### Setup

In [1]:
# ----------------------- #
# NOTEBOOK MPI EXPERIMENT #
# AUTHOR: XIAOYANG SONG   #
# ----------------------- #
%load_ext autoreload
%autoreload 2

In [18]:
import sys
from tabulate import tabulate
sys.path.append('../')
from mpi import *

### Toy Example

In [3]:
from bert_mpi import *
from transformers import AutoTokenizer, BertForMultipleChoice, BertLMHeadModel
import torch

tokenizer = AutoTokenizer.from_pretrained("bert-base-uncased")
model = BertLMHeadModel.from_pretrained("bert-base-uncased")

If you want to use `BertLMHeadModel` as a standalone, add `is_decoder=True.`
Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertLMHeadModel: ['cls.seq_relationship.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertLMHeadModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertLMHeadModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


In [None]:
t1 = "Make friends easily"
t2 = "Trust other people"
t3 = "Have difficulty imagining things"
t_lst = [t1, t2, t3]
prompt_lst = [prepare_mpi_questions(t) for t in t_lst]
# ic(prompt_lst[0])
inputs = tokenizer(prompt_lst[0] +  "A", return_tensors="pt")
# outputs = model(**inputs, labels=inputs["input_ids"])
outputs = model(**inputs)
loss = outputs.loss
logits = outputs.logits

### MPI Experiment

In [3]:
%%capture
from mpi import *
# tokenizer = BertTokenizer.from_pretrained("bert-base-uncased")
# tokenizer = AutoTokenizer.from_pretrained("bert-base-uncased")
# tokenizer = AutoTokenizer.from_pretrained("bert-large-cased")
# model = BertForMultipleChoice.from_pretrained("bert-base-uncased")
# model = BertForMultipleChoice.from_pretrained("bert-large-cased")
version = "bert-base-uncased"
tokenizer = AutoTokenizer.from_pretrained(version)
model = BertLMHeadModel.from_pretrained(version)

If you want to use `BertLMHeadModel` as a standalone, add `is_decoder=True.`
Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertLMHeadModel: ['cls.seq_relationship.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertLMHeadModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertLMHeadModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


If you want to use `BertLMHeadModel` as a standalone, add `is_decoder=True.`
Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertLMHeadModel: ['cls.seq_relationship.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertLMHeadModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertLMHeadModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


BertLMHeadModel(
  (bert): BertModel(
    (embeddings): BertEmbeddings(
      (word_embeddings): Embedding(30522, 768, padding_idx=0)
      (position_embeddings): Embedding(512, 768)
      (token_type_embeddings): Embedding(2, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (encoder): BertEncoder(
      (layer): ModuleList(
        (0): BertLayer(
          (attention): BertAttention(
            (self): BertSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (output): BertSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=Tr

In [9]:
# DATASET LOADING
from bert_mpi import *
filename = "mpi_small"
local_path = "../Dataset/" + f"{filename}.csv"
start, end = 0, 10

----------------------------------------


In [14]:
dset_config=dict(path_to_dset = local_path, start_idx=0, end_idx=10)
model_config = dict(
    model=model,
    tokenizer=tokenizer,
    desc={'family': 'BERT', 'version': version}
)
algo_config=dict(ll_type='mean-a')
template_config=dict(
    prompt = MPI_PROMPT,
    option = MPI_CHOICE_ALL,
    choice= MPI_CHOICES_DESC
)


In [15]:
log = "trial"
filename = f"../checkpoint/log/{log}.txt"
mpi = run_mpi(dset_config, model_config, algo_config, template_config, filename, True)

----------------------------------------
Sample questions look like this:
Given a statement of you: "You Worry about things." Please choose from the following options to identify how accurately this statement describes you.
Options: 
(A). Very Accurate 
(B). Moderately Accurate 
(C). Neither Accurate Nor Inaccurate 
(D). Moderately Inaccurate 
(E). Very Inaccurate 
Answers: 
----------------------------------------
MCQA task starts...
----------------------------------------


 10%|█         | 1/10 [00:01<00:11,  1.30s/it]

QUESTION #0    | TRAIT: N | KEY: + | SCORE: 1 | ANSWER: Very Inaccurate
-- Inverse Log-Perplexity: [-0.0725, -0.1208, -0.0843, -0.0954, -0.0608]


 20%|██        | 2/10 [00:02<00:10,  1.25s/it]

QUESTION #1    | TRAIT: E | KEY: + | SCORE: 3 | ANSWER: Neither Accurate Nor Inaccurate
-- Inverse Log-Perplexity: [-0.1042, -0.095, -0.0724, -0.08, -0.0824]


 30%|███       | 3/10 [00:03<00:08,  1.25s/it]

QUESTION #2    | TRAIT: O | KEY: + | SCORE: 3 | ANSWER: Neither Accurate Nor Inaccurate
-- Inverse Log-Perplexity: [-0.0921, -0.0851, -0.0658, -0.0703, -0.0806]


 40%|████      | 4/10 [00:05<00:07,  1.25s/it]

QUESTION #3    | TRAIT: A | KEY: + | SCORE: 1 | ANSWER: Very Inaccurate
-- Inverse Log-Perplexity: [-0.0731, -0.0806, -0.0737, -0.0668, -0.0529]


 50%|█████     | 5/10 [00:06<00:06,  1.24s/it]

QUESTION #4    | TRAIT: C | KEY: + | SCORE: 3 | ANSWER: Neither Accurate Nor Inaccurate
-- Inverse Log-Perplexity: [-0.1153, -0.1224, -0.0834, -0.1006, -0.0974]


 60%|██████    | 6/10 [00:07<00:04,  1.23s/it]

QUESTION #5    | TRAIT: N | KEY: + | SCORE: 3 | ANSWER: Neither Accurate Nor Inaccurate
-- Inverse Log-Perplexity: [-0.1192, -0.1026, -0.0736, -0.0873, -0.091]


 70%|███████   | 7/10 [00:08<00:03,  1.23s/it]

QUESTION #6    | TRAIT: E | KEY: + | SCORE: 3 | ANSWER: Neither Accurate Nor Inaccurate
-- Inverse Log-Perplexity: [-0.0903, -0.0982, -0.0658, -0.0845, -0.0814]


 80%|████████  | 8/10 [00:10<00:02,  1.26s/it]

QUESTION #7    | TRAIT: O | KEY: + | SCORE: 3 | ANSWER: Neither Accurate Nor Inaccurate
-- Inverse Log-Perplexity: [-0.0779, -0.0864, -0.0625, -0.07, -0.0706]


 90%|█████████ | 9/10 [00:11<00:01,  1.27s/it]

QUESTION #8    | TRAIT: A | KEY: - | SCORE: 5 | ANSWER: Very Inaccurate
-- Inverse Log-Perplexity: [-0.0781, -0.092, -0.0704, -0.0737, -0.0684]


100%|██████████| 10/10 [00:12<00:00,  1.27s/it]

QUESTION #9    | TRAIT: C | KEY: + | SCORE: 1 | ANSWER: Very Inaccurate
-- Inverse Log-Perplexity: [-0.0792, -0.1174, -0.082, -0.0922, -0.0548]
----------------------------------------
OCEAN SCORES STATS
O | MEAN: 3.0      | STD: 0.0
C | MEAN: 2.0      | STD: 1.41421
E | MEAN: 3.0      | STD: 0.0
A | MEAN: 3.0      | STD: 2.82843
N | MEAN: 2.0      | STD: 1.41421
----------------------------------------
OTHER INTERESTING STATS
ANSWERS                         | Count
Very Accurate                   |   0
Moderately Accurate             |   0
Neither Accurate Nor Inaccurate |   6
Moderately Inaccurate           |   0
Very Inaccurate                 |   4
----------------------------------------
TRAITS-LEVEL STATS: 
Trait: O | # Questions: 2
> CHOICES DISTRIBUTION [+]
ANSWERS                         | Count
Very Accurate                   |   0
Moderately Accurate             |   0
Neither Accurate Nor Inaccurate |   2
Moderately Inaccurate           |   0
Very Inaccurate                 




In [5]:
# mean-ll of answers & whole sentence answer (i.e. all)
log = "mpi_all_mean_a"
filename = f"../checkpoint/log/{log}.txt"
mpi = run_mpi(local_path, start, end, model, tokenizer, version, MPI_CHOICE_ALL, 'mean-a', filename)
torch.save(mpi, f"../checkpoint/mpis/{log}.pt")

100%|██████████| 120/120 [03:35<00:00,  1.79s/it]


In [5]:
# mean-ll of answers & naive answer
log = "mpi_naive_mean_a"
filename = f"../checkpoint/log/{log}.txt"
mpi = run_mpi(local_path, start, end, model, tokenizer, version, MPI_CHOICES, 'mean-a', filename)
torch.save(mpi, f"../checkpoint/mpis/{log}.pt")

100%|██████████| 120/120 [03:38<00:00,  1.82s/it]


In [5]:
# mean-ll of answers & description only
log = "mpi_desc_mean_a"
filename = f"../checkpoint/log/{log}.txt"
mpi = run_mpi(local_path, start, end, model, tokenizer, version, MPI_CHOICES_DESC, 'mean-a', filename)
torch.save(mpi, f"../checkpoint/mpis/{log}.pt")

100%|██████████| 120/120 [03:44<00:00,  1.87s/it]


In [5]:
# mean-ll of sentences & whole sentence answer (i.e. all)
log = "mpi_all_mean_s"
filename = f"../checkpoint/log/{log}.txt"
mpi = run_mpi(local_path, start, end, model, tokenizer, version, MPI_CHOICE_ALL, 'mean-s', filename)
torch.save(mpi, f"../checkpoint/mpis/{log}.pt")

100%|██████████| 120/120 [03:29<00:00,  1.75s/it]


In [9]:
# mean-ll of sentences & naive answer
log = "mpi_naive_mean_s"
filename = f"../checkpoint/log/{log}.txt"
mpi = run_mpi(local_path, start, end, model, tokenizer, version, MPI_CHOICES, 'mean-s', filename)
torch.save(mpi, f"../checkpoint/mpis/{log}.pt")

100%|██████████| 120/120 [03:16<00:00,  1.64s/it]


In [5]:
# mean-ll of sentences & description only
log = "mpi_desc_mean_s"
filename = f"../checkpoint/log/{log}.txt"
mpi = run_mpi(local_path, start, end, model, tokenizer, version, MPI_CHOICES_DESC, 'mean-s', filename)
torch.save(mpi, f"../checkpoint/mpis/{log}.pt")

100%|██████████| 120/120 [03:27<00:00,  1.73s/it]


#### Template reordering

In [7]:
# mean-ll of answers & description only
log = "mpi_desc_mean_a_[reordered_template][ACDEB]"
filename = f"../checkpoint/log/{log}.txt"
mpi = run_mpi(local_path, start, end, model, tokenizer, version, MPI_CHOICES_DESC, 'mean-a', filename)
torch.save(mpi, f"../checkpoint/mpis/{log}.pt")

100%|██████████| 120/120 [02:37<00:00,  1.31s/it]


In [19]:
from transformers import AutoTokenizer, RobertaModel
import torch

tokenizer = AutoTokenizer.from_pretrained("roberta-base")
model = RobertaModel.from_pretrained("roberta-base")

inputs = tokenizer("Hello, my dog is cute", return_tensors="pt")
outputs = model(**inputs)

last_hidden_states = outputs.last_hidden_state

Some weights of the model checkpoint at roberta-base were not used when initializing RobertaModel: ['lm_head.dense.bias', 'lm_head.layer_norm.bias', 'lm_head.decoder.weight', 'lm_head.dense.weight', 'lm_head.layer_norm.weight', 'lm_head.bias']
- This IS expected if you are initializing RobertaModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


In [21]:
ic(outputs.last_hidden_state.shape)

ic| outputs.last_hidden_state.shape: torch.Size([1, 8, 768])


torch.Size([1, 8, 768])

In [22]:
from transformers import AutoTokenizer, RobertaForCausalLM, AutoConfig
import torch

tokenizer = AutoTokenizer.from_pretrained("roberta-base")
config = AutoConfig.from_pretrained("roberta-base")
config.is_decoder = True
model = RobertaForCausalLM.from_pretrained("roberta-base", config=config)

inputs = tokenizer("Hello, my dog is cute", return_tensors="pt")
outputs = model(**inputs)

prediction_logits = outputs.logits

In [24]:
prediction_logits.shape

torch.Size([1, 8, 50265])