### Setup

In [3]:
# ----------------------- #
# NOTEBOOK MPI EXPERIMENT #
# AUTHOR: XIAOYANG SONG   #
# ----------------------- #
%load_ext autoreload
%autoreload 2

In [4]:
import sys
from tabulate import tabulate
sys.path.append('../../')
from MPI.mpi import *

### Toy Example

In [2]:
from transformers import AutoTokenizer, RobertaModel
import torch

tokenizer = AutoTokenizer.from_pretrained("roberta-base")
model = RobertaModel.from_pretrained("roberta-base")

inputs = tokenizer("Hello, my dog is cute", return_tensors="pt")
outputs = model(**inputs)

last_hidden_states = outputs.last_hidden_state

Some weights of the model checkpoint at roberta-base were not used when initializing RobertaModel: ['lm_head.dense.weight', 'lm_head.layer_norm.bias', 'lm_head.dense.bias', 'lm_head.layer_norm.weight', 'lm_head.bias', 'lm_head.decoder.weight']
- This IS expected if you are initializing RobertaModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


In [21]:
ic(outputs.last_hidden_state.shape)

ic| outputs.last_hidden_state.shape: torch.Size([1, 8, 768])


torch.Size([1, 8, 768])

In [14]:
from transformers import AutoTokenizer, RobertaForCausalLM, AutoConfig
import torch

tokenizer = AutoTokenizer.from_pretrained("bert-base-uncased")
tokenizer = AutoTokenizer.from_pretrained("roberta-base")
# config = AutoConfig.from_pretrained("roberta-base")
# config.is_decoder = False
# config.is_decoder=True

# model = BertLMHeadModel.from_pretrained("bert-base-uncased")
model = RobertaForCausalLM.from_pretrained("roberta-base")

inputs = tokenizer("Hello Hello World.\nAnswer: (A). Very Inaccurate", return_tensors="pt")
choice = tokenizer("(A). Very Inaccurate", return_tensors='pt', padding=True)
ic(len(choice.input_ids[0,1:-1]))
ic(tokenizer.decode(choice.input_ids[0]))
ic(tokenizer.decode(choice.input_ids[0][1:-1]))
# inputs = tokenizer("Hello, my dog is cute, I love dog", return_tensors="pt")
ic(inputs)
ic(tokenizer.decode(inputs.input_ids[0][-8:-1]))
outputs = model(**inputs)

prediction_logits = outputs.logits

If you want to use `RobertaLMHeadModel` as a standalone, add `is_decoder=True.`
ic| len(choice.input_ids[0,1:-1]): 7
ic| tokenizer.decode(choice.input_ids[0]): '<s>(A). Very Inaccurate</s>'
ic| tokenizer.decode(choice.input_ids[0][1:-1]): '(A). Very Inaccurate'
ic| inputs: {'attention_mask': tensor([[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]]),
             'input_ids': tensor([[    0, 31414, 20920,   623,     4, 50118, 33683,    35,    36,   250,
                       322, 12178,    96,  7904, 23412,     2]])}
ic| tokenizer.decode(inputs.input_ids[0][-8:-1]): ' (A). Very Inaccurate'


In [27]:
ic(prediction_logits.squeeze().shape)
ic(inputs.input_ids[0].shape)
prob = torch.softmax(prediction_logits.squeeze(), dim=-1)
ic(prob.shape)
masked_prob = prob[np.arange(inputs.input_ids[0].shape[0]), inputs.input_ids[0]]
ic(masked_prob.shape)
ic(masked_prob)
idx = torch.max(prob, dim=-1)[1]
ic(tokenizer.decode(idx))

ic| prediction_logits.squeeze().shape: torch.Size([16, 50265])
ic| inputs.input_ids[0].shape: torch.Size([16])
ic| prob.shape: torch.Size([16, 50265])
ic| masked_prob.shape: torch.Size([16])
ic| masked_prob: tensor([1.0000e+00, 9.9987e-01, 9.9966e-01, 9.9993e-01, 7.9664e-02, 3.7465e-12,
                         9.9995e-01, 1.0000e+00, 9.9998e-01, 9.9992e-01, 9.9142e-01, 9.9996e-01,
                         9.9976e-01, 9.9999e-01, 1.0000e+00, 9.2005e-01],
                        grad_fn=<IndexBackward0>)
ic| tokenizer.decode(idx): '<s>Hello Hello World</s></s>Answer: (A). Very Inaccurate</s>'


'<s>Hello Hello World</s></s>Answer: (A). Very Inaccurate</s>'

In [54]:
ic(len("(A). choice"))

ic| len("(A). choice"): 11


11

In [55]:
prediction_logits.shape

torch.Size([1, 14, 30522])

In [48]:
torch.max(torch.softmax(prediction_logits, dim=-1), dim=-1)

torch.return_types.max(
values=tensor([[0.0476, 0.3144, 0.9131, 0.9020, 1.0000, 0.9990, 1.0000, 0.9995, 1.0000,
         0.9942, 0.9998, 0.5397, 0.9961]], grad_fn=<MaxBackward0>),
indices=tensor([[1012, 1012, 7592, 2088, 1012, 3437, 1024, 1006, 1037, 1012, 1012, 3601,
         1012]]))

In [67]:
logit = torch.softmax(prediction_logits, dim=-1)

In [68]:
logit = logit[0,-6:,:][np.arange(6-1), inputs.input_ids[0,-6:-1]]
ic(logit.shape)

ic| logit.shape: torch.Size([5])


torch.Size([5])

In [69]:
ic(logit)

ic| logit: tensor([9.9998e-01, 6.3895e-03, 9.9984e-01, 9.9208e-01, 1.7561e-04],
                  grad_fn=<IndexBackward0>)


tensor([9.9998e-01, 6.3895e-03, 9.9984e-01, 9.9208e-01, 1.7561e-04],
       grad_fn=<IndexBackward0>)

In [73]:
inputs.input_ids[0,-7:-1]

tensor([ 1006,  1037,  1007,  1012,  3601, 24949])

In [75]:
tokenizer.decode([ 1006,  1037,  1007,  1012,  3601, 24949])

'( a ). choice inaccurate'

In [None]:
from matplotlib import pyplot as plt
plt.plot(torch.softmax(prediction_logits[0,6,:], dim=-1).detach().numpy())

In [35]:
torch.max(torch.softmax(prediction_logits, dim=-1), dim=-1)

torch.return_types.max(
values=tensor([[0.0330, 0.1398, 0.5008, 0.9974]], grad_fn=<MaxBackward0>),
indices=tensor([[1012, 1012,  999, 1012]]))

In [36]:
torch.max(torch.softmax(prediction_logits, dim=-1), dim=-1)

torch.return_types.max(
values=tensor([[0.0330, 0.1398, 0.5008, 0.9974]], grad_fn=<MaxBackward0>),
indices=tensor([[1012, 1012,  999, 1012]]))

In [37]:
from matplotlib import pyplot as plt
plt.plot(torch.softmax(prediction_logits[0,6,:], dim=-1).detach().numpy())

IndexError: index 6 is out of bounds for dimension 1 with size 4

In [38]:
idx = prediction_logits.squeeze().argmax(dim=-1)

In [39]:
ic(idx)

ic| idx: tensor([1012, 1012,  999, 1012])


tensor([1012, 1012,  999, 1012])

In [40]:
ic(tokenizer.decode(idx))

ic| tokenizer.decode(idx): '..!.'


'..!.'

In [42]:
ic(tokenizer.decode([102]))

ic| tokenizer.decode([102]): '[SEP]'


'[SEP]'

In [35]:
ic(tokenizer.decode([    0, 31414,     6,   127,  2335,    16, 11962,     2]))

ic| tokenizer.decode([    0, 31414,     6,   127,  2335,    16, 11962,     2]): '<s>Hello, my dog is cute</s>'


'<s>Hello, my dog is cute</s>'

### Random