In [None]:
import torch
import transformers

In [None]:
from transformers import GPT2Tokenizer, GPT2LMHeadModel, StoppingCriteriaList, MaxLengthCriteria
tokenizer = GPT2Tokenizer.from_pretrained("gpt2", padding_side="left")
model = GPT2LMHeadModel.from_pretrained("gpt2")

Downloading (…)olve/main/vocab.json:   0%|          | 0.00/1.04M [00:00<?, ?B/s]

Downloading (…)olve/main/merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

Downloading (…)lve/main/config.json:   0%|          | 0.00/665 [00:00<?, ?B/s]

Downloading (…)"pytorch_model.bin";:   0%|          | 0.00/548M [00:00<?, ?B/s]

Downloading (…)neration_config.json:   0%|          | 0.00/124 [00:00<?, ?B/s]

In [None]:
tokenizer.pad_token = tokenizer.eos_token

In [None]:
inputs = tokenizer(["Hello, my dog is cute", "Hello, my cat is cute", "Hello, my house is cute", "Hello, please be nice to me"], return_tensors="pt", padding=True)

In [None]:
inputs = tokenizer("Hello, my dog is cute", return_tensors="pt")

In [None]:
inputs

{'input_ids': tensor([[15496,    11,   616,  3290,   318, 13779]]), 'attention_mask': tensor([[1, 1, 1, 1, 1, 1]])}

In [None]:
outputs = model(**inputs)

In [None]:
outputs.logits[:, -1, :].shape

torch.Size([4, 50257])

In [None]:
inputs['attention_mask']

tensor([[0, 1, 1, 1, 1, 1, 1],
        [0, 1, 1, 1, 1, 1, 1],
        [0, 1, 1, 1, 1, 1, 1],
        [1, 1, 1, 1, 1, 1, 1]])

In [None]:
outputs = model.generate(inputs=inputs['input_ids'], do_sample=False, num_beams=1, max_new_tokens=10, pad_token_id=tokenizer.pad_token_id, output_scores=True, return_dict_in_generate=True)

In [None]:
outputs

GreedySearchDecoderOnlyOutput(sequences=tensor([[15496,    11,   616,  3290,   318, 13779,    13,   314]]), scores=(tensor([[-77.4425, -80.4462, -88.0497,  ..., -96.2564, -93.6345, -84.0666]]), tensor([[-142.3193, -142.1796, -144.0388,  ..., -155.6977, -153.8760,
         -137.7942]])), attentions=None, hidden_states=None)

In [None]:
b = outputs.sequences
b

tensor([[15496,    11,   616,  3290,   318, 13779,    13,   314]])

In [None]:
b[0]

tensor([15496,    11,   616,  3290,   318, 13779,    13,   314])

In [None]:
a = outputs.scores
a

(tensor([[-77.4425, -80.4462, -88.0497,  ..., -96.2564, -93.6345, -84.0666]]),
 tensor([[-142.3193, -142.1796, -144.0388,  ..., -155.6977, -153.8760,
          -137.7942]]),
 tensor([[-175.1096, -174.3555, -180.2890,  ..., -186.5559, -176.5305,
          -176.7950]]),
 tensor([[-149.5665, -149.3980, -153.9603,  ..., -158.9080, -155.3729,
          -151.3569]]),
 tensor([[-146.1682, -147.2100, -157.6489,  ..., -158.1570, -155.2079,
          -150.0087]]),
 tensor([[-139.9639, -140.1997, -149.4295,  ..., -151.1516, -150.9630,
          -143.5753]]),
 tensor([[-111.7724, -111.0601, -118.1015,  ..., -119.4616, -118.6231,
          -113.0703]]),
 tensor([[-144.3919, -142.3804, -151.1539,  ..., -153.2746, -146.0315,
          -144.4816]]),
 tensor([[-141.3285, -140.4889, -146.7292,  ..., -147.8345, -143.9044,
          -142.7541]]),
 tensor([[-117.3900, -116.4773, -121.3260,  ..., -126.3712, -123.0911,
          -117.4414]]))

In [None]:
c = a[0][0]
c

tensor([-77.4425, -80.4462, -88.0497,  ..., -96.2564, -93.6345, -84.0666])

In [None]:
softmax = torch.nn.Softmax(dim=0)

In [None]:
d = softmax(c)
d

tensor([8.4113e-02, 4.1721e-03, 2.0807e-06,  ..., 5.6767e-10, 7.8120e-09,
        1.1170e-04])

In [None]:
values, indices = torch.topk(d, 5)
values, indices

(tensor([0.2837, 0.1696, 0.1335, 0.0841, 0.0440]),
 tensor([  13,  290,   11,    0, 2474]))

In [None]:
logits_mask = [1, 2, -1]

In [None]:
a[:, logits_mask] = float("-Inf")
a

tensor([[    -inf,     -inf,     -inf,  ..., -96.2564, -93.6345,     -inf]])

In [None]:
next_tokens = torch.argmax(outputs.scores[0], dim=-1)

In [None]:
outputs.scores[0], outputs.scores[0].shape

(tensor([[    -inf,     -inf,     -inf,  ..., -96.2564, -93.6345,     -inf]]),
 torch.Size([1, 50257]))

In [None]:
next_tokens, next_tokens.shape

(tensor([13]), torch.Size([1]))

In [None]:
inputs['input_ids'], inputs['input_ids'].shape

(tensor([[15496,    11,   616,  3290,   318, 13779]]), torch.Size([1, 6]))

In [None]:
torch.cat([inputs['input_ids'], next_tokens[:, None]], dim=-1)

tensor([[15496,    11,   616,  3290,   318, 13779,    13]])

In [None]:
type(outputs.sequences[0])

torch.Tensor

In [None]:
outputs.scores[0][0][0] = -float("Inf")

In [None]:
torch.argmax(outputs.scores[0])

tensor(13)

In [None]:
outputs.sequences

tensor([[15496,    11,   616,  3290,   318, 13779,    13]])

In [None]:
tokenizer.decode(outputs.sequences[0])

"Hello, my dog is cute. I'm not sure if she's a puppy"

In [None]:
tokenizer.encode('Hello', return_tensors="pt")

tensor([[15496]])

In [None]:
t = torch.Tensor([1, 2, 3, 4, 5, 6])
t

tensor([1., 2., 3., 4., 5., 6.])

In [None]:
torch.take(t, torch.tensor([0, 2, 4]))

tensor([1., 3., 5.])

In [None]:
d = {1: "a", 2: "b", 3: "c"}


In [None]:
outputs[0, :-1]

In [None]:
stopping_criteria = StoppingCriteriaList([MaxLengthCriteria(max_length=20)])

In [None]:
outputs = model.greedy_search(input_ids=inputs['input_ids'], pad_token_id=tokenizer.pad_token_id, eos_token_id=tokenizer.eos_token_id, stopping_criteria=stopping_criteria)

In [None]:
outputs

In [None]:
tokenizer.decode(outputs[0])

In [None]:
logits.shape

In [None]:
softmax = torch.nn.Softmax(dim=0)

In [None]:
last_logits = logits[0, 2, :]
output_sotfmax = softmax(last_logits)
output_argmax = torch.argmax(output_sotfmax)
tokenizer.decode(output_argmax)

In [None]:
tokenizer.encode("Hello, my dog is cute <|endoftext|>")

In [None]:
tokenizer.decode(11)

In [None]:
t = torch.tensor([10., 11., 1., 1., 2.])
torch.argmax(t)

tensor(1)

In [None]:
idxs = torch.tensor([1, 3])
t[idxs] = float("-Inf")
torch.argmax(t)

tensor(0)

In [None]:
di = {1: 2, 3: 4, 5: 6, 7: 8}
yo = set(di.keys())
yo.add(9)
yo

{1, 3, 5, 7, 9}

In [None]:
s = set(range(5))
s

{0, 1, 2, 3, 4}

In [None]:
torch.list(s - set([1, 3]))

[0, 2, 4]

In [None]:
ala = torch.tensor([4, 5, 6])

In [None]:
torch.cat((ala, torch.tensor(1).unsqueeze(0)))

tensor([4, 5, 6, 1])

In [None]:
t = torch.tensor([1, 2, 3, 4])

In [None]:
(t == 1).nonzero(as_tuple=True)[0].item()

0

In [None]:
for i in t[0 + 1:]:
    print(i)

tensor(2)
tensor(3)
tensor(4)


In [None]:
t1 = torch.tensor([1, 2, 3, 4, 5])
t2 = torch.tensor([5, 6, 7, 8, 11])
t3 = torch.tensor([9, 10, 11, 12, 33])
t4 = torch.tensor([22, 19, 13, 91, 124584])

l = [t1, t2, t3, t4]

b = torch.stack(l)
b

tensor([[     1,      2,      3,      4,      5],
        [     5,      6,      7,      8,     11],
        [     9,     10,     11,     12,     33],
        [    22,     19,     13,     91, 124584]])

In [None]:
c

In [None]:
t = torch.tensor([1, 1, 2, 3, 1, 1])
pad_idx = (t == 1).nonzero(as_tuple=True)[0]
pad_idx