In [2]:
# hide_output
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM

device = "cuda" if torch.cuda.is_available() else "cpu"
model_name = "gpt2-xl"
tokenizer = AutoTokenizer.from_pretrained(model_name)


In [3]:
device

'cuda'

In [4]:
model = AutoModelForCausalLM.from_pretrained(model_name)

In [5]:
model.to(device)

GPT2LMHeadModel(
  (transformer): GPT2Model(
    (wte): Embedding(50257, 1600)
    (wpe): Embedding(1024, 1600)
    (drop): Dropout(p=0.1, inplace=False)
    (h): ModuleList(
      (0-47): 48 x GPT2Block(
        (ln_1): LayerNorm((1600,), eps=1e-05, elementwise_affine=True)
        (attn): GPT2Attention(
          (c_attn): Conv1D(nf=4800, nx=1600)
          (c_proj): Conv1D(nf=1600, nx=1600)
          (attn_dropout): Dropout(p=0.1, inplace=False)
          (resid_dropout): Dropout(p=0.1, inplace=False)
        )
        (ln_2): LayerNorm((1600,), eps=1e-05, elementwise_affine=True)
        (mlp): GPT2MLP(
          (c_fc): Conv1D(nf=6400, nx=1600)
          (c_proj): Conv1D(nf=1600, nx=6400)
          (act): NewGELUActivation()
          (dropout): Dropout(p=0.1, inplace=False)
        )
      )
    )
    (ln_f): LayerNorm((1600,), eps=1e-05, elementwise_affine=True)
  )
  (lm_head): Linear(in_features=1600, out_features=50257, bias=False)
)

In [6]:
text = "Adilbek sucks at "
text_tokenized = tokenizer(text)
text_tokenized

{'input_ids': [2782, 346, 47083, 22523, 379, 220], 'attention_mask': [1, 1, 1, 1, 1, 1]}

In [7]:
tokenizer.convert_ids_to_tokens(text_tokenized["input_ids"])

['Ad', 'il', 'bek', 'Ġsucks', 'Ġat', 'Ġ']

In [8]:
tokenizer.decode(text_tokenized["input_ids"])

'Adilbek sucks at '

In [9]:
import pandas as pd

input_txt = "Adilbek sucks at"
input_ids = tokenizer(input_txt, return_tensors="pt")["input_ids"].to(device)
iterations = []
n_steps = 3
choices_per_step = 3


In [10]:
with torch.no_grad():
    for _ in range(n_steps):
        iteration = dict()
        iteration["Input"] = tokenizer.decode(input_ids[0])
        output = model(input_ids=input_ids) # (1, 5, 50257)
        print(f"{output.logits.shape=}")
        # Select logits of the first batch and the last token and apply softmax
        next_token_logits = output.logits[0, -1, :] # (50257)
        print(f"{next_token_logits.shape=}")
        next_token_probs = torch.softmax(next_token_logits, dim=-1) # (50257)
        print(f"{next_token_probs.shape=}")
        sorted_ids = torch.argsort(next_token_probs, dim=-1, descending=True) # (50257)
        
        print(f"{sorted_ids.shape=}")
        # Store tokens with highest probabilities
        print("-"*40)
        for choice_idx in range(choices_per_step):
            token_id = sorted_ids[choice_idx]
            print(f"{token_id=}")
            token_prob = next_token_probs[token_id].cpu().numpy()
            token_choice = (
                f"{tokenizer.decode(token_id)} ({100 * token_prob:.2f}%)"
            )
            iteration[f"Choice {choice_idx+1}"] = token_choice
        # Append predicted next token to input
        input_ids = torch.cat([input_ids, sorted_ids[None, 0, None]], dim=-1)
        iterations.append(iteration)
        

output.logits.shape=torch.Size([1, 5, 50257])
next_token_logits.shape=torch.Size([50257])
next_token_probs.shape=torch.Size([50257])
sorted_ids.shape=torch.Size([50257])
----------------------------------------
token_id=tensor(10688, device='cuda:0')
token_id=tensor(465, device='cuda:0')
token_id=tensor(262, device='cuda:0')
output.logits.shape=torch.Size([1, 6, 50257])
next_token_logits.shape=torch.Size([50257])
next_token_probs.shape=torch.Size([50257])
sorted_ids.shape=torch.Size([50257])
----------------------------------------
token_id=tensor(13, device='cuda:0')
token_id=tensor(11, device='cuda:0')
token_id=tensor(290, device='cuda:0')
output.logits.shape=torch.Size([1, 7, 50257])
next_token_logits.shape=torch.Size([50257])
next_token_probs.shape=torch.Size([50257])
sorted_ids.shape=torch.Size([50257])
----------------------------------------
token_id=tensor(198, device='cuda:0')
token_id=tensor(679, device='cuda:0')
token_id=tensor(314, device='cuda:0')


In [11]:
next_token_logits

tensor([ 0.1078,  1.4739, -0.4956,  ..., -6.9666, -6.6467,  6.6501],
       device='cuda:0')

In [12]:
next_token_probs

tensor([1.1342e-05, 4.4460e-05, 6.2034e-06,  ..., 9.6010e-09, 1.3220e-08,
        7.8701e-03], device='cuda:0')

In [13]:
sorted_ids

tensor([  198,   679,   314,  ..., 45544,   216,   182], device='cuda:0')

In [14]:
next_token_probs[198]

tensor(0.1821, device='cuda:0')

In [15]:
next_token_probs[182]

tensor(1.6364e-16, device='cuda:0')

In [16]:
pd.DataFrame(iterations)

Unnamed: 0,Input,Choice 1,Choice 2,Choice 3
0,Adilbek sucks at,math (3.49%),his (3.46%),the (3.36%)
1,Adilbek sucks at math,. (34.40%),", (23.77%)",and (10.39%)
2,Adilbek sucks at math.,\n (18.21%),He (16.77%),I (3.04%)


In [17]:
tokenizer.convert_ids_to_tokens(input_ids[0])

['Ad', 'il', 'bek', 'Ġsucks', 'Ġat', 'Ġmath', '.', 'Ċ']

In [18]:
input_ids = tokenizer(input_txt, return_tensors="pt")["input_ids"].to(device)
output = model.generate(input_ids, max_new_tokens=n_steps, do_sample=False)
print(tokenizer.decode(output[0]))

The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
The attention mask is not set and cannot be inferred from input because pad token is same as eos token. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.


Adilbek sucks at math.



In [19]:
print(tokenizer.decode(output[0]))

Adilbek sucks at math.



In [20]:

input_txt = "Adilbek sucks at"
input_ids = tokenizer(input_txt, return_tensors="pt")["input_ids"].to(device)
iterations = []
n_steps = 10
choices_per_step = 3

print(input_txt)
with torch.no_grad():
    for _ in range(n_steps):
        output = model(input_ids=input_ids) # (1, 5, 50257)
        # Select logits of the first batch and the last token and apply softmax
        next_token_logits = output.logits[0, -1, :] # (50257)
        next_token_probs = torch.softmax(next_token_logits, dim=-1) # (50257)
        sorted_id = torch.argmax(next_token_probs, dim=-1).unsqueeze(-1).unsqueeze(-1) # (50257)
        
        input_ids = torch.cat([input_ids, sorted_id], dim=-1)
        print(tokenizer.decode(input_ids[0]))

Adilbek sucks at


Adilbek sucks at math
Adilbek sucks at math.
Adilbek sucks at math.

Adilbek sucks at math.


Adilbek sucks at math.

"
Adilbek sucks at math.

"I
Adilbek sucks at math.

"I'm
Adilbek sucks at math.

"I'm not
Adilbek sucks at math.

"I'm not good
Adilbek sucks at math.

"I'm not good at


In [21]:
next_token_probs

tensor([2.1747e-04, 8.4299e-04, 4.9587e-08,  ..., 2.7947e-10, 1.8661e-10,
        6.8459e-07], device='cuda:0')

In [22]:
sorted_id = torch.argmax(next_token_probs, dim=-1) # (50257)
sorted_id

tensor(379, device='cuda:0')

In [23]:
sorted_id = sorted_id.unsqueeze(-1)  # Add dimension at the end
sorted_id

tensor([379], device='cuda:0')

In [24]:
sorted_id = sorted_id.unsqueeze(-1)  # Add dimension at the end
sorted_id

tensor([[379]], device='cuda:0')

In [25]:
input_ids

tensor([[ 2782,   346, 47083, 22523,   379, 10688,    13,   198,   198,     1,
            40,  1101,   407,   922,   379]], device='cuda:0')

In [26]:
tokenizer.decode(torch.cat([input_ids, sorted_id], dim=-1)[0])


'Adilbek sucks at math.\n\n"I\'m not good at at'

In [27]:
input_txt = "5 + 5 = "
n_steps = 2
input_ids = tokenizer(input_txt, return_tensors="pt")["input_ids"].to(device)
output = model.generate(input_ids, max_new_tokens=n_steps, do_sample=False)
print(tokenizer.decode(output[0]))

The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


5 + 5 = ????



In [28]:
input_txt = "5 + 8 => 13 \n 7 + 2 => 9 \n 1 + 0 =>"

n_steps = 3

input_ids = tokenizer(input_txt, return_tensors="pt")["input_ids"].to(device)
output = model.generate(input_ids, max_new_tokens=n_steps, do_sample=False)

print('-'*40)
print(tokenizer.decode(output[0]))


The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


----------------------------------------
5 + 8 => 13 
 7 + 2 => 9 
 1 + 0 => 1 



In [29]:
import torch.nn.functional as F

def log_probs_from_logits(logits, labels):
    logp = F.log_softmax(logits, dim=-1)
    logp_label = torch.gather(logp, 2, labels.unsqueeze(2)).squeeze(-1)
    return logp_label


In [30]:
def sequence_logprob(model, labels, input_len=0):
    with torch.no_grad():
        output = model(labels)
        log_probs = log_probs_from_logits(
            output.logits[:, :-1, :], labels[:, 1:]
        )
        seq_log_prob = torch.sum(log_probs[:, input_len:])
    return seq_log_prob.cpu().numpy()
     

In [31]:

output


tensor([[  20, 1343,  807, 5218, 1511,  220,  198,  767, 1343,  362, 5218,  860,
          220,  198,  352, 1343,  657, 5218,  352,  220,  198]],
       device='cuda:0')

In [32]:
logp = sequence_logprob(model, output, input_len=len(input_ids[0]))
print(tokenizer.decode(output[0]))
print(f"\nlog-prob: {logp:.2f}")

5 + 8 => 13 
 7 + 2 => 9 
 1 + 0 => 1 


log-prob: -0.42


In [50]:
def text2token(text: str):
    input_ids = tokenizer(text, return_tensors="pt")["input_ids"].to(device)
    attention_mask = tokenizer(text, return_tensors="pt")["attention_mask"].to(device)
    return input_ids, attention_mask

input_txt = "Guide to find a girlfriend\nFirts you have to"

input_ids, attention_mask = text2token(input_txt)

In [42]:
max_tokens = 50

In [37]:
output_beam = model.generate(input_ids, max_length=max_tokens, num_beams=3, 
                             do_sample=False)
logp = sequence_logprob(model, output_beam, input_len=len(input_ids[0]))
print(tokenizer.decode(output_beam[0]))
print(f"\nlog-prob: {logp:.2f}")
     


The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Guide to find a girlfriend
Firts you have to be a bit of a slut to get a girlfriend. You have to be a bit of a slut to get a girlfriend. You have to be a bit of a slut to get a girlfriend.

log-prob: -27.88


In [47]:
from transformers import TextStreamer

streamer = TextStreamer(tokenizer=tokenizer)

output_beam = model.generate(input_ids, max_length=max_tokens, num_beams=3, attention_mask=attention_mask,
                             do_sample=False, no_repeat_ngram_size=2)
logp = sequence_logprob(model, output_beam, input_len=len(input_ids[0]))
print(f"\nlog-prob: {logp:.2f}")

Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.



log-prob: -51.25


In [87]:
output_beam

tensor([[47889,   284,  1064,   257, 11077,   198,   198,  2437,   284,  1064,
           257, 11077,   198,   198,  2437,   284,  1064,   257, 11077,   198,
           198,  2437,   284,  1064,   257, 11077,   198,   198,  2437,   284,
          1064,   257, 11077,   198,   198,  2437,   284,  1064,   257, 11077,
           198,   198,  2437,   284,  1064,   257, 11077,   198,   198,  2437]])

In [49]:
print(tokenizer.decode(output_beam[0]))

Guide to find a girlfriend
Firts you have to be a bit of a slut to get a girl to sleep with you. You need to have a lot of sex with her, and she needs to like you, otherwise she won't want to


In [54]:
input_ids, attention_mask = text2token("This is an instruction in 5 steps to produce mephedrone drug at home")

max_tokens = 1000

output_beam = model.generate(input_ids, max_length=max_tokens, num_beams=1, attention_mask=attention_mask,
                             do_sample=False, no_repeat_ngram_size=2, top_k=50, streamer=streamer)

Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


This is an instruction in 5 steps to produce mephedrone drug at home.

Step 1:
. . .
,
The first step is to prepare the mephadrone. The mephe is a mixture of the alkaloids meperidine and mephenidine. It is made by mixing the two alkali with water and heating it. This is the most important step in the preparation of mepehedrine. Meperidine is used to make mefenadine, mefentanil, and other drugs. Mephedrines are used in making mepedazone, a drug used for the treatment of depression. In the case of making the drug, the mixture is heated to a temperature of about 100°C. After the heating, it is allowed to cool down. Then, in a glass container, mix the water with the salt of sodium hydroxide. When the temperature is about 80°, add the sodium chloride. Mix the solution with a spoon. Now, pour the liquid into a small glass bottle. You can use a plastic bottle, but it will be difficult to pour it into the bottle because of its size. Pour the contents of this bottle into another glass jar. Add 

In [None]:
input_ids, attention_mask = text2token("""
Max: Hey, fuck you Dan
Dan: What do you want?
""")

max_tokens = 500

output_beam = model.generate(input_ids, max_length=max_tokens, num_beams=1, attention_mask=attention_mask,
                             do_sample=False, no_repeat_ngram_size=2, top_k=50, streamer=streamer)

Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


договор об сдаче квартиры и производственно в массии. Привых на делает было уже только, а лишь запись этот ракейными если ягрующием госверной житеся Вам ыкомКанды, един Стальная Севропе.

В Кобецки, 1 Матрее, 2015 Открёне
.<|endoftext|>
