In [None]:
from transformers import AutoTokenizer

prompt = "It was a dark and stormy"
tokenizer = AutoTokenizer.from_pretrained("Qwen/Qwen2-0.5B")
input_ids = tokenizer(prompt).input_ids
print(input_ids)

In [None]:
for t in input_ids:
    print(t, "\t:", tokenizer.decode(t))

In [None]:
tokenizer(" Luis").input_ids

In [None]:
tokenizer.decode(33197)

In [None]:
from transformers import AutoModelForCausalLM

model = AutoModelForCausalLM.from_pretrained("Qwen/Qwen2-0.5B")

In [None]:
input_ids = tokenizer(prompt, return_tensors="pt").input_ids

outputs = model(input_ids)
outputs.logits.shape  # (batch_size, sequence_length, vocab_size)

In [None]:
final_logits = model(input_ids).logits[0, -1]
final_logits.argmax()

In [None]:
tokenizer.decode(final_logits.argmax())

In [None]:
import torch

top10_logits = torch.topk(final_logits, 10)
for index in top10_logits.indices:
    print(tokenizer.decode(index))

In [None]:
top10 = torch.topk(final_logits.softmax(dim=0), 10)
for value, index in zip(top10.values, top10.indices):
    print(f"{tokenizer.decode(index):<10} {value.item():.2%}")

In [None]:
from pprint import pprint

In [None]:
output_ids = model.generate(input_ids, max_new_tokens=20)
decoded_text = tokenizer.decode(output_ids[0])

print("Input IDs", input_ids[0])
print("Output IDs", output_ids)
pprint(f"Generated text: {decoded_text}")

In [None]:
beam_output = model.generate(
    input_ids,
    num_beams=5,
    max_new_tokens=30,
)

pprint(tokenizer.decode(beam_output[0]))

In [None]:
sampling_output = model.generate(
    input_ids,
    do_sample=True,
    temperature=0.001,
    max_new_tokens=40,
    top_k=0,
)

pprint(tokenizer.decode(sampling_output[0]))

In [None]:
sampling_output = model.generate(
    input_ids,
    do_sample=True,
    temperature=3.0,
    max_new_tokens=40,
    top_k=0,
)

pprint(tokenizer.decode(sampling_output[0]))