## From Pipeline

In [74]:
from transformers import pipeline

In [75]:
pipe = pipeline("text-generation", model="Qwen/Qwen3-0.6B")

Device set to use cuda:0


In [76]:
pipe([{"role": "user", "content": "Who are you?"}])

[{'generated_text': [{'role': 'user', 'content': 'Who are you?'},
   {'role': 'assistant',
    'content': '<think>\nOkay, the user asked, "Who are you?" I need to respond appropriately. As an AI assistant, I should first acknowledge their question and explain my purpose. I should mention that I\'m a language model trained on vast amounts of text and am designed to provide assistance. I should also clarify that I don\'t have a physical form or personality, just a tool for help. It\'s important to keep the response friendly and helpful. Let me make sure the language is natural and easy to understand.\n</think>\n\nI\'m an AI assistant designed to help with questions, provide information, and support conversations. I don\'t have a physical form or personality, but I\'m capable of understanding and responding to your queries. Let me know how I can assist you!'}]}]

In [77]:
pipe([{"role": "user", "content": "When was Albert Einstein born?"}])

[{'generated_text': [{'role': 'user',
    'content': 'When was Albert Einstein born?'},
   {'role': 'assistant',
    'content': "<think>\nOkay, the user is asking when Albert Einstein was born. Let me recall. I know he was born on July 4th, 1879, in Ulm, Germany. Wait, is that correct? I think the date is July 4th, so the birth date is 1879-07-04. I should double-check to make sure there's no confusion with other people's dates. No, Einstein is a well-known figure, so I'm pretty confident about this. Just need to present it clearly.\n</think>\n\nAlbert Einstein was born on **July 4, 1879**, in Ulm, Germany."}]}]

In [78]:
pipe([{"role": "user", "content": "Albert Einstein was born on"}])

[{'generated_text': [{'role': 'user',
    'content': 'Albert Einstein was born on'},
   {'role': 'assistant',
    'content': "<think>\nOkay, the user is asking about Albert Einstein's birth date. I need to make sure I provide the correct information.\n\nFirst, I remember that Einstein was born on July 14, 1879, in Germany. Let me verify this. Yes, that's accurate. I should mention the country and the date again to be clear.\n\nI should also check if there's any recent information that might change this, but as of now, the date remains the same. It's important to note that the date is in the Gregorian calendar, which is what most people use. \n\nI need to present this information in a straightforward way, making sure the user understands the correct details. Let me structure it clearly: the birth date, the country, and a brief note about the significance of the date.\n</think>\n\nAlbert Einstein was born on **July 14, 1879**, in **Germany**. His birthdate is widely recognized and celebr

## From Tokenizer

In [79]:
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM
import numpy as np


def set_seed(seed):
    """Setup the trainer"""
    np.random.seed(seed)
    torch.manual_seed(seed)
    if torch.cuda.is_available():
        torch.cuda.manual_seed(seed)
        torch.cuda.manual_seed_all(seed)


set_seed(42)

In [80]:
tokenizer = AutoTokenizer.from_pretrained("Qwen/Qwen3-0.6B-Base")
model = AutoModelForCausalLM.from_pretrained("Qwen/Qwen3-0.6B-Base")
# model = model.to(torch.device("cuda"))
# tokenizer = AutoTokenizer.from_pretrained("EleutherAI/pythia-70m")
# model = AutoModelForCausalLM.from_pretrained("EleutherAI/pythia-70m")

In [81]:
model.device

device(type='cpu')

In [104]:
prompt = "Albert Einstein was born on"
answer = " November 05, 1977"

In [105]:
if tokenizer.pad_token is None:
    tokenizer.pad_token = tokenizer.eos_token

input = tokenizer(
    answer,
    return_tensors="pt",
    padding=True,
).to(model.device)
input

{'input_ids': tensor([[6702,  220,   15,   20,   11,  220,   16,   24,   22,   22]]), 'attention_mask': tensor([[1, 1, 1, 1, 1, 1, 1, 1, 1, 1]])}

In [84]:
outputs = model.generate(
    input["input_ids"],
    max_new_tokens=24,
    do_sample=True,
    top_p=0.9,
    pad_token_id=tokenizer.eos_token_id,
    temperature=0.7,
)
generated_text = tokenizer.batch_decode(outputs, skip_special_tokens=True)[0]
print(generated_text)

Albert Einstein was born on March 14, 1879 in the town of Würzburg, which is now part of the


In [85]:
tokenizer.batch_decode(outputs, skip_special_tokens=True)

['Albert Einstein was born on March 14, 1879 in the town of Würzburg, which is now part of the']

In [86]:
outputs

tensor([[66622, 54052,   572,  9223,   389,  5470,   220,    16,    19,    11,
           220,    16,    23,    22,    24,   304,   279,  6290,   315,   467,
          5186, 78202,    11,   892,   374,  1431,   949,   315,   279]])

In [87]:
# next_token_logits = outputs["logits"][:, -1, :]
# next_token_logits.shape

In [88]:
idx = tokenizer(
    prompt,
    return_tensors="pt",
    padding=True,
).input_ids
# idx = torch.tensor(idx).unsqueeze(0).to(model.device)
idx

tensor([[66622, 54052,   572,  9223,   389]])

In [89]:
idx_correct = tokenizer(
    answer,
    return_tensors="pt",
    padding=True,
).input_ids
# idx_correct = torch.tensor(idx_correct).unsqueeze(0).to(model.device)
idx_correct

tensor([[5470,  220,   16,   19,   11,  220,   16,   23,   22,   24]])

In [90]:
prompt = "Albert Einstein was born on"
answer = " March 14, 1879"

if tokenizer.pad_token is None:
    tokenizer.pad_token = tokenizer.eos_token

with torch.no_grad():
    idx = tokenizer(
        prompt,
        return_tensors="pt",
        padding=True,
    ).input_ids

    idx_correct = tokenizer(
        answer,
        return_tensors="pt",
        padding=True,
    ).input_ids

    probs_correct = []
    for i in range(idx_correct.shape[1]):
        logits = model(idx).logits[:, -1, :]
        probs = torch.nn.functional.softmax(logits, dim=-1)
        probs_correct.append(probs[0, idx_correct[0, i]].item())
        idx = torch.cat((idx, idx_correct[:, i].unsqueeze(0)), dim=1)
        print(
            f"Step {i + 1}, Correct token index: {idx_correct[0, i].item()}, Probability: {probs_correct[-1]:.4f}"
        )

    probs = torch.tensor(probs_correct)
    probs = torch.clamp(probs, min=1e-12)  # avoid log(0)

    # Negative log-likelihood (sum and mean)
    # log_likelihood = torch.sum(torch.log(probs))
    # nll_sum = -log_likelihood
    nll_mean = -torch.mean(torch.log(probs))

    # Perplexity = exp(mean NLL)
    ppl = torch.exp(nll_mean)

    # print(f"Probabilities of correct tokens: {probs_correct}")
    # print(f"Negative log-likelihood (sum): {nll_sum.item():.4f}")
    # print(f"Negative log-likelihood (mean per token): {nll_mean.item():.4f}")
    # print(f"Perplexity: {ppl.item():.4f}")

    print([round(float(x), 4) for x in probs.tolist()], ppl.item())

Step 1, Correct token index: 5470, Probability: 0.3802
Step 2, Correct token index: 220, Probability: 0.9899
Step 3, Correct token index: 16, Probability: 0.9992
Step 4, Correct token index: 19, Probability: 0.9936
Step 5, Correct token index: 11, Probability: 0.8983
Step 6, Correct token index: 220, Probability: 0.9760
Step 7, Correct token index: 16, Probability: 0.9992
Step 8, Correct token index: 23, Probability: 0.9974
Step 9, Correct token index: 22, Probability: 0.9776
Step 10, Correct token index: 24, Probability: 0.9979
[0.3802, 0.9899, 0.9992, 0.9936, 0.8983, 0.976, 0.9992, 0.9974, 0.9776, 0.9979] 1.121217966079712


In [91]:
tokenizer.eos_token_id

151643

In [95]:
tokenizer.eos_token

'<|endoftext|>'

In [102]:
i_idx.view(1, -1)

tensor([[467]])

In [101]:
i_idx.item()

467

In [92]:
with torch.no_grad():
    idx = tokenizer(
        prompt,
        return_tensors="pt",
        padding=True,
    ).input_ids

    messages = []
    temperature = 0.7
    top_k = 5
    max_new_tokens = 16

    for i_token in range(max_new_tokens):
        message = f"Step {i_token + 1}:\n"

        # forward the model to get the logits for the index in the sequence
        # logits, model_input = self.model.inference(idx)
        logits = model(idx).logits[:, -1, :]

        # pluck the logits at the final step and scale by desired temperature
        logits = logits / temperature

        # logits have shape (b,t,v)
        # optionally crop the logits to only the top k options
        if top_k is not None:
            v, _ = torch.topk(logits, min(top_k, logits.size(-1)))
            # check for dim
            if len(v.size()) == 3:
                logits[logits < v[:, :, [-1]]] = -float("Inf")
            else:
                logits[logits < v[:, [-1]]] = -float("Inf")

        #     # apply softmax to convert logits to (normalized) probabilities
        probs = torch.nn.functional.softmax(logits, dim=-1)

        # sample from the distribution
        # check if byte-level and if so, flatten
        if len(probs.size()) == 4:
            B, S, S_c, H = probs.size()
            probs = probs.view(B * S * S_c, H)
            flattened = True
        else:
            flattened = False

        # # For every i_token, collect top_k token info and format messages
        top_k_probs, top_k_indices = torch.topk(probs, top_k)
        print(top_k_probs)
        print(top_k_indices)
        for i_idx, i_prob in zip(top_k_indices.flatten(), top_k_probs.flatten()):
            decoded_token = tokenizer.batch_decode(i_idx.view(1, -1))
            message += f"Token: {decoded_token}, Probability: {i_prob.item():.4f}, Index: {i_idx.item()}\n"

        idx_next = torch.multinomial(probs, num_samples=1)

        # check if byte-level and if so, unflatten
        if flattened:
            idx_next = idx_next.view(B, S)
        elif idx_next == tokenizer.eos_token_id:
            break

        if flattened:
            idx_next = idx_next.unsqueeze(0)
        idx = torch.cat((idx, idx_next), dim=1)

        messages.append(message)

    print(tokenizer.batch_decode(idx.tolist()))
    for message in messages:
        print(message)

tensor([[0.7741, 0.0793, 0.0577, 0.0462, 0.0427]])
tensor([[5470, 6652, 6058, 5534,  220]])
tensor([[9.9965e-01, 2.4251e-04, 4.7379e-05, 4.1698e-05, 2.0134e-05]])
tensor([[ 220,  400, 4102,   11,  198]])
tensor([[9.9999e-01, 6.8441e-06, 2.8919e-06, 2.6354e-06, 2.5673e-06]])
tensor([[ 16,  18, 220,  24,  17]])
tensor([[9.9980e-01, 6.3225e-05, 6.0188e-05, 4.6668e-05, 3.3305e-05]])
tensor([[19, 20, 17, 18, 16]])
tensor([[9.6752e-01, 3.1589e-02, 4.2947e-04, 2.4897e-04, 2.1279e-04]])
tensor([[ 11, 339, 320, 220, 304]])
tensor([[9.9771e-01, 2.0113e-03, 1.6283e-04, 9.3792e-05, 2.5927e-05]])
tensor([[220,  16, 323, 892, 304]])
tensor([[9.9999e-01, 4.7701e-06, 4.3867e-06, 1.8999e-06, 9.5875e-07]])
tensor([[ 16, 220,  17,  18,  19]])
tensor([[9.9987e-01, 1.0870e-04, 1.1299e-05, 4.9139e-06, 1.3793e-06]])
tensor([[23, 24, 19, 21, 22]])
tensor([[9.9628e-01, 3.6646e-03, 2.5791e-05, 1.9385e-05, 1.3273e-05]])
tensor([[22, 19, 24, 18, 21]])
tensor([[9.9995e-01, 2.4370e-05, 1.2801e-05, 1.1473e-05, 3.800