In [None]:
!pip install --upgrade transformers accelerate
!pip install --index-url https://download.pytorch.org/whl/cu121 torch torchvision torchaudio




In [None]:
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM, set_seed

device = "cuda" if torch.cuda.is_available() else "cpu"
model_id = "distilgpt2"

tokenizer = AutoTokenizer.from_pretrained(model_id)
model = AutoModelForCausalLM.from_pretrained(model_id).to(device)
tokenizer.pad_token = tokenizer.eos_token
model.config.pad_token_id = tokenizer.eos_token_id

set_seed(25)
device


  from .autonotebook import tqdm as notebook_tqdm
Xet Storage is enabled for this repo, but the 'hf_xet' package is not installed. Falling back to regular HTTP download. For better performance, install the package with: `pip install huggingface_hub[hf_xet]` or `pip install hf_xet`
To support symlinks on Windows, you either need to activate Developer Mode or to run Python as an administrator. In order to activate developer mode, see this article: https://docs.microsoft.com/en-us/windows/apps/get-started/enable-your-device-for-development


'cuda'

In [None]:
import math, random

def perplexity(text: str) -> float:
    """Compute perplexity = exp(loss) for a single string."""
    enc = tokenizer(text, return_tensors="pt")
    input_ids = enc["input_ids"].to(device)
    attn = enc["attention_mask"].to(device)
    with torch.no_grad():
        out = model(input_ids, attention_mask=attn, labels=input_ids)
        ppl = torch.exp(out.loss).item()
    return ppl

paragraph = (
    "Galatasaray SK was founded in October 1905 (the exact day is disputed, but is traditionally accepted as '30 October 1905' according to the Gregorian calendar) by Ali Sami Yen and other students of Galatasaray High School (a high school in Istanbul which was established in 1481) as a football club." 
    "Ali Sami Yen became Galatasaray SK's first president and was given the club's membership number '1'. The team's first match was against Cadi-Keuy FC and Galatasaray won this match with a score of 2-0."
    "There were discussions about the club's name, in which some suggested Gloria (victory) and others Audace (courage), but it was decided that its name would be Galatasaray."
)

words = paragraph.split()
random.seed(0)
random.shuffle(words)
shuffled = " ".join(words)

ppl_orig = perplexity(paragraph)
ppl_shuf = perplexity(shuffled)

print("Original PPL :", ppl_orig)
print("Shuffled PPL :", ppl_shuf)


`loss_type=None` was set in the config but it is unrecognized. Using the default loss: `ForCausalLMLoss`.


Original PPL : 35.36817169189453
Shuffled PPL : 845.6408081054688
Comment     : Expect original < shuffled because correct word order raises token likelihoods.


In [None]:
def generate(prompt: str, *, greedy=False, temperature=1.0, max_new_tokens=500):
    inputs = tokenizer(prompt, return_tensors="pt").to(device)
    if greedy or temperature == 0:
        out_ids = model.generate(
            **inputs,
            max_new_tokens=max_new_tokens,
            do_sample=False,              # greedy decoding
            eos_token_id=tokenizer.eos_token_id,
            pad_token_id=tokenizer.eos_token_id,
        )
        mode = "greedy"
    else:
        out_ids = model.generate(
            **inputs,
            max_new_tokens=max_new_tokens,
            do_sample=True,
            temperature=temperature,
            top_k=0,                     
            eos_token_id=tokenizer.eos_token_id,
            pad_token_id=tokenizer.eos_token_id,
        )
        mode = f"temp={temperature}"
    return mode, tokenizer.decode(out_ids[0], skip_special_tokens=True)

prompt = "Once upon a time"

# Run greedy
label, text = generate(prompt, greedy=True)
print(f"\n=== {label} ===\n{text[:800]}...\n")


for T in [0, 0.3, 0.6, 0.9, 1.2, 1.5]:
    label, text = generate(prompt, temperature=T)
    print(f"\n=== {label} ===\n{text[:800]}...\n")



=== greedy ===
Once upon a time of war, the United States was the only country in the world to have a military presence. The United States was the only country in the world to have a military presence. The United States was the only country in the world to have a military presence. The United States was the only country in the world to have a military presence. The United States was the only country in the world to have a military presence. The United States was the only country in the world to have a military presence. The United States was the only country in the world to have a military presence. The United States was the only country in the world to have a military presence. The United States was the only country in the world to have a military presence. The United States was the only country in the ...


=== greedy ===
Once upon a time of war, the United States was the only country in the world to have a military presence. The United States was the only country in the world to ha