In [1]:
from transformers import pipeline, AutoTokenizer, AutoModelForCausalLM
import torch

# Load model
print("Loading AI model...")
model_name = "distilgpt2"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(model_name)

Loading AI model...


Loading weights:   0%|          | 0/76 [00:00<?, ?it/s]

GPT2LMHeadModel LOAD REPORT from: distilgpt2
Key                                        | Status     |  | 
-------------------------------------------+------------+--+-
transformer.h.{0, 1, 2, 3, 4, 5}.attn.bias | UNEXPECTED |  | 

Notes:
- UNEXPECTED	:can be ignored when loading from different task/architecture; not ok if you expect identical arch.


In [14]:
# Simple example
text = "The FitnessGram Pacer Test is a multistage aerobic capacity test that progressively gets more difficult as it continues. The 20 meter pacer test will begin in "
print(f"\nStarting text: '{text}'")
print("\nGenerating word by word...\n")

# Generate 5 words, one at a time
current_text = text
steps = 30
k = 3
for step in range(steps):
    print(f"--- Step {step + 1} ---")
    print(f"Current: '{current_text}'")
    
    # Encode current text
    input_ids = tokenizer.encode(current_text, return_tensors="pt")
    
    # Get predictions
    with torch.no_grad():
        outputs = model(input_ids)
        predictions = outputs.logits
    
    # Get the predictions for the NEXT token
    next_token_logits = predictions[0, -1, :]
    next_token_probs = torch.softmax(next_token_logits, dim=0)
    
    # Get top 5 predictions
    top_probs, top_indices = torch.topk(next_token_probs, 5)
    
    print("Top 5 next word predictions:")
    for i, (prob, idx) in enumerate(zip(top_probs, top_indices)):
        word = tokenizer.decode([idx])
        print(f"  {i+1}. '{word}' ({prob.item()*100:.1f}% confident)")
    
    K = 10  # Only consider top 10 tokens

    # Get top K candidates
    top_k_probs, top_k_indices = torch.topk(next_token_probs, K)
    
    # Renormalize probabilities (make them sum to 1 again)
    top_k_probs_normalized = top_k_probs / top_k_probs.sum()
    
    # Randomly sample from these K tokens according to their probabilities
    sampled_index = torch.multinomial(top_k_probs_normalized, num_samples=1).item()
    
    # Get the actual token
    next_token_id = top_k_indices[sampled_index].item()
    
    next_word = tokenizer.decode([next_token_id])
    current_text += next_word
    
    print(f"✓ Chosen: '{next_word}'")
    print(f"New text: '{current_text}'\n")

print(f"\nFinal generated text: '{current_text}'")


Starting text: 'The FitnessGram Pacer Test is a multistage aerobic capacity test that progressively gets more difficult as it continues. The 20 meter pacer test will begin in '

Generating word by word...

--- Step 1 ---
Current: 'The FitnessGram Pacer Test is a multistage aerobic capacity test that progressively gets more difficult as it continues. The 20 meter pacer test will begin in '
Top 5 next word predictions:
  1. 'vern' (40.5% confident)
  2. 'ips' (4.6% confident)
  3. 'iced' (4.4% confident)
  4. ' ' (4.2% confident)
  5. 'Â' (3.4% confident)
✓ Chosen: 'ips'
New text: 'The FitnessGram Pacer Test is a multistage aerobic capacity test that progressively gets more difficult as it continues. The 20 meter pacer test will begin in ips'

--- Step 2 ---
Current: 'The FitnessGram Pacer Test is a multistage aerobic capacity test that progressively gets more difficult as it continues. The 20 meter pacer test will begin in ips'
Top 5 next word predictions:
  1. 'ilateral' (33.1% confid