<a href="https://colab.research.google.com/github/EkansTCG/AI-hw-2/blob/main/Next_Word_Predictor.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [18]:
from transformers import pipeline, AutoTokenizer, AutoModelForCausalLM
import torch

# Load model
print("Loading AI model...")
model_name = "distilgpt2"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(model_name)

Loading AI model...


In [23]:
# Simple example
text = "The Green Bay Packers were up to make their pick at the end of the first round of the NFL draft, picking"
print(f"\nStarting text: '{text}'")
print("\nGenerating word by word...\n")

# Generate 30 words, one at a time
current_text = text
for step in range(30):
    print(f"--- Step {step + 1} ---")
    print(f"Current: '{current_text}'")

    # Encode current text
    input_ids = tokenizer.encode(current_text, return_tensors="pt")

    # Get predictions
    with torch.no_grad():
        outputs = model(input_ids)
        predictions = outputs.logits

    # Get the predictions for the NEXT token
    next_token_logits = predictions[0, -1, :]
    next_token_probs = torch.softmax(next_token_logits, dim=0)

    # Get top 5 predictions for display purposes
    # This line fixes the error you were seeing in your loop
    top_probs, top_indices = torch.topk(next_token_probs, 5)

    print("Top 5 next word predictions:")
    for i, (prob, idx) in enumerate(zip(top_probs, top_indices)):
        word = tokenizer.decode([idx])
        print(f"  {i+1}. '{word}' ({prob.item() * 100:.1f}% confident)")

    # Pick the next token using the GREEDY strategy (highest probability)
    next_token_id = torch.argmax(next_token_probs, dim=-1)
    next_word = tokenizer.decode([next_token_id])

    # Append to text
    current_text += next_word
    print(f"✓ Chosen: '{next_word}'")
    print(f"New text: '{current_text}'\n")

print(f"\nFinal generated text: '{current_text}'")


Starting text: 'The Green Bay Packers were up to make their pick at the end of the first round of the NFL draft, picking'

Generating word by word...

--- Step 1 ---
Current: 'The Green Bay Packers were up to make their pick at the end of the first round of the NFL draft, picking'
Top 5 next word predictions:
  1. ' the' (17.3% confident)
  2. ' up' (10.3% confident)
  3. ' No' (5.1% confident)
  4. ' a' (4.8% confident)
  5. ' out' (3.0% confident)
✓ Chosen: ' the'
New text: 'The Green Bay Packers were up to make their pick at the end of the first round of the NFL draft, picking the'

--- Step 2 ---
Current: 'The Green Bay Packers were up to make their pick at the end of the first round of the NFL draft, picking the'
Top 5 next word predictions:
  1. ' 49' (6.0% confident)
  2. ' Packers' (5.9% confident)
  3. ' first' (5.5% confident)
  4. ' second' (3.7% confident)
  5. ' No' (3.6% confident)
✓ Chosen: ' 49'
New text: 'The Green Bay Packers were up to make their pick at the end of 