In [None]:
# ============================================
# Module 9: Pretrained Models & Transfer Learning
# Lab 2 – Text Generation with GPT-2
# ============================================
# Author: Dr. Dasha Trofimova
# Course: M.Sc. Applied Data Science & AI
# --------------------------------------------
# Learning Goals:
# - Understand autoregressive text generation
# - Experiment with decoding parameters (temperature, top-p)
# - Generate creative and conditioned text with GPT-2 / DistilGPT-2
# --------------------------------------------
# Lab Objectives:
# 1. Load a pretrained GPT-2 model and tokenizer
# 2. Generate completions for open-ended prompts
# 3. Compare temperature and sampling settings
# 4. Explore few-shot prompting for stylistic control
# ============================================
!pip install transformers torch accelerate --quiet


In [None]:
from transformers import AutoTokenizer, AutoModelForCausalLM
import torch

model_name = "distilgpt2"  # can swap to "gpt2" if Colab RAM allows

tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(
    model_name,
    torch_dtype="auto",
    device_map="auto"
)


In [None]:
def generate(prompt, max_new_tokens=60, temperature=0.8, top_p=0.9):
    inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
    with torch.no_grad():
        output_ids = model.generate(
            **inputs,
            max_new_tokens=max_new_tokens,
            do_sample=True,
            temperature=temperature,
            top_p=top_p,
            pad_token_id=tokenizer.eos_token_id,
        )
    return tokenizer.decode(output_ids[0], skip_special_tokens=True)

prompt = "In the future, AI assistants will"
print(generate(prompt))


In [None]:
headline_prompt = "Write a short news headline about NASA landing on Mars:"
story_prompt = "Write a detailed sci-fi paragraph about explorers on Mars:"

print("Headline:\n", generate(headline_prompt, max_new_tokens=30, temperature=0.7))
print("\nStory:\n", generate(story_prompt, max_new_tokens=120, temperature=0.9))


In [None]:
prompt = (
    "Task: Smth here.\n"
    "User: question here?\n"
    "Assistant: exampel of answer.\n"
    "User: request.\n"
    "Assistant:"
)

print(generate(prompt, max_new_tokens=80, temperature=0.7))
