# Assignment 3: Fine-tune GPT-2 for Story Generation

This notebook trains GPT-2 to write creative stories.

**Setup:** Go to Runtime → Change runtime type → Select GPU

In [None]:
# Install packages
!pip install -q transformers datasets accelerate

In [None]:
import torch
from transformers import GPT2LMHeadModel, GPT2Tokenizer, Trainer, TrainingArguments, DataCollatorForLanguageModeling
from datasets import load_dataset

# Check if GPU is available
print("Using GPU:" if torch.cuda.is_available() else "Using CPU")

## Load Model

In [None]:
# Load GPT-2 model and tokenizer
model = GPT2LMHeadModel.from_pretrained("gpt2-medium")
tokenizer = GPT2Tokenizer.from_pretrained("gpt2-medium")
tokenizer.pad_token = tokenizer.eos_token

print("Model loaded!")

## Prepare Data

In [None]:
# Load story dataset (5000 stories)
dataset = load_dataset("roneneldan/TinyStories", split="train[:5000]")
print(f"Loaded {len(dataset)} stories")

In [None]:
# Tokenize stories
def tokenize(examples):
    return tokenizer(examples["text"], truncation=True, max_length=256, padding="max_length")

dataset = dataset.map(tokenize, batched=True, remove_columns=["text"])
print("Data ready!")

## Train Model

In [None]:
# Setup training
training_args = TrainingArguments(
    output_dir="/content/story_model",
    num_train_epochs=3,
    per_device_train_batch_size=4,
    logging_steps=100,
    save_steps=500,
    fp16=torch.cuda.is_available()
)

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=dataset,
    data_collator=DataCollatorForLanguageModeling(tokenizer=tokenizer, mlm=False)
)

print("Training...")

In [None]:
# Run training
trainer.train()
print("Training done!")

In [None]:
# Save model
model.save_pretrained("/content/story_model/final")
tokenizer.save_pretrained("/content/story_model/final")
print("Model saved!")

## Generate Stories

In [None]:
# Function to generate stories
def generate_story(prompt):
    inputs = tokenizer.encode(prompt, return_tensors="pt")
    
    outputs = model.generate(
        inputs,
        max_length=300,
        temperature=0.8,
        top_p=0.92,
        do_sample=True
    )
    
    story = tokenizer.decode(outputs[0], skip_special_tokens=True)
    return story

print("Ready to generate stories!")

In [None]:
# Test with sample prompts
prompts = [
    "Once upon a time, in a magical forest,",
    "The little girl found a mysterious box",
    "A brave knight set out on a journey"
]

for prompt in prompts:
    print(f"\nPrompt: {prompt}")
    print(generate_story(prompt))
    print("-" * 50)

## Try Your Own Prompts

In [2]:
# Generate story from your own prompt
my_prompt = input("Enter Your Prompt Here : ")
print(my_prompt)
print(generate_story(my_prompt))

hi i am ayush 


NameError: name 'generate_story' is not defined