# TinyStories Story Infilling Model Demo with BPE Tokenizer

This notebook demonstrates how to use the TinyStories story infilling model with the BPE tokenizer. The model takes the first and last sentences of a story as input and generates the middle part.

In [1]:
import torch
import os
import sys
from datasets import load_dataset
import random

# Add project directory to path to import modules
current_path = os.path.abspath('.')
project_name = 'TinyStoriesProject'
project_path = os.path.join(current_path.split(project_name)[0], project_name)
sys.path.append(project_path)
print(f"Project path: {project_path}")

  from .autonotebook import tqdm as notebook_tqdm


Project path: /Users/shawn/Documents/sjsu/2025-1/DL_CMPE258/TinyStoriesProject


In [2]:
# Import project modules
from src.models import StoryInfillingModel
from src.bpe_tokenizer import BPETokenizerWrapper
from src.generate_story import generate_story

## 1. Load the pre-trained model

First, let's load the pre-trained model from the saved checkpoint.

In [None]:
# Check if model exists
model_path = os.path.join(project_path, 'model', 'tinystories_bpe_infilling_model.pth')
if not os.path.exists(model_path):
    print("Model file not found. You need to train the model first by running src/train_infilling_model.py")
else:
    print(f"Model found at {model_path}")

In [None]:
# Set the device
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"Using device: {device}")

In [None]:
# Load model checkpoint
checkpoint = torch.load(model_path, map_location=device)
model_args = checkpoint['args']
tokenizer_model_name = checkpoint.get('tokenizer_model', 'gpt2')

print(f"Model was trained for {checkpoint['epoch']} epochs")
print(f"Train loss: {checkpoint['train_loss']:.4f}, Validation loss: {checkpoint['valid_loss']:.4f}")
print(f"Using tokenizer model: {tokenizer_model_name}")

In [None]:
# Initialize BPE tokenizer
tokenizer = BPETokenizerWrapper(
    model_name=tokenizer_model_name,
    special_tokens={"blank_token": "<blank>"}
)
vocab_size = tokenizer.get_vocab_size()
print(f"Tokenizer vocabulary size: {vocab_size}")

In [None]:
# Initialize model
model = StoryInfillingModel(
    vocab_size=vocab_size,
    embed_dim=model_args['embed_dim'],
    num_layers=model_args['num_layers'],
    num_heads=model_args['num_heads'],
    ff_dim=model_args['ff_dim'],
    max_seq_length=model_args['max_seq_length'],
    dropout=model_args['dropout'],
    pad_token_id=tokenizer.pad_token_id,
    blank_token_id=tokenizer.blank_token_id
).to(device)

# Load model weights
model.load_state_dict(checkpoint['model_state_dict'])
model.eval()

print("Model loaded successfully!")

## 2. Testing with examples from the validation set

Let's load the validation set and test our model with some real examples.

In [None]:
# Load validation dataset
valid_dataset = load_dataset("roneneldan/TinyStories", split="validation")
print(f"Validation dataset loaded with {len(valid_dataset)} examples")

In [None]:
def extract_first_last_sentences(text):
    """Extract the first and last sentences from a story."""
    # Simple sentence splitting by period
    sentences = text.split('.')
    sentences = [s.strip() + '.' for s in sentences if s.strip()]
    
    if len(sentences) < 2:
        return None, None
    
    return sentences[0], sentences[-1]

In [None]:
# Test with a random example from the validation set
def test_with_random_example():
    # Get a random example
    idx = random.randint(0, len(valid_dataset) - 1)
    story = valid_dataset[idx]['text']
    
    # Extract first and last sentences
    first_sentence, last_sentence = extract_first_last_sentences(story)
    
    if not first_sentence or not last_sentence:
        print("Couldn't extract sentences properly. Trying another example.")
        return test_with_random_example()
    
    print("Original story:")
    print("-" * 80)
    print(story)
    print("-" * 80)
    print()
    
    print("First sentence:")
    print(first_sentence)
    print()
    
    print("Last sentence:")
    print(last_sentence)
    print()
    
    # Generate the middle part with our model
    generated_story = model.generate(
        first_sentence,
        last_sentence,
        tokenizer,
        max_length=150,
        teacher_forcing_ratio=0.0  # During testing, we don't use teacher forcing
    )
    
    print("Generated story:")
    print("-" * 80)
    print(generated_story)
    print("-" * 80)
    
    return first_sentence, last_sentence, story, generated_story

In [None]:
first_sentence, last_sentence, original_story, generated_story = test_with_random_example()

## 3. Custom examples

Now let's try with our own custom first and last sentences.

In [None]:
def generate_with_custom_input(first_sentence, last_sentence, max_tokens=150):
    print("First sentence:")
    print(first_sentence)
    print()
    
    print("Last sentence:")
    print(last_sentence)
    print()
    
    # Generate the middle part with our model
    generated_story = model.generate(
        first_sentence,
        last_sentence,
        tokenizer,
        max_length=max_tokens,
        teacher_forcing_ratio=0.0
    )
    
    print("Generated story:")
    print("-" * 80)
    print(generated_story)
    print("-" * 80)
    
    return generated_story

In [None]:
# Example 1
custom_first_1 = "Once upon a time, there was a little boy named Tim who loved to play with toys."
custom_last_1 = "Tim learned that sharing his toys made everyone happy, including himself."

generated_story_1 = generate_with_custom_input(custom_first_1, custom_last_1)

In [None]:
# Example 2
custom_first_2 = "Sarah was excited to visit the zoo with her family on Saturday."
custom_last_2 = "They all agreed it was the best day ever and couldn't wait to come back."

generated_story_2 = generate_with_custom_input(custom_first_2, custom_last_2)

In [None]:
# Example 3
custom_first_3 = "It was a rainy day and Max was feeling sad because he couldn't go outside to play."
custom_last_3 = "Max realized that rainy days could be fun too."

generated_story_3 = generate_with_custom_input(custom_first_3, custom_last_3)

## 4. Experiment with generation parameters

Let's try changing the generation parameters to see how they affect the output.

In [None]:
def generate_with_parameters(first_sentence, last_sentence, max_tokens=150, temperature=1.0, top_k=50, top_p=0.9):
    # Initialize model for story generation with different parameters
    # Temperature controls randomness: higher values (>1.0) make output more random, lower values (<1.0) make it more deterministic
    
    print(f"Generating with temperature={temperature}, top_k={top_k}, top_p={top_p}")
    
    # For direct use of the model's generate method with more control
    generated_story = model.generate(
        first_sentence,
        last_sentence,
        tokenizer,
        max_length=max_tokens,
        temperature=temperature,
        top_k=top_k,
        top_p=top_p,
        teacher_forcing_ratio=0.0
    )
    
    print("Generated story:")
    print("-" * 80)
    print(generated_story)
    print("-" * 80)
    
    return generated_story

In [None]:
# Choose an example
test_first = "Jake was a little boy who always wanted a puppy for his birthday."
test_last = "Jake was so happy with his new puppy and promised to take care of it forever."

print("First sentence:")
print(test_first)
print()
print("Last sentence:")
print(test_last)
print()

In [None]:
# Generate with different temperatures
low_temp_story = generate_with_parameters(test_first, test_last, temperature=0.5)
normal_temp_story = generate_with_parameters(test_first, test_last, temperature=1.0)
high_temp_story = generate_with_parameters(test_first, test_last, temperature=1.5)

In [None]:
# Generate with different top_k and top_p values
low_k_story = generate_with_parameters(test_first, test_last, top_k=10)
high_k_story = generate_with_parameters(test_first, test_last, top_k=100)
low_p_story = generate_with_parameters(test_first, test_last, top_p=0.5)
high_p_story = generate_with_parameters(test_first, test_last, top_p=0.99)

## 5. Trying teacher forcing during generation

Let's see how teacher forcing affects generation when we have a ground truth story.

In [None]:
def generate_with_teacher_forcing(first_sentence, last_sentence, ground_truth, ratio=0.5):
    print(f"Generating with teacher_forcing_ratio={ratio}")
    print("Ground truth:")
    print("-" * 80)
    print(ground_truth)
    print("-" * 80)
    print()
    
    # Generate with teacher forcing
    generated_story = model.generate(
        first_sentence,
        last_sentence,
        tokenizer,
        max_length=200,
        teacher_forcing_ratio=ratio,
        ground_truth=ground_truth
    )
    
    print("Generated story:")
    print("-" * 80)
    print(generated_story)
    print("-" * 80)
    
    return generated_story

In [None]:
# Get a random story from the validation set
idx = random.randint(0, len(valid_dataset) - 1)
ground_truth = valid_dataset[idx]['text']
first_sentence, last_sentence = extract_first_last_sentences(ground_truth)

if not first_sentence or not last_sentence:
    print("Couldn't extract sentences properly. Please run this cell again.")
else:
    # Generate with different teacher forcing ratios
    no_tf_story = generate_with_teacher_forcing(first_sentence, last_sentence, ground_truth, ratio=0.0)
    half_tf_story = generate_with_teacher_forcing(first_sentence, last_sentence, ground_truth, ratio=0.5)
    full_tf_story = generate_with_teacher_forcing(first_sentence, last_sentence, ground_truth, ratio=1.0)

## 6. Continuation Generation (Without Last Sentence)

The model can also generate continuations when only given the first sentence.

In [None]:
def generate_continuation(first_sentence, max_tokens=200):
    print("First sentence:")
    print(first_sentence)
    print()
    
    # Generate continuation with our model
    generated_story = model.generate(
        first_sentence,
        last_sentence=None,  # No last sentence for continuation
        tokenizer=tokenizer,
        max_length=max_tokens,
        teacher_forcing_ratio=0.0
    )
    
    print("Generated continuation:")
    print("-" * 80)
    print(generated_story)
    print("-" * 80)
    
    return generated_story

In [None]:
# Example continuations
custom_first_1 = "Once upon a time, there was a little girl named Lily who loved to dance in the rain."
continuation_1 = generate_continuation(custom_first_1)

custom_first_2 = "Tim got a new robot toy for his birthday and was very excited to show it to his friends."
continuation_2 = generate_continuation(custom_first_2)