In [None]:
# Cell 1: Install Required Libraries
!pip install -q transformers datasets torch accelerate evaluate rouge-score nltk kaggle


  Preparing metadata (setup.py) ... [?25l[?25hdone
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m84.1/84.1 kB[0m [31m7.7 MB/s[0m eta [36m0:00:00[0m
[?25h  Building wheel for rouge-score (setup.py) ... [?25l[?25hdone


In [None]:
# Cell 2: Import Libraries
import os
import torch
import pandas as pd
import numpy as np
import json
from datasets import Dataset, DatasetDict
from transformers import (
    GPT2Tokenizer,
    GPT2LMHeadModel,
    TrainingArguments,
    Trainer,
    DataCollatorForLanguageModeling
)
from evaluate import load
import matplotlib.pyplot as plt
import warnings
warnings.filterwarnings('ignore')

# Check GPU availability
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"Using device: {device}")
if torch.cuda.is_available():
    print(f"GPU: {torch.cuda.get_device_name(0)}")
    print(f"Memory: {torch.cuda.get_device_properties(0).total_memory / 1e9:.2f} GB")


Using device: cuda
GPU: Tesla T4
Memory: 15.83 GB


In [None]:
# Cell 3: Check GPU Availability
# ----------------------------------------------------------------------------
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"Using device: {device}")

if torch.cuda.is_available():
    print(f"GPU Name: {torch.cuda.get_device_name(0)}")
    print(f"GPU Memory: {torch.cuda.get_device_properties(0).total_memory / 1e9:.2f} GB")
    print(f"CUDA Version: {torch.version.cuda}")
else:
    print("No GPU available. Training will be slower on CPU.")

Using device: cuda
GPU Name: Tesla T4
GPU Memory: 15.83 GB
CUDA Version: 12.6


In [None]:
# Cell 4: Load Dataset from Current Folder
import glob
# Find all CSV files in the current directory
csv_files = glob.glob('/content/drive/MyDrive/Colab/Task_02_Decoder/dataset.csv')
print(f"Found CSV files in Task_02_Decoder folder: {csv_files}")
# Load the recipe dataset
df = pd.read_csv(csv_files[0])
print(f"\nDataset loaded successfully!")
print(f"Dataset shape: {df.shape}")
print(f"Column names: {df.columns.tolist()}")

Found CSV files in Task_02_Decoder folder: ['/content/drive/MyDrive/Colab/Task_02_Decoder/dataset.csv']

Dataset loaded successfully!
Dataset shape: (2231143, 6)
Column names: ['title', 'NER', 'Extended_NER', 'genre', 'label', 'directions']


In [None]:
# Cell 5: Explore the Dataset
print("DATASET EXPLORATION")
# Display dataset info
print("\nDataset Info:")
print(df.info())
# Display first few rows
print("\nFirst 3 rows:")
print(df.head(3))
# Check for missing values
print(f"\nMissing values:")
print(df.isnull().sum())
# Display sample recipe
if len(df) > 0:
    print("\n" + "="*70)
    print("SAMPLE RECIPE:")
    print("="*70)
    sample = df.iloc[0]
    for col in df.columns:
        print(f"\n{col}:")
        print(str(sample[col])[:200] + ('...' if len(str(sample[col])) > 200 else ''))

DATASET EXPLORATION

Dataset Info:
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2231143 entries, 0 to 2231142
Data columns (total 6 columns):
 #   Column        Dtype 
---  ------        ----- 
 0   title         object
 1   NER           object
 2   Extended_NER  object
 3   genre         object
 4   label         int64 
 5   directions    object
dtypes: int64(1), object(5)
memory usage: 102.1+ MB
None

First 3 rows:
                            title  \
0    \t Arugula Pomegranate Salad   
1  \t Black Bean And Turkey Chili   
2  \t Finger Lickin' Tofu Nuggets   

                                                 NER  \
0  ["baby spinach", "baby arugula", "pomegranate ...   
1  ["olive oil", "yellow onion", "garlic", "groun...   
2  ["extra firm", "almond flour", "nutritional ye...   

                                        Extended_NER       genre  label  \
0  ['alfalfa sprouts', 'baby spinach', 'baby arug...  vegetables      4   
1  ['one', 'yellow onion', 'tomato paste', 'about

In [None]:
# Cell 6: Identify and Standardize Column Names
print("IDENTIFYING DATASET STRUCTURE")
# Find relevant columns
title_col = None
ingredients_col = None
directions_col = None
# Common column name patterns
for col in df.columns:
    col_lower = col.lower()
    if any(x in col_lower for x in ['title', 'name', 'recipe']):
        title_col = col
    elif any(x in col_lower for x in ['ingredient']):
        ingredients_col = col
    elif any(x in col_lower for x in ['direction', 'instruction', 'step', 'method']):
        directions_col = col
print(f"Identified columns:")
print(f"  Title column: {title_col}")
print(f"  Ingredients column: {ingredients_col}")
print(f"  Directions/Instructions column: {directions_col}")
# Verify we found all required columns
if not all([title_col, ingredients_col, directions_col]):
    print("\nWarning: Could not auto-detect all columns. Please verify manually.")
    print("Available columns:", df.columns.tolist())


IDENTIFYING DATASET STRUCTURE
Identified columns:
  Title column: title
  Ingredients column: None
  Directions/Instructions column: directions

Available columns: ['title', 'NER', 'Extended_NER', 'genre', 'label', 'directions']


In [None]:
# Cell 7: Data Cleaning and Preprocessing
print("DATA PREPROCESSING")
# Keep only required columns and remove missing values
required_cols = [title_col, 'NER', directions_col]
df_clean = df[required_cols].copy()
original_size = len(df_clean)
df_clean = df_clean.dropna()
print(f"Removed {original_size - len(df_clean)} rows with missing values")
# Remove duplicate recipes (based on title)
df_clean = df_clean.drop_duplicates(subset=[title_col])
print(f"Removed duplicates. Final dataset size: {len(df_clean)}")
# Rename columns for consistency
df_clean.columns = ['title', 'ingredients', 'directions']
# Convert all to string type
for col in df_clean.columns:
    df_clean[col] = df_clean[col].astype(str)

print(f"\nCleaned dataset shape: {df_clean.shape}")

DATA PREPROCESSING
Removed 1 rows with missing values
Removed duplicates. Final dataset size: 1312864

Cleaned dataset shape: (1312864, 3)


In [None]:
# # Cell 8: Limit Dataset Size (Optional for Faster Training)
# # ----------------------------------------------------------------------------
# # For demonstration and faster training, we'll use a subset
# # Remove this cell or adjust SAMPLE_SIZE for full dataset training
SAMPLE_SIZE = 5000  # Adjust based on your needs and GPU memory

if len(df_clean) > SAMPLE_SIZE:
    df_clean = df_clean.sample(n=SAMPLE_SIZE, random_state=42).reset_index(drop=True)
    print(f"Using sample of {SAMPLE_SIZE} recipes for training")
else:
    print(f"Using full dataset of {len(df_clean)} recipes")

# Analyze text lengths
df_clean['total_length'] = df_clean.apply(
    lambda row: len(row['title']) + len(row['ingredients']) + len(row['directions']),
    axis=1
)

print(f"\nText length statistics (characters):")
print(df_clean['total_length'].describe())


Using sample of 5000 recipes for training

Text length statistics (characters):
count    5000.000000
mean      744.024000
std       520.322948
min        67.000000
25%       386.000000
50%       603.000000
75%       951.000000
max      6032.000000
Name: total_length, dtype: float64


In [None]:
# Cell 9: Format Recipes for GPT-2 Training
# ----------------------------------------------------------------------------
print("FORMATTING RECIPES FOR GPT-2")
def format_recipe(row):
    """
    Format recipe into structured text for GPT-2
    Format: Recipe: [title] | Ingredients: [ingredients] | Instructions: [directions]
    """
    title = row['title'].strip()
    ingredients = row['ingredients'].strip()
    directions = row['directions'].strip()
    # Create structured format
    formatted = f"Recipe: {title} | Ingredients: {ingredients} | Instructions: {directions}"
    return formatted
# Apply formatting
print("Formatting all recipes...")
df_clean['formatted_text'] = df_clean.apply(format_recipe, axis=1)

# Display sample formatted recipes
print("\nSample Formatted Recipes:")
for i in range(min(3, len(df_clean))):
    print(f"\n{'='*70}")
    print(f"Recipe {i+1}:")
    print(df_clean['formatted_text'].iloc[i][:300] + "...")

# Analyze formatted text lengths
df_clean['formatted_length'] = df_clean['formatted_text'].str.len()
print(f"\nFormatted text length statistics:")
print(df_clean['formatted_length'].describe())


FORMATTING RECIPES FOR GPT-2
Formatting all recipes...

Sample Formatted Recipes:

Recipe 1:
Recipe: Ham Steaks With Jazzed-Up Gravy | Ingredients: ["butter", "brown sugar", "ham steaks", "pepper", "green onion", "mushrooms", "flour", "chicken broth", "coffee"] | Instructions: ["Melt the butter and brown sugar in a heavy skillet.", "Season the ham steaks with pepper.", "Cook the ham steaks ...

Recipe 2:
Recipe: Walnut Muffins | Ingredients: ["flour", "sugar", "baking powder", "butter", "eggs", "milk", "fruit", "walnuts"] | Instructions: Whisk all wet ingredients together. Mix all dry ingredients together. Add wet mixture to dry mixture. Next add fruit and nuts. Grease muffin tins. Bake at 350\u00b0...

Recipe 3:
Recipe: Pumpkin Chocolate Chip Waffles | Ingredients: ["flour", "baking powder", "baking soda", "pumpkin pie spice", "salt", "eggs", "brown sugar", "pumpkin", "milk", "chocolate chips", "margarine", "orange", "apples", "butter", "syrup"] | Instructions: ["Make waffle batter (a

In [None]:
# Cell 10: Train-Validation-Test Split
print("DATASET SPLITTING")
from sklearn.model_selection import train_test_split
# Split: 80% train, 10% validation, 10% test
# First split: 80% train, 20% temp
train_df, temp_df = train_test_split(
    df_clean[['formatted_text']],
    test_size=0.2,
    random_state=42
)
# Second split: split temp into 50-50 for validation and test
val_df, test_df = train_test_split(
    temp_df,
    test_size=0.5,
    random_state=42
)
print(f"Train set size: {len(train_df)}")
print(f"Validation set size: {len(val_df)}")
print(f"Test set size: {len(test_df)}")
print(f"Split ratio - Train:Val:Test = {len(train_df)}:{len(val_df)}:{len(test_df)}")
# Verify split percentages
total = len(train_df) + len(val_df) + len(test_df)
print(f"\nSplit percentages:")
print(f"  Train: {len(train_df)/total*100:.1f}%")
print(f"  Validation: {len(val_df)/total*100:.1f}%")
print(f"  Test: {len(test_df)/total*100:.1f}%")

DATASET SPLITTING
Train set size: 4000
Validation set size: 500
Test set size: 500
Split ratio - Train:Val:Test = 4000:500:500

Split percentages:
  Train: 80.0%
  Validation: 10.0%
  Test: 10.0%


In [None]:
# Cell 11: Convert to Hugging Face Dataset Format
# Create HuggingFace datasets for all three splits
train_dataset = Dataset.from_pandas(train_df.reset_index(drop=True))
val_dataset = Dataset.from_pandas(val_df.reset_index(drop=True))
test_dataset = Dataset.from_pandas(test_df.reset_index(drop=True))
dataset_dict = DatasetDict({
    'train': train_dataset,
    'validation': val_dataset,
    'test': test_dataset
})
print("Dataset converted to HuggingFace format:")
print(dataset_dict)

Dataset converted to HuggingFace format:
DatasetDict({
    train: Dataset({
        features: ['formatted_text'],
        num_rows: 4000
    })
    validation: Dataset({
        features: ['formatted_text'],
        num_rows: 500
    })
    test: Dataset({
        features: ['formatted_text'],
        num_rows: 500
    })
})


In [None]:
# Cell 12: Load GPT-2 Tokenizer
print("LOADING GPT-2 TOKENIZER")
model_name = 'gpt2'
tokenizer = GPT2Tokenizer.from_pretrained(model_name)
# GPT-2 doesn't have a pad token by default, so we set it to eos_token
tokenizer.pad_token = tokenizer.eos_token
tokenizer.padding_side = 'left'  # Padding on the left for generation
print(f"Tokenizer: {model_name}")
print(f"Vocabulary size: {tokenizer.vocab_size}")
print(f"EOS token: {tokenizer.eos_token} (ID: {tokenizer.eos_token_id})")
print(f"PAD token: {tokenizer.pad_token} (ID: {tokenizer.pad_token_id})")
print(f"Max length: {tokenizer.model_max_length}")
# Test tokenization
sample_text = df_clean['formatted_text'].iloc[0][:200]
print(f"\nExample tokenization:")
print(f"Input: {sample_text}...")
encoded = tokenizer(sample_text, truncation=True, max_length=50)
print(f"Tokens: {len(encoded['input_ids'])} tokens")
print(f"First 15 token IDs: {encoded['input_ids'][:15]}")

LOADING GPT-2 TOKENIZER
Tokenizer: gpt2
Vocabulary size: 50257
EOS token: <|endoftext|> (ID: 50256)
PAD token: <|endoftext|> (ID: 50256)
Max length: 1024

Example tokenization:
Input: Recipe: Ham Steaks With Jazzed-Up Gravy | Ingredients: ["butter", "brown sugar", "ham steaks", "pepper", "green onion", "mushrooms", "flour", "chicken broth", "coffee"] | Instructions: ["Melt the butt...
Tokens: 50 tokens
First 15 token IDs: [37523, 25, 4345, 2441, 4730, 2080, 21406, 276, 12, 4933, 32599, 88, 930, 33474, 25]


In [None]:
# Cell 13: Tokenize Datasets
print("TOKENIZING DATASETS")
# Define tokenization function
def tokenize_function(examples):
    """
    Tokenize the formatted recipe texts
    Using max_length of 512 tokens for full recipes
    """
    return tokenizer(
        examples['formatted_text'],
        truncation=True,
        max_length=512,
        padding='max_length'
    )
# Apply tokenization
print("Tokenizing training set...")
tokenized_train = dataset_dict['train'].map(
    tokenize_function,
    batched=True,
    remove_columns=['formatted_text'],
    desc="Tokenizing train"
)
print("Tokenizing validation set...")
tokenized_val = dataset_dict['validation'].map(
    tokenize_function,
    batched=True,
    remove_columns=['formatted_text'],
    desc="Tokenizing validation"
)
# Create final tokenized dataset
tokenized_datasets = DatasetDict({
    'train': tokenized_train,
    'validation': tokenized_val
})
print("\nTokenization complete!")
print(tokenized_datasets)

TOKENIZING DATASETS
Tokenizing training set...


Tokenizing train:   0%|          | 0/4000 [00:00<?, ? examples/s]

Tokenizing validation set...


Tokenizing validation:   0%|          | 0/500 [00:00<?, ? examples/s]


Tokenization complete!
DatasetDict({
    train: Dataset({
        features: ['input_ids', 'attention_mask'],
        num_rows: 4000
    })
    validation: Dataset({
        features: ['input_ids', 'attention_mask'],
        num_rows: 500
    })
})


In [None]:
# Cell 14: Load GPT-2 Model
print("LOADING GPT-2 MODEL")
model = GPT2LMHeadModel.from_pretrained(model_name)
# Resize token embeddings to match tokenizer
model.resize_token_embeddings(len(tokenizer))
# Move model to GPU
model = model.to(device)
print(f"Model: {model_name}")
print(f"Total parameters: {model.num_parameters():,}")
print(f"Trainable parameters: {sum(p.numel() for p in model.parameters() if p.requires_grad):,}")
print(f"Model device: {next(model.parameters()).device}")


LOADING GPT-2 MODEL
Model: gpt2
Total parameters: 124,439,808
Trainable parameters: 124,439,808
Model device: cuda:0


In [None]:
# Cell 15: Setup Data Collator for Language Modeling
# Data collator for causal language modeling
data_collator = DataCollatorForLanguageModeling(
    tokenizer=tokenizer,
    mlm=False  # We're doing causal LM, not masked LM
)
print("Data Collator configured:")
print("  Task: Causal Language Modeling")
print("  MLM (Masked Language Modeling): False")
print("  GPT-2 will learn to predict the next token")

Data Collator configured:
  Task: Causal Language Modeling
  MLM (Masked Language Modeling): False
  GPT-2 will learn to predict the next token


In [None]:
# Cell 16: Configure Training Arguments
print("CONFIGURING TRAINING")
training_args = TrainingArguments(
    output_dir='./results',                    # Output directory
    eval_strategy='epoch',                # Evaluate each epoch
    save_strategy='epoch',                      # Save each epoch
    learning_rate=5e-5,                        # Learning rate
    per_device_train_batch_size=4,             # Batch size for training
    per_device_eval_batch_size=4,              # Batch size for evaluation
    num_train_epochs=3,                        # Number of epochs
    weight_decay=0.01,                         # Weight decay
    warmup_steps=100,                          # Warmup steps
    logging_dir='./logs',                      # Logging directory
    logging_steps=50,                          # Log every N steps
    load_best_model_at_end=True,              # Load best model
    save_total_limit=2,                        # Keep only 2 checkpoints
    fp16=torch.cuda.is_available(),            # Mixed precision
    gradient_accumulation_steps=4,             # Accumulate gradients
    report_to='none',                          # Don't report externally
    disable_tqdm=False,                        # Show progress bars
)
print("Training Arguments:")
print(f"  Learning Rate: {training_args.learning_rate}")
print(f"  Train Batch Size: {training_args.per_device_train_batch_size}")
print(f"  Gradient Accumulation: {training_args.gradient_accumulation_steps}")
print(f"  Effective Batch Size: {training_args.per_device_train_batch_size * training_args.gradient_accumulation_steps}")
print(f"  Epochs: {training_args.num_train_epochs}")
print(f"  FP16: {training_args.fp16}")

CONFIGURING TRAINING
Training Arguments:
  Learning Rate: 5e-05
  Train Batch Size: 4
  Gradient Accumulation: 4
  Effective Batch Size: 16
  Epochs: 3
  FP16: True


In [None]:
# Cell 17: Initialize Trainer
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_datasets['train'],
    eval_dataset=tokenized_datasets['validation'],
    tokenizer=tokenizer,
    data_collator=data_collator
)
print("Trainer initialized successfully!")
print(f"Training samples: {len(tokenized_datasets['train'])}")
print(f"Validation samples: {len(tokenized_datasets['validation'])}")
steps_per_epoch = len(tokenized_datasets['train']) // (training_args.per_device_train_batch_size * training_args.gradient_accumulation_steps)
print(f"Steps per epoch: {steps_per_epoch}")


Trainer initialized successfully!
Training samples: 4000
Validation samples: 500
Steps per epoch: 250


In [None]:
# Cell 18: Train the Model
print("STARTING TRAINING")
print(f"Training on {device}")
print()
# Start training
train_result = trainer.train()
print("TRAINING COMPLETED!")
print(f"Training Loss: {train_result.training_loss:.4f}")
print(f"Training Runtime: {train_result.metrics['train_runtime']:.2f} seconds ({train_result.metrics['train_runtime']/60:.2f} minutes)")
print(f"Training Samples/Second: {train_result.metrics['train_samples_per_second']:.2f}")
print(f"Training Steps/Second: {train_result.metrics['train_steps_per_second']:.2f}")

The tokenizer has new PAD/BOS/EOS tokens that differ from the model config and generation config. The model config and generation config were aligned accordingly, being updated with the tokenizer's values. Updated tokens: {'pad_token_id': 50256}.


STARTING TRAINING
Training on cuda
This will take approximately 30-40 minutes on a T4 GPU...



Epoch,Training Loss,Validation Loss
1,2.4057,2.238946
2,2.261,2.172313
3,2.2051,2.155329


There were missing keys in the checkpoint model loaded: ['lm_head.weight'].



TRAINING COMPLETED!
Training Loss: 2.5944
Training Runtime: 892.38 seconds (14.87 minutes)
Training Samples/Second: 13.45
Training Steps/Second: 0.84


In [None]:
# Cell 19: Evaluate on Validation Set
print("EVALUATING ON VALIDATION SET")
eval_results = trainer.evaluate()
print("\nValidation Results:")
for key, value in eval_results.items():
    if isinstance(value, float):
        print(f"  {key}: {value:.4f}")
    else:
        print(f"  {key}: {value}")

EVALUATING ON VALIDATION SET



Validation Results:
  eval_loss: 2.1553
  eval_runtime: 10.4329
  eval_samples_per_second: 47.9250
  eval_steps_per_second: 11.9810
  epoch: 3.0000


In [None]:
# Cell 20: Define Recipe Generation Function
def generate_recipe(prompt, max_length=400, temperature=0.8, top_k=50, top_p=0.95):
    """
    Generate a recipe based on a prompt

    Args:
        prompt (str): Starting text (e.g., "Recipe: Chocolate Cake | Ingredients:")
        max_length (int): Maximum length of generated text
        temperature (float): Controls randomness (0.1-1.5, higher = more creative)
        top_k (int): Limits to top k tokens
        top_p (float): Nucleus sampling parameter

    Returns:
        str: Generated recipe text
    """
    # Encode input
    input_ids = tokenizer.encode(prompt, return_tensors='pt').to(device)
    # Generate
    model.eval()
    with torch.no_grad():
        output = model.generate(
            input_ids,
            max_length=max_length,
            temperature=temperature,
            top_k=top_k,
            top_p=top_p,
            do_sample=True,
            pad_token_id=tokenizer.eos_token_id,
            num_return_sequences=1
        )
    # Decode
    generated_text = tokenizer.decode(output[0], skip_special_tokens=True)
    return generated_text
print("Recipe generation function defined!")
print("Usage: generate_recipe('Recipe: [name] | Ingredients:')")

Recipe generation function defined!
Usage: generate_recipe('Recipe: [name] | Ingredients:')


In [None]:
# Cell 21: Test Recipe Generation with Various Prompts
print("TESTING RECIPE GENERATION")
# Define test prompts
test_prompts = [
    "Recipe: Chocolate Chip Cookies | Ingredients:",
    "Recipe: Chicken Tikka Masala | Ingredients:",
    "Recipe: Vegetarian Pasta | Ingredients:",
    "Recipe: Banana Smoothie | Ingredients:",
    "Recipe: Grilled Salmon | Ingredients:"
]
generated_recipes = []
for i, prompt in enumerate(test_prompts, 1):
    print(f"\n{'='*70}")
    print(f"Test {i}/{len(test_prompts)}")
    print(f"{'='*70}")
    print(f"Prompt: {prompt}")
    print("-"*70)
    # Generate recipe
    recipe = generate_recipe(prompt, max_length=350, temperature=0.8)
    generated_recipes.append({
        'prompt': prompt,
        'generated_recipe': recipe
    })
    print(recipe)

The attention mask is not set and cannot be inferred from input because pad token is same as eos token. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.


TESTING RECIPE GENERATION

Test 1/5
Prompt: Recipe: Chocolate Chip Cookies | Ingredients:
----------------------------------------------------------------------
Recipe: Chocolate Chip Cookies | Ingredients: ["baking soda", "water", "butter", "unsalted butter", "eggs", "milk", "salt", "unsalted butter", "vanilla", "milk", "brown sugar", "vanilla", "sugar", "flour", "salt", "egg", "sugar", "powdered sugar", "vanilla", "vanilla extract", "flour", "vanilla", "butter", "butter", "butter", "vanilla", "cream cheese", "sugar", "vanilla", "butter", "butter", "white sugar", "vanilla", "butter", "salt", "vanilla"] | Instructions: ["Preheat oven to 375.", "In a small bowl, combine the baking soda, water, butter, eggs, and milk. Add the butter and vanilla, mix well. Pour in the flour, salt, and sugar. Bring to a boil, then reduce heat to medium. Stir in the butter and whisk until smooth. When the mixture has reduced by half, add in the eggs. Stir in the vanilla. Stir well to coat the eggs. Transfer

In [None]:
# Cell 22: Generate Recipes from Custom Ingredients
print("GENERATING FROM INGREDIENTS")
def generate_from_ingredients(ingredients_list, recipe_name=""):
    """
    Generate a recipe given a list of ingredients

    Args:
        ingredients_list (list): List of ingredients
        recipe_name (str): Optional recipe name

    Returns:
        str: Generated recipe
    """
    ingredients_str = ", ".join(ingredients_list)
    if recipe_name:
        prompt = f"Recipe: {recipe_name} | Ingredients: {ingredients_str} | Instructions:"
    else:
        prompt = f"Recipe: Delicious Dish | Ingredients: {ingredients_str} | Instructions:"

    return generate_recipe(prompt, max_length=400, temperature=0.7)
# Test ingredient-based generation
ingredient_tests = [
    (["chicken breast", "tomatoes", "onions", "garlic", "olive oil"], "Italian Chicken"),
    (["eggs", "milk", "flour", "sugar", "butter"], "Pancakes"),
    (["pasta", "cheese", "cream", "black pepper"], "Cacio e Pepe")
]
print("\nGenerating recipes from ingredients:")
for ingredients, name in ingredient_tests:
    print(f"Ingredients: {', '.join(ingredients)}")
    print(f"Recipe Name: {name}")
    recipe = generate_from_ingredients(ingredients, name)
    print(recipe)
    print()

GENERATING FROM INGREDIENTS

Generating recipes from ingredients:
Ingredients: chicken breast, tomatoes, onions, garlic, olive oil
Recipe Name: Italian Chicken
Recipe: Italian Chicken | Ingredients: chicken breast, tomatoes, onions, garlic, olive oil | Instructions: ["Cut chicken into quarters and bring to a boil. Drain and cool.", "In a large saucepan, heat 1/4 cup of the chicken broth. Add garlic powder, onion powder and 1/2 cup of the water. Bring to a boil. Reduce heat to medium and simmer for 10 minutes. Remove chicken from heat, add 1/2 cup of the water and cook, stirring, until chicken is tender, about 5 minutes. Remove from heat and allow to cool.", "In a large saucepan, heat 1 cup of the chicken broth. Add 1/4 cup of the chicken broth and bring to a boil. Reduce heat to medium and simmer for 10 minutes. Remove from heat and allow to cool.", "In a large saucepan, heat 1 cup of the chicken broth. Add 1/4 cup of the water and bring to a boil. Reduce heat to medium and simmer for 

In [None]:
# Cell 23: Evaluate Using ROUGE Scores on Test Set
print("EVALUATING WITH ROUGE SCORES ON TEST SET")
# Load ROUGE metric
rouge = load('rouge')
def evaluate_with_rouge(num_samples=30):
    """
    Evaluate generated recipes using ROUGE scores
    Compare generated recipes with original recipes from TEST set
    """
    # Sample from TEST set (not validation)
    sample_size = min(num_samples, len(test_df))
    sample_df = test_df.sample(n=sample_size, random_state=42)
    references = []
    predictions = []
    print(f"Evaluating {sample_size} samples from TEST set...")
    for idx, row in sample_df.iterrows():
        original = row['formatted_text']
        # Extract prompt (recipe name and start of ingredients)
        parts = original.split('|')
        if len(parts) >= 2:
            prompt = parts[0] + '|' + parts[1][:50] + '...'
        else:
            prompt = original[:100]

        # Generate recipe
        try:
            generated = generate_recipe(prompt, max_length=400, temperature=0.7)
            references.append(original)
            predictions.append(generated)
        except Exception as e:
            print(f"Error generating for prompt: {prompt[:50]}... - {str(e)}")
            continue
    # Compute ROUGE scores
    if len(predictions) > 0:
        results = rouge.compute(predictions=predictions, references=references)
        return results
    else:
        return None
print("Computing ROUGE scores on test set (this may take a few minutes)...")
rouge_scores = evaluate_with_rouge(num_samples=30)
if rouge_scores:
    print("\nROUGE Scores (Test Set):")
    print("="*70)
    for key, value in rouge_scores.items():
        print(f"  {key}: {value:.4f}")
else:
    print("Could not compute ROUGE scores")

EVALUATING WITH ROUGE SCORES ON TEST SET


Downloading builder script: 0.00B [00:00, ?B/s]

Computing ROUGE scores on test set (this may take a few minutes)...
Evaluating 30 samples from TEST set...

ROUGE Scores (Test Set):
  rouge1: 0.2418
  rouge2: 0.0924
  rougeL: 0.1743
  rougeLsum: 0.1749


In [None]:
# Cell 24: Quality Assessment of Generated Recipes
print("QUALITY ASSESSMENT")
def assess_recipe_quality(recipe_text):
    """
    Automated quality checks for generated recipes
    """
    scores = {}
    # Check for key sections
    scores['has_recipe_name'] = 'recipe:' in recipe_text.lower()
    scores['has_ingredients'] = 'ingredients:' in recipe_text.lower()
    scores['has_instructions'] = 'instructions:' in recipe_text.lower()
    # Check length (reasonable recipe should be substantial)
    word_count = len(recipe_text.split())
    scores['adequate_length'] = word_count > 30
    # Check for cooking verbs
    cooking_verbs = ['mix', 'bake', 'cook', 'heat', 'add', 'stir', 'combine', 'serve', 'pour', 'chop', 'blend']
    scores['has_cooking_verbs'] = any(verb in recipe_text.lower() for verb in cooking_verbs)
    # Check for measurements
    measurements = ['cup', 'tablespoon', 'teaspoon', 'gram', 'ounce', 'pound', 'oz', 'ml', 'tbsp', 'tsp']
    scores['has_measurements'] = any(measure in recipe_text.lower() for measure in measurements)
    # Overall score
    scores['overall_score'] = sum(scores.values()) / len(scores)
    return scores
# Assess all generated recipes
print("Assessing quality of generated recipes...")
quality_results = []
for item in generated_recipes:
    scores = assess_recipe_quality(item['generated_recipe'])
    quality_results.append(scores)
# Calculate average scores
if quality_results:
    avg_scores = {
        key: np.mean([r[key] for r in quality_results])
        for key in quality_results[0].keys()
    }
    print("\nAverage Quality Scores:")
    for key, value in avg_scores.items():
        print(f"  {key}: {value:.2f}")

QUALITY ASSESSMENT
Assessing quality of generated recipes...

Average Quality Scores:
  has_recipe_name: 1.00
  has_ingredients: 1.00
  has_instructions: 1.00
  adequate_length: 1.00
  has_cooking_verbs: 1.00
  has_measurements: 0.20
  overall_score: 0.87


In [None]:
# Cell 25: Save Generated Recipes to CSV
print("SAVING GENERATED RECIPES")
# Create DataFrame with generated recipes
recipes_df = pd.DataFrame(generated_recipes)
# Add quality scores
recipes_df['quality_score'] = [assess_recipe_quality(r['generated_recipe'])['overall_score']
for r in generated_recipes]
# Save to CSV
recipes_df.to_csv('generated_recipes.csv', index=False)
print("Generated recipes saved to 'generated_recipes.csv'")
# Display summary
print(f"\nGenerated {len(recipes_df)} recipes")
print(f"Average quality score: {recipes_df['quality_score'].mean():.2f}")

SAVING GENERATED RECIPES
Generated recipes saved to 'generated_recipes.csv'

Generated 5 recipes
Average quality score: 0.87


In [None]:
# Cell 26: Save the Fine-tuned Model
print("SAVING FINE-TUNED MODEL")
# Create output directory
output_dir = './gpt2_recipe_model'
os.makedirs(output_dir, exist_ok=True)
# Save model and tokenizer
model.save_pretrained(output_dir)
tokenizer.save_pretrained(output_dir)
print(f"Model saved to: {output_dir}")
print("\nSaved files:")
for file in os.listdir(output_dir):
    print(f"  - {file}")
print("\nTo load this model later:")
print(f"  tokenizer = GPT2Tokenizer.from_pretrained('{output_dir}')")
print(f"  model = GPT2LMHeadModel.from_pretrained('{output_dir}')")

SAVING FINE-TUNED MODEL
Model saved to: ./gpt2_recipe_model

Saved files:
  - config.json
  - model.safetensors
  - merges.txt
  - vocab.json
  - special_tokens_map.json
  - tokenizer_config.json
  - generation_config.json

To load this model later:
  tokenizer = GPT2Tokenizer.from_pretrained('./gpt2_recipe_model')
  model = GPT2LMHeadModel.from_pretrained('./gpt2_recipe_model')


In [None]:
# Cell 27: Create and Save Training Summary
print("CREATING TRAINING SUMMARY")
# Compile all metrics
summary = {
    'task': 'Recipe Generation',
    'model_architecture': 'GPT-2 (Decoder-only)',
    'model_name': model_name,
    'dataset_size': len(df_clean),
    'train_size': len(train_df),
    'validation_size': len(val_df),
    'test_size': len(test_df),
    'split_percentages': {
        'train': f"{len(train_df)/(len(train_df)+len(val_df)+len(test_df))*100:.1f}%",
        'validation': f"{len(val_df)/(len(train_df)+len(val_df)+len(test_df))*100:.1f}%",
        'test': f"{len(test_df)/(len(train_df)+len(val_df)+len(test_df))*100:.1f}%"
    },
    'training_config': {
        'learning_rate': training_args.learning_rate,
        'batch_size': training_args.per_device_train_batch_size,
        'gradient_accumulation': training_args.gradient_accumulation_steps,
        'effective_batch_size': training_args.per_device_train_batch_size * training_args.gradient_accumulation_steps,
        'num_epochs': training_args.num_train_epochs,
        'max_length': 512,
    },
    'training_results': {
        'final_training_loss': float(train_result.training_loss),
        'training_time_seconds': float(train_result.metrics['train_runtime']),
        'training_time_minutes': float(train_result.metrics['train_runtime'] / 60),
    },
    'validation_results': {
        'loss': float(eval_results['eval_loss']),
    },
    'generation_quality': {
        'average_quality_score': float(recipes_df['quality_score'].mean()) if len(recipes_df) > 0 else 0,
        'num_recipes_generated': len(recipes_df),
    }
}
# Add ROUGE scores if available (these are from TEST set)
if rouge_scores:
    summary['test_rouge_scores'] = {k: float(v) for k, v in rouge_scores.items()}
# Save summary as JSON
with open('training_summary.json', 'w') as f:
    json.dump(summary, f, indent=4)
print("Training Summary:")
print(json.dumps(summary, indent=2))
print("\nSummary saved to 'training_summary.json'")

CREATING TRAINING SUMMARY
Training Summary:
{
  "task": "Recipe Generation",
  "model_architecture": "GPT-2 (Decoder-only)",
  "model_name": "gpt2",
  "dataset_size": 5000,
  "train_size": 4000,
  "validation_size": 500,
  "test_size": 500,
  "split_percentages": {
    "train": "80.0%",
    "validation": "10.0%",
    "test": "10.0%"
  },
  "training_config": {
    "learning_rate": 5e-05,
    "batch_size": 4,
    "gradient_accumulation": 4,
    "effective_batch_size": 16,
    "num_epochs": 3,
    "max_length": 512
  },
  "training_results": {
    "final_training_loss": 2.594430430094401,
    "training_time_seconds": 892.376,
    "training_time_minutes": 14.872933333333332
  },
  "validation_results": {
    "loss": 2.1553285121917725
  },
  "generation_quality": {
    "average_quality_score": 0.8666666666666668,
    "num_recipes_generated": 5
  },
  "test_rouge_scores": {
    "rouge1": 0.24180729908162427,
    "rouge2": 0.09237760047589866,
    "rougeL": 0.174345544124981,
    "rougeLsum

In [None]:
# Cell 28: Final Summary and Output Files
print("TASK 02 COMPLETED SUCCESSFULLY! ✓")
print("\n Generated Files:")
print("  1. generated_recipes.csv - Sample generated recipes with quality scores")
print("  2. training_summary.json - Complete training metrics and configuration")
print("  3. ./gpt2_recipe_model/ - Fine-tuned GPT-2 model checkpoint")
print("  4. ./results/ - Training checkpoints and logs")
print("\n Final Metrics:")
print(f"  Training Loss: {train_result.training_loss:.4f}")
print(f"  Validation Loss: {eval_results['eval_loss']:.4f}")
if rouge_scores:
    print(f"\n  Test Set ROUGE Scores:")
    print(f"    ROUGE-1: {rouge_scores.get('rouge1', 0):.4f}")
    print(f"    ROUGE-2: {rouge_scores.get('rouge2', 0):.4f}")
    print(f"    ROUGE-L: {rouge_scores.get('rougeL', 0):.4f}")
print(f"\n  Average Quality Score: {recipes_df['quality_score'].mean():.2f}/1.00")
print("\n Dataset Split:")
print(f"  Train: {len(train_df)} samples ({len(train_df)/(len(train_df)+len(val_df)+len(test_df))*100:.1f}%)")
print(f"  Validation: {len(val_df)} samples ({len(val_df)/(len(train_df)+len(val_df)+len(test_df))*100:.1f}%)")
print(f"  Test: {len(test_df)} samples ({len(test_df)/(len(train_df)+len(val_df)+len(test_df))*100:.1f}%)")
print("\n  Training Time:")
print(f"  Total: {train_result.metrics['train_runtime']:.2f} seconds ({train_result.metrics['train_runtime']/60:.2f} minutes)")

TASK 02 COMPLETED SUCCESSFULLY! ✓

 Generated Files:
  1. generated_recipes.csv - Sample generated recipes with quality scores
  2. training_summary.json - Complete training metrics and configuration
  3. ./gpt2_recipe_model/ - Fine-tuned GPT-2 model checkpoint
  4. ./results/ - Training checkpoints and logs

 Final Metrics:
  Training Loss: 2.5944
  Validation Loss: 2.1553

  Test Set ROUGE Scores:
    ROUGE-1: 0.2418
    ROUGE-2: 0.0924
    ROUGE-L: 0.1743

  Average Quality Score: 0.87/1.00

 Dataset Split:
  Train: 4000 samples (80.0%)
  Validation: 500 samples (10.0%)
  Test: 500 samples (10.0%)

  Training Time:
  Total: 892.38 seconds (14.87 minutes)


In [None]:
# Cell 29: Save Fine-tuned Model and Tokenizer
def save_fine_tuned_model(model, tokenizer, save_directory="./gpt2_recipe_fine_tuned"):
    """
    Save the fine-tuned model and tokenizer to the specified directory
    Args:
        model: The fine-tuned GPT-2 model
        tokenizer: The tokenizer used for training
        save_directory: Directory to save the model and tokenizer
    """
    # Create directory if it doesn't exist
    if not os.path.exists(save_directory):
        os.makedirs(save_directory)
    # Save model and tokenizer
    model.save_pretrained(save_directory)
    tokenizer.save_pretrained(save_directory)
    print(f" Model and tokenizer successfully saved to: {save_directory}")
    print(f" Files saved:")
    print(f"   - {save_directory}/config.json")
    print(f"   - {save_directory}/pytorch_model.bin")
    print(f"   - {save_directory}/tokenizer.json")
    print(f"   - {save_directory}/vocab.json")
    print(f"   - {save_directory}/merges.txt")
    print(f"   - {save_directory}/special_tokens_map.json")
    return save_directory
# Save the fine-tuned model
model_save_path = save_fine_tuned_model(model, tokenizer)

# Additional: Save training configuration
training_config = {
    "model_name": "gpt2",
    "fine_tuned_on": "recipe_dataset",
    "dataset_size": len(df),
    "training_epochs": training_args.num_train_epochs,
    "learning_rate": training_args.learning_rate,
    "batch_size": training_args.per_device_train_batch_size * training_args.gradient_accumulation_steps,
    "max_length": 512,
    "save_directory": model_save_path,
    "training_date": pd.Timestamp.now().strftime("%Y-%m-%d %H:%M:%S")
}
config_save_path = os.path.join(model_save_path, "training_config.json")
with open(config_save_path, 'w') as f:
    json.dump(training_config, f, indent=4)
print(f" Training configuration saved to: {config_save_path}")

 Model and tokenizer successfully saved to: ./gpt2_recipe_fine_tuned
 Files saved:
   - ./gpt2_recipe_fine_tuned/config.json
   - ./gpt2_recipe_fine_tuned/pytorch_model.bin
   - ./gpt2_recipe_fine_tuned/tokenizer.json
   - ./gpt2_recipe_fine_tuned/vocab.json
   - ./gpt2_recipe_fine_tuned/merges.txt
   - ./gpt2_recipe_fine_tuned/special_tokens_map.json
 Training configuration saved to: ./gpt2_recipe_fine_tuned/training_config.json


In [None]:
# Cell 30: Model Loading Function (For Future Use)
def load_fine_tuned_model(model_directory="./gpt2_recipe_fine_tuned"):
    """
    Load the fine-tuned model and tokenizer from the specified directory

    Args:
        model_directory: Directory containing the saved model and tokenizer

    Returns:
        model: Loaded GPT-2 model
        tokenizer: Loaded tokenizer
    """
    print(f"Loading fine-tuned model from: {model_directory}")
    # Check if directory exists
    if not os.path.exists(model_directory):
        raise FileNotFoundError(f"Model directory '{model_directory}' not found!")
    # Load tokenizer and model
    tokenizer = GPT2Tokenizer.from_pretrained(model_directory)
    model = GPT2LMHeadModel.from_pretrained(model_directory)
    # Move model to appropriate device
    model = model.to(device)
    print(" Model and tokenizer loaded successfully!")
    print(f" Model parameters: {model.num_parameters():,}")
    print(f" Using device: {device}")
    return model, tokenizer
# Test loading the saved model
print("TESTING MODEL LOADING FUNCTION")
try:
    # Load the model we just saved
    loaded_model, loaded_tokenizer = load_fine_tuned_model(model_save_path)
    # Test generation with loaded model
    test_prompt = "Recipe: Test Chocolate Cake | Ingredients:"
    print(f"\nTesting generation with loaded model...")
    print(f"Prompt: {test_prompt}")
    # Generate with loaded model
    input_ids = loaded_tokenizer.encode(test_prompt, return_tensors='pt').to(device)
    with torch.no_grad():
        output = loaded_model.generate(
            input_ids,
            max_length=200,
            temperature=0.8,
            do_sample=True,
            pad_token_id=loaded_tokenizer.eos_token_id
        )
    generated_text = loaded_tokenizer.decode(output[0], skip_special_tokens=True)
    print(f"Generated: {generated_text[:200]}...")
    print("\n Model loading test successful!")
except Exception as e:
    print(f"Error loading model: {e}")

TESTING MODEL LOADING FUNCTION
Loading fine-tuned model from: ./gpt2_recipe_fine_tuned
 Model and tokenizer loaded successfully!
 Model parameters: 124,439,808
 Using device: cuda

Testing generation with loaded model...
Prompt: Recipe: Test Chocolate Cake | Ingredients:
Generated: Recipe: Test Chocolate Cake | Ingredients: ["chocolate", "butter", "mixed cream", "egg yolks", "ground cinnamon", "salt", "baking powder", "powdered sugar", "vanilla extract", "water", "ground nutmeg"...

 Model loading test successful!


In [None]:
# Cell 31: Export Model to Google Drive
try:
    from google.colab import drive
    # Mount Google Drive
    drive.mount('/content/drive')
    # Create backup directory in Drive
    drive_backup_path = "/content/drive/MyDrive/gpt2_recipe_model"
    if not os.path.exists(drive_backup_path):
        os.makedirs(drive_backup_path)
    # Copy model to Drive
    import shutil
    shutil.copytree(model_save_path, drive_backup_path, dirs_exist_ok=True)
    print(f" Model backed up to Google Drive: {drive_backup_path}")
except ImportError:
    print("  Not running in Google Colab - skipping Drive backup")
except Exception as e:
    print(f"  Could not backup to Google Drive: {e}")
print("Model Saved Successfully")
print("Fine-tuned model is now saved and can be loaded later using:")
print(f"model, tokenizer = load_fine_tuned_model('{model_save_path}')")

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
  Could not backup to Google Drive: name 'os' is not defined
Model Saved Successfully
Fine-tuned model is now saved and can be loaded later using:


NameError: name 'model_save_path' is not defined