In [None]:
# Check GPU
import torch
print('Torch:', torch.__version__)
print('CUDA available:', torch.cuda.is_available())
if torch.cuda.is_available():
    print('GPU name:', torch.cuda.get_device_name(0))
else:
    print('Enable GPU in Runtime > Change runtime type')


In [None]:
# Option A: Mount Google Drive and use existing project in Drive (edit the path)
from google.colab import drive
import os

drive.mount('/content/drive', force_remount=True)

PROJECT_DIR = '/content/drive/MyDrive/your_project_path/genai_A1'  # <-- EDIT THIS
Q2_DIR = os.path.join(PROJECT_DIR, 'Q2')

%cd $Q2_DIR
os.environ['PYTHONPATH'] = Q2_DIR
!pwd && ls -la


In [None]:
# Option B: Clone from GitHub (replace with your repo URL)
import os

%cd /content
!git clone https://github.com/your/repo.git genai_A1
%cd /content/genai_A1/Q2
os.environ['PYTHONPATH'] = '/content/genai_A1/Q2'
!pwd && ls -la


In [None]:
# Option C: Upload a ZIP and unzip to /content
from google.colab import files
import os

uploaded = files.upload()  # upload your project zip
zip_name = list(uploaded.keys())[0]
!unzip -o "$zip_name" -d /content
%cd /content/genai_A1/Q2
os.environ['PYTHONPATH'] = '/content/genai_A1/Q2'
!pwd && ls -la


In [None]:
# Install dependencies
%pip install -U pip
%pip install -r /content/genai_A1/requirements.txt

# Optional: Speed up HF datasets cache
import os
os.environ['HF_DATASETS_CACHE'] = '/content/hf_cache'
!mkdir -p /content/hf_cache


In [None]:
# Ensure we are in Q2 dir and outputs exists
import os, pathlib
Q2_DIR = os.getcwd()
print('Working dir:', Q2_DIR)
os.makedirs('outputs', exist_ok=True)
!ls -la


In [None]:
# 1) Run Ablation Study (adjust epochs for speed/quality)
!python -m src.ablation_study --outdir outputs --epochs 5

# Show ablation outputs
!ls -la outputs | sed -n '1,120p'


In [None]:
# 2) Train with Optimal Hyperparameters and compare with Baseline
!python -m src.train_optimal --outdir outputs --epochs 15 --best_config_file outputs/best_ablation_configs.json

# Preview comparison results
import pandas as pd
cmp = pd.read_csv('outputs/model_comparison.csv')
cmp


In [None]:
# 3) Evaluate Optimal Model and Generate Text Samples
!python -m src.evaluate --model_path outputs/optimal_model.pt --outdir outputs --num_samples 10 --max_length 100

# Show metrics
import pandas as pd
metrics = pd.read_csv('outputs/evaluation_metrics.csv')
print("Evaluation Metrics:")
print(metrics.to_string(index=False))

# Show generated texts
print("\nGenerated Text Samples:")
with open('outputs/generated_texts.txt', 'r') as f:
    print(f.read())


In [None]:
# 4) Display Results and Visualizations
from IPython.display import Image, display
import matplotlib.pyplot as plt
import matplotlib.image as mpimg

print('Ablation Study Results:')
display(Image('outputs/ablation_study_results.png'))

print('\nOptimal vs Baseline Comparison:')
display(Image('outputs/optimal_vs_baseline.png'))

print('\nEvaluation Results:')
display(Image('outputs/evaluation_results.png'))

# Show training history
import pandas as pd
history = pd.read_csv('outputs/training_history.csv')
print("\nTraining History:")
print(history.tail())


In [None]:
# 5) Interactive Text Generation
import torch
from src.model import ShakespeareRNN
from src.utils import generate_text

# Load the optimal model
checkpoint = torch.load('outputs/optimal_model.pt', map_location='cpu')
vocab_info = checkpoint['vocab_info']
model_info = checkpoint['model_info']

# Create and load model
model = ShakespeareRNN(
    vocab_size=model_info['vocab_size'],
    embedding_dim=model_info['embedding_dim'],
    hidden_size=model_info['hidden_size'],
    num_layers=model_info['num_layers'],
    dropout=0.0,
    rnn_type=model_info['rnn_type']
)
model.load_state_dict(checkpoint['model_state_dict'])
model.eval()

# Interactive generation function
def generate_interactive(seed_text, max_length=50, temperature=1.0):
    generated = generate_text(
        model=model,
        vocab_info=vocab_info,
        seed_text=seed_text,
        max_length=max_length,
        temperature=temperature,
        device=torch.device('cpu')
    )
    return generated

# Example generations
seeds = [
    "To be or not to",
    "Once upon a time",
    "The quick brown fox",
    "In the beginning",
    "All the world's a"
]

print("Interactive Text Generation Examples:")
print("=" * 50)

for i, seed in enumerate(seeds):
    generated = generate_interactive(seed, max_length=60, temperature=0.8)
    print(f"\nExample {i+1}:")
    print(f"Seed: '{seed}'")
    print(f"Generated: '{generated}'")

# You can also try your own seeds:
# custom_seed = "Your custom seed here"
# custom_generated = generate_interactive(custom_seed, max_length=100, temperature=1.0)
# print(f"\nCustom Generation:")
# print(f"Seed: '{custom_seed}'")
# print(f"Generated: '{custom_generated}'")


In [None]:
# 6) Zip and Download all outputs
!zip -r /content/Q2_outputs.zip outputs
from google.colab import files
files.download('/content/Q2_outputs.zip')
