In [None]:
# benchmark_vs_baselines.ipynb

# -------------------------------
# 1. Setup and Imports
# -------------------------------
!pip install transformers datasets evaluate accelerate -q

import torch
from transformers import (
    AutoModelForCausalLM, AutoTokenizer, 
    GPT2LMHeadModel, GPT2TokenizerFast
)
import time
import evaluate

device = "cuda" if torch.cuda.is_available() else "cpu"
print("Using device:", device)

# -------------------------------
# 2. Load Models
# -------------------------------

# Baseline model: GPT-2
gpt2_model = GPT2LMHeadModel.from_pretrained("gpt2").to(device)
gpt2_tokenizer = GPT2TokenizerFast.from_pretrained("gpt2")

# Custom model: use MiniLM or plug in your own
from torch import nn

class CustomMiniTransformer(nn.Module):
    def __init__(self, vocab_size=50257, dim=256, depth=4, heads=4, ff_dim=1024):
        super().__init__()
        self.embed = nn.Embedding(vocab_size, dim)
        self.blocks = nn.Sequential(*[
            nn.TransformerEncoderLayer(d_model=dim, nhead=heads, dim_feedforward=ff_dim, batch_first=True)
            for _ in range(depth)
        ])
        self.ln = nn.LayerNorm(dim)
        self.head = nn.Linear(dim, vocab_size)

    def forward(self, x):
        x = self.embed(x)
        x = self.blocks(x)
        x = self.ln(x)
        return self.head(x)

custom_model = CustomMiniTransformer().to(device)
print("✅ Models loaded.")

# -------------------------------
# 3. Inference Benchmark
# -------------------------------
def benchmark_model(model, tokenizer, prompt, max_new_tokens=50):
    model.eval()
    inputs = tokenizer(prompt, return_tensors="pt").to(device)
    with torch.no_grad():
        start = time.time()
        output = model.generate(**inputs, max_new_tokens=max_new_tokens)
        end = time.time()
    decoded = tokenizer.decode(output[0], skip_special_tokens=True)
    return decoded, end - start

prompt = "The rise of artificial general intelligence will"
print("🧪 Running GPT-2...")
gpt2_out, gpt2_time = benchmark_model(gpt2_model, gpt2_tokenizer, prompt)

print("🧪 Running Custom Transformer...")
tokenizer = gpt2_tokenizer  # Reuse GPT-2 tokenizer
inputs = tokenizer(prompt, return_tensors="pt").input_ids.to(device)
with torch.no_grad():
    start = time.time()
    logits = custom_model(inputs)
    end = time.time()

print("📉 GPT-2 Time: {:.3f}s".format(gpt2_time))
print("📉 Custom Model Time: {:.3f}s".format(end - start))

# -------------------------------
# 4. Evaluation (Perplexity)
# -------------------------------
ppl_eval = evaluate.load("perplexity", module_type="metric")

def calc_ppl(model, tokenizer):
    model.eval()
    return ppl_eval.compute(model_id=model, add_start_token=True, batch_size=4)

# This works only with HuggingFace models
gpt2_ppl = calc_ppl("gpt2", gpt2_tokenizer)
print("🧠 GPT-2 Perplexity:", gpt2_ppl)

# -------------------------------
# 5. Summary Table
# -------------------------------
import pandas as pd

results = pd.DataFrame([
    {"Model": "GPT-2", "Time (s)": gpt2_time, "Perplexity": gpt2_ppl["perplexity"]},
    {"Model": "Custom", "Time (s)": end - start, "Perplexity": "N/A"},
])

results
