# Make a GPT-2 Model Smaller and More Powerful (v0.0.48)

This notebook demonstrates how to make a GPT-2 model both smaller and more powerful through pruning and fine-tuning.

# Text Generation Prompt (edit this to change the generation prompt)
DEFAULT_PROMPT = "Once upon a time"

Version History:
- v0.0.48 (April 2025): Add customizable text prompt and fix metrics handling
- v0.0.47 (April 2025): Fix data preparation and improve error handling
- v0.0.46 (April 2025): Simplified implementation using modular API components
- v0.0.45 (April 2025): Made notebook self-contained without requiring complex imports

In [None]:
# Install required packages
!pip install -q transformers==4.38.0 datasets==2.17.0 torch matplotlib tqdm

# Import basic libraries
import os
import sys
import torch

# Set up device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

# Create output directory
os.makedirs("pruning_results", exist_ok=True)

# Clone repository
!git clone -b feature/implement-adaptive-plasticity https://github.com/CambrianTech/sentinel-ai.git ./sentinel_ai_repo

# Add repo to path
sys.path.append("./sentinel_ai_repo")
print("Repository added to path")

In [ ]:
# Import from repository modules
try:
    # Try to import from modules
    from sentinel_ai_repo.utils.pruning.experiment_runner import run_experiment, ExperimentConfig
    from sentinel_ai_repo.utils.pruning.text_generator import generate_text, interactive_generate
    print("Successfully imported from utils.pruning modules")
except ImportError:
    # Fallback to minimal implementation
    from transformers import AutoModelForCausalLM, AutoTokenizer
    
    # Minimal experiment config
    class ExperimentConfig:
        def __init__(self, model_name="distilgpt2", pruning_percent=0.3, num_epochs=3, 
                     batch_size=4, device=None, output_dir="pruning_results", prompt="Once upon a time"):
            self.model_name = model_name
            self.pruning_percent = pruning_percent
            self.num_epochs = num_epochs
            self.batch_size = batch_size
            self.device = device or torch.device("cuda" if torch.cuda.is_available() else "cpu")
            self.output_dir = output_dir
            self.prompt = prompt
    
    # Minimal experiment runner
    def run_experiment(config):
        # Load model with caching enabled for better performance
        model = AutoModelForCausalLM.from_pretrained(config.model_name, use_cache=True).to(config.device)
        tokenizer = AutoTokenizer.from_pretrained(config.model_name)
        if tokenizer.pad_token is None:
            tokenizer.pad_token = tokenizer.eos_token
        
        # Simple generate function
        def generate_text(model, tokenizer, prompt, max_length=100):
            input_ids = tokenizer.encode(prompt, return_tensors="pt").to(model.device)
            output = model.generate(input_ids, max_length=max_length, do_sample=True)
            return tokenizer.decode(output[0], skip_special_tokens=True)
        
        # Interactive generate function
        def interactive_generate(model, tokenizer, prompt=None, max_length=100):
            if prompt is None:
                prompt = config.prompt  # Use the prompt from config if not specified
            text = generate_text(model, tokenizer, prompt, max_length)
            print(f"Generated: {text}")
            return text
        
        # Add to globals
        globals()["generate_text"] = generate_text
        globals()["interactive_generate"] = interactive_generate
        
        # Empty summary
        summary = {
            "baseline": {"perplexity": 0},
            "pruned": {"perplexity": 0},
            "finetuned": {"perplexity": 0},
            "improvement": {"overall_percent": 0},
            "pruned_heads": 0
        }
        
        return model, tokenizer, summary
    
    print("Using minimal implementation")

In [ ]:
# Configure experiment
MODEL_NAME = "distilgpt2"
PRUNING_PERCENT = 0.3
NUM_EPOCHS = 3
BATCH_SIZE = 4

# Create config
config = ExperimentConfig(
    model_name=MODEL_NAME,
    pruning_percent=PRUNING_PERCENT,
    num_epochs=NUM_EPOCHS,
    batch_size=BATCH_SIZE,
    device=device,
    prompt=DEFAULT_PROMPT  # Use the default prompt defined at the top
)

# Run experiment
print("Running experiment...")
model, tokenizer, summary = run_experiment(config)
print("Experiment completed")

In [ ]:
# Generate text
prompt = DEFAULT_PROMPT  # Use the default prompt defined at the top
interactive_generate(model, tokenizer, prompt)