# Upgrayedd Continuous Optimization

This notebook demonstrates the continuous optimization mode of Upgrayedd, which runs until manually interrupted and provides regular checkpoints and sample outputs.

In [None]:
# Install dependencies
!pip install torch transformers datasets tqdm matplotlib

In [None]:
# Clone the repository if running on Colab
import os
if not os.path.exists('sentinel-ai'):
    !git clone https://github.com/yourusername/sentinel-ai.git
    %cd sentinel-ai
else:
    %cd sentinel-ai
    !git pull

In [None]:
import torch
import logging
import sys

# Add the repository to the path
sys.path.append('.')

# Configure logging
logging.basicConfig(
    level=logging.INFO,
    format='%(asctime)s - %(levelname)s - %(message)s',
    handlers=[
        logging.StreamHandler(sys.stdout)
    ]
)

In [None]:
# Check for GPU
device = "cuda" if torch.cuda.is_available() else "cpu"
if device == "cuda":
    gpu_name = torch.cuda.get_device_name(0)
    print(f"🚀 {gpu_name} GPU detected!")
    
    # Recommended models based on GPU
    if "T4" in gpu_name or "K80" in gpu_name or "P4" in gpu_name:
        print("Recommended models:")
        print("- distilgpt2 (82M parameters)")
        print("- facebook/opt-350m (350M parameters)")
        print("- EleutherAI/pythia-410m (410M parameters)")
    elif "V100" in gpu_name or "A100" in gpu_name or "P100" in gpu_name:
        print("Recommended models:")
        print("- gpt2 (124M parameters)")
        print("- facebook/opt-1.3b (1.3B parameters)")
        print("- EleutherAI/pythia-1.4b (1.4B parameters)")
else:
    print("⚠️ No GPU detected. Using CPU.")
    print("Recommended models:")
    print("- distilgpt2 (82M parameters)")
    print("- distilbert-base-uncased (66M parameters)")

In [None]:
# Configuration
config = {
    # Model selection
    "model_name": "distilgpt2",  # Change this to your preferred model
    
    # Dataset selection
    "dataset": "wikitext",  # Options: wikitext, tiny_shakespeare, gutenberg
    
    # Optimization parameters
    "pruning_ratio": 0.3,      # Fraction of heads to prune
    "growth_ratio": 0.1,       # Fraction of pruned heads to regrow
    "learning_rate": 5e-5,     # Learning rate for fine-tuning
    "batch_size": 4,           # Training batch size
    "gradient_accumulation": 2,  # Gradient accumulation steps
    
    # Training parameters
    "epochs_per_cycle": 1,     # Training epochs per cycle
    
    # Checkpointing
    "save_frequency": 1,       # Save checkpoints every N cycles
    "eval_frequency": 1,       # Generate samples every N cycles
    "compress_model": True,    # Apply compression after training
    "compression_type": "mask",  # Compression type: mask, remove, distill
    
    # Controller-plasticity integration
    "controller_type": "ann",  # Type of controller (ann, rule)
    
    # Visualization
    "visualize": True,         # Create visualizations
    "plot_frequency": 1,       # Update plots every N cycles
    
    # Advanced
    "use_differential_lr": True  # Use different learning rates for different heads
}

# Display configuration
print("Upgrayedd Configuration:")
print(f"- Model: {config['model_name']}")
print(f"- Dataset: {config['dataset']}")
print(f"- Pruning level: {config['pruning_ratio']}")
print(f"- Growth ratio: {config['growth_ratio']}")
print(f"- Controller type: {config['controller_type']}")
print(f"- Learning rate: {config['learning_rate']}")
print(f"- Batch size: {config['batch_size']} (gradient accumulation: {config['gradient_accumulation']})")
print(f"- Compression: {config['compress_model']} (type: {config['compression_type']})")

In [None]:
# Create output directory
import os
from datetime import datetime

timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
model_short_name = config["model_name"].split('/')[-1]
output_dir = f"./upgrayedd_output/{model_short_name}_{timestamp}"

os.makedirs(output_dir, exist_ok=True)
print(f"Output will be saved to: {output_dir}")

In [None]:
# Import and run Upgrayedd
from upgrayedd.core import transform_model

# Optional: Specify a checkpoint to resume from
resume_checkpoint = None  # Set to a checkpoint path to resume

# Run the transformation in continuous mode
print("Starting continuous optimization (run this cell and press Stop when you want to interrupt)")
print("The process will save checkpoints regularly and can be resumed later")

results = transform_model(
    model_name=config["model_name"],
    output_dir=output_dir,
    device=device,
    config=config,
    mode="continuous",  # This is the key setting for continuous mode
    resume_checkpoint=resume_checkpoint,
    verbose=True
)

In [None]:
# Visualize results (only needed if interrupted before visualization)
import matplotlib.pyplot as plt
import json
import os
import glob

# Load results from the latest checkpoint if available
checkpoint_dirs = sorted(glob.glob(f"{output_dir}/checkpoint-*"))
if checkpoint_dirs:
    latest_checkpoint = checkpoint_dirs[-1]
    results_path = os.path.join(latest_checkpoint, "results.json")
    
    if os.path.exists(results_path):
        with open(results_path, "r") as f:
            results = json.load(f)
    
# Plot perplexity over cycles
if results and "cycles" in results:
    cycles = list(range(1, len(results["cycles"]) + 1))
    
    # Extract perplexities
    perplexities = [cycle.get("perplexity", 0) for cycle in results["cycles"]]
    
    plt.figure(figsize=(10, 6))
    plt.plot(cycles, perplexities, marker='o')
    plt.axhline(y=results.get("baseline_perplexity", 0), color='r', linestyle='--', label='Baseline')
    plt.title("Perplexity over Optimization Cycles")
    plt.xlabel("Cycle")
    plt.ylabel("Perplexity")
    plt.grid(True)
    plt.legend()
    plt.savefig(os.path.join(output_dir, "perplexity_plot.png"))
    plt.show()
    
    # Plot active heads
    if "pruned_heads_percent" in results["cycles"][0]:
        pruned_percents = [cycle.get("pruned_heads_percent", 0) * 100 for cycle in results["cycles"]]
        
        plt.figure(figsize=(10, 6))
        plt.plot(cycles, pruned_percents, marker='o', color='g')
        plt.title("Pruned Heads Percentage over Optimization Cycles")
        plt.xlabel("Cycle")
        plt.ylabel("Pruned Heads (%)")
        plt.grid(True)
        plt.savefig(os.path.join(output_dir, "pruning_plot.png"))
        plt.show()
else:
    print("No cycle results available for visualization")

In [None]:
# Generate text with the optimized model
from transformers import AutoModelForCausalLM, AutoTokenizer

# Load the final model if available, otherwise use the latest checkpoint
final_model_path = os.path.join(output_dir, "final_checkpoint")
if not os.path.exists(final_model_path) and checkpoint_dirs:
    final_model_path = checkpoint_dirs[-1]

if os.path.exists(final_model_path):
    # Load model and tokenizer
    model = AutoModelForCausalLM.from_pretrained(final_model_path).to(device)
    tokenizer = AutoTokenizer.from_pretrained(final_model_path)
    
    # Define prompt
    prompts = [
        "The future of artificial intelligence is",
        "In recent scientific discoveries,",
        "The most important thing to remember about learning is"
    ]
    
    # Generate text
    for prompt in prompts:
        print(f"\nPrompt: {prompt}")
        
        inputs = tokenizer(prompt, return_tensors="pt").to(device)
        
        # Generate
        with torch.no_grad():
            outputs = model.generate(
                inputs["input_ids"],
                max_length=100,
                num_return_sequences=1,
                do_sample=True,
                temperature=0.7,
                top_p=0.9
            )
            
        # Decode
        generated_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
        print(f"Generated: {generated_text}")
else:
    print("No optimized model available for text generation")