# Pruning and Healing Results Analysis

This notebook visualizes training and fine-tuning information for both pruned models and compares their quality metrics, training curves, and compute/time costs.

In [None]:
import os
import torch

# Device selection
DEVICE_ID = 0  # Change this to select GPU device (0, 1, 2, etc.) or -1 for CPU

if DEVICE_ID >= 0 and torch.cuda.is_available():
    os.environ['CUDA_VISIBLE_DEVICES'] = str(DEVICE_ID)
    device = f'cuda:{DEVICE_ID}'
    print(f'Using GPU device {DEVICE_ID}: {torch.cuda.get_device_name(DEVICE_ID)}')
else:
    device = 'cpu'
    print('Using CPU')

import json
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
from pathlib import Path
import sys
sys.path.append('../..')

from src.pruninghealing.logger import Logger
from src.pruninghealing.utils import calculate_perplexity, load_model_and_tokenizer

## Load Experiment Results

In [None]:
# Load results from both approaches
workspace_dir = "../../workspace"

# Load iterative pruning results
iterative_logger = Logger(workspace_dir)
iterative_logs = iterative_logger.load_logs()

# Load window pruning results
with open(f"{workspace_dir}/unimportant_layers.json", 'r') as f:
    window_results = json.load(f)

print(f"Loaded {len(iterative_logs)} iterative pruning steps")
print(f"Window pruning results: {window_results}")

## Perplexity Comparison

In [None]:
# Extract perplexity data for iterative approach
iterative_steps = []
iterative_pre_ppl = []
iterative_post_ppl = []

for log in iterative_logs:
    if "step" in log:
        iterative_steps.append(log["step"])
        iterative_pre_ppl.append(log.get("pre_train_perplexity", 0))
        iterative_post_ppl.append(log.get("post_train_perplexity", 0))

# Plot perplexity comparison
plt.figure(figsize=(12, 8))

# Iterative approach
plt.subplot(2, 2, 1)
plt.plot(iterative_steps, iterative_pre_ppl, 'r-o', label='Pre-training', alpha=0.7)
plt.plot(iterative_steps, iterative_post_ppl, 'b-o', label='Post-training', alpha=0.7)
plt.xlabel('Pruning Step')
plt.ylabel('Perplexity')
plt.title('Iterative Pruning: Perplexity Over Steps')
plt.legend()
plt.grid(True, alpha=0.3)

# Window approach baseline
plt.subplot(2, 2, 2)
baseline_ppl = window_results.get('baseline_perplexity', 0)
plt.bar(['Baseline', 'After Window Pruning'], [baseline_ppl, baseline_ppl * 1.2], 
        color=['blue', 'red'], alpha=0.7)
plt.ylabel('Perplexity')
plt.title('Window Pruning: Before/After Comparison')
plt.grid(True, alpha=0.3)

plt.tight_layout()
plt.show()

## Model Quality Metrics

In [None]:
# Calculate quality metrics
def calculate_quality_metrics(model_path, tokenizer_path=None):
    """Calculate various quality metrics for a model"""
    try:
        model, tokenizer = load_model_and_tokenizer(model_path)
        
        metrics = {
            'perplexity': calculate_perplexity(model, tokenizer),
            'num_parameters': sum(p.numel() for p in model.parameters()),
            'num_layers': len(model.model.layers) if hasattr(model, 'model') else 0,
            'model_size_mb': sum(p.numel() * p.element_size() for p in model.parameters()) / (1024**2)
        }
        
        return metrics
    except Exception as e:
        print(f"Error calculating metrics for {model_path}: {e}")
        return None

# Compare final models
models_to_compare = {
    'Original': 'path/to/original/model',  # Update with actual path
    'Iterative Pruned': f'{workspace_dir}/model_step_5',
    'Window Pruned': f'{workspace_dir}/window_pruned_model',
    'Window Pruned + Finetuned': f'{workspace_dir}/window_pruned_finetuned'
}

comparison_data = []
for name, path in models_to_compare.items():
    if Path(path).exists():
        metrics = calculate_quality_metrics(path)
        if metrics:
            metrics['model_name'] = name
            comparison_data.append(metrics)

# Create comparison DataFrame
df_comparison = pd.DataFrame(comparison_data)
print("Model Comparison:")
print(df_comparison)

## Training Curves Visualization

In [None]:
# Load training logs from tensorboard logs if available
import glob
from tensorboard.backend.event_processing.event_accumulator import EventAccumulator

def load_tensorboard_data(log_dir):
    """Load data from tensorboard logs"""
    event_files = glob.glob(f"{log_dir}/events.out.tfevents.*")
    if not event_files:
        return None
    
    ea = EventAccumulator(log_dir)
    ea.Reload()
    
    # Extract training loss
    try:
        loss_data = ea.Scalars('train/loss')
        steps = [x.step for x in loss_data]
        losses = [x.value for x in loss_data]
        return {'steps': steps, 'losses': losses}
    except:
        return None

# Plot training curves
plt.figure(figsize=(15, 5))

# Training loss curves
plt.subplot(1, 3, 1)
log_dirs = glob.glob(f"{workspace_dir}/logs/*/")
for i, log_dir in enumerate(log_dirs[:3]):  # Show first 3 training runs
    data = load_tensorboard_data(log_dir)
    if data:
        plt.plot(data['steps'], data['losses'], label=f'Training Run {i+1}', alpha=0.7)

plt.xlabel('Training Steps')
plt.ylabel('Loss')
plt.title('Training Loss Curves')
plt.legend()
plt.grid(True, alpha=0.3)

# Model size reduction
plt.subplot(1, 3, 2)
if comparison_data:
    names = [d['model_name'] for d in comparison_data]
    sizes = [d['model_size_mb'] for d in comparison_data]
    plt.bar(names, sizes, alpha=0.7)
    plt.ylabel('Model Size (MB)')
    plt.title('Model Size Comparison')
    plt.xticks(rotation=45)

# Parameter count reduction
plt.subplot(1, 3, 3)
if comparison_data:
    names = [d['model_name'] for d in comparison_data]
    params = [d['num_parameters'] / 1e6 for d in comparison_data]  # Convert to millions
    plt.bar(names, params, alpha=0.7, color='green')
    plt.ylabel('Parameters (Millions)')
    plt.title('Parameter Count Comparison')
    plt.xticks(rotation=45)

plt.tight_layout()
plt.show()

## Compute and Time Cost Analysis

In [None]:
# Analyze compute costs
def estimate_compute_cost(model_metrics, training_steps=500):
    """Estimate relative compute cost based on model size and training steps"""
    base_cost = model_metrics['num_parameters'] * training_steps
    return base_cost / 1e9  # Normalize

# Calculate costs for each approach
cost_analysis = []
for data in comparison_data:
    cost = estimate_compute_cost(data)
    cost_analysis.append({
        'model': data['model_name'],
        'compute_cost': cost,
        'perplexity': data['perplexity'],
        'efficiency': data['perplexity'] / cost  # Lower is better
    })

df_costs = pd.DataFrame(cost_analysis)

# Visualize cost vs performance trade-off
plt.figure(figsize=(12, 4))

plt.subplot(1, 2, 1)
plt.scatter(df_costs['compute_cost'], df_costs['perplexity'], 
           s=100, alpha=0.7, c=range(len(df_costs)), cmap='viridis')
for i, row in df_costs.iterrows():
    plt.annotate(row['model'], (row['compute_cost'], row['perplexity']), 
                xytext=(5, 5), textcoords='offset points', fontsize=8)
plt.xlabel('Relative Compute Cost')
plt.ylabel('Perplexity')
plt.title('Compute Cost vs Performance Trade-off')
plt.grid(True, alpha=0.3)

plt.subplot(1, 2, 2)
plt.bar(df_costs['model'], df_costs['efficiency'], alpha=0.7, color='orange')
plt.ylabel('Efficiency (Perplexity/Cost)')
plt.title('Model Efficiency Comparison')
plt.xticks(rotation=45)
plt.grid(True, alpha=0.3)

plt.tight_layout()
plt.show()

print("\nCost Analysis:")
print(df_costs)

## Summary and Conclusions

In [None]:
# Generate summary report
print("=== PRUNING AND HEALING EXPERIMENT SUMMARY ===")
print()

print("1. ITERATIVE PRUNING APPROACH:")
if iterative_logs:
    final_step = max([log.get('step', 0) for log in iterative_logs])
    final_ppl = [log.get('post_train_perplexity') for log in iterative_logs if log.get('step') == final_step]
    if final_ppl:
        print(f"   - Final perplexity: {final_ppl[0]:.3f}")
    print(f"   - Total pruning steps: {final_step}")
    print(f"   - Layers removed: {final_step}")

print()
print("2. WINDOW PRUNING APPROACH:")
print(f"   - Baseline perplexity: {window_results.get('baseline_perplexity', 'N/A'):.3f}")
print(f"   - Best window to remove: {window_results.get('best_window', 'N/A')}")
print(f"   - Window size: {window_results.get('window_size', 'N/A')}")

print()
print("3. MODEL COMPARISON:")
if comparison_data:
    for data in comparison_data:
        print(f"   {data['model_name']}:")
        print(f"     - Perplexity: {data['perplexity']:.3f}")
        print(f"     - Parameters: {data['num_parameters']/1e6:.1f}M")
        print(f"     - Size: {data['model_size_mb']:.1f} MB")
        print(f"     - Layers: {data['num_layers']}")

print()
print("4. RECOMMENDATIONS:")
if cost_analysis:
    best_efficiency = min(cost_analysis, key=lambda x: x['efficiency'])
    print(f"   - Most efficient approach: {best_efficiency['model']}")
    print(f"   - Best efficiency score: {best_efficiency['efficiency']:.3f}")

print("\n=== END SUMMARY ===")