# SketchRNN Model Benchmarking System
## Comprehensive Model Evaluation with Hardware-Aware Optimization

In [None]:
# Setup and Imports
import os
import sys
import numpy as np
import json
import pickle
import matplotlib.pyplot as plt
from pathlib import Path

# Add benchmark system to path
sys.path.append('./benchmark_system')

# Import benchmark components
from benchmark_system.core.benchmark_engine import BenchmarkEngine, BenchmarkConfig
from benchmark_system.core.model_registry import get_global_registry, register_model
from benchmark_system.utils.hardware_detector import create_hardware_detector
from benchmark_system.utils.visualization import create_visualizer

print("✅ Benchmark system imported successfully!")

In [None]:
# Hardware Detection
hardware_detector = create_hardware_detector()
hardware_summary = hardware_detector.get_device_summary()

print("🖥️ HARDWARE DETECTION:")
print(f"Environment: {hardware_summary['environment']}")
print(f"Primary Device: {hardware_summary['primary_device']['name']}")
print(f"Device Type: {hardware_summary['primary_device']['type']}")
print(f"Memory: {hardware_summary['primary_device']['memory_gb']} GB")

opt_config = hardware_detector.get_optimization_config()
print(f"\n⚙️ OPTIMIZATION: Batch Size: {opt_config.max_batch_size}, Workers: {opt_config.max_workers}")

In [None]:
# Load Test Data
def load_test_data():
    # Load categories
    with open('data/categories.json', 'r') as f:
        categories = json.load(f)
    
    # Try to load existing data or create sample data
    data_path = 'data/quickdraw_data.pkl'
    if Path(data_path).exists():
        with open(data_path, 'rb') as f:
            data = pickle.load(f)
    else:
        # Create sample data
        print("Creating sample data for demonstration...")
        data = {}
        for category in categories:
            data[category] = np.random.randint(0, 255, (100, 28, 28), dtype=np.uint8)
    
    # Prepare test set
    test_data, test_labels = [], []
    for i, category in enumerate(categories):
        if category in data:
            samples = data[category][-20:]  # Last 20 for testing
            test_data.extend(samples)
            test_labels.extend([i] * len(samples))
    
    return np.array(test_data), np.array(test_labels), categories

test_data, test_labels, categories = load_test_data()
print(f"📊 Test data: {test_data.shape}, Labels: {test_labels.shape}, Categories: {len(categories)}")

In [None]:
# Model Registration
def create_demo_model():
    """Create a demonstration CNN model"""
    try:
        import tensorflow as tf
        from tensorflow.keras import layers, models
        
        model = models.Sequential([
            layers.Conv2D(32, (3, 3), activation='relu', input_shape=(28, 28, 1)),
            layers.MaxPooling2D((2, 2)),
            layers.Conv2D(64, (3, 3), activation='relu'),
            layers.MaxPooling2D((2, 2)),
            layers.Flatten(),
            layers.Dense(64, activation='relu'),
            layers.Dense(len(categories), activation='softmax')
        ])
        
        model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
        return model
    except ImportError:
        return None

# Register models
registry = get_global_registry()
model_ids = []

# Look for existing models
models_dir = Path('models')
models_dir.mkdir(exist_ok=True)

for model_file in models_dir.glob('*.h5'):
    model_id = f"sketchrnn_{model_file.stem}"
    if register_model(model_id=model_id, name=model_file.stem, model_path=str(model_file), categories=categories):
        model_ids.append(model_id)
        print(f"✅ Registered: {model_id}")

# Create demo model if none found
if not model_ids:
    demo_model = create_demo_model()
    if demo_model:
        demo_path = models_dir / 'demo_sketchrnn.h5'
        demo_model.save(demo_path)
        if register_model(model_id="demo_sketchrnn", name="Demo SketchRNN", model_path=str(demo_path), categories=categories):
            model_ids.append("demo_sketchrnn")
            print("✅ Created and registered demo model")

print(f"📋 Total registered models: {len(model_ids)}")

In [None]:
# Run Benchmark
if model_ids:
    config = BenchmarkConfig(
        batch_size=32,
        max_samples=200,  # Limited for demo
        warmup_runs=2,
        parallel_models=True,
        output_dir="benchmark_results_notebook"
    )
    
    print("🚀 Starting benchmark...")
    benchmark_engine = BenchmarkEngine(hardware_detector=hardware_detector)
    
    try:
        summary = benchmark_engine.benchmark_models(model_ids, test_data, test_labels, config)
        
        print(f"\n✅ BENCHMARK COMPLETED!")
        print(f"Total: {summary.total_models}, Successful: {summary.successful_models}, Failed: {summary.failed_models}")
        print(f"Total Time: {summary.total_execution_time:.2f}s")
        
        # Show results
        successful_results = [r for r in summary.results if r.error is None and r.metrics]
        
        if successful_results:
            print("\n📊 RESULTS:")
            for result in successful_results:
                print(f"• {result.model_name}: Accuracy={result.metrics.accuracy:.4f}, Time={result.metrics.inference_time_mean:.4f}s")
        
        benchmark_summary = summary
        
    except Exception as e:
        print(f"❌ Benchmark failed: {e}")
        benchmark_summary = None
else:
    print("⚠️ No models to benchmark")
    benchmark_summary = None

In [None]:
# Visualize Results
if benchmark_summary and benchmark_summary.successful_models > 0:
    successful_results = [r for r in benchmark_summary.results if r.error is None and r.metrics]
    
    # Extract data
    model_names = [r.model_name for r in successful_results]
    accuracies = [r.metrics.accuracy for r in successful_results]
    inference_times = [r.metrics.inference_time_mean for r in successful_results]
    
    # Create plots
    fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(12, 5))
    
    # Accuracy chart
    ax1.bar(model_names, accuracies, color='skyblue', alpha=0.7)
    ax1.set_title('Model Accuracy Comparison')
    ax1.set_ylabel('Accuracy')
    ax1.tick_params(axis='x', rotation=45)
    ax1.grid(True, alpha=0.3)
    
    # Speed chart
    ax2.bar(model_names, inference_times, color='lightcoral', alpha=0.7)
    ax2.set_title('Inference Time Comparison')
    ax2.set_ylabel('Time (seconds)')
    ax2.tick_params(axis='x', rotation=45)
    ax2.grid(True, alpha=0.3)
    
    plt.tight_layout()
    plt.show()
    
    # Generate report
    print("📄 Generating comprehensive report...")
    visualizer = create_visualizer(config.output_dir)
    report_dir = visualizer.generate_comprehensive_report(benchmark_summary)
    print(f"✅ Report generated: {report_dir}")
    
else:
    print("⚠️ No results to visualize")

## Usage Examples

### Command Line Usage:
```bash
# Discover and benchmark all models
python benchmark_models.py benchmark --discover-models --generate-reports

# Benchmark specific models
python benchmark_models.py benchmark --model-ids model1 model2 --parallel

# Register a new model
python benchmark_models.py register my_model_id /path/to/model.h5

# List all registered models
python benchmark_models.py list
```

### Python API Usage:
```python
from benchmark_system.core.benchmark_engine import quick_benchmark

# Quick benchmark
summary = quick_benchmark(
    model_ids=['model1', 'model2'],
    test_data=test_data,
    test_labels=test_labels,
    batch_size=64,
    parallel=True
)
```