# HQDE Multi-Dataset Benchmark

This notebook benchmarks the HQDE (Hierarchical Quantum-Distributed Ensemble) framework on multiple datasets.

## Datasets:
1. MNIST (Grayscale, 10 classes)
2. CIFAR-10 (RGB, 10 classes)
3. Cats vs Dogs (RGB, 2 classes)
4. Fashion MNIST (Grayscale, 10 classes)
5. STL-10 (RGB, 10 classes)
6. Oxford Flowers 17 (RGB, 17 classes)
7. SVHN (RGB, 10 classes)
8. CIFAR-100 (RGB, 100 classes)

## Hardware Configuration:
- 2x T4 GPUs
- 4 CPU Cores
- Ray for distributed training

## Installation and Setup

In [None]:
# Install HQDE and dependencies
!pip install hqde torch torchvision torchaudio --quiet
!pip install ray[rllib] tensorboard --quiet
!pip install kaggle --quiet

In [None]:
import os
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
import torchvision.transforms as transforms
from torch.utils.data import DataLoader, Subset
import numpy as np
import time
import logging
import json
from pathlib import Path
import ray
from ray.util.actor_pool import ActorPool

# Import HQDE
from hqde import create_hqde_system, PerformanceMonitor

# Set up logging
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
logger = logging.getLogger(__name__)

# Set random seeds for reproducibility
torch.manual_seed(42)
np.random.seed(42)

# Check available GPUs
DEVICE = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
NUM_GPUS = torch.cuda.device_count()
logger.info(f'Using device: {DEVICE}')
logger.info(f'Number of GPUs available: {NUM_GPUS}')

# Initialize Ray with GPU support
if NUM_GPUS >= 2:
    ray.init(ignore_reinit_error=True, num_cpus=4, num_gpus=2)
    logger.info('Ray initialized with 2 GPUs')
else:
    ray.init(ignore_reinit_error=True)
    logger.info('Ray initialized in CPU mode')

## Model Architectures for Different Datasets

In [None]:
class SimpleCNN(nn.Module):
    """Simple CNN for small grayscale images (MNIST, Fashion MNIST)."""
    def __init__(self, num_classes=10):
        super(SimpleCNN, self).__init__()
        self.features = nn.Sequential(
            nn.Conv2d(1, 32, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(2),
            nn.Conv2d(32, 64, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(2)
        )
        self.classifier = nn.Sequential(
            nn.Flatten(),
            nn.Linear(64 * 7 * 7, 256),
            nn.ReLU(),
            nn.Dropout(0.3),
            nn.Linear(256, num_classes)
        )
    
    def forward(self, x):
        x = self.features(x)
        x = self.classifier(x)
        return x


class StandardCNN(nn.Module):
    """Standard CNN for RGB images (CIFAR-10, SVHN, CIFAR-100)."""
    def __init__(self, num_classes=10):
        super(StandardCNN, self).__init__()
        self.features = nn.Sequential(
            nn.Conv2d(3, 64, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(2),
            nn.Conv2d(64, 128, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(2),
            nn.Conv2d(128, 256, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(2)
        )
        self.classifier = nn.Sequential(
            nn.Flatten(),
            nn.Linear(256 * 4 * 4, 512),
            nn.ReLU(),
            nn.Dropout(0.4),
            nn.Linear(512, num_classes)
        )
    
    def forward(self, x):
        x = self.features(x)
        x = self.classifier(x)
        return x


class DeepCNN(nn.Module):
    """Deeper CNN for larger datasets (STL-10, Flowers, Cats vs Dogs)."""
    def __init__(self, num_classes=10, input_size=96):
        super(DeepCNN, self).__init__()
        self.input_size = input_size
        self.features = nn.Sequential(
            nn.Conv2d(3, 64, kernel_size=3, padding=1),
            nn.BatchNorm2d(64),
            nn.ReLU(),
            nn.MaxPool2d(2),
            
            nn.Conv2d(64, 128, kernel_size=3, padding=1),
            nn.BatchNorm2d(128),
            nn.ReLU(),
            nn.MaxPool2d(2),
            
            nn.Conv2d(128, 256, kernel_size=3, padding=1),
            nn.BatchNorm2d(256),
            nn.ReLU(),
            nn.MaxPool2d(2),
            
            nn.Conv2d(256, 512, kernel_size=3, padding=1),
            nn.BatchNorm2d(512),
            nn.ReLU(),
        )
        
        # Calculate adaptive pooling size
        self.pool = nn.AdaptiveAvgPool2d((4, 4))
        
        self.classifier = nn.Sequential(
            nn.Flatten(),
            nn.Linear(512 * 4 * 4, 1024),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(1024, 512),
            nn.ReLU(),
            nn.Dropout(0.4),
            nn.Linear(512, num_classes)
        )
    
    def forward(self, x):
        x = self.features(x)
        x = self.pool(x)
        x = self.classifier(x)
        return x


print("Models defined successfully!")

## Dataset Loaders

In [None]:
class DatasetConfig:
    """Configuration for each dataset."""
    def __init__(self, name, model_class, model_kwargs, 
                 transform_train, transform_test, 
                 num_classes, batch_size=64, subset_size=5000):
        self.name = name
        self.model_class = model_class
        self.model_kwargs = model_kwargs
        self.transform_train = transform_train
        self.transform_test = transform_test
        self.num_classes = num_classes
        self.batch_size = batch_size
        self.subset_size = subset_size


def load_mnist(subset_size=5000, batch_size=64):
    """Load MNIST dataset."""
    transform = transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize((0.1307,), (0.3081,))
    ])
    
    train_dataset = torchvision.datasets.MNIST(root='./data', train=True, 
                                               download=True, transform=transform)
    test_dataset = torchvision.datasets.MNIST(root='./data', train=False, 
                                              download=True, transform=transform)
    
    # Create subsets
    train_subset = Subset(train_dataset, range(min(subset_size, len(train_dataset))))
    test_subset = Subset(test_dataset, range(min(1000, len(test_dataset))))
    
    train_loader = DataLoader(train_subset, batch_size=batch_size, shuffle=True, num_workers=2)
    test_loader = DataLoader(test_subset, batch_size=batch_size, shuffle=False, num_workers=2)
    
    config = DatasetConfig(
        name='MNIST',
        model_class=SimpleCNN,
        model_kwargs={'num_classes': 10},
        transform_train=transform,
        transform_test=transform,
        num_classes=10,
        batch_size=batch_size
    )
    
    return train_loader, test_loader, config


def load_fashion_mnist(subset_size=5000, batch_size=64):
    """Load Fashion MNIST dataset."""
    transform = transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize((0.2860,), (0.3530,))
    ])
    
    train_dataset = torchvision.datasets.FashionMNIST(root='./data', train=True, 
                                                       download=True, transform=transform)
    test_dataset = torchvision.datasets.FashionMNIST(root='./data', train=False, 
                                                      download=True, transform=transform)
    
    train_subset = Subset(train_dataset, range(min(subset_size, len(train_dataset))))
    test_subset = Subset(test_dataset, range(min(1000, len(test_dataset))))
    
    train_loader = DataLoader(train_subset, batch_size=batch_size, shuffle=True, num_workers=2)
    test_loader = DataLoader(test_subset, batch_size=batch_size, shuffle=False, num_workers=2)
    
    config = DatasetConfig(
        name='Fashion-MNIST',
        model_class=SimpleCNN,
        model_kwargs={'num_classes': 10},
        transform_train=transform,
        transform_test=transform,
        num_classes=10,
        batch_size=batch_size
    )
    
    return train_loader, test_loader, config


def load_cifar10(subset_size=5000, batch_size=64):
    """Load CIFAR-10 dataset."""
    transform_train = transforms.Compose([
        transforms.RandomHorizontalFlip(),
        transforms.RandomCrop(32, padding=4),
        transforms.ToTensor(),
        transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010))
    ])
    
    transform_test = transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010))
    ])
    
    train_dataset = torchvision.datasets.CIFAR10(root='./data', train=True, 
                                                download=True, transform=transform_train)
    test_dataset = torchvision.datasets.CIFAR10(root='./data', train=False, 
                                               download=True, transform=transform_test)
    
    train_subset = Subset(train_dataset, range(min(subset_size, len(train_dataset))))
    test_subset = Subset(test_dataset, range(min(1000, len(test_dataset))))
    
    train_loader = DataLoader(train_subset, batch_size=batch_size, shuffle=True, num_workers=2)
    test_loader = DataLoader(test_subset, batch_size=batch_size, shuffle=False, num_workers=2)
    
    config = DatasetConfig(
        name='CIFAR-10',
        model_class=StandardCNN,
        model_kwargs={'num_classes': 10},
        transform_train=transform_train,
        transform_test=transform_test,
        num_classes=10,
        batch_size=batch_size
    )
    
    return train_loader, test_loader, config


def load_cifar100(subset_size=5000, batch_size=64):
    """Load CIFAR-100 dataset."""
    transform_train = transforms.Compose([
        transforms.RandomHorizontalFlip(),
        transforms.RandomCrop(32, padding=4),
        transforms.ToTensor(),
        transforms.Normalize((0.5071, 0.4867, 0.4408), (0.2675, 0.2565, 0.2761))
    ])
    
    transform_test = transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize((0.5071, 0.4867, 0.4408), (0.2675, 0.2565, 0.2761))
    ])
    
    train_dataset = torchvision.datasets.CIFAR100(root='./data', train=True, 
                                                 download=True, transform=transform_train)
    test_dataset = torchvision.datasets.CIFAR100(root='./data', train=False, 
                                                download=True, transform=transform_test)
    
    train_subset = Subset(train_dataset, range(min(subset_size, len(train_dataset))))
    test_subset = Subset(test_dataset, range(min(1000, len(test_dataset))))
    
    train_loader = DataLoader(train_subset, batch_size=batch_size, shuffle=True, num_workers=2)
    test_loader = DataLoader(test_subset, batch_size=batch_size, shuffle=False, num_workers=2)
    
    config = DatasetConfig(
        name='CIFAR-100',
        model_class=StandardCNN,
        model_kwargs={'num_classes': 100},
        transform_train=transform_train,
        transform_test=transform_test,
        num_classes=100,
        batch_size=batch_size
    )
    
    return train_loader, test_loader, config


def load_svhn(subset_size=5000, batch_size=64):
    """Load SVHN dataset."""
    transform_train = transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize((0.4377, 0.4438, 0.4728), (0.1980, 0.2010, 0.1970))
    ])
    
    transform_test = transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize((0.4377, 0.4438, 0.4728), (0.1980, 0.2010, 0.1970))
    ])
    
    train_dataset = torchvision.datasets.SVHN(root='./data', split='train', 
                                              download=True, transform=transform_train)
    test_dataset = torchvision.datasets.SVHN(root='./data', split='test', 
                                             download=True, transform=transform_test)
    
    train_subset = Subset(train_dataset, range(min(subset_size, len(train_dataset))))
    test_subset = Subset(test_dataset, range(min(1000, len(test_dataset))))
    
    train_loader = DataLoader(train_subset, batch_size=batch_size, shuffle=True, num_workers=2)
    test_loader = DataLoader(test_subset, batch_size=batch_size, shuffle=False, num_workers=2)
    
    config = DatasetConfig(
        name='SVHN',
        model_class=StandardCNN,
        model_kwargs={'num_classes': 10},
        transform_train=transform_train,
        transform_test=transform_test,
        num_classes=10,
        batch_size=batch_size
    )
    
    return train_loader, test_loader, config


def load_stl10(subset_size=5000, batch_size=32):
    """Load STL-10 dataset."""
    transform_train = transforms.Compose([
        transforms.Resize(96),
        transforms.RandomHorizontalFlip(),
        transforms.RandomCrop(96, padding=4),
        transforms.ToTensor(),
        transforms.Normalize((0.4467, 0.4390, 0.4066), (0.2603, 0.2566, 0.2713))
    ])
    
    transform_test = transforms.Compose([
        transforms.Resize(96),
        transforms.ToTensor(),
        transforms.Normalize((0.4467, 0.4390, 0.4066), (0.2603, 0.2566, 0.2713))
    ])
    
    train_dataset = torchvision.datasets.STL10(root='./data', split='train', 
                                               download=True, transform=transform_train)
    test_dataset = torchvision.datasets.STL10(root='./data', split='test', 
                                              download=True, transform=transform_test)
    
    train_subset = Subset(train_dataset, range(min(subset_size, len(train_dataset))))
    test_subset = Subset(test_dataset, range(min(1000, len(test_dataset))))
    
    train_loader = DataLoader(train_subset, batch_size=batch_size, shuffle=True, num_workers=2)
    test_loader = DataLoader(test_subset, batch_size=batch_size, shuffle=False, num_workers=2)
    
    config = DatasetConfig(
        name='STL-10',
        model_class=DeepCNN,
        model_kwargs={'num_classes': 10, 'input_size': 96},
        transform_train=transform_train,
        transform_test=transform_test,
        num_classes=10,
        batch_size=batch_size
    )
    
    return train_loader, test_loader, config


print("Dataset loaders defined successfully!")

## HQDE Benchmark Function

In [None]:
def benchmark_hqde(train_loader, test_loader, config, num_epochs=5, num_workers=2):
    """Benchmark HQDE on a dataset."""
    logger.info(f"\n{'='*60}")
    logger.info(f"Benchmarking HQDE on {config.name}")
    logger.info(f"{'='*60}")
    
    results = {
        'dataset': config.name,
        'num_classes': config.num_classes,
        'num_workers': num_workers,
        'num_epochs': num_epochs,
        'train_samples': len(train_loader.dataset),
        'test_samples': len(test_loader.dataset)
    }
    
    # Create HQDE system
    logger.info(f"Creating HQDE system with {num_workers} workers...")
    hqde_system = create_hqde_system(
        model_class=config.model_class,
        model_kwargs=config.model_kwargs,
        num_workers=num_workers,
        quantization_config={'base_bits': 8, 'min_bits': 4, 'max_bits': 16},
        aggregation_config={'noise_scale': 0.005, 'exploration_factor': 0.1}
    )
    
    # Training
    logger.info(f"Training for {num_epochs} epochs...")
    start_time = time.time()
    
    training_metrics = hqde_system.train(train_loader, num_epochs=num_epochs)
    
    training_time = time.time() - start_time
    results['training_time'] = training_time
    logger.info(f"Training completed in {training_time:.2f} seconds")
    
    # Evaluation
    logger.info("Evaluating on test set...")
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    criterion = nn.CrossEntropyLoss()
    
    total_correct = 0
    total_samples = 0
    total_loss = 0.0
    
    for data, targets in test_loader:
        data, targets = data.to(device), targets.to(device)
        
        predictions = hqde_system.predict([data])
        
        if predictions.numel() > 0:
            loss = criterion(predictions, targets).item()
            _, predicted_classes = torch.max(predictions, dim=1)
            
            total_correct += (predicted_classes == targets).sum().item()
            total_loss += loss * data.size(0)
            total_samples += data.size(0)
    
    accuracy = total_correct / total_samples if total_samples > 0 else 0.0
    avg_loss = total_loss / total_samples if total_samples > 0 else 0.0
    
    results['test_accuracy'] = accuracy
    results['test_loss'] = avg_loss
    results['correct_predictions'] = total_correct
    
    logger.info(f"Test Accuracy: {accuracy:.4f} ({accuracy*100:.2f}%)")
    logger.info(f"Test Loss: {avg_loss:.4f}")
    
    # Get HQDE performance metrics
    hqde_metrics = hqde_system.get_performance_metrics()
    results['hqde_metrics'] = hqde_metrics
    
    # Save model
    model_path = f"hqde_{config.name.lower().replace('-', '_')}_model.pth"
    hqde_system.save_model(model_path)
    results['model_path'] = model_path
    
    # Cleanup
    hqde_system.cleanup()
    
    logger.info(f"Benchmark completed for {config.name}\n")
    
    return results


print("Benchmark function defined successfully!")

## Run Benchmarks on All Datasets

In [None]:
# Dictionary of dataset loaders
dataset_loaders = {
    'MNIST': load_mnist,
    'Fashion-MNIST': load_fashion_mnist,
    'CIFAR-10': load_cifar10,
    'SVHN': load_svhn,
    'STL-10': load_stl10,
    'CIFAR-100': load_cifar100,
}

# Results storage
all_results = []

# Configuration
NUM_EPOCHS = 5
SUBSET_SIZE = 5000  # Use subset for faster benchmarking
NUM_WORKERS = 2  # Adjust based on your GPU count

print(f"Starting benchmarks with {NUM_WORKERS} workers, {NUM_EPOCHS} epochs")
print(f"Subset size: {SUBSET_SIZE} samples per dataset")
print(f"Running on {NUM_GPUS} GPUs\n")

### 1. MNIST

In [None]:
train_loader, test_loader, config = load_mnist(subset_size=SUBSET_SIZE, batch_size=64)
results_mnist = benchmark_hqde(train_loader, test_loader, config, 
                               num_epochs=NUM_EPOCHS, num_workers=NUM_WORKERS)
all_results.append(results_mnist)

# Display results
print(f"\nResults Summary for {config.name}:")
print(f"  Training Time: {results_mnist['training_time']:.2f}s")
print(f"  Test Accuracy: {results_mnist['test_accuracy']*100:.2f}%")
print(f"  Test Loss: {results_mnist['test_loss']:.4f}")

### 2. Fashion MNIST

In [None]:
train_loader, test_loader, config = load_fashion_mnist(subset_size=SUBSET_SIZE, batch_size=64)
results_fashion = benchmark_hqde(train_loader, test_loader, config, 
                                  num_epochs=NUM_EPOCHS, num_workers=NUM_WORKERS)
all_results.append(results_fashion)

print(f"\nResults Summary for {config.name}:")
print(f"  Training Time: {results_fashion['training_time']:.2f}s")
print(f"  Test Accuracy: {results_fashion['test_accuracy']*100:.2f}%")
print(f"  Test Loss: {results_fashion['test_loss']:.4f}")

### 3. CIFAR-10

In [None]:
train_loader, test_loader, config = load_cifar10(subset_size=SUBSET_SIZE, batch_size=64)
results_cifar10 = benchmark_hqde(train_loader, test_loader, config, 
                                  num_epochs=NUM_EPOCHS, num_workers=NUM_WORKERS)
all_results.append(results_cifar10)

print(f"\nResults Summary for {config.name}:")
print(f"  Training Time: {results_cifar10['training_time']:.2f}s")
print(f"  Test Accuracy: {results_cifar10['test_accuracy']*100:.2f}%")
print(f"  Test Loss: {results_cifar10['test_loss']:.4f}")

### 4. SVHN

In [None]:
train_loader, test_loader, config = load_svhn(subset_size=SUBSET_SIZE, batch_size=64)
results_svhn = benchmark_hqde(train_loader, test_loader, config, 
                               num_epochs=NUM_EPOCHS, num_workers=NUM_WORKERS)
all_results.append(results_svhn)

print(f"\nResults Summary for {config.name}:")
print(f"  Training Time: {results_svhn['training_time']:.2f}s")
print(f"  Test Accuracy: {results_svhn['test_accuracy']*100:.2f}%")
print(f"  Test Loss: {results_svhn['test_loss']:.4f}")

### 5. CIFAR-100

In [None]:
train_loader, test_loader, config = load_cifar100(subset_size=SUBSET_SIZE, batch_size=64)
results_cifar100 = benchmark_hqde(train_loader, test_loader, config, 
                                   num_epochs=NUM_EPOCHS, num_workers=NUM_WORKERS)
all_results.append(results_cifar100)

print(f"\nResults Summary for {config.name}:")
print(f"  Training Time: {results_cifar100['training_time']:.2f}s")
print(f"  Test Accuracy: {results_cifar100['test_accuracy']*100:.2f}%")
print(f"  Test Loss: {results_cifar100['test_loss']:.4f}")

### 6. STL-10 (Optional - Larger Dataset)

In [None]:
# Uncomment to run STL-10 (takes longer due to larger images)
# train_loader, test_loader, config = load_stl10(subset_size=SUBSET_SIZE, batch_size=32)
# results_stl10 = benchmark_hqde(train_loader, test_loader, config, 
#                                num_epochs=NUM_EPOCHS, num_workers=NUM_WORKERS)
# all_results.append(results_stl10)
# 
# print(f"\nResults Summary for {config.name}:")
# print(f"  Training Time: {results_stl10['training_time']:.2f}s")
# print(f"  Test Accuracy: {results_stl10['test_accuracy']*100:.2f}%")
# print(f"  Test Loss: {results_stl10['test_loss']:.4f}")

print("STL-10 benchmark skipped (uncomment to run)")

## Results Summary and Visualization

In [None]:
# Create summary DataFrame
import pandas as pd

summary_data = []
for result in all_results:
    summary_data.append({
        'Dataset': result['dataset'],
        'Classes': result['num_classes'],
        'Train Samples': result['train_samples'],
        'Test Samples': result['test_samples'],
        'Training Time (s)': f"{result['training_time']:.2f}",
        'Test Accuracy (%)': f"{result['test_accuracy']*100:.2f}",
        'Test Loss': f"{result['test_loss']:.4f}",
        'Correct Predictions': result['correct_predictions']
    })

df_summary = pd.DataFrame(summary_data)
print("\n" + "="*100)
print("HQDE MULTI-DATASET BENCHMARK RESULTS")
print("="*100)
print(df_summary.to_string(index=False))
print("="*100)

# Save results to JSON
with open('hqde_benchmark_results.json', 'w') as f:
    json.dump(all_results, f, indent=2)

print("\nResults saved to 'hqde_benchmark_results.json'")

## Performance Visualization

In [None]:
import matplotlib.pyplot as plt
import seaborn as sns

# Set style
sns.set_style("whitegrid")
plt.rcParams['figure.figsize'] = (15, 10)

# Extract data for plotting
datasets = [r['dataset'] for r in all_results]
accuracies = [r['test_accuracy']*100 for r in all_results]
training_times = [r['training_time'] for r in all_results]
losses = [r['test_loss'] for r in all_results]

# Create subplots
fig, axes = plt.subplots(2, 2, figsize=(16, 12))

# 1. Test Accuracy Comparison
axes[0, 0].bar(datasets, accuracies, color='steelblue', alpha=0.8)
axes[0, 0].set_ylabel('Accuracy (%)', fontsize=12)
axes[0, 0].set_title('Test Accuracy by Dataset', fontsize=14, fontweight='bold')
axes[0, 0].set_ylim([0, 100])
axes[0, 0].tick_params(axis='x', rotation=45)
for i, v in enumerate(accuracies):
    axes[0, 0].text(i, v + 1, f'{v:.1f}%', ha='center', fontsize=10)

# 2. Training Time Comparison
axes[0, 1].bar(datasets, training_times, color='coral', alpha=0.8)
axes[0, 1].set_ylabel('Training Time (seconds)', fontsize=12)
axes[0, 1].set_title('Training Time by Dataset', fontsize=14, fontweight='bold')
axes[0, 1].tick_params(axis='x', rotation=45)
for i, v in enumerate(training_times):
    axes[0, 1].text(i, v + 1, f'{v:.1f}s', ha='center', fontsize=10)

# 3. Test Loss Comparison
axes[1, 0].bar(datasets, losses, color='lightgreen', alpha=0.8)
axes[1, 0].set_ylabel('Test Loss', fontsize=12)
axes[1, 0].set_title('Test Loss by Dataset', fontsize=14, fontweight='bold')
axes[1, 0].tick_params(axis='x', rotation=45)
for i, v in enumerate(losses):
    axes[1, 0].text(i, v + 0.05, f'{v:.3f}', ha='center', fontsize=10)

# 4. Accuracy vs Training Time Scatter
axes[1, 1].scatter(training_times, accuracies, s=200, alpha=0.6, c='purple')
axes[1, 1].set_xlabel('Training Time (seconds)', fontsize=12)
axes[1, 1].set_ylabel('Test Accuracy (%)', fontsize=12)
axes[1, 1].set_title('Accuracy vs Training Time', fontsize=14, fontweight='bold')
axes[1, 1].grid(True, alpha=0.3)

# Add labels for each point
for i, dataset in enumerate(datasets):
    axes[1, 1].annotate(dataset, (training_times[i], accuracies[i]),
                        xytext=(5, 5), textcoords='offset points', fontsize=9)

plt.tight_layout()
plt.savefig('hqde_benchmark_plots.png', dpi=300, bbox_inches='tight')
plt.show()

print("\nVisualization saved to 'hqde_benchmark_plots.png'")

## Ray Cluster Cleanup

In [None]:
# Shutdown Ray cluster
ray.shutdown()
print("\nRay cluster shutdown complete.")
print("Benchmark finished successfully!")

## Summary and Key Findings

This notebook demonstrated the HQDE framework on multiple datasets:

### Key Features Tested:
1. **Distributed Training**: Multi-GPU support via Ray
2. **Adaptive Quantization**: Memory-efficient training
3. **Quantum-Inspired Aggregation**: Ensemble learning with noise injection
4. **Scalability**: From simple (MNIST) to complex (CIFAR-100) datasets

### Hardware Configuration:
- 2x T4 GPUs
- 4 CPU Cores  
- Ray distributed framework

### Files Generated:
- `hqde_benchmark_results.json` - Detailed results
- `hqde_benchmark_plots.png` - Visualization
- `hqde_*_model.pth` - Trained models for each dataset

### Next Steps:
- Experiment with different hyperparameters
- Try larger subset sizes for better accuracy
- Test on custom datasets
- Compare with baseline models