# Benchmark Preparation

In [None]:
import csv
import numpy as np

import torch
import torchvision
from torchvision import datasets, transforms

from time import perf_counter
from datetime import datetime
from pathlib import Path

# CIFAR-10 Preparation

## Data transformation
For well computing, it is important to do some work with tha data

In [None]:
"""
For trainning, transformation is augmentations for data variability and normalisation for the model
"""
transform_train = transforms.Compose([
    transforms.RandomHorizontalFlip(), # Turn images 50% of time
    transforms.RandomCrop(32, padding=4), # Extend area by 4 pixels -> 40*40 -> Variability without loosing information
    transforms.RandomRotation(15), # 15, general rule to keep it consistently with real world
    transforms.ToTensor(), # Image to tensor for PyTorch and data from uint8 to float32
    transforms.Normalize(
        mean=[0.4914, 0.4822, 0.4465],
        std=[0.2470, 0.2435, 0.2616]
    ) # Values per channel, info calculated from CIFAR-10 dataset
])

"""
For testing, transformation is just normalisation for the model
"""
transform_test = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize(
        mean=[0.4914, 0.4822, 0.4465],
        std=[0.2470, 0.2435, 0.2616]
    )
])

## Load data

In [None]:
# Load CIFAR-10 (train)
cifar10_train = datasets.CIFAR10(
    root='./data',
    train=True,
    download=True,
    transform=transform_train
)

# Load CIFAR-10 (test)
cifar10_test = datasets.CIFAR10(
    root='./data',
    train=False,
    download=True,
    transform=transform_test
)

batch_size_train = 128  # For trainning
batch_size_test = 1     # For latence benchmark

In [None]:
train_loader = torch.utils.data.DataLoader(
    cifar10_train,
    batch_size=batch_size_train,
    shuffle=True, # Mix data, not overfitting
    num_workers=4,
    pin_memory=True # CPU -> GPU
)

test_loader = torch.utils.data.DataLoader(
    cifar10_test,
    batch_size=batch_size_test,
    shuffle=False, # Don't mix, fair comparisons
    num_workers=0, # No parallelism, just one pass through without overhead
    pin_memory=True
)

## Summarize

In [None]:

# Verification
print(f"- Train dataset: {len(cifar10_train)} images")
print(f"- Test dataset: {len(cifar10_test)} images")
print(f"- Nombre de classes: {len(cifar10_train.classes)}")
print(f"- Classes: {cifar10_train.classes}")

# Verify one image
sample_img, sample_label = cifar10_train[0]
print(f"Shape image train: {sample_img.shape}")
print(f"Label sample: {sample_label} ({cifar10_train.classes[sample_label]})")

# Benchmark Latency

In [None]:
WARM_UP_ITERS = 50
MEASURE_ITERS = 500

def benchmark_latency(model, dataloader, warmup_iters=WARM_UP_ITERS, measure_iters=MEASURE_ITERS, device='cuda'):
    """
    Benchmark latency d'inférence GPU pour batch_size=1, suelement 1 image.

    Args:
        model: PyTorch model en mode eval
        dataloader: DataLoader avec batch_size=1
        warmup_iters: nombre d'itérations de warm-up (default 50)
        measure_iters: nombre d'itérations de mesure (default 500)
        device: 'cuda' (GPU) ou 'cpu'

    Returns:
        dict avec statistiques de latence (mean, p95, std, min, max en ms)
    """

    # Check eval mode and device
    model.eval()
    model.to(device)

    # Send data to be stored in GPU
    print(f"[Benchmark] Préparation des données sur {device}...")
    gpu_data = []
    for i, (images, labels) in enumerate(dataloader):
        images = images.to(device)
        labels = labels.to(device)
        gpu_data.append((images, labels))
        if i >= max(warmup_iters, measure_iters) - 1:
            break

    print(f"[Benchmark] {len(gpu_data)} images préchargées sur GPU")

    # WARM-UP (no mesure) to avoid bias for overcharging
    print(f"[Benchmark] Warm-up: {warmup_iters} itérations...")
    with torch.no_grad():
        for i in range(min(warmup_iters, len(gpu_data))):
            images, _ = gpu_data[i]
            _ = model(images)

    torch.cuda.synchronize()  # GPU Synchronisation after warm-up
    print("[Benchmark] Warm-up terminé")

    # MESURE (avec chrono)
    print(f"[Benchmark] Mesure: {measure_iters} itérations...")
    times = []

    with torch.no_grad():
        for i in range(min(measure_iters, len(gpu_data))):
            images, _ = gpu_data[i]

            # Chrono précis
            torch.cuda.synchronize()  # Sync before
            t0 = perf_counter()

            output = model(images)

            torch.cuda.synchronize()  # Sync after
            t1 = perf_counter()

            # Temps en ms
            elapsed_ms = (t1 - t0) * 1000
            times.append(elapsed_ms)

    # STATISTIQUES
    times = np.array(times)
    stats = {
        'mean': float(np.mean(times)),
        'p95': float(np.percentile(times, 95)),
        'p50': float(np.percentile(times, 50)),
        'std': float(np.std(times)),
        'min': float(np.min(times)),
        'max': float(np.max(times)),
        'count': len(times)
    }

    print(f"[Benchmark] Résultats Latence:")
    print(f"  - Mean: {stats['mean']:.4f} ms")
    print(f"  - P95:  {stats['p95']:.4f} ms")
    print(f"  - P50:  {stats['p50']:.4f} ms")
    print(f"  - Std:  {stats['std']:.4f} ms")
    print(f"  - Min:  {stats['min']:.4f} ms")
    print(f"  - Max:  {stats['max']:.4f} ms")

    return stats, times

In [None]:
def save_benchmark_results(results_list, filename='benchmark_results.csv'):
    """
    Stocke les résultats de benchmark en CSV.

    Args:
        results_list: liste de dict avec {variant, accuracy, lat_mean, lat_p95, ...}
        filename: nom du fichier CSV
    """
    filepath = Path(filename)

    # Headers
    fieldnames = ['timestamp', 'variant', 'accuracy', 'lat_mean_ms', 'lat_p95_ms',
                  'lat_p50_ms', 'lat_std_ms', 'lat_min_ms', 'lat_max_ms', 'measure_iters']

    with open(filepath, 'w', newline='') as csvfile:
        writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
        writer.writeheader()
        writer.writerows(results_list)

    print(f"\nRésultats stockées dans {filepath}")

## Usage Example

In [None]:
# ========================
# Example
# ========================

# Créer un modèle dummy pour tester
dummy_model = torch.nn.Sequential(
    torch.nn.Conv2d(3, 16, 3, padding=1),
    torch.nn.ReLU(),
    torch.nn.AvgPool2d(2),
    torch.nn.Flatten(),
    torch.nn.Linear(16 * 16 * 16, 10)
).cuda()

# Lancer 3 benchmarks (validation stabilité)
print("="*60)
print("VALIDATION STABILITÉ DU BENCHMARK")
print("="*60)

results_list = []
for run in range(3):
    print(f"\n[Run {run+1}/3]")
    print("-" * 60)

    stats, times = benchmark_latency(
        dummy_model,
        test_loader,  # From data preparation
        warmup_iters= WARM_UP_ITERS,
        measure_iters= MEASURE_ITERS,
        device='cuda'
    )

    # Store results
    results_list.append({
        'timestamp': datetime.now().isoformat(),
        'variant': 'dummy_model_fp32',
        'accuracy': 0.0,  # Placeholder
        'lat_mean_ms': stats['mean'],
        'lat_p95_ms': stats['p95'],
        'lat_p50_ms': stats['p50'],
        'lat_std_ms': stats['std'],
        'lat_min_ms': stats['min'],
        'lat_max_ms': stats['max'],
        'measure_iters': stats['count']
    })

# Store in CSV file
save_benchmark_results(results_list, 'results_j1_validation.csv')

# Variance
print("\n" + "="*60)
print("RÉSUMÉ VARIANCE (< 10% acceptable)")
print("="*60)
p95_values = [r['lat_p95_ms'] for r in results_list]
p95_mean = np.mean(p95_values)
p95_std = np.std(p95_values)
p95_cv = (p95_std / p95_mean) * 100  # Coefficient de variation

print(f"P95 Mean: {p95_mean:.4f} ms")
print(f"P95 Std:  {p95_std:.4f} ms")
print(f"P95 CV:   {p95_cv:.2f}% {'approuvé' if p95_cv < 20 else 'X'}")

# Conditions for reproducing

In [None]:
print("\n" + "="*60)
print("CONDITIONS FIXES DE MESURE (reproductibilité)")
print("="*60)

# Documenter l'environnement
benchmark_conditions = {
    "timestamp": datetime.now().isoformat(),
    "pytorch_version": torch.__version__,
    "torchvision_version": torchvision.__version__,
    "cuda_available": torch.cuda.is_available(),
    "cuda_version": torch.version.cuda,
    "gpu_name": torch.cuda.get_device_name(0) if torch.cuda.is_available() else "N/A",
    "gpu_count": torch.cuda.device_count(),
    "cudnn_version": torch.backends.cudnn.version(),
    "cudnn_enabled": torch.backends.cudnn.enabled,
    "cudnn_deterministic": torch.backends.cudnn.deterministic,
}

# Paramètres de benchmark
benchmark_params = {
    "batch_size": batch_size_test,
    "input_shape": (1, 3, 32, 32),  # (batch, channels, height, width)
    "warmup_iterations": WARM_UP_ITERS,
    "measure_iterations": MEASURE_ITERS,
    "model_mode": "eval",
    "grad_disabled": "torch.no_grad()",
    "synchronization": "torch.cuda.synchronize()",
    "precision": "FP32 (float32)",
    "data_preloaded_gpu": True,  # Pas de transferts pendant mesure
}

print("\nEnvironnement:")
for key, value in benchmark_conditions.items():
    print(f"  {key}: {value}")

print("\nParamètres de benchmark:")
for key, value in benchmark_params.items():
    print(f"  {key}: {value}")

# Sauvegarder les conditions dans un fichier pour reproductibilité
import json
conditions_file = Path('benchmark_conditions.json')
with open(conditions_file, 'w') as f:
    # Convertir torch.Version en string pour JSON
    benchmark_conditions_serializable = {
        k: str(v) if not isinstance(v, (str, int, float, bool, type(None))) else v
        for k, v in benchmark_conditions.items()
    }
    json.dump({
        "conditions": benchmark_conditions_serializable,
        "params": benchmark_params
    }, f, indent=2)

print(f"\nConditions sauvegardées dans {conditions_file}")

print("\n" + "="*60)
print("Résumé")
print("="*60)

print("\nBenchmark produit mean + p95 lisibles")
print(f"  - Mean: {stats['mean']:.4f} ms")
print(f"  - P95:  {stats['p95']:.4f} ms")

print("\nVariance acceptable (< 20% entre runs)")
if p95_cv < 20:
    print(f"  - CV: {p95_cv:.2f}% ACCEPTÉ")
else:
    print(f"  - CV: {p95_cv:.2f}% ATTENTION (> 20%)")

print("\nCSV bien formé (colonnes header)")
import pandas as pd
df = pd.read_csv('results_j1_validation.csv')
print(f"  - Fichier: results_j1_validation.csv")
print(f"  - Colonnes: {list(df.columns)}")
print(f"  - Rows: {len(df)}")
print(f"  - Aperçu:")
print(df.to_string(index=False))

print("\n" + "="*60)
print("="*60)