# FL-EHDS Paper Experiments — Colab GPU Runner

Run all imaging experiments on Colab GPU (T4/A100) with optimized config:
- **FedBN**: skip norm layers during aggregation
- **Partial freeze** (level 1): only conv1+bn1 frozen
- **Cosine LR** scheduling
- **3 local epochs**, **20 rounds**
- **Algorithms**: FedAvg, FedLC, FedSAM, FedDecorr, FedExP

**Estimated time**: 3-4h on T4, 1.5-2h on A100

## 1. Setup

In [None]:
# Mount Google Drive
from google.colab import drive
drive.mount('/content/drive')

In [None]:
# Copy framework code from Drive (esclude data/ che pesa ~4GB)
import shutil, os

DRIVE_FW = '/content/drive/MyDrive/FL-EHDS-FLICS2026/fl-ehds-framework'
FRAMEWORK_DIR = '/content/fl-ehds-framework'

# Se esiste una copia vecchia/rotta, rimuovila
if os.path.exists(FRAMEWORK_DIR) and not os.path.exists(os.path.join(FRAMEWORK_DIR, 'data')):
    print('Rimuovo copia incompleta precedente...')
    shutil.rmtree(FRAMEWORK_DIR)

if not os.path.exists(FRAMEWORK_DIR):
    print('Copio codice framework (senza data/)...')
    shutil.copytree(DRIVE_FW, FRAMEWORK_DIR, ignore=shutil.ignore_patterns('data'))
    # Symlink a data/ su Drive (zero copie, accesso diretto)
    os.symlink(os.path.join(DRIVE_FW, 'data'), os.path.join(FRAMEWORK_DIR, 'data'))
    print('Symlink data/ -> Drive (nessuna copia dei dataset)')
else:
    print('Framework già presente')

assert os.path.exists(FRAMEWORK_DIR), f'Framework non trovato in {FRAMEWORK_DIR}'
assert os.path.exists(os.path.join(FRAMEWORK_DIR, 'data')), 'data/ non accessibile'
print(f'Framework OK: {FRAMEWORK_DIR}')

In [None]:
# Install dependencies
!pip install -q torch torchvision scipy opacus tqdm rich \
    structlog scikit-learn cryptography pyyaml pydantic numpy matplotlib

In [None]:
# Verify GPU
import torch
print(f'PyTorch: {torch.__version__}')
print(f'CUDA available: {torch.cuda.is_available()}')
if torch.cuda.is_available():
    print(f'GPU: {torch.cuda.get_device_name(0)}')
    props = torch.cuda.get_device_properties(0)
    vram = getattr(props, 'total_memory', None) or getattr(props, 'total_mem', 0)
    print(f'VRAM: {vram / 1e9:.1f} GB')
else:
    print('WARNING: No GPU detected. Go to Runtime > Change runtime type > GPU')

In [None]:
# Verify datasets
data_dir = os.path.join(FRAMEWORK_DIR, 'data')
datasets = ['Brain_Tumor', 'chest_xray', 'Skin Cancer']
for ds in datasets:
    path = os.path.join(data_dir, ds)
    if os.path.exists(path):
        n_files = sum(len(files) for _, _, files in os.walk(path))
        print(f'  {ds}: {n_files} files')
    else:
        print(f'  {ds}: NOT FOUND at {path}')
        print(f'    Upload dataset to {path}')

## 2. Run Experiments (Colab profile)

Each cell runs one micro-batch (1 dataset x 1 algorithm x 3 seeds).
Run sequentially — checkpoint saves after each experiment.

In [None]:
# Clean imaging checkpoint (fresh start with Colab config)
import json
ckpt_path = os.path.join(FRAMEWORK_DIR, 'benchmarks', 'paper_results', 'checkpoint_p12_multidataset.json')
if os.path.exists(ckpt_path):
    with open(ckpt_path) as f:
        ckpt = json.load(f)
    imaging_ds = ['Brain_Tumor', 'chest_xray', 'Skin_Cancer']
    to_rm = [k for k in ckpt.get('completed', {}) if any(k.startswith(d) for d in imaging_ds)]
    for k in to_rm:
        del ckpt['completed'][k]
    with open(ckpt_path, 'w') as f:
        json.dump(ckpt, f, indent=2, default=str)
    print(f'Removed {len(to_rm)} old imaging results, kept {len(ckpt["completed"])} tabular results')
else:
    print('No checkpoint found, starting fresh')

In [None]:
# Run ALL imaging experiments with Colab profile
# This runs: 3 datasets x 5 algorithms x 3 seeds = 45 experiments
# Estimated: ~3-4h on T4, ~1.5-2h on A100
%cd {FRAMEWORK_DIR}
!python -m benchmarks.run_paper_experiments --colab --resume --only p12

### Alternative: run one dataset at a time (if timeout concerns)

In [None]:
# Brain_Tumor only (~1h on T4)
%cd {FRAMEWORK_DIR}
!python -m benchmarks.run_paper_experiments --colab --resume --only p12 --dataset Brain_Tumor

In [None]:
# chest_xray only (~1.5h on T4)
%cd {FRAMEWORK_DIR}
!python -m benchmarks.run_paper_experiments --colab --resume --only p12 --dataset chest_xray

In [None]:
# Skin_Cancer only (~1h on T4)
%cd {FRAMEWORK_DIR}
!python -m benchmarks.run_paper_experiments --colab --resume --only p12 --dataset Skin_Cancer

## 3. Verify Results

In [None]:
import json
ckpt_path = os.path.join(FRAMEWORK_DIR, 'benchmarks', 'paper_results', 'checkpoint_p12_multidataset.json')
with open(ckpt_path) as f:
    results = json.load(f)

completed = results.get('completed', {})
errors = {k: v for k, v in completed.items() if 'error' in v}
good = {k: v for k, v in completed.items() if 'error' not in v}

print(f'Total: {len(completed)} | OK: {len(good)} | Errors: {len(errors)}')
print(f'Target: 75 (30 tabular + 45 imaging)')
print()

# Summary by dataset
from collections import defaultdict
by_ds = defaultdict(list)
for k, v in good.items():
    ds = k.rsplit('_', 2)[0]  # approximate
    acc = v.get('final_metrics', {}).get('accuracy', 0)
    best = v.get('best_metrics', {}).get('accuracy', acc)
    by_ds[ds].append(max(acc, best))

for ds, accs in sorted(by_ds.items()):
    import numpy as np
    print(f'{ds:20s}: n={len(accs):2d}  acc={np.mean(accs):.3f} ± {np.std(accs):.3f}  '
          f'[{np.min(accs):.3f} - {np.max(accs):.3f}]')

if errors:
    print(f'\nErrors:')
    for k, v in errors.items():
        print(f'  {k}: {v["error"][:80]}')

## 4. Download Results

In [None]:
# Copy results to Drive for local download
import shutil
results_dir = os.path.join(FRAMEWORK_DIR, 'benchmarks', 'paper_results')
drive_dest = '/content/drive/MyDrive/FL-EHDS-results'
os.makedirs(drive_dest, exist_ok=True)
for f in os.listdir(results_dir):
    if f.endswith('.json'):
        shutil.copy2(os.path.join(results_dir, f), drive_dest)
        print(f'Copied: {f}')
print(f'\nResults saved to: {drive_dest}')
print('Download from Google Drive to local machine, then run:')
print('  python -m benchmarks.run_paper_experiments --only output')