# FL-EHDS — HealthcareCNN Imaging Comparison

Confronto HealthcareCNN (~500K params) vs ResNet-18 (11.2M params) su dataset imaging.

- **27 esperimenti**: 3 algos × 3 datasets × 3 seeds
- **Tempo stimato**: ~1.5-2.5 ore su GPU T4/A100
- **Checkpoint**: salvataggio automatico ogni esperimento

**IMPORTANTE**: Seleziona GPU runtime: Runtime → Change runtime type → GPU

In [1]:
# === CELLA 1: Setup ===
!git clone https://github.com/FabioLiberti/FL-EHDS-FLICS2026.git
%cd FL-EHDS-FLICS2026/fl-ehds-framework
!pip install -q opacus>=1.4.0 scikit-learn scipy tqdm rich pydantic pyyaml

import torch
if torch.cuda.is_available():
    print(f"GPU: {torch.cuda.get_device_name(0)}")
    print(f"VRAM: {torch.cuda.get_device_properties(0).total_memory / 1e9:.1f} GB")
else:
    print("ATTENZIONE: Nessuna GPU! Vai su Runtime -> Change runtime type -> GPU")

Cloning into 'FL-EHDS-FLICS2026'...
remote: Enumerating objects: 1709, done.[K
remote: Counting objects: 100% (25/25), done.[K
remote: Compressing objects: 100% (21/21), done.[K
remote: Total 1709 (delta 8), reused 6 (delta 4), pack-reused 1684 (from 1)[K
Receiving objects: 100% (1709/1709), 54.68 MiB | 22.86 MiB/s, done.
Resolving deltas: 100% (959/959), done.
/content/FL-EHDS-FLICS2026/fl-ehds-framework
GPU: Tesla T4


AttributeError: 'torch._C._CudaDeviceProperties' object has no attribute 'total_mem'

In [2]:
# === CELLA 2: Upload Dataset ===
# Monta Google Drive per accedere ai dataset
from google.colab import drive
drive.mount('/content/drive')

# === MODIFICA QUESTI PATH con la posizione dei tuoi dataset su Drive ===
import os, shutil

DRIVE_DATASETS = {
    "chest_xray": "/content/drive/MyDrive/FL-EHDS-FLICS2026/fl-ehds-framework/data/chest_xray",
    "Brain_Tumor": "/content/drive/MyDrive/FL-EHDS-FLICS2026/fl-ehds-framework/data/Brain_Tumor",
    "Skin Cancer": "/content/drive/MyDrive/FL-EHDS-FLICS2026/fl-ehds-framework/data/Skin Cancer",
}

for name, drive_path in DRIVE_DATASETS.items():
    local_path = f"data/{name}"
    if os.path.exists(local_path):
        count = sum(1 for r,d,f in os.walk(local_path) for fn in f if fn.lower().endswith(('.jpg','.png','.jpeg')))
        print(f"{name}: gia presente ({count} immagini)")
    elif os.path.exists(drive_path):
        print(f"{name}: copio da Drive...")
        shutil.copytree(drive_path, local_path)
        count = sum(1 for r,d,f in os.walk(local_path) for fn in f if fn.lower().endswith(('.jpg','.png','.jpeg')))
        print(f"{name}: OK ({count} immagini)")
    else:
        print(f"{name}: NON TROVATO su Drive! Modifica DRIVE_DATASETS con il path corretto.")
        print(f"  Path cercato: {drive_path}")

Mounted at /content/drive
chest_xray: NON TROVATO su Drive! Modifica DRIVE_DATASETS con il path corretto.
  Path cercato: /content/drive/MyDrive/FL-EHDS-FLICS2026/fl-ehds-framework/data/chest-xray
Brain_Tumor: copio da Drive...
Brain_Tumor: OK (7023 immagini)
Skin Cancer: copio da Drive...


KeyboardInterrupt: 

In [3]:
# === CELLA 2 BIS: scarica correttamente e velocemente i db da kaggle ===

# Download ALL datasets from Kaggle (fast, single archive each)
!pip install -q kagglehub
import kagglehub, shutil, os, glob

os.environ['KAGGLE_API_TOKEN'] = 'KGAT_edd561c1bc682c9ad06930bacd164431'

# 1. Chest X-Ray (~2.3 GB)
if not os.path.exists('data/chest_xray/train'):
    print('Downloading Chest X-Ray...')
    cache_path = kagglehub.dataset_download("paultimothymooney/chest-xray-pneumonia")
    os.makedirs('data/chest_xray', exist_ok=True)
    for item in ['train', 'test', 'val']:
        src = os.path.join(cache_path, 'chest_xray', item)
        if not os.path.exists(src):
            src = os.path.join(cache_path, item)
        dst = f'data/chest_xray/{item}'
        if os.path.exists(src) and not os.path.exists(dst):
            shutil.copytree(src, dst)
    shutil.rmtree('data/chest_xray/__MACOSX', ignore_errors=True)
else:
    print('Chest X-Ray: gia presente')

# 2. Skin Cancer (~325 MB)
if not os.path.exists('data/Skin Cancer'):
    print('Downloading Skin Cancer...')
    cache_path = kagglehub.dataset_download("fanconic/skin-cancer-malignant-vs-benign")
    shutil.copytree(cache_path, 'data/Skin Cancer')
else:
    print('Skin Cancer: gia presente')

# 3. Brain Tumor (~250 MB)
if not os.path.exists('data/Brain_Tumor') or len(os.listdir('data/Brain_Tumor')) < 3:
    print('Downloading Brain Tumor...')
    cache_path = kagglehub.dataset_download("masoudnickparvar/brain-tumor-mri-dataset")
    os.makedirs('data/Brain_Tumor', exist_ok=True)
    for root, dirs, files in os.walk(cache_path):
        for d in dirs:
            d_lower = d.lower()
            if d_lower in ['glioma', 'meningioma', 'pituitary', 'notumor', 'no_tumor', 'healthy']:
                target = 'healthy' if d_lower in ['notumor', 'no_tumor'] else d_lower
                src = os.path.join(root, d)
                dst_dir = f'data/Brain_Tumor/{target}'
                if not os.path.exists(dst_dir):
                    shutil.copytree(src, dst_dir)
                else:
                    for f in os.listdir(src):
                        src_f, dst_f = os.path.join(src, f), os.path.join(dst_dir, f)
                        if os.path.isfile(src_f) and not os.path.exists(dst_f):
                            shutil.copy2(src_f, dst_f)
else:
    print('Brain Tumor: gia presente')

# Cleanup + verify
import subprocess
subprocess.run(['find', 'data/', '-name', '._*', '-delete'], capture_output=True)

print('\n=== Dataset Summary ===')
for name, path in [('Chest X-Ray', 'data/chest_xray'), ('Skin Cancer', 'data/Skin Cancer'), ('Brain Tumor', 'data/Brain_Tumor')]:
    if os.path.exists(path):
        count = sum(1 for _ in glob.iglob(f'{path}/**/*.*', recursive=True) if _.lower().endswith(('.jpg','.jpeg','.png')))
        print(f'  {name:15s}: {count:5d} images')
    else:
        print(f'  {name:15s}: MANCANTE!')



Downloading Chest X-Ray...
Using Colab cache for faster access to the 'chest-xray-pneumonia' dataset.
Skin Cancer: gia presente
Brain Tumor: gia presente

=== Dataset Summary ===
  Chest X-Ray    :  5856 images
  Skin Cancer    :  5410 images
  Brain Tumor    :  7023 images


In [4]:
# === CELLA 3: Pulisci file ._ macOS (evita warning) ===
import subprocess
result = subprocess.run(['find', 'data/', '-name', '._*', '-delete'], capture_output=True, text=True)
print("File ._ macOS rimossi (evita warning durante training)")



In [5]:
# === CELLA 4: Script HealthcareCNN ===
%%writefile benchmarks/run_imaging_cnn_comparison.py
"""HealthcareCNN (~500K params) comparison on imaging datasets."""
import sys, json, time, gc, traceback
from pathlib import Path
from datetime import datetime

import numpy as np, torch

FRAMEWORK_DIR = Path(__file__).parent.parent
sys.path.insert(0, str(FRAMEWORK_DIR))
from terminal.fl_trainer import ImageFederatedTrainer, _detect_device

ALGORITHMS = ["FedAvg", "Ditto", "HPFL"]
SEEDS = [42, 123, 456]
DATASETS = {
    "chest_xray": {"data_dir": str(FRAMEWORK_DIR / "data" / "chest_xray"), "num_classes": 2},
    "Brain_Tumor": {"data_dir": str(FRAMEWORK_DIR / "data" / "Brain_Tumor"), "num_classes": 4},
    "Skin_Cancer": {"data_dir": str(FRAMEWORK_DIR / "data" / "Skin Cancer"), "num_classes": 2},
}
CONFIG = dict(
    num_clients=5, num_rounds=20, local_epochs=2, batch_size=32,
    learning_rate=0.001, model_type="cnn",
    is_iid=False, alpha=0.5, freeze_backbone=False, freeze_level=0,
    use_fedbn=True, use_class_weights=True, use_amp=True, mu=0.1,
)
EARLY_STOPPING = dict(enabled=True, patience=4, min_delta=0.003, min_rounds=8, metric="accuracy")

OUTPUT = FRAMEWORK_DIR / "benchmarks" / "paper_results_delta"
OUTPUT.mkdir(parents=True, exist_ok=True)
CKPT_FILE = OUTPUT / "checkpoint_imaging_cnn.json"

def load_checkpoint():
    if CKPT_FILE.exists():
        with open(CKPT_FILE) as f:
            return json.load(f)
    return {"completed": {}, "meta": {"started": str(datetime.now()), "model": "HealthcareCNN"}}

def save_checkpoint(ckpt):
    tmp = CKPT_FILE.with_suffix('.tmp')
    with open(tmp, 'w') as f:
        json.dump(ckpt, f, indent=2, default=str)
    tmp.rename(CKPT_FILE)

def main():
    device = _detect_device()
    ckpt = load_checkpoint()
    experiments = [(ds, algo, seed) for ds in DATASETS for algo in ALGORITHMS for seed in SEEDS]
    total = len(experiments)
    done = len([k for k, v in ckpt["completed"].items() if "error" not in v])
    print(f"=== HealthcareCNN Imaging Comparison ===")
    print(f"Total: {total}, Already done: {done}, Remaining: {total - done}")
    print(f"Device: {device}\n")

    for i, (ds, algo, seed) in enumerate(experiments):
        key = f"{ds}_{algo}_s{seed}"
        if key in ckpt["completed"] and "error" not in ckpt["completed"][key]:
            continue
        print(f"\n[{done+1}/{total}] {key}")
        t0 = time.time()
        try:
            ds_info = DATASETS[ds]
            cfg = {**CONFIG, "num_classes": ds_info["num_classes"]}
            if ds == "Brain_Tumor":
                cfg["learning_rate"] = 0.0005
            np.random.seed(seed); torch.manual_seed(seed)
            trainer = ImageFederatedTrainer(
                data_dir=ds_info["data_dir"], algorithm=algo, seed=seed,
                early_stopping_config=EARLY_STOPPING, **cfg
            )
            result = trainer.train()
            elapsed = time.time() - t0
            ckpt["completed"][key] = {
                "accuracy": result.get("final_accuracy", 0),
                "best_accuracy": result.get("best_accuracy", 0),
                "rounds": result.get("rounds_completed", 0),
                "time_s": round(elapsed, 1),
                "model": "HealthcareCNN",
                "algorithm": algo,
                "dataset": ds,
                "seed": seed,
            }
            done += 1
            print(f"  Done: acc={ckpt['completed'][key]['best_accuracy']:.4f}, {elapsed:.0f}s")
            del trainer; gc.collect(); torch.cuda.empty_cache()
        except Exception as e:
            ckpt["completed"][key] = {"error": str(e), "traceback": traceback.format_exc()}
            print(f"  ERROR: {e}")
        save_checkpoint(ckpt)

    print(f"\n=== COMPLETED: {done}/{total} ===")
    # Summary table
    print(f"\n{'Dataset':<15} {'Algorithm':<10} {'Acc (mean±std)'}")
    for ds in DATASETS:
        for algo in ALGORITHMS:
            accs = [ckpt['completed'].get(f'{ds}_{algo}_s{s}', {}).get('best_accuracy', 0) for s in SEEDS]
            accs = [a for a in accs if a > 0]
            if accs:
                print(f"{ds:<15} {algo:<10} {np.mean(accs)*100:.1f} ± {np.std(accs)*100:.1f}%")

if __name__ == "__main__":
    main()

Writing benchmarks/run_imaging_cnn_comparison.py


In [7]:
# === sistema dipendenza
!pip install -q structlog cryptography grpcio aiohttp pydantic pyyaml


[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/72.5 kB[0m [31m?[0m eta [36m-:--:--[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m72.5/72.5 kB[0m [31m2.8 MB/s[0m eta [36m0:00:00[0m
[?25h

In [9]:
# Prima cancella il checkpoint con errori
!rm -f benchmarks/paper_results_delta/checkpoint_imaging_cnn.json


In [10]:
%%writefile benchmarks/run_imaging_cnn_comparison.py
"""HealthcareCNN (~500K params) comparison on imaging datasets."""
import sys, json, time, gc, traceback
from pathlib import Path
from datetime import datetime

import numpy as np, torch

FRAMEWORK_DIR = Path(__file__).parent.parent
sys.path.insert(0, str(FRAMEWORK_DIR))
from terminal.fl_trainer import ImageFederatedTrainer, _detect_device

ALGORITHMS = ["FedAvg", "Ditto", "HPFL"]
SEEDS = [42, 123, 456]
DATASETS = {
    "chest_xray": {"data_dir": str(FRAMEWORK_DIR / "data" / "chest_xray"), "num_classes": 2},
    "Brain_Tumor": {"data_dir": str(FRAMEWORK_DIR / "data" / "Brain_Tumor"), "num_classes": 4},
    "Skin_Cancer": {"data_dir": str(FRAMEWORK_DIR / "data" / "Skin Cancer"), "num_classes": 2},
}

OUTPUT = FRAMEWORK_DIR / "benchmarks" / "paper_results_delta"
OUTPUT.mkdir(parents=True, exist_ok=True)
CKPT_FILE = OUTPUT / "checkpoint_imaging_cnn.json"

def load_checkpoint():
    if CKPT_FILE.exists():
        with open(CKPT_FILE) as f:
            return json.load(f)
    return {"completed": {}, "meta": {"started": str(datetime.now()), "model": "HealthcareCNN"}}

def save_checkpoint(ckpt):
    tmp = CKPT_FILE.with_suffix('.tmp')
    with open(tmp, 'w') as f:
        json.dump(ckpt, f, indent=2, default=str)
    tmp.rename(CKPT_FILE)

def main():
    device = _detect_device()
    ckpt = load_checkpoint()
    experiments = [(ds, algo, seed) for ds in DATASETS for algo in ALGORITHMS for seed in SEEDS]
    total = len(experiments)
    done = len([k for k, v in ckpt["completed"].items() if "error" not in v])
    print(f"=== HealthcareCNN Imaging Comparison ===")
    print(f"Total: {total}, Already done: {done}, Remaining: {total - done}")
    print(f"Device: {device}\n")

    for i, (ds, algo, seed) in enumerate(experiments):
        key = f"{ds}_{algo}_s{seed}"
        if key in ckpt["completed"] and "error" not in ckpt["completed"][key]:
            continue
        print(f"\n[{done+1}/{total}] {key}")
        t0 = time.time()
        try:
            ds_info = DATASETS[ds]
            lr = 0.0005 if ds == "Brain_Tumor" else 0.001
            np.random.seed(seed); torch.manual_seed(seed)
            trainer = ImageFederatedTrainer(
                data_dir=ds_info["data_dir"],
                num_clients=5,
                algorithm=algo,
                local_epochs=2,
                batch_size=32,
                learning_rate=lr,
                model_type="cnn",
                is_iid=False,
                alpha=0.5,
                freeze_backbone=False,
                use_fedbn=True,
                use_class_weights=True,
                use_amp=True,
                mu=0.1,
                seed=seed,
            )
            trainer.num_rounds = 20
            result = trainer.train()
            elapsed = time.time() - t0
            ckpt["completed"][key] = {
                "accuracy": result.get("final_accuracy", 0),
                "best_accuracy": result.get("best_accuracy", 0),
                "rounds": result.get("rounds_completed", 0),
                "time_s": round(elapsed, 1),
                "model": "HealthcareCNN",
                "algorithm": algo,
                "dataset": ds,
                "seed": seed,
            }
            done += 1
            print(f"  Done: acc={ckpt['completed'][key]['best_accuracy']:.4f}, {elapsed:.0f}s")
            del trainer; gc.collect(); torch.cuda.empty_cache()
        except Exception as e:
            ckpt["completed"][key] = {"error": str(e), "traceback": traceback.format_exc()}
            print(f"  ERROR: {e}")
        save_checkpoint(ckpt)

    print(f"\n=== COMPLETED: {done}/{total} ===")
    print(f"\n{'Dataset':<15} {'Algorithm':<10} {'Acc (mean +/- std)'}")
    print('-' * 45)
    for ds in DATASETS:
        for algo in ALGORITHMS:
            accs = [ckpt['completed'].get(f'{ds}_{algo}_s{s}', {}).get('best_accuracy', 0) for s in SEEDS]
            accs = [a for a in accs if a > 0]
            if accs:
                print(f"{ds:<15} {algo:<10} {np.mean(accs)*100:.1f} +/- {np.std(accs)*100:.1f}%")

if __name__ == "__main__":
    main()


Overwriting benchmarks/run_imaging_cnn_comparison.py


In [None]:
# === CELLA 5: Lancia esperimenti ===
!python benchmarks/run_imaging_cnn_comparison.py


=== HealthcareCNN Imaging Comparison ===
Total: 27, Already done: 0, Remaining: 27
Device: cuda


[1/27] chest_xray_FedAvg_s42
Loading dataset from: /content/FL-EHDS-FLICS2026/fl-ehds-framework/data/chest_xray
Found 2 classes: ['NORMAL', 'PNEUMONIA']
  Found 5856 images to load


In [None]:
# === CELLA 6: Scarica risultati ===
from google.colab import files
files.download('benchmarks/paper_results_delta/checkpoint_imaging_cnn.json')

In [None]:
# === CELLA 7: Salva anche su Drive (backup) ===
import shutil
shutil.copy(
    'benchmarks/paper_results_delta/checkpoint_imaging_cnn.json',
    '/content/drive/MyDrive/checkpoint_imaging_cnn.json'
)
print('Salvato su Google Drive!')