In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
!pip3 install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu118


Looking in indexes: https://download.pytorch.org/whl/cu118



[notice] A new release of pip is available: 25.0.1 -> 25.1.1
[notice] To update, run: python.exe -m pip install --upgrade pip


In [3]:

from pathlib import Path

import flwr
import torch
from flwr.simulation import run_simulation
from torch.optim.lr_scheduler import CosineAnnealingLR
from torch.utils.data import DataLoader
from torchvision import datasets

from fl_g13.architectures import BaseDino
from fl_g13.config import RAW_DATA_DIR
from fl_g13.editing import SparseSGDM
from fl_g13.fl_pytorch.client_app import get_client_app
from fl_g13.fl_pytorch.datasets import get_eval_transforms
from fl_g13.fl_pytorch.server_app import get_server_app
from fl_g13.modeling.eval import eval

[32m2025-06-04 00:12:39.228[0m | [1mINFO    [0m | [36mfl_g13.config[0m:[36m<module>[0m:[36m11[0m - [1mPROJ_ROOT path is: C:\Users\ADMIN\Desktop\BACKUP\study\Italy\polito\classes\20242\deep learning\project\source_code\fl-g13[0m


In [4]:
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
# DEVICE = "cpu"
print(f"Training on {DEVICE}")
print(f"Flower {flwr.__version__} / PyTorch {torch.__version__}")
# disable_progress_bar()

Training on cuda
Flower 1.17.0 / PyTorch 2.6.0+cu118


# Login wandb

In [5]:
!pip install wandb




[notice] A new release of pip is available: 25.0.1 -> 25.1.1
[notice] To update, run: python.exe -m pip install --upgrade pip


In [6]:
## read .env file
import dotenv

dotenv.load_dotenv()


True

In [7]:
import wandb

# login by key in .env file
WANDB_API_KEY = dotenv.dotenv_values()["WANDB_API_KEY"]
wandb.login(key=WANDB_API_KEY)

  return LooseVersion(v) >= LooseVersion(check)
wandb: Using wandb-core as the SDK backend.  Please refer to https://wandb.me/wandb-core for more information.
wandb: Appending key for api.wandb.ai to your netrc file: C:\Users\ADMIN\_netrc
wandb: Currently logged in as: thanhnv-it23 (stefano-gamba-social-politecnico-di-torino) to https://api.wandb.ai. Use `wandb login --relogin` to force relogin


True

## Build module local

Build module local such that ClientApp can use it

In [8]:
!pip install -e ..

Obtaining file:///C:/Users/ADMIN/Desktop/BACKUP/study/Italy/polito/classes/20242/deep%20learning/project/source_code/fl-g13









  Installing build dependencies: started









  Installing build dependencies: finished with status 'done'









  Checking if build backend supports build_editable: started



[notice] A new release of pip is available: 25.0.1 -> 25.1.1

  Checking if build backend supports build_editable: finished with status 'done'





  Getting requirements to build editable: started

[notice] To update, run: python.exe -m pip install --upgrade pip



  Getting requirements to build editable: finished with status 'done'
  Preparing editable metadata (pyproject.toml): started
  Preparing editable metadata (pyproject.toml): finished with status 'done'
Building wheels for collected packages: fl_g13
  Building editable for fl_g13 (pyproject.toml): started
  Building editable for fl_g13 (pyproject.toml): finished with status 'done'


### Download missing module for clients

Dino model,that is serialized and sent to client by server, require some modules that have to download from source code of dino model


In [9]:
import os
import urllib.request


def download_if_not_exists(file_path: str, file_url: str):
    """
    Checks if a file exists at the given path. If it does not, downloads it from the specified URL.

    Parameters:
    - file_path (str): The local path to check and save the file.
    - file_url (str): The URL from which to download the file.
    """
    if not os.path.exists(file_path):
        print(f"'{file_path}' not found. Downloading from {file_url}...")
        try:
            urllib.request.urlretrieve(file_url, file_path)
            print("Download complete.")
        except Exception as e:
            print(f"Failed to download file: {e}")
    else:
        print(f"'{file_path}' already exists.")

In [10]:
download_if_not_exists("vision_transformer.py",
                       "https://raw.githubusercontent.com/facebookresearch/dino/refs/heads/main/vision_transformer.py")
download_if_not_exists("utils.py",
                       "https://raw.githubusercontent.com/facebookresearch/dino/refs/heads/main/utils.py")


'vision_transformer.py' already exists.
'utils.py' already exists.


# FL

## Configs

In [11]:
DEBUG = True

In [12]:
# Model config

## Model Hyper-parameters
head_layers = 3
head_hidden_size = 512
dropout_rate = 0.0
unfreeze_blocks = 1

## Training Hyper-parameters
batch_size = 128
lr = 1e-3
momentum = 0.9
weight_decay = 1e-5
T_max = 8
eta_min = 1e-5

# FL config
K = 100
C = 0.1
J = 4
num_rounds = 30
partition_type = 'iid'

## only for partition_type = 'shard'
num_shards_per_partition = 10

## Server App config
save_every = 1
fraction_fit = C  # Sample of available clients for training
fraction_evaluate = 0.1  # Sample 50% of available clients for evaluation
min_fit_clients = 10  # Never sample less than 10 clients for training
min_evaluate_clients = 5  # Never sample less than 5 clients for evaluation
min_available_clients = 10  # Wait until all 10 clients are available
device = DEVICE
## checkpoints directory
current_path = Path.cwd()
model_save_path = current_path / f"../models/fl_dino_baseline/{partition_type}"
checkpoint_dir = model_save_path.resolve()
os.makedirs(checkpoint_dir, exist_ok=True)

## Wandb config
use_wandb = True
wandb_config = {
    # wandb param
    'name': 'FL_Dino_Baseline_iid',
    'project_name': "FL_test_chart",
    # model config param
    "fraction_fit": fraction_fit,
    "lr": lr,
    "momentum": momentum,
    'partition_type': partition_type,
    'K': K,
    'C': C,
    'J': J,
}

# model editing config
model_editing = True
mask_type = 'global'
sparsity = 0.8
mask = None
model_editing_batch_size = 16

## simulation run config
NUM_CLIENTS = 100
MAX_PARALLEL_CLIENTS = 10

if DEBUG:
    use_wandb = False
    num_rounds = 4
    J = 4


## Define model , optimizer and loss function

In [13]:
from fl_g13.modeling import load_or_create

# Model
model, start_epoch = load_or_create(
        path=checkpoint_dir,
        model_class=BaseDino,
        model_config=None,
        optimizer=None,
        scheduler=None,
        device=device,
        verbose=True,
    )

model.to(DEVICE)

# optimizer = SGD(model.parameters(), lr=lr, momentum=momentum)

# Create a dummy mask for SparseSGDM
init_mask = [torch.ones_like(p, device=p.device) for p in
             model.parameters()]  # Must be done AFTER the model is moved to the device
# Optimizer, scheduler, and loss function
optimizer = SparseSGDM(
    model.parameters(),
    mask=init_mask,
    lr=lr,
    momentum=0.9,
    weight_decay=1e-5
)
criterion = torch.nn.CrossEntropyLoss()
scheduler = CosineAnnealingLR(
    optimizer=optimizer,
    T_max=T_max,
    eta_min=eta_min
)

🔍 Loading checkpoint from C:\Users\ADMIN\Desktop\BACKUP\study\Italy\polito\classes\20242\deep learning\project\source_code\fl-g13\models\fl_dino_baseline\iid\fl_fl_baseline_BaseDino_epoch_207.pth
📦 Model class in checkpoint: BaseDino
🔧 Model configuration: {'variant': 'dino_vits16', 'dropout_rate': 0.0, 'head_hidden_size': 512, 'head_layers': 3, 'num_classes': 100, 'unfreeze_blocks': 0, 'activation_fn': 'GELU', 'pretrained': True}


Using cache found in C:\Users\ADMIN/.cache\torch\hub\facebookresearch_dino_main
Using cache found in C:\Users\ADMIN/.cache\torch\hub\facebookresearch_dino_main


➡️ Moved model to device: cuda
✅ Loaded checkpoint from C:\Users\ADMIN\Desktop\BACKUP\study\Italy\polito\classes\20242\deep learning\project\source_code\fl-g13\models\fl_dino_baseline\iid\fl_fl_baseline_BaseDino_epoch_207.pth, resuming at epoch 208


In [14]:
## unfreeze blocks
num_blocks = 3 
model.unfreeze_blocks(num_blocks)

## Define the Client, Server Apps

In [15]:
client = get_client_app(
    model=model,
    optimizer=optimizer,
    criterion=criterion,
    device=DEVICE,
    partition_type=partition_type,
    local_epochs=J,
    batch_size=batch_size,
    num_shards_per_partition=num_shards_per_partition,
    scheduler=scheduler,
    verbose=0,
    model_editing=model_editing,
    mask_type=mask_type,
    sparsity=sparsity,
    mask=mask
)

In [16]:
server = get_server_app(checkpoint_dir=checkpoint_dir,
                        model_class=model,
                        optimizer=optimizer,
                        criterion=criterion,
                        scheduler=scheduler,
                        num_rounds=num_rounds,
                        fraction_fit=fraction_fit,
                        fraction_evaluate=fraction_evaluate,
                        min_fit_clients=min_fit_clients,
                        min_evaluate_clients=min_evaluate_clients,
                        min_available_clients=min_available_clients,
                        device=device,
                        use_wandb=use_wandb,
                        wandb_config=wandb_config,
                        save_every=save_every,
                        prefix='fl_baseline',
                        evaluate_each=2,
                        )

🔍 Loading checkpoint from C:\Users\ADMIN\Desktop\BACKUP\study\Italy\polito\classes\20242\deep learning\project\source_code\fl-g13\models\fl_dino_baseline\iid\fl_fl_baseline_BaseDino_epoch_207.pth
📦 Model class in checkpoint: BaseDino
🔧 Model configuration: {'variant': 'dino_vits16', 'dropout_rate': 0.0, 'head_hidden_size': 512, 'head_layers': 3, 'num_classes': 100, 'unfreeze_blocks': 0, 'activation_fn': 'GELU', 'pretrained': True}


Using cache found in C:\Users\ADMIN/.cache\torch\hub\facebookresearch_dino_main


➡️ Moved model to device: cuda
✅ Loaded checkpoint from C:\Users\ADMIN\Desktop\BACKUP\study\Italy\polito\classes\20242\deep learning\project\source_code\fl-g13\models\fl_dino_baseline\iid\fl_fl_baseline_BaseDino_epoch_207.pth, resuming at epoch 208


|## Before training

Test model performance before fine-turning

In [17]:
# testset = datasets.CIFAR100(RAW_DATA_DIR, train=False, download=True, transform=get_eval_transforms())
# testloader = DataLoader(testset, batch_size=32)

In [18]:
# test_loss, test_accuracy, _ = eval(testloader, model, criterion)
# test_loss, test_accuracy

## Run the training


In [17]:
# Specify the resources each of your clients need
# By default, each client will be allocated 1x CPU and 0x GPUs
backend_config = {"client_resources": {"num_cpus": 1, "num_gpus": 0.0}}

# When running on GPU, assign an entire GPU for each client
if DEVICE == "cuda":
    backend_config["client_resources"] = {"num_cpus": 1, "num_gpus": 1}
    # Refer to our Flower framework documentation for more details about Flower simulations
    # and how to set up the `backend_config`

In [None]:
# Run simulation
run_simulation(
    server_app=server,
    client_app=client,
    num_supernodes=NUM_CLIENTS,
    backend_config=backend_config
)

[Server] Server on device: cuda:0
[Server] CUDA available in client: True
Using strategy 'CustomFedAvg' (default option)


[92mINFO [0m:      Starting Flower ServerApp, config: num_rounds=4, no round_timeout
[92mINFO [0m:      
[92mINFO [0m:      [INIT]
[92mINFO [0m:      Using initial global parameters provided by strategy
[92mINFO [0m:      Starting evaluation of initial global parameters


[Server Eval Round 0] Model device: cuda:0
[Server Eval Round 0] CUDA available in server eval: True


Eval progress: 100%|██████████| 313/313 [00:39<00:00,  7.83batch/s]
[92mINFO [0m:      [Round 0] Centralized Evaluation - Loss: 1.0220, Metrics: {'centralized_accuracy': 0.7014}
[92mINFO [0m:      initial parameters (loss, other metrics): 1.0220128186404134, {'centralized_accuracy': 0.7014}
[92mINFO [0m:      
[92mINFO [0m:      [ROUND 1]
[92mINFO [0m:      configure_fit: strategy sampled 10 clients (out of 100)
(ClientAppActor pid=35976) 2025-06-04 00:16:34.506 | INFO     | fl_g13.config:<module>:11 - PROJ_ROOT path is: C:\Users\ADMIN\Desktop\BACKUP\study\Italy\polito\classes\20242\deep learning\project\source_code\fl-g13


(ClientAppActor pid=35976) [Client] Client on device: cuda:0
(ClientAppActor pid=35976) [Client] CUDA available in client: True


Fisher Score:   0%|          | 0/25 [00:00<?, ?batch/s]


(ClientAppActor pid=35976) Computing simple global mask with target sparsity (0.80).
(ClientAppActor pid=35976) Round 1/1.
(ClientAppActor pid=35976) 	Current round density 0.20%
(ClientAppActor pid=35976) 	Computing the masked fisher score


Fisher Score:   4%|▍         | 1/25 [00:00<00:11,  2.18batch/s]
Fisher Score:  12%|█▏        | 3/25 [00:00<00:03,  6.35batch/s]
Fisher Score:  20%|██        | 5/25 [00:00<00:02,  9.63batch/s]
Fisher Score:  40%|████      | 10/25 [00:00<00:01, 14.65batch/s]
Fisher Score:  52%|█████▏    | 13/25 [00:01<00:00, 16.56batch/s]
Fisher Score:  64%|██████▍   | 16/25 [00:01<00:00, 17.58batch/s]
Fisher Score:  72%|███████▏  | 18/25 [00:01<00:00, 17.70batch/s]
Fisher Score:  80%|████████  | 20/25 [00:01<00:00, 18.21batch/s]
Fisher Score:  88%|████████▊ | 22/25 [00:01<00:00, 18.62batch/s]
Fisher Score: 100%|██████████| 25/25 [00:01<00:00, 14.78batch/s]


(ClientAppActor pid=35976) 	Updating the mask
(ClientAppActor pid=35976) No prefix/name for the model was provided, choosen prefix/name: bubbly_nidorina_19
(ClientAppActor pid=35976) 
(ClientAppActor pid=35976) Step 1/4 | Total batches: 4
(ClientAppActor pid=35976) Step 2/4 | Total batches: 4
(ClientAppActor pid=35976) Step 3/4 | Total batches: 4




(ClientAppActor pid=35976) Step 4/4 | Total batches: 4
(ClientAppActor pid=35976) 🚀 Epoch 1/4 (25.00%) Completed
(ClientAppActor pid=35976) 	📊 Training Loss: 0.7216
(ClientAppActor pid=35976) 	✅ Training Accuracy: 77.50%
(ClientAppActor pid=35976) 	⏳ Elapsed Time: 1.69s | ETA: 5.08s
(ClientAppActor pid=35976) 	🕒 Completed At: 00:16
(ClientAppActor pid=35976) 
(ClientAppActor pid=35976) Step 1/4 | Total batches: 4
(ClientAppActor pid=35976) Step 2/4 | Total batches: 4
(ClientAppActor pid=35976) Step 3/4 | Total batches: 4
(ClientAppActor pid=35976) Step 4/4 | Total batches: 4
(ClientAppActor pid=35976) 🚀 Epoch 2/4 (50.00%) Completed
(ClientAppActor pid=35976) 	📊 Training Loss: 0.9145
(ClientAppActor pid=35976) 	✅ Training Accuracy: 78.00%
(ClientAppActor pid=35976) 	⏳ Elapsed Time: 1.71s | ETA: 3.42s
(ClientAppActor pid=35976) 	🕒 Completed At: 00:16
(ClientAppActor pid=35976) 
(ClientAppActor pid=35976) Step 1/4 | Total batches: 4
(ClientAppActor pid=35976) Step 2/4 | Total batches: 4
(

Fisher Score:   0%|          | 0/25 [00:00<?, ?batch/s]


(ClientAppActor pid=35976) Computing simple global mask with target sparsity (0.80).
(ClientAppActor pid=35976) Round 1/1.
(ClientAppActor pid=35976) 	Current round density 0.20%
(ClientAppActor pid=35976) 	Computing the masked fisher score


Fisher Score:   8%|▊         | 2/25 [00:00<00:02,  9.66batch/s]
Fisher Score:  12%|█▏        | 3/25 [00:00<00:02,  9.62batch/s]
Fisher Score:  16%|█▌        | 4/25 [00:00<00:02,  9.62batch/s]
Fisher Score:  28%|██▊       | 7/25 [00:00<00:01, 14.25batch/s]
Fisher Score:  40%|████      | 10/25 [00:00<00:00, 16.50batch/s]
Fisher Score:  56%|█████▌    | 14/25 [00:00<00:00, 17.92batch/s]
Fisher Score:  68%|██████▊   | 17/25 [00:01<00:00, 18.75batch/s]
Fisher Score:  88%|████████▊ | 22/25 [00:01<00:00, 19.25batch/s]
Fisher Score: 100%|██████████| 25/25 [00:01<00:00, 17.02batch/s]


(ClientAppActor pid=35976) 	Updating the mask
(ClientAppActor pid=35976) No prefix/name for the model was provided, choosen prefix/name: happy_arbok_52
(ClientAppActor pid=35976) 
(ClientAppActor pid=35976) Step 1/4 | Total batches: 4
(ClientAppActor pid=35976) Step 2/4 | Total batches: 4
(ClientAppActor pid=35976) Step 3/4 | Total batches: 4
