In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import torch
from fl_g13.editing.sparseSGDM import SparseSGDM
from torch.nn import CrossEntropyLoss

import flwr
from flwr.simulation import run_simulation
from fl_g13.architectures import BaseDino
from fl_g13.fl_pytorch import get_client_app, get_server_app
from fl_g13.fl_pytorch import build_fl_dependencies


print(f"Flower {flwr.__version__} / PyTorch {torch.__version__}")

build_fl_dependencies() #! Remind to always put this, it will download Dino dependencies for client

[32m2025-05-28 11:26:47.763[0m | [1mINFO    [0m | [36mfl_g13.config[0m:[36m<module>[0m:[36m11[0m - [1mPROJ_ROOT path is: /home/massimiliano/Projects/fl-g13[0m


Flower 1.17.0 / PyTorch 2.6.0+cu124
'vision_transformer.py' already exists.
'utils.py' already exists.


In [3]:
# Checkpoint saving settings
CHECKPOINT_DIR = "/home/massimiliano/Projects/fl-g13/checkpoints"
name = 'aron'
save_with_model_dir = False
save_every = 1

# Model hyper-parameters
head_layers=3
head_hidden_size=512
dropout_rate=0.0
unfreeze_blocks=1

# Training hyper-parameters
starting_lr = 1e-3
momentum = 0.9
weight_decay=1e-5
T_max=8
eta_min=1e-5

# Federated Training settings
batch_size = 64 # Batch size for training #! Let's stick to 64 to make training fit also on RTX 3070
local_epochs = 1 # Number of local epochs per client
number_of_rounds = 5 # Total number of federated learning rounds
fraction_fit = 1 # Fraction of clients participating in training per round
fraction_evaluate = 0.1 # Fraction of clients participating in evaluation per round
number_of_clients = 2 # Total number of clients in the simulation
min_num_clients = 2 # Minimum number of clients required for training and evaluation
partition_type = "iid" # Partitioning strategy for the dataset (e.g., "iid" or "shard")
num_shards_per_partition = 6 # Number of shards per partition (used when partition_type is "shard")
use_wandb = False # Whether to use Weights & Biases (wandb) for experiment tracking
wandb_config = None

# Device settings
device = "cuda" if torch.cuda.is_available() else "cpu"
backend_config = {
    "client_resources": {
        "num_cpus": 1, 
        "num_gpus": 0
    }
}

# When running on GPU, assign an entire GPU for each client
# Refer to Flower framework documentation for more details about Flower simulations
# and how to set up the `backend_config`
if device == "cuda":
    backend_config["client_resources"] = {
        "num_cpus": 1, 
        "num_gpus": 1
    }

print(f"Training on {device}")

Training on cuda


In [4]:
# Model
model = BaseDino(
    head_layers=head_layers, 
    head_hidden_size=head_hidden_size, 
    dropout_rate=dropout_rate, 
    unfreeze_blocks=unfreeze_blocks
    )
model.to(device)

mask = [torch.ones_like(p, device=p.device) for p in model.parameters()] # Must be done AFTER the model is moved to CUDA
optimizer = SparseSGDM(
    params=model.parameters(),
    mask=mask,
    lr=starting_lr,
    momentum=momentum,
    weight_decay=weight_decay
    )
scheduler = None
criterion = CrossEntropyLoss()

client_app = get_client_app(
    model=model, 
    criterion=criterion, 
    optimizer=optimizer, 
    scheduler=scheduler,
    device=device,
    strategy='scheduling-lr',
    partition_type=partition_type, 
    batch_size=batch_size,
    num_shards_per_partition=num_shards_per_partition,
    local_epochs=local_epochs,
    model_editing=True,
    mask_type= 'global',
    sparsity = 0.1,
)

server_app = get_server_app(
    checkpoint_dir=CHECKPOINT_DIR,
    prefix=name,
    model_class=model.__class__,
    model_config=model.get_config(), 
    optimizer=optimizer,
    criterion=criterion,
    scheduler=scheduler,
    device=device, 
    save_every=save_every,
    save_with_model_dir=save_with_model_dir,
    strategy='scheduling-lr',
    num_rounds=number_of_rounds, 
    fraction_fit=fraction_fit,
    fraction_evaluate=fraction_evaluate,
    min_fit_clients=min_num_clients,
    min_evaluate_clients=min_num_clients,
    min_available_clients=number_of_clients,
    use_wandb=use_wandb,
    wandb_config=wandb_config,
)

Using cache found in /home/massimiliano/.cache/torch/hub/facebookresearch_dino_main


⚠️ No checkpoint found at /home/massimiliano/Projects/fl-g13/checkpoints. Creating a new model.


Using cache found in /home/massimiliano/.cache/torch/hub/facebookresearch_dino_main


### Pre-train the model (head)

In [5]:
run_simulation(
    client_app=client_app,
    server_app=server_app,
    num_supernodes=number_of_clients,
    backend_config=backend_config,
)

[Server] Server on device: cuda:0
[Server] CUDA available in client: True


[92mINFO [0m:      Starting Flower ServerApp, config: num_rounds=5, no round_timeout
[92mINFO [0m:      
[92mINFO [0m:      [INIT]
[92mINFO [0m:      Using initial global parameters provided by strategy
[92mINFO [0m:      Starting evaluation of initial global parameters


Using strategy 'LRUpdateFedAvg'
[Server Eval Round 0] Model device: cuda:0
[Server Eval Round 0] CUDA available in server eval: True


Eval progress: 100%|██████████| 313/313 [00:22<00:00, 13.67batch/s]
[92mINFO [0m:      [Round 0] Centralized Evaluation - Loss: 6.5382, Metrics: {'centralized_accuracy': 0.01}
[92mINFO [0m:      initial parameters (loss, other metrics): 6.538191083901987, {'centralized_accuracy': 0.01}
[92mINFO [0m:      
[92mINFO [0m:      [ROUND 1]
[92mINFO [0m:      configure_fit: strategy sampled 2 clients (out of 2)


Entered server configure_fit()


[36m(ClientAppActor pid=39972)[0m 2025-05-28 11:27:15.104 | INFO     | fl_g13.config:<module>:11 - PROJ_ROOT path is: /home/massimiliano/Projects/fl-g13


[36m(ClientAppActor pid=39972)[0m [Client] Client on device: cuda:0
[36m(ClientAppActor pid=39972)[0m [Client] CUDA available in client: True
[36m(ClientAppActor pid=39972)[0m Updated learning rate to 0.001
[36m(ClientAppActor pid=39972)[0m First time participating in sparse training
[36m(ClientAppActor pid=39972)[0m Fine-tuning classification head
[36m(ClientAppActor pid=39972)[0m ⚠️ No checkpoint found at . Creating a new model.


[36m(ClientAppActor pid=39972)[0m Using cache found in /home/massimiliano/.cache/torch/hub/facebookresearch_dino_main


[36m(ClientAppActor pid=39972)[0m No prefix/name for the model was provided, choosen prefix/name: cheeky_nidorino_26
[36m(ClientAppActor pid=39972)[0m 
[36m(ClientAppActor pid=39972)[0m 🚀 Epoch 1/1 (100.00%) Completed
[36m(ClientAppActor pid=39972)[0m 	📊 Training Loss: 2.7973
[36m(ClientAppActor pid=39972)[0m 	✅ Training Accuracy: 33.49%
[36m(ClientAppActor pid=39972)[0m 	⏳ Elapsed Time: 52.15s | ETA: 0.00s
[36m(ClientAppActor pid=39972)[0m 	🕒 Completed At: 11:28
[36m(ClientAppActor pid=39972)[0m 
[36m(ClientAppActor pid=39972)[0m No prefix/name for the model was provided, choosen prefix/name: sleepy_weedle_81
[36m(ClientAppActor pid=39972)[0m 
[36m(ClientAppActor pid=39972)[0m 🚀 Epoch 1/1 (100.00%) Completed
[36m(ClientAppActor pid=39972)[0m 	📊 Training Loss: 1.4062
[36m(ClientAppActor pid=39972)[0m 	✅ Training Accuracy: 60.16%
[36m(ClientAppActor pid=39972)[0m 	⏳ Elapsed Time: 49.35s | ETA: 0.00s
[36m(ClientAppActor pid=39972)[0m 	🕒 Completed At: 11:29


Fisher Score:   0%|          | 0/1250 [00:00<?, ?batch/s]
Fisher Score:   0%|          | 3/1250 [00:00<00:42, 29.49batch/s]
Fisher Score:   1%|          | 7/1250 [00:00<00:41, 29.88batch/s]
Fisher Score:   1%|          | 10/1250 [00:00<00:41, 29.90batch/s]
Fisher Score:   1%|          | 14/1250 [00:00<00:40, 30.18batch/s]
Fisher Score:   1%|▏         | 18/1250 [00:00<00:40, 30.51batch/s]
Fisher Score:   2%|▏         | 22/1250 [00:00<00:40, 30.56batch/s]
Fisher Score:   2%|▏         | 26/1250 [00:00<00:39, 30.64batch/s]
Fisher Score:   2%|▏         | 30/1250 [00:00<00:39, 30.57batch/s]
Fisher Score:   3%|▎         | 34/1250 [00:01<00:39, 30.68batch/s]
Fisher Score:   3%|▎         | 38/1250 [00:01<00:39, 30.45batch/s]
Fisher Score:   3%|▎         | 42/1250 [00:01<00:39, 30.28batch/s]
Fisher Score:   4%|▎         | 46/1250 [00:01<00:39, 30.11batch/s]
Fisher Score:   4%|▍         | 50/1250 [00:01<00:39, 30.12batch/s]
Fisher Score:   4%|▍         | 54/1250 [00:01<00:39, 30.02batch/s]
Fisher

[36m(ClientAppActor pid=39972)[0m 	Updating the mask
[36m(ClientAppActor pid=39972)[0m No prefix/name for the model was provided, choosen prefix/name: jazzy_kakuna_59
[36m(ClientAppActor pid=39972)[0m 
[36m(ClientAppActor pid=39972)[0m 🚀 Epoch 1/1 (100.00%) Completed
[36m(ClientAppActor pid=39972)[0m 	📊 Training Loss: 2.5035
[36m(ClientAppActor pid=39972)[0m 	✅ Training Accuracy: 40.94%
[36m(ClientAppActor pid=39972)[0m 	⏳ Elapsed Time: 53.07s | ETA: 0.00s
[36m(ClientAppActor pid=39972)[0m 	🕒 Completed At: 11:30
[36m(ClientAppActor pid=39972)[0m 
[36m(ClientAppActor pid=39972)[0m [Client] Client on device: cuda:0
[36m(ClientAppActor pid=39972)[0m [Client] CUDA available in client: True
[36m(ClientAppActor pid=39972)[0m Updated learning rate to 0.001
[36m(ClientAppActor pid=39972)[0m First time participating in sparse training
[36m(ClientAppActor pid=39972)[0m Fine-tuning classification head
[36m(ClientAppActor pid=39972)[0m ⚠️ No checkpoint found at . Crea

[36m(ClientAppActor pid=39972)[0m Using cache found in /home/massimiliano/.cache/torch/hub/facebookresearch_dino_main


[36m(ClientAppActor pid=39972)[0m No prefix/name for the model was provided, choosen prefix/name: giddy_nidorino_44
[36m(ClientAppActor pid=39972)[0m 
[36m(ClientAppActor pid=39972)[0m 🚀 Epoch 1/1 (100.00%) Completed
[36m(ClientAppActor pid=39972)[0m 	📊 Training Loss: 2.8124
[36m(ClientAppActor pid=39972)[0m 	✅ Training Accuracy: 33.66%
[36m(ClientAppActor pid=39972)[0m 	⏳ Elapsed Time: 47.85s | ETA: 0.00s
[36m(ClientAppActor pid=39972)[0m 	🕒 Completed At: 11:31
[36m(ClientAppActor pid=39972)[0m 
[36m(ClientAppActor pid=39972)[0m No prefix/name for the model was provided, choosen prefix/name: nutty_sandshrew_63
[36m(ClientAppActor pid=39972)[0m 
[36m(ClientAppActor pid=39972)[0m 🚀 Epoch 1/1 (100.00%) Completed
[36m(ClientAppActor pid=39972)[0m 	📊 Training Loss: 1.4079
[36m(ClientAppActor pid=39972)[0m 	✅ Training Accuracy: 60.48%
[36m(ClientAppActor pid=39972)[0m 	⏳ Elapsed Time: 49.09s | ETA: 0.00s
[36m(ClientAppActor pid=39972)[0m 	🕒 Completed At: 11:32


Fisher Score:   0%|          | 0/1250 [00:00<?, ?batch/s]
Fisher Score:   0%|          | 4/1250 [00:00<00:34, 36.00batch/s]
Fisher Score:   1%|          | 8/1250 [00:00<00:36, 33.60batch/s]
Fisher Score:   1%|          | 12/1250 [00:00<00:38, 32.53batch/s]
Fisher Score:   1%|▏         | 16/1250 [00:00<00:37, 32.64batch/s]
Fisher Score:   2%|▏         | 20/1250 [00:00<00:37, 32.43batch/s]
Fisher Score:   2%|▏         | 24/1250 [00:00<00:37, 32.46batch/s]
Fisher Score:   2%|▏         | 28/1250 [00:00<00:37, 32.57batch/s]
Fisher Score:   3%|▎         | 32/1250 [00:00<00:37, 32.31batch/s]
Fisher Score:   3%|▎         | 36/1250 [00:01<00:37, 32.50batch/s]
Fisher Score:   3%|▎         | 40/1250 [00:01<00:37, 32.51batch/s]
Fisher Score:   4%|▎         | 44/1250 [00:01<00:37, 32.34batch/s]
Fisher Score:   4%|▍         | 48/1250 [00:01<00:37, 32.49batch/s]
Fisher Score:   4%|▍         | 52/1250 [00:01<00:36, 32.58batch/s]
Fisher Score:   4%|▍         | 56/1250 [00:01<00:36, 32.46batch/s]
Fisher

[36m(ClientAppActor pid=39972)[0m 	Updating the mask
[36m(ClientAppActor pid=39972)[0m No prefix/name for the model was provided, choosen prefix/name: sassy_sandshrew_71
[36m(ClientAppActor pid=39972)[0m 


Fisher Score: 100%|██████████| 1250/1250 [00:38<00:00, 32.32batch/s]


[36m(ClientAppActor pid=39972)[0m 🚀 Epoch 1/1 (100.00%) Completed
[36m(ClientAppActor pid=39972)[0m 	📊 Training Loss: 2.5193
[36m(ClientAppActor pid=39972)[0m 	✅ Training Accuracy: 40.78%
[36m(ClientAppActor pid=39972)[0m 	⏳ Elapsed Time: 52.32s | ETA: 0.00s
[36m(ClientAppActor pid=39972)[0m 	🕒 Completed At: 11:33
[36m(ClientAppActor pid=39972)[0m 


[92mINFO [0m:      aggregate_fit: received 2 results and 0 failures
[92mINFO [0m:      [Round 1] Avg Drift: 0.0001 | Relative Drift: 0.0000
[92mINFO [0m:      [Round 1] Saving aggregated model at epoch 1...


💾 Saved checkpoint at: /home/massimiliano/Projects/fl-g13/checkpoints/fl_aron_BaseDino_epoch_1.pth
[Server Eval Round 1] Model device: cuda:0
[Server Eval Round 1] CUDA available in server eval: True


Eval progress: 100%|██████████| 313/313 [00:22<00:00, 14.13batch/s]
[92mINFO [0m:      [Round 1] Centralized Evaluation - Loss: 1.3333, Metrics: {'centralized_accuracy': 0.6272}
[92mINFO [0m:      fit progress: (1, 1.3333409038214639, {'centralized_accuracy': 0.6272}, 426.93273506199876)
[92mINFO [0m:      configure_evaluate: strategy sampled 2 clients (out of 2)


[36m(ClientAppActor pid=39972)[0m [Client] Client on device: cuda:0
[36m(ClientAppActor pid=39972)[0m [Client] CUDA available in client: True


Eval progress:   0%|          | 0/79 [00:00<?, ?batch/s]
Eval progress:   1%|▏         | 1/79 [00:00<00:11,  7.04batch/s]
Eval progress:   3%|▎         | 2/79 [00:00<00:11,  6.69batch/s]
Eval progress:   4%|▍         | 3/79 [00:00<00:11,  6.77batch/s]
Eval progress:   5%|▌         | 4/79 [00:00<00:11,  6.71batch/s]
Eval progress:   6%|▋         | 5/79 [00:00<00:10,  6.84batch/s]
Eval progress:   8%|▊         | 6/79 [00:00<00:10,  6.77batch/s]
Eval progress:   9%|▉         | 7/79 [00:01<00:10,  6.84batch/s]
Eval progress:  10%|█         | 8/79 [00:01<00:10,  6.81batch/s]
Eval progress:  11%|█▏        | 9/79 [00:01<00:10,  6.87batch/s]
Eval progress:  13%|█▎        | 10/79 [00:01<00:10,  6.80batch/s]
Eval progress:  14%|█▍        | 11/79 [00:01<00:09,  6.86batch/s]
Eval progress:  15%|█▌        | 12/79 [00:01<00:09,  6.78batch/s]
Eval progress:  16%|█▋        | 13/79 [00:01<00:09,  6.86batch/s]
Eval progress:  18%|█▊        | 14/79 [00:02<00:09,  6.81batch/s]
Eval progress:  19%|█▉      

[36m(ClientAppActor pid=39972)[0m [Client] Client on device: cuda:0
[36m(ClientAppActor pid=39972)[0m [Client] CUDA available in client: True


Eval progress:   0%|          | 0/79 [00:00<?, ?batch/s]
Eval progress:   1%|▏         | 1/79 [00:00<00:11,  6.62batch/s]
Eval progress:   3%|▎         | 2/79 [00:00<00:11,  6.52batch/s]
Eval progress:   4%|▍         | 3/79 [00:00<00:11,  6.58batch/s]
Eval progress:   5%|▌         | 4/79 [00:00<00:11,  6.58batch/s]
Eval progress:   6%|▋         | 5/79 [00:00<00:11,  6.60batch/s]
Eval progress:   8%|▊         | 6/79 [00:00<00:11,  6.59batch/s]
Eval progress:   9%|▉         | 7/79 [00:01<00:10,  6.59batch/s]
Eval progress:  10%|█         | 8/79 [00:01<00:10,  6.58batch/s]
Eval progress:  11%|█▏        | 9/79 [00:01<00:10,  6.58batch/s]
Eval progress:  13%|█▎        | 10/79 [00:01<00:10,  6.59batch/s]
Eval progress:  14%|█▍        | 11/79 [00:01<00:10,  6.59batch/s]
Eval progress:  15%|█▌        | 12/79 [00:01<00:10,  6.59batch/s]
Eval progress:  16%|█▋        | 13/79 [00:01<00:10,  6.59batch/s]
Eval progress:  18%|█▊        | 14/79 [00:02<00:09,  6.62batch/s]
Eval progress:  19%|█▉      

Entered server configure_fit()
[36m(ClientAppActor pid=39972)[0m [Client] Client on device: cuda:0
[36m(ClientAppActor pid=39972)[0m [Client] CUDA available in client: True
[36m(ClientAppActor pid=39972)[0m Updated learning rate to 0.0009998303758629009
[36m(ClientAppActor pid=39972)[0m No prefix/name for the model was provided, choosen prefix/name: perky_sandslash_65
[36m(ClientAppActor pid=39972)[0m 
[36m(ClientAppActor pid=39972)[0m 🚀 Epoch 1/1 (100.00%) Completed
[36m(ClientAppActor pid=39972)[0m 	📊 Training Loss: 1.1848
[36m(ClientAppActor pid=39972)[0m 	✅ Training Accuracy: 65.94%
[36m(ClientAppActor pid=39972)[0m 	⏳ Elapsed Time: 52.67s | ETA: 0.00s
[36m(ClientAppActor pid=39972)[0m 	🕒 Completed At: 11:35
[36m(ClientAppActor pid=39972)[0m 
[36m(ClientAppActor pid=39972)[0m [Client] Client on device: cuda:0
[36m(ClientAppActor pid=39972)[0m [Client] CUDA available in client: True
[36m(ClientAppActor pid=39972)[0m Updated learning rate to 0.000999830375

[92mINFO [0m:      aggregate_fit: received 2 results and 0 failures
[92mINFO [0m:      [Round 2] Avg Drift: 0.0001 | Relative Drift: 0.0000
[92mINFO [0m:      [Round 2] Saving aggregated model at epoch 2...


💾 Saved checkpoint at: /home/massimiliano/Projects/fl-g13/checkpoints/fl_aron_BaseDino_epoch_2.pth
[Server Eval Round 2] Model device: cuda:0
[Server Eval Round 2] CUDA available in server eval: True


Eval progress: 100%|██████████| 313/313 [00:23<00:00, 13.50batch/s]
[92mINFO [0m:      [Round 2] Centralized Evaluation - Loss: 1.0122, Metrics: {'centralized_accuracy': 0.7061}
[92mINFO [0m:      fit progress: (2, 1.012247834247522, {'centralized_accuracy': 0.7061}, 582.1679644669985)
[92mINFO [0m:      configure_evaluate: strategy sampled 2 clients (out of 2)


[36m(ClientAppActor pid=39972)[0m [Client] Client on device: cuda:0
[36m(ClientAppActor pid=39972)[0m [Client] CUDA available in client: True


Eval progress:   0%|          | 0/79 [00:00<?, ?batch/s]
Eval progress:   1%|▏         | 1/79 [00:00<00:13,  5.95batch/s]
Eval progress:   3%|▎         | 2/79 [00:00<00:13,  5.87batch/s]
Eval progress:   4%|▍         | 3/79 [00:00<00:12,  5.89batch/s]
Eval progress:   5%|▌         | 4/79 [00:00<00:12,  6.01batch/s]
Eval progress:   6%|▋         | 5/79 [00:00<00:12,  6.14batch/s]
Eval progress:   8%|▊         | 6/79 [00:00<00:11,  6.18batch/s]
Eval progress:   9%|▉         | 7/79 [00:01<00:11,  6.26batch/s]
Eval progress:  10%|█         | 8/79 [00:01<00:11,  6.30batch/s]
Eval progress:  11%|█▏        | 9/79 [00:01<00:11,  6.34batch/s]
Eval progress:  13%|█▎        | 10/79 [00:01<00:10,  6.36batch/s]
Eval progress:  14%|█▍        | 11/79 [00:01<00:10,  6.36batch/s]
Eval progress:  15%|█▌        | 12/79 [00:01<00:10,  6.36batch/s]
Eval progress:  16%|█▋        | 13/79 [00:02<00:10,  6.36batch/s]
Eval progress:  18%|█▊        | 14/79 [00:02<00:10,  6.37batch/s]
Eval progress:  19%|█▉      

[36m(ClientAppActor pid=39972)[0m [Client] Client on device: cuda:0
[36m(ClientAppActor pid=39972)[0m [Client] CUDA available in client: True


Eval progress:   0%|          | 0/79 [00:00<?, ?batch/s]
Eval progress:   1%|▏         | 1/79 [00:00<00:11,  6.53batch/s]
Eval progress:   3%|▎         | 2/79 [00:00<00:11,  6.59batch/s]
Eval progress:   4%|▍         | 3/79 [00:00<00:11,  6.57batch/s]
Eval progress:   5%|▌         | 4/79 [00:00<00:11,  6.64batch/s]
Eval progress:   6%|▋         | 5/79 [00:00<00:11,  6.61batch/s]
Eval progress:   8%|▊         | 6/79 [00:00<00:11,  6.63batch/s]
Eval progress:   9%|▉         | 7/79 [00:01<00:10,  6.62batch/s]
Eval progress:  10%|█         | 8/79 [00:01<00:10,  6.64batch/s]
Eval progress:  11%|█▏        | 9/79 [00:01<00:10,  6.64batch/s]
Eval progress:  13%|█▎        | 10/79 [00:01<00:10,  6.63batch/s]
Eval progress:  14%|█▍        | 11/79 [00:01<00:10,  6.62batch/s]
Eval progress:  15%|█▌        | 12/79 [00:01<00:10,  6.56batch/s]
Eval progress:  16%|█▋        | 13/79 [00:01<00:10,  6.58batch/s]
Eval progress:  18%|█▊        | 14/79 [00:02<00:09,  6.58batch/s]
Eval progress:  19%|█▉      

Entered server configure_fit()
[36m(ClientAppActor pid=39972)[0m [Client] Client on device: cuda:0
[36m(ClientAppActor pid=39972)[0m [Client] CUDA available in client: True
[36m(ClientAppActor pid=39972)[0m Updated learning rate to 0.000999321619703514
[36m(ClientAppActor pid=39972)[0m No prefix/name for the model was provided, choosen prefix/name: giddy_wartortle_32
[36m(ClientAppActor pid=39972)[0m 
[36m(ClientAppActor pid=39972)[0m 🚀 Epoch 1/1 (100.00%) Completed
[36m(ClientAppActor pid=39972)[0m 	📊 Training Loss: 0.8991
[36m(ClientAppActor pid=39972)[0m 	✅ Training Accuracy: 73.43%
[36m(ClientAppActor pid=39972)[0m 	⏳ Elapsed Time: 52.66s | ETA: 0.00s
[36m(ClientAppActor pid=39972)[0m 	🕒 Completed At: 11:38
[36m(ClientAppActor pid=39972)[0m 
[36m(ClientAppActor pid=39972)[0m [Client] Client on device: cuda:0
[36m(ClientAppActor pid=39972)[0m [Client] CUDA available in client: True
[36m(ClientAppActor pid=39972)[0m Updated learning rate to 0.0009993216197

[92mINFO [0m:      aggregate_fit: received 2 results and 0 failures


[36m(ClientAppActor pid=39972)[0m 🚀 Epoch 1/1 (100.00%) Completed
[36m(ClientAppActor pid=39972)[0m 	📊 Training Loss: 0.8915
[36m(ClientAppActor pid=39972)[0m 	✅ Training Accuracy: 73.83%
[36m(ClientAppActor pid=39972)[0m 	⏳ Elapsed Time: 52.71s | ETA: 0.00s
[36m(ClientAppActor pid=39972)[0m 	🕒 Completed At: 11:39
[36m(ClientAppActor pid=39972)[0m 


[92mINFO [0m:      [Round 3] Avg Drift: 0.0001 | Relative Drift: 0.0000
[92mINFO [0m:      [Round 3] Saving aggregated model at epoch 3...


💾 Saved checkpoint at: /home/massimiliano/Projects/fl-g13/checkpoints/fl_aron_BaseDino_epoch_3.pth
[Server Eval Round 3] Model device: cuda:0
[Server Eval Round 3] CUDA available in server eval: True


Eval progress: 100%|██████████| 313/313 [00:23<00:00, 13.26batch/s]
[92mINFO [0m:      [Round 3] Centralized Evaluation - Loss: 0.9112, Metrics: {'centralized_accuracy': 0.741}
[92mINFO [0m:      fit progress: (3, 0.9111603402291624, {'centralized_accuracy': 0.741}, 738.2744826549988)
[92mINFO [0m:      configure_evaluate: strategy sampled 2 clients (out of 2)


[36m(ClientAppActor pid=39972)[0m [Client] Client on device: cuda:0
[36m(ClientAppActor pid=39972)[0m [Client] CUDA available in client: True


Eval progress:   0%|          | 0/79 [00:00<?, ?batch/s]
Eval progress:   1%|▏         | 1/79 [00:00<00:13,  5.86batch/s]
Eval progress:   3%|▎         | 2/79 [00:00<00:13,  5.91batch/s]
Eval progress:   4%|▍         | 3/79 [00:00<00:12,  5.87batch/s]
Eval progress:   5%|▌         | 4/79 [00:00<00:12,  5.87batch/s]
Eval progress:   6%|▋         | 5/79 [00:00<00:12,  5.88batch/s]
Eval progress:   8%|▊         | 6/79 [00:01<00:12,  5.89batch/s]
Eval progress:   9%|▉         | 7/79 [00:01<00:12,  5.91batch/s]
Eval progress:  10%|█         | 8/79 [00:01<00:12,  5.90batch/s]
Eval progress:  11%|█▏        | 9/79 [00:01<00:11,  5.89batch/s]
Eval progress:  13%|█▎        | 10/79 [00:01<00:11,  5.95batch/s]
Eval progress:  14%|█▍        | 11/79 [00:01<00:11,  5.96batch/s]
Eval progress:  15%|█▌        | 12/79 [00:02<00:11,  5.92batch/s]
Eval progress:  16%|█▋        | 13/79 [00:02<00:11,  5.92batch/s]
Eval progress:  18%|█▊        | 14/79 [00:02<00:10,  5.94batch/s]
Eval progress:  19%|█▉      

[36m(ClientAppActor pid=39972)[0m [Client] Client on device: cuda:0
[36m(ClientAppActor pid=39972)[0m [Client] CUDA available in client: True


Eval progress:   0%|          | 0/79 [00:00<?, ?batch/s]
Eval progress:   1%|▏         | 1/79 [00:00<00:13,  5.94batch/s]
Eval progress:   3%|▎         | 2/79 [00:00<00:13,  5.85batch/s]
Eval progress:   4%|▍         | 3/79 [00:00<00:12,  5.85batch/s]
Eval progress:   5%|▌         | 4/79 [00:00<00:12,  5.84batch/s]
Eval progress:   6%|▋         | 5/79 [00:00<00:12,  5.90batch/s]
Eval progress:   8%|▊         | 6/79 [00:01<00:12,  5.88batch/s]
Eval progress:   9%|▉         | 7/79 [00:01<00:12,  5.90batch/s]
Eval progress:  10%|█         | 8/79 [00:01<00:12,  5.90batch/s]
Eval progress:  11%|█▏        | 9/79 [00:01<00:11,  5.89batch/s]
Eval progress:  13%|█▎        | 10/79 [00:01<00:11,  5.92batch/s]
Eval progress:  14%|█▍        | 11/79 [00:01<00:11,  5.90batch/s]
Eval progress:  15%|█▌        | 12/79 [00:02<00:11,  5.89batch/s]
Eval progress:  16%|█▋        | 13/79 [00:02<00:11,  5.91batch/s]
Eval progress:  18%|█▊        | 14/79 [00:02<00:10,  5.91batch/s]
Eval progress:  19%|█▉      

Entered server configure_fit()
[36m(ClientAppActor pid=39972)[0m [Client] Client on device: cuda:0
[36m(ClientAppActor pid=39972)[0m [Client] CUDA available in client: True
[36m(ClientAppActor pid=39972)[0m Updated learning rate to 0.0009984740801978985
[36m(ClientAppActor pid=39972)[0m No prefix/name for the model was provided, choosen prefix/name: peppy_nidoqueen_78
[36m(ClientAppActor pid=39972)[0m 
[36m(ClientAppActor pid=39972)[0m 🚀 Epoch 1/1 (100.00%) Completed
[36m(ClientAppActor pid=39972)[0m 	📊 Training Loss: 0.7319
[36m(ClientAppActor pid=39972)[0m 	✅ Training Accuracy: 78.06%
[36m(ClientAppActor pid=39972)[0m 	⏳ Elapsed Time: 58.02s | ETA: 0.00s
[36m(ClientAppActor pid=39972)[0m 	🕒 Completed At: 11:40
[36m(ClientAppActor pid=39972)[0m 
[36m(ClientAppActor pid=39972)[0m [Client] Client on device: cuda:0
[36m(ClientAppActor pid=39972)[0m [Client] CUDA available in client: True
[36m(ClientAppActor pid=39972)[0m Updated learning rate to 0.000998474080

[92mINFO [0m:      aggregate_fit: received 2 results and 0 failures
[92mINFO [0m:      [Round 4] Avg Drift: 0.0001 | Relative Drift: 0.0000
[92mINFO [0m:      [Round 4] Saving aggregated model at epoch 4...


💾 Saved checkpoint at: /home/massimiliano/Projects/fl-g13/checkpoints/fl_aron_BaseDino_epoch_4.pth
[Server Eval Round 4] Model device: cuda:0
[Server Eval Round 4] CUDA available in server eval: True


Eval progress: 100%|██████████| 313/313 [00:21<00:00, 14.24batch/s]
[92mINFO [0m:      [Round 4] Centralized Evaluation - Loss: 0.8773, Metrics: {'centralized_accuracy': 0.7527}
[92mINFO [0m:      fit progress: (4, 0.877304599022332, {'centralized_accuracy': 0.7527}, 900.7928903489992)
[92mINFO [0m:      configure_evaluate: strategy sampled 2 clients (out of 2)


[36m(ClientAppActor pid=39972)[0m [Client] Client on device: cuda:0
[36m(ClientAppActor pid=39972)[0m [Client] CUDA available in client: True


Eval progress:   0%|          | 0/79 [00:00<?, ?batch/s]
Eval progress:   1%|▏         | 1/79 [00:00<00:12,  6.45batch/s]
Eval progress:   3%|▎         | 2/79 [00:00<00:12,  6.38batch/s]
Eval progress:   4%|▍         | 3/79 [00:00<00:11,  6.45batch/s]
Eval progress:   5%|▌         | 4/79 [00:00<00:11,  6.45batch/s]
Eval progress:   6%|▋         | 5/79 [00:00<00:11,  6.48batch/s]
Eval progress:   8%|▊         | 6/79 [00:00<00:11,  6.52batch/s]
Eval progress:   9%|▉         | 7/79 [00:01<00:10,  6.56batch/s]
Eval progress:  10%|█         | 8/79 [00:01<00:10,  6.59batch/s]
Eval progress:  11%|█▏        | 9/79 [00:01<00:10,  6.60batch/s]
Eval progress:  13%|█▎        | 10/79 [00:01<00:10,  6.54batch/s]
Eval progress:  14%|█▍        | 11/79 [00:01<00:10,  6.57batch/s]
Eval progress:  15%|█▌        | 12/79 [00:01<00:10,  6.57batch/s]
Eval progress:  16%|█▋        | 13/79 [00:01<00:10,  6.58batch/s]
Eval progress:  18%|█▊        | 14/79 [00:02<00:09,  6.62batch/s]
Eval progress:  19%|█▉      

[36m(ClientAppActor pid=39972)[0m [Client] Client on device: cuda:0
[36m(ClientAppActor pid=39972)[0m [Client] CUDA available in client: True


Eval progress:   0%|          | 0/79 [00:00<?, ?batch/s]
Eval progress:   1%|▏         | 1/79 [00:00<00:11,  6.69batch/s]
Eval progress:   3%|▎         | 2/79 [00:00<00:11,  6.58batch/s]
Eval progress:   4%|▍         | 3/79 [00:00<00:11,  6.51batch/s]
Eval progress:   5%|▌         | 4/79 [00:00<00:11,  6.50batch/s]
Eval progress:   6%|▋         | 5/79 [00:00<00:11,  6.52batch/s]
Eval progress:   8%|▊         | 6/79 [00:00<00:11,  6.54batch/s]
Eval progress:   9%|▉         | 7/79 [00:01<00:11,  6.45batch/s]
Eval progress:  10%|█         | 8/79 [00:01<00:10,  6.53batch/s]
Eval progress:  11%|█▏        | 9/79 [00:01<00:10,  6.48batch/s]
Eval progress:  13%|█▎        | 10/79 [00:01<00:10,  6.50batch/s]
Eval progress:  14%|█▍        | 11/79 [00:01<00:10,  6.51batch/s]
Eval progress:  15%|█▌        | 12/79 [00:01<00:10,  6.51batch/s]
Eval progress:  16%|█▋        | 13/79 [00:01<00:10,  6.52batch/s]
Eval progress:  18%|█▊        | 14/79 [00:02<00:09,  6.54batch/s]
Eval progress:  19%|█▉      

Entered server configure_fit()
[36m(ClientAppActor pid=39972)[0m [Client] Client on device: cuda:0
[36m(ClientAppActor pid=39972)[0m [Client] CUDA available in client: True
[36m(ClientAppActor pid=39972)[0m Updated learning rate to 0.0009972883382072953
[36m(ClientAppActor pid=39972)[0m No prefix/name for the model was provided, choosen prefix/name: happy_pidgey_71
[36m(ClientAppActor pid=39972)[0m 
[36m(ClientAppActor pid=39972)[0m 🚀 Epoch 1/1 (100.00%) Completed
[36m(ClientAppActor pid=39972)[0m 	📊 Training Loss: 0.6059
[36m(ClientAppActor pid=39972)[0m 	✅ Training Accuracy: 81.41%
[36m(ClientAppActor pid=39972)[0m 	⏳ Elapsed Time: 53.12s | ETA: 0.00s
[36m(ClientAppActor pid=39972)[0m 	🕒 Completed At: 11:43
[36m(ClientAppActor pid=39972)[0m 
[36m(ClientAppActor pid=39972)[0m [Client] Client on device: cuda:0
[36m(ClientAppActor pid=39972)[0m [Client] CUDA available in client: True
[36m(ClientAppActor pid=39972)[0m Updated learning rate to 0.000997288338207

[92mINFO [0m:      aggregate_fit: received 2 results and 0 failures


[36m(ClientAppActor pid=39972)[0m 🚀 Epoch 1/1 (100.00%) Completed
[36m(ClientAppActor pid=39972)[0m 	📊 Training Loss: 0.6025
[36m(ClientAppActor pid=39972)[0m 	✅ Training Accuracy: 81.49%
[36m(ClientAppActor pid=39972)[0m 	⏳ Elapsed Time: 51.68s | ETA: 0.00s
[36m(ClientAppActor pid=39972)[0m 	🕒 Completed At: 11:44
[36m(ClientAppActor pid=39972)[0m 


[92mINFO [0m:      [Round 5] Avg Drift: 0.0001 | Relative Drift: 0.0000
[92mINFO [0m:      [Round 5] Saving aggregated model at epoch 5...


💾 Saved checkpoint at: /home/massimiliano/Projects/fl-g13/checkpoints/fl_aron_BaseDino_epoch_5.pth
[Server Eval Round 5] Model device: cuda:0
[Server Eval Round 5] CUDA available in server eval: True


Eval progress: 100%|██████████| 313/313 [00:20<00:00, 14.92batch/s]
[92mINFO [0m:      [Round 5] Centralized Evaluation - Loss: 0.8623, Metrics: {'centralized_accuracy': 0.7602}
[92mINFO [0m:      fit progress: (5, 0.8622801780415038, {'centralized_accuracy': 0.7602}, 1053.5594696509997)
[92mINFO [0m:      configure_evaluate: strategy sampled 2 clients (out of 2)


[36m(ClientAppActor pid=39972)[0m [Client] Client on device: cuda:0
[36m(ClientAppActor pid=39972)[0m [Client] CUDA available in client: True


Eval progress:   0%|          | 0/79 [00:00<?, ?batch/s]
Eval progress:   1%|▏         | 1/79 [00:00<00:10,  7.37batch/s]
Eval progress:   3%|▎         | 2/79 [00:00<00:10,  7.11batch/s]
Eval progress:   4%|▍         | 3/79 [00:00<00:10,  7.19batch/s]
Eval progress:   5%|▌         | 4/79 [00:00<00:10,  7.21batch/s]
Eval progress:   6%|▋         | 5/79 [00:00<00:10,  7.20batch/s]
Eval progress:   8%|▊         | 6/79 [00:00<00:10,  7.25batch/s]
Eval progress:   9%|▉         | 7/79 [00:00<00:09,  7.24batch/s]
Eval progress:  10%|█         | 8/79 [00:01<00:09,  7.23batch/s]
Eval progress:  11%|█▏        | 9/79 [00:01<00:09,  7.26batch/s]
Eval progress:  13%|█▎        | 10/79 [00:01<00:09,  7.30batch/s]
Eval progress:  14%|█▍        | 11/79 [00:01<00:09,  7.29batch/s]
Eval progress:  15%|█▌        | 12/79 [00:01<00:09,  7.29batch/s]
Eval progress:  16%|█▋        | 13/79 [00:01<00:09,  7.24batch/s]
Eval progress:  18%|█▊        | 14/79 [00:01<00:08,  7.27batch/s]
Eval progress:  19%|█▉      

[36m(ClientAppActor pid=39972)[0m [Client] Client on device: cuda:0
[36m(ClientAppActor pid=39972)[0m [Client] CUDA available in client: True


Eval progress:   0%|          | 0/79 [00:00<?, ?batch/s]
Eval progress:   1%|▏         | 1/79 [00:00<00:10,  7.29batch/s]
Eval progress:   3%|▎         | 2/79 [00:00<00:11,  6.91batch/s]
Eval progress:   4%|▍         | 3/79 [00:00<00:11,  6.81batch/s]
Eval progress:   5%|▌         | 4/79 [00:00<00:10,  6.82batch/s]
Eval progress:   6%|▋         | 5/79 [00:00<00:10,  6.84batch/s]
Eval progress:   8%|▊         | 6/79 [00:00<00:10,  6.82batch/s]
Eval progress:   9%|▉         | 7/79 [00:01<00:10,  6.84batch/s]
Eval progress:  10%|█         | 8/79 [00:01<00:10,  6.86batch/s]
Eval progress:  11%|█▏        | 9/79 [00:01<00:10,  6.86batch/s]
Eval progress:  13%|█▎        | 10/79 [00:01<00:10,  6.84batch/s]
Eval progress:  14%|█▍        | 11/79 [00:01<00:09,  6.86batch/s]
Eval progress:  15%|█▌        | 12/79 [00:01<00:09,  6.86batch/s]
Eval progress:  16%|█▋        | 13/79 [00:01<00:09,  6.85batch/s]
Eval progress:  18%|█▊        | 14/79 [00:02<00:09,  6.87batch/s]
Eval progress:  19%|█▉      