In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
!pip3 install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu118


Looking in indexes: https://download.pytorch.org/whl/cu118



[notice] A new release of pip is available: 23.2.1 -> 25.0.1
[notice] To update, run: python.exe -m pip install --upgrade pip


In [3]:
from fl_g13.config import RAW_DATA_DIR


from torchvision import datasets, transforms

from fl_g13.base_experimentation import dataset_handler

import torch
import torch.nn as nn
import torch.nn.functional as F


import flwr
from flwr.common import Context

from flwr.simulation import run_simulation


[32m2025-04-17 16:15:27.766[0m | [1mINFO    [0m | [36mfl_g13.config[0m:[36m<module>[0m:[36m11[0m - [1mPROJ_ROOT path is: C:\Users\ADMIN\Desktop\BACKUP\study\Italy\polito\classes\20242\deep learning\project\source_code\fl-g13[0m


In [4]:
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
# DEVICE = "cpu"
print(f"Training on {DEVICE}")
print(f"Flower {flwr.__version__} / PyTorch {torch.__version__}")
# disable_progress_bar()

Training on cpu
Flower 1.17.0 / PyTorch 2.6.0+cu118


# Load data

In [5]:
transform = transforms.Compose([
    transforms.ToTensor()
])
cifar100_train = datasets.CIFAR100(root=RAW_DATA_DIR, train=True, download=True, transform=transform)
cifar100_test = datasets.CIFAR100(root=RAW_DATA_DIR, train=False, download=True, transform=transform)

In [6]:
### train val split
train_dataset,val_dataset = dataset_handler.train_test_split(cifar100_train,train_ratio=0.8)

In [7]:
# I.I.D Sharding Split
## k client
k =10
clients_dataset_train= dataset_handler.iid_sharding(train_dataset,k)
clients_dataset_val= dataset_handler.iid_sharding(val_dataset,k)

## Tiny model

In [8]:
class TinyCNN(nn.Module):
    def __init__(self, num_classes=100):
        super(TinyCNN, self).__init__()
        self.conv1 = nn.Conv2d(3, 16, 3, padding=1)
        self.conv2 = nn.Conv2d(16, 32, 3, padding=1)
        self.fc1 = nn.Linear(32 * 8 * 8, num_classes)

    def forward(self, x):
        x = F.relu(self.conv1(x))     # -> [B, 16, 32, 32]
        x = F.max_pool2d(x, 2)        # -> [B, 16, 16, 16]
        x = F.relu(self.conv2(x))     # -> [B, 32, 16, 16]
        x = F.max_pool2d(x, 2)        # -> [B, 32, 8, 8]
        x = x.view(x.size(0), -1)     # -> [B, 32*8*8]
        x = self.fc1(x)               # -> [B, 100]
        return x

## Init model , optimizer and loss function

In [9]:
net = TinyCNN().to(DEVICE)
# optimizer = optim.SGD(net.parameters(), lr=0.01, momentum=0.9)
optimizer = torch.optim.AdamW(net.parameters(), lr=1e-4, weight_decay=0.04)
criterion = torch.nn.CrossEntropyLoss()

# Define the ClientApp

## Build module local

Build module local such that ClientApp can use it

In [10]:
!pip install -e ..

Obtaining file:///C:/Users/ADMIN/Desktop/BACKUP/study/Italy/polito/classes/20242/deep%20learning/project/source_code/fl-g13
  Installing build dependencies: started
  Installing build dependencies: finished with status 'done'
  Checking if build backend supports build_editable: started
  Checking if build backend supports build_editable: finished with status 'done'
  Getting requirements to build editable: started
  Getting requirements to build editable: finished with status 'done'
  Preparing editable metadata (pyproject.toml): started
  Preparing editable metadata (pyproject.toml): finished with status 'done'
Building wheels for collected packages: fl_g13
  Building editable for fl_g13 (pyproject.toml): started
  Building editable for fl_g13 (pyproject.toml): finished with status 'done'
  Created wheel for fl_g13: filename=fl_g13-0.0.1-py3-none-any.whl size=2920 sha256=58cad1ba858085057c04f166298cea48e57db9492fa24b427227ecd360d31dcf
  Stored in directory: C:\Users\ADMIN\AppData\Loca


[notice] A new release of pip is available: 23.2.1 -> 25.0.1
[notice] To update, run: python.exe -m pip install --upgrade pip


## create FlowerClient instances  

In [11]:
'''
Function load data client is to simulate the distribution data into each client
In the real case, each client will have its dataset
'''
def load_data_client(context: Context):
    partition_id = context.node_config["partition-id"] 
    print(f"Client {partition_id} is ready to train")
    trainloader = DataLoader(clients_dataset_train[partition_id])
    valloader = DataLoader(clients_dataset_val[partition_id])
    return trainloader, valloader

### Create instant of ClientApp

In [12]:
from fl_g13.fl_pytorch.client_app import get_client_app

config={'local-epochs':1}
client = get_client_app(load_data_client,model=net,optimizer=optimizer,criterion=criterion,device=DEVICE,config=config)

# Define the Flower ServerApp

Customize built-in strategy Federated Averaging (FedAvg) of Flower to combine hyperparams in server-side and save model for each k epoch

The strategy could also incremental training an

## Create instant of ServerApp

In [13]:

from pathlib import Path
from torch.utils.data import DataLoader
from fl_g13.fl_pytorch.server_app import get_server_app

def get_datatest_fn(context: Context):
    return DataLoader(cifar100_test)

## checkpoints directory
current_path = Path.cwd()
model_test_path = current_path / "../models/model_test"
model_test_path.resolve()


num_rounds=2
save_every =2
fraction_fit=1.0  # Sample 100% of available clients for training
fraction_evaluate=0.5  # Sample 50% of available clients for evaluation
min_fit_clients=10  # Never sample less than 10 clients for training
min_evaluate_clients=5  # Never sample less than 5 clients for evaluation
min_available_clients=10  # Wait until all 10 clients are available
device=DEVICE
use_wandb=False


server = get_server_app(checkpoint_dir=model_test_path.resolve(),
                        model=net,optimizer=optimizer,criterion=criterion, get_datatest_fn=get_datatest_fn,
                        num_rounds=num_rounds,
                        fraction_fit=fraction_fit, 
                        fraction_evaluate=fraction_evaluate,  
                        min_fit_clients=min_fit_clients,  
                        min_evaluate_clients=min_evaluate_clients, 
                        min_available_clients=min_available_clients, 
                        device=device,
                        use_wandb=use_wandb,
                        save_every=save_every
                        )

No checkpoint found, initializing new model from scratch.


# Run the training


In [14]:
# Specify the resources each of your clients need
# By default, each client will be allocated 1x CPU and 0x GPUs
backend_config = {"client_resources": {"num_cpus": 1, "num_gpus": 0.0}}

# When running on GPU, assign an entire GPU for each client
if DEVICE == "cuda":
    backend_config["client_resources"]= {"num_cpus": 1, "num_gpus": 0.25}
    # Refer to our Flower framework documentation for more details about Flower simulations
    # and how to set up the `backend_config`

In [15]:
NUM_CLIENTS =10

In [17]:
# Run simulation
run_simulation(
    server_app=server,
    client_app=client,
    num_supernodes=NUM_CLIENTS,
    backend_config=backend_config,
)

[92mINFO [0m:      Starting Flower ServerApp, config: num_rounds=2, no round_timeout
[92mINFO [0m:      
[92mINFO [0m:      [INIT]
[92mINFO [0m:      Using initial global parameters provided by strategy
[92mINFO [0m:      Starting evaluation of initial global parameters


Continue train model from epoch 1


[92mINFO [0m:      💡 New best global model found: 0.090300
[92mINFO [0m:      initial parameters (loss, other metrics): 4.152868543231487, {'centralized_accuracy': 0.0903}
[92mINFO [0m:      
[92mINFO [0m:      [ROUND 1]
[92mINFO [0m:      configure_fit: strategy sampled 10 clients (out of 10)


Test Loss: 4.1529, Test Accuracy: 9.03%


[36m(ClientAppActor pid=19896)[0m 2025-04-17 16:20:25.814 | INFO     | fl_g13.config:<module>:11 - PROJ_ROOT path is: C:\Users\ADMIN\Desktop\BACKUP\study\Italy\polito\classes\20242\deep learning\project\source_code\fl-g13


[36m(ClientAppActor pid=20364)[0m Client 7 is ready to train
[36m(ClientAppActor pid=22276)[0m Training Loss: 4.0446, Training Accuracy: 9.93%
[36m(ClientAppActor pid=22276)[0m 📘 Epoch [1/1] - Avg Loss: 4.0446, Accuracy: 9.93%
[36m(ClientAppActor pid=25344)[0m Client 8 is ready to train[32m [repeated 9x across cluster][0m


[92mINFO [0m:      aggregate_fit: received 10 results and 0 failures
[92mINFO [0m:      💡 New best global model found: 0.138600
[92mINFO [0m:      fit progress: (1, 3.897378060948849, {'centralized_accuracy': 0.1386}, 73.15738400000009)
[92mINFO [0m:      configure_evaluate: strategy sampled 5 clients (out of 10)


Test Loss: 3.8974, Test Accuracy: 13.86%
[36m(ClientAppActor pid=23668)[0m Training Loss: 4.0704, Training Accuracy: 9.12%[32m [repeated 9x across cluster][0m
[36m(ClientAppActor pid=23668)[0m 📘 Epoch [1/1] - Avg Loss: 4.0704, Accuracy: 9.12%[32m [repeated 9x across cluster][0m
[36m(ClientAppActor pid=20364)[0m Client 5 is ready to train
[36m(ClientAppActor pid=25356)[0m Client 7 is ready to train


[92mINFO [0m:      aggregate_evaluate: received 5 results and 0 failures
[92mINFO [0m:      
[92mINFO [0m:      [ROUND 2]
[92mINFO [0m:      configure_fit: strategy sampled 10 clients (out of 10)


[36m(ClientAppActor pid=4160)[0m Test Loss: 3.9103, Test Accuracy: 13.10%
[36m(ClientAppActor pid=23668)[0m Client 2 is ready to train[32m [repeated 4x across cluster][0m
[36m(ClientAppActor pid=25344)[0m Test Loss: 3.9039, Test Accuracy: 12.10%[32m [repeated 4x across cluster][0m
[36m(ClientAppActor pid=25356)[0m Training Loss: 3.8980, Training Accuracy: 13.15%
[36m(ClientAppActor pid=25356)[0m 📘 Epoch [1/1] - Avg Loss: 3.8980, Accuracy: 13.15%
[36m(ClientAppActor pid=19896)[0m Client 8 is ready to train[32m [repeated 9x across cluster][0m


[92mINFO [0m:      aggregate_fit: received 10 results and 0 failures


Saving centralized model epoch 2 aggregated_parameters...
💾 Saved checkpoint at: C:\Users\ADMIN\Desktop\BACKUP\study\Italy\polito\classes\20242\deep learning\project\source_code\fl-g13\models\model_test\TinyCNN_epoch_2.pth


[92mINFO [0m:      💡 New best global model found: 0.162600
[92mINFO [0m:      fit progress: (2, 3.7512905917793513, {'centralized_accuracy': 0.1626}, 124.51516420000007)
[92mINFO [0m:      configure_evaluate: strategy sampled 5 clients (out of 10)


Test Loss: 3.7513, Test Accuracy: 16.26%
[36m(ClientAppActor pid=4160)[0m Training Loss: 3.8647, Training Accuracy: 12.97%[32m [repeated 9x across cluster][0m
[36m(ClientAppActor pid=4160)[0m 📘 Epoch [1/1] - Avg Loss: 3.8647, Accuracy: 12.97%[32m [repeated 9x across cluster][0m
[36m(ClientAppActor pid=18356)[0m Client 3 is ready to train
[36m(ClientAppActor pid=4160)[0m Client 1 is ready to train


[92mINFO [0m:      aggregate_evaluate: received 5 results and 0 failures
[92mINFO [0m:      
[92mINFO [0m:      [SUMMARY]
[92mINFO [0m:      Run finished 2 round(s) in 131.92s
[92mINFO [0m:      	History (loss, distributed):
[92mINFO [0m:      		round 1: 3.9517598749428986
[92mINFO [0m:      		round 2: 3.823694650053978
[92mINFO [0m:      	History (loss, centralized):
[92mINFO [0m:      		round 0: 4.152868543231487
[92mINFO [0m:      		round 1: 3.897378060948849
[92mINFO [0m:      		round 2: 3.7512905917793513
[92mINFO [0m:      	History (metrics, distributed, evaluate):
[92mINFO [0m:      	{'federated_evaluate_accuracy': [(1, 0.1154), (2, 0.1378)]}
[92mINFO [0m:      	History (metrics, centralized):
[92mINFO [0m:      	{'centralized_accuracy': [(0, 0.0903), (1, 0.1386), (2, 0.1626)]}
[92mINFO [0m:      


[36m(ClientAppActor pid=4160)[0m Test Loss: 3.7862, Test Accuracy: 14.20%
[36m(ClientAppActor pid=18456)[0m Client 7 is ready to train[32m [repeated 3x across cluster][0m
[36m(ClientAppActor pid=20364)[0m Test Loss: 3.7581, Test Accuracy: 16.60%[32m [repeated 4x across cluster][0m


[36m(ClientAppActor pid=25344)[0m 2025-04-17 16:20:29.989 | INFO     | fl_g13.config:<module>:11 - PROJ_ROOT path is: C:\Users\ADMIN\Desktop\BACKUP\study\Italy\polito\classes\20242\deep learning\project\source_code\fl-g13[32m [repeated 9x across cluster][0m
