In [1]:
from collections import OrderedDict
from typing import Dict, List, Optional, Tuple

import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
import torchvision.transforms as transforms
from torch.utils.data import DataLoader

import flwr
from flwr.client import Client, ClientApp, NumPyClient
from flwr.common import Context
from flwr.server import ServerApp, ServerConfig, ServerAppComponents
from flwr.server.strategy import Strategy
from flwr.simulation import run_simulation
from flwr_datasets import FederatedDataset
from typing import Union
from flwr.server.client_proxy import ClientProxy  # Correctly import ClientProxy
from flwr.common import FitRes, Parameters
import glob
import os
from torchvision import transforms, datasets
from torch.utils.data import DataLoader

DEVICE = torch.device("cpu")  # Try "cuda" to train on GPU
print(f"Training on {DEVICE}")
print(f"Flower {flwr.__version__} / PyTorch {torch.__version__}")

  from .autonotebook import tqdm as notebook_tqdm
2025-06-03 10:01:31,030	INFO util.py:154 -- Missing packages: ['ipywidgets']. Run `pip install -U ipywidgets`, then restart the notebook server for rich notebook output.


Training on cpu
Flower 1.17.0 / PyTorch 2.6.0+cu124


In [None]:
from flwr.common import NDArrays
MAX_ROUND = 1
NUM_CLIENTS = 10 # nombre de client participant
NUM_PARTITIONS = NUM_CLIENTS

In [3]:

def load_datasets(partition_id, num_partitions: int):
    # Définir les chemins d'accès
    base_dir = f"utkface_partitions/partition_{partition_id}"
    train_dir = os.path.join(base_dir, "train")
    test_dir = os.path.join(base_dir, "test")

    # Transformations pour images RGB (UTKFace est en couleur)
    pytorch_transforms = transforms.Compose([
        transforms.Resize((64, 64)),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5])  # pour images RGB
    ])

    # Charger les datasets
    train_dataset = datasets.ImageFolder(root=train_dir, transform=pytorch_transforms)
    test_dataset = datasets.ImageFolder(root=test_dir, transform=pytorch_transforms)

    # Créer des DataLoaders
    trainloader = DataLoader(train_dataset, batch_size=32, shuffle=True)
    valloader = DataLoader(test_dataset, batch_size=32)  # ou un vrai split validation/test si disponible
    testloader = DataLoader(test_dataset, batch_size=32)

    return trainloader, valloader, testloader


In [4]:
from torchvision import transforms, datasets
from torch.utils.data import Dataset, random_split, DataLoader
import os
from PIL import Image

class UTKFaceDataset(Dataset):
    def __init__(self, root_dir, transform=None):
        self.root_dir = root_dir
        self.transform = transform
        self.images = [img for img in os.listdir(root_dir) if img.endswith('.jpg')]

    def __len__(self):
        return len(self.images)

    def __getitem__(self, idx):
        img_name = self.images[idx]
        # Format: [age]_[gender]_[race]_[date&time].jpg
        age, gender, race, _ = img_name.split('_')
        img_path = os.path.join(self.root_dir, img_name)
        image = Image.open(img_path).convert('RGB')
        if self.transform:
            image = self.transform(image)
        label = int(age)  # tu peux changer pour `int(gender)` ou `int(race)`
        return image, label

In [5]:
class Net(nn.Module):
    def __init__(self, input_channel=3, num_classes=117):
        super(Net, self).__init__()
        self.features = nn.Sequential(
            nn.Conv2d(input_channel, 32, kernel_size=3),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2),
            nn.Conv2d(32, 64, kernel_size=3),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2),
            nn.Conv2d(64, 128, kernel_size=3),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2),
        )

        self.classifier = nn.Sequential(
            nn.Linear(128*6*6, 512),
            nn.ReLU(),
            nn.Linear(512, num_classes),
        )


    def forward(self, x):
        x = self.features(x)
        x = torch.flatten(x, 1)
        x = self.classifier(x)
        return x

def get_parameters(net) -> List[np.ndarray]:
    print("net state_dict keys:", net.state_dict().keys())  # Imprime les clés
    return [val.cpu().numpy() for _, val in net.state_dict().items()]


def set_parameters(net, parameters: List[np.ndarray]):
    params_dict = zip(net.state_dict().keys(), parameters)
    state_dict = OrderedDict({k: torch.Tensor(v) for k, v in params_dict})
    net.load_state_dict(state_dict, strict=True)


def train(net, trainloader, epochs: int):
    """Train the network on the training set."""
    criterion = torch.nn.CrossEntropyLoss()
    optimizer = torch.optim.Adam(net.parameters())
    net.train()
    for epoch in range(epochs):
        correct, total, epoch_loss = 0, 0, 0.0
        for images,labels in trainloader:
            print(f"Input shape: {images.shape}")
            images, labels = images.to(DEVICE), labels.to(DEVICE)
            optimizer.zero_grad()
            outputs = net(images)
            loss = criterion(net(images), labels)
            loss.backward()
            optimizer.step()
            # Metrics
            epoch_loss += loss
            total += labels.size(0)
            correct += (torch.max(outputs.data, 1)[1] == labels).sum().item()
        epoch_loss /= len(trainloader.dataset)
        epoch_acc = correct / total
        print(f"Epoch {epoch+1}: train loss {epoch_loss}, accuracy {epoch_acc}")


def test(net, testloader):
    """Evaluate the network on the entire test set."""
    criterion = torch.nn.CrossEntropyLoss()
    correct, total, loss = 0, 0, 0.0
    net.eval()
    with torch.no_grad():
        for images,labels in testloader:
            images, labels = images.to(DEVICE), labels.to(DEVICE)
            outputs = net(images)
            loss += criterion(outputs, labels).item()
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
    loss /= len(testloader.dataset)
    accuracy = correct / total
    return loss, accuracy

In [6]:
class FlowerClient(NumPyClient):
    def __init__(self, partition_id, net, trainloader, valloader):
        self.partition_id = partition_id
        self.net = net
        self.trainloader = trainloader
        self.valloader = valloader

    def get_parameters(self, config):
        print(f"[Client {self.partition_id}] get_parameters")
        return get_parameters(self.net)

    def fit(self, parameters, config):
        print(f"[Client {self.partition_id}] fit, config: {config}")
        set_parameters(self.net, parameters)
        train(self.net, self.trainloader, epochs=1)
        return get_parameters(self.net), len(self.trainloader), {}

    def evaluate(self, parameters, config):
        print(f"[Client {self.partition_id}] evaluate, config: {config}")
        set_parameters(self.net, parameters)
        loss, accuracy = test(self.net, self.valloader)
        return float(loss), len(self.valloader), {"accuracy": float(accuracy)}


def client_fn(context: Context) -> Client:
    net = Net().to(DEVICE)
    partition_id = context.node_config["partition-id"]
    num_partitions = context.node_config["num-partitions"]
    trainloader, valloader, _ = load_datasets(partition_id, num_partitions)
    return FlowerClient(partition_id, net, trainloader, valloader).to_client()


# Create the ClientApp
client = ClientApp(client_fn=client_fn)

In [7]:
from typing import Union

from flwr.common import (
    EvaluateIns,
    EvaluateRes,
    FitIns,
    FitRes,
    Parameters,
    Scalar,
    ndarrays_to_parameters,
    parameters_to_ndarrays,
)
from flwr.server.client_manager import ClientManager
from flwr.server.client_proxy import ClientProxy
from flwr.server.strategy.aggregate import aggregate, weighted_loss_avg

 

In [8]:
net = Net().to(DEVICE)

In [9]:
import flwr as fl

class SaveModelStrategy(fl.server.strategy.FedAvg):
    def aggregate_fit(
        self,
        server_round: int,
        results: list[tuple[fl.server.client_proxy.ClientProxy, fl.common.FitRes]],
        failures: list[Union[tuple[ClientProxy, FitRes], BaseException]],
    ) -> tuple[Optional[Parameters], dict[str, Scalar]]:
        """Aggregate model weights using weighted average and store checkpoint"""

        # Call aggregate_fit from base class (FedAvg) to aggregate parameters and metrics
        aggregated_parameters, aggregated_metrics = super().aggregate_fit(
            server_round, results, failures
        )

        if server_round == MAX_ROUND:

            if aggregated_parameters is not None:
                print(f"Saving round {server_round} aggregated_parameters...")

                # Convert `Parameters` to `list[np.ndarray]`
                aggregated_ndarrays: list[np.ndarray] = fl.common.parameters_to_ndarrays(
                    aggregated_parameters
                )


                # Convert `list[np.ndarray]` to PyTorch `state_dict`
                params_dict = zip(net.state_dict().keys(), aggregated_ndarrays)
                state_dict = OrderedDict({k: torch.tensor(v) for k, v in params_dict})
                net.load_state_dict(state_dict, strict=True)

                # Save the model to disk
                torch.save(net.state_dict(), f"model_round_{server_round}.pth")

        return aggregated_parameters, aggregated_metrics

In [10]:

def evaluate(
    server_round: int,
    parameters: NDArrays,
    config: Dict[str, Scalar],
) -> Optional[Tuple[float, Dict[str, Scalar]]]:
    net = Net().to(DEVICE)
    _, _, testloader = load_datasets(0, NUM_PARTITIONS)
    set_parameters(net, parameters)  # Update model with the latest parameters
    loss, accuracy = test(net, testloader)
    print(f"Server-side evaluation loss {loss} / accuracy {accuracy}")
    return loss, {"accuracy": accuracy}

def server_fn(context):
    strategy = SaveModelStrategy(
        fraction_fit=1.0,  # Utiliser 100% des clients pour l'entraînement
        fraction_evaluate=1.0,  # Utiliser 10% des clients pour l'évaluation
        min_fit_clients=1,  # Minimum de 10 clients pour l'entraînement
        min_evaluate_clients=1,  # Minimum de 5 clients pour l'évaluation
        min_available_clients=1,
        evaluate_fn=evaluate
    )
    
    config = ServerConfig(num_rounds=MAX_ROUND)
    return ServerAppComponents(strategy=strategy, config=config)


app = ServerApp(server_fn=server_fn)

server = ServerApp(server_fn=server_fn)
backend_config = {"client_resources": None}
if DEVICE.type == "cuda":
    backend_config = {"client_resources": {"num_gpus": 1}}


run_simulation(
    server_app=server,
    client_app=client,
    num_supernodes=NUM_PARTITIONS,
    backend_config=backend_config,
)


NameError: name 'NUM_PARTITIONS' is not defined

# Adapter le dataset en fonction du nombre de client


In [None]:
import os
import shutil
import random
from sklearn.model_selection import train_test_split

# Dossiers d'entrée
input_dirs = ["./dataset/UTKFace/part1", "./dataset/UTKFace/part2", "./dataset/UTKFace/part3"]
all_files = []

# Collecte de tous les fichiers .jpg
for part in input_dirs:
    for file in os.listdir(part):
        if file.endswith(".jpg"):
            all_files.append(os.path.join(part, file))

print(f"Nombre total de fichiers trouvés : {len(all_files)}")

# Mélange aléatoire des fichiers
random.shuffle(all_files)

# Nombre de partitions
num_partitions = NUM_PARTITIONS

# Taille approximative d'une partition
partition_size = len(all_files) // num_partitions

# Dossier de base pour les partitions
output_base = "./utkface_partitions"

# Nettoyer puis recréer le dossier de sortie s'il existe
if os.path.exists(output_base):
    shutil.rmtree(output_base)
os.makedirs(output_base, exist_ok=True)

# Fonction pour copier les fichiers en fonction du split et du genre
def process_files_partition(partition_files, partition_idx):
    partition_dir = os.path.join(output_base, f"partition_{partition_idx}")
    
    # split interne en 80% train, 20% test
    train_files, test_files = train_test_split(partition_files, test_size=0.2, random_state=42)
    
    for split, files in [("train", train_files), ("test", test_files)]:
        for filepath in files:
            filename = os.path.basename(filepath)
            try:
                gender = filename.split("_")[1]
                if gender not in ["0", "1"]:
                    continue  # ignore fichier étrange
                target_dir = os.path.join(partition_dir, split, gender)
                os.makedirs(target_dir, exist_ok=True)
                shutil.copy(filepath, os.path.join(target_dir, filename))
            except Exception as e:
                print(f"Erreur sur {filename} : {e}")

# Répartition des fichiers dans les partitions
for i in range(num_partitions):
    start_idx = i * partition_size
    end_idx = (i + 1) * partition_size if i != num_partitions - 1 else len(all_files)
    
    partition_files = all_files[start_idx:end_idx]
    process_files_partition(partition_files, i)

print("✅ 15 partitions créées avec succès avec structure train/test et labels 0/1.")


Nombre total de fichiers trouvés : 24106
✅ 15 partitions créées avec succès avec structure train/test et labels 0/1.
