# Federated Learning Pipeline

> **Mini Glossary (FL)**
> - **Client**: A device/site (e.g., hospital, phone) that trains locally on private data.
> - **Server**:The coordinator distributes the starting model, communicates with clients, collects client updates, and distributes new models.
> - **Aggregator/Aggregation Method**:  Method chosen to aggregate all client models and create the new global model.
> - **Round**: One cycle of local training → sending updates → aggregation.
> - **Aggregation frequency**: Number of epochs between sending weights to the server.
> - **Federated Averaging (FedAvg)**: Standard method to average/aggregate client model weights.
> - **IID Data**: Customer data follows the same distribution across all clients (homogeneity).
> - **Non‑IID Data**: Client data may follow different distributions (heterogeneity).
> - **Global model**: Model sent by the server to all clients
> - **Local model**: Model trained by the client on its data

# Import module
**Environment setup and library imports.**

This section imports all required libraries:
 - Data handling: `pandas`, `numpy`, `sklearn`
 - Model building and training: `torch`, `torch.nn`
 - Visualization: `matplotlib`

In [1]:
import torch
import torch.nn as nn
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
import pandas as pd
from torch.utils.data import DataLoader, TensorDataset
import torch.nn.functional as F
from sklearn.metrics import confusion_matrix, f1_score, precision_score, recall_score
from copy import deepcopy
import matplotlib.pyplot as plt # New import
import numpy as np # New import

# Definition of the model, train function, and evaluation.

This part defines:
 - a simple **MLP** model;
 - two helper functions:
   - `train()` → performs one epoch of training;
   - `evaluate()` → computes accuracy and loss on validation/test sets. 

In [2]:
# Model definition
class MLP(nn.Module):
    def __init__(self, input_dim, output_dim, hidden_dim=64):
        super(MLP, self).__init__()
        self.fc1 = nn.Linear(input_dim, hidden_dim)
        self.fc2 = nn.Linear(hidden_dim, hidden_dim)
        self.fc3 = nn.Linear(hidden_dim, output_dim)

    def forward(self, x):
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        return x

# Training and evaluation loops
def train(model, loader, optimizer, criterion):
    model.train()
    total_loss = 0
    correct = 0
    for features, labels in loader:
        features, labels = features.to(DEVICE), labels.to(DEVICE)
        optimizer.zero_grad()
        outputs = model(features)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        total_loss += loss.item()
        correct += (outputs.argmax(1) == labels).sum().item()
    return total_loss / len(loader), correct / len(loader.dataset)

# Evaluation function
def evaluate(model, loader, criterion):
    model.eval()
    total_loss = 0
    correct = 0
    with torch.no_grad():
        for features, labels in loader:
            features, labels = features.to(DEVICE), labels.to(DEVICE)
            outputs = model(features)
            loss = criterion(outputs, labels)
            total_loss += loss.item()
            correct += (outputs.argmax(1) == labels).sum().item()
    return total_loss / len(loader), correct / len(loader.dataset)

# Data loading and preprocessing


This section performs:
  - Loading train and test data from CSV files;
  - Encoding class labels into integers;
  - Splitting the training data into training and validation sets;
  - Normalizing features (mean 0, std 1);
  - Converting NumPy arrays into PyTorch tensors;
  - Creating DataLoader objects for efficient batch training.



In [3]:
def load_dataset(train_file, test_file):
    # Load data
    train_data = pd.read_csv(train_file)
    test_data = pd.read_csv(test_file)
    class_names = train_data["Class"].unique()
    print(f"Classes: {class_names}")
    
    # Feature and label columns
    feature_names = train_data.columns[:-1]
    print(f"Features: {feature_names}")
    
    # --- Capture the full training data before the split (for client partitioning) ---
    X_full_train_unsplit = train_data[feature_names].values
    y_full_train_unsplit = train_data["Class"].values
    
    X_test = test_data[feature_names].values
    y_test = test_data["Class"].values

    # Convert labels to integers
    class_map = {label: idx for idx, label in enumerate(class_names)}
    y_full_train_int = np.array([class_map[label] for label in y_full_train_unsplit])
    y_test_int = np.array([class_map[label] for label in y_test])

    # Split full training data into server training and validation
    X_train, X_val, y_train, y_val = train_test_split(
        X_full_train_unsplit, y_full_train_int, test_size=0.2, random_state=42, stratify=y_full_train_int
    )
    print(f"Training samples: {len(X_train)}, Validation samples: {len(X_val)}, Test samples: {len(X_test)}")

    input_dim = X_train.shape[1]
    output_dim = len(class_names)

    # Now returns 10 values: 8 server splits/dims + 2 full unscaled arrays for clients
    return X_train, y_train, X_val, y_val, X_test, y_test_int, input_dim, output_dim, X_full_train_unsplit, y_full_train_int

# Normalize features with mean 0 and std 1
def features_scaling(X_train, X_val, X_test):
    # Scale the features
    scaler = StandardScaler()
    X_train_scaled = scaler.fit_transform(X_train)
    X_val_scaled = scaler.transform(X_val)
    X_test_scaled = scaler.transform(X_test)
    print("Features scaled successfully.")
    # Now returns the fitted scaler object
    return X_train_scaled, X_val_scaled, X_test_scaled, scaler

# Convert numpy arrays to PyTorch tensors.
def convert_to_tensors(X_train, y_train, X_val, y_val, X_test, y_test):
    # Convert to PyTorch tensors
    X_train_tensor = torch.tensor(X_train, dtype=torch.float32)
    y_train_tensor = torch.tensor(y_train, dtype=torch.long)
    X_val_tensor = torch.tensor(X_val, dtype=torch.float32)
    y_val_tensor = torch.tensor(y_val, dtype=torch.long)
    X_test_tensor = torch.tensor(X_test, dtype=torch.float32)
    y_test_tensor = torch.tensor(y_test, dtype=torch.long)

    return X_train_tensor, y_train_tensor, X_val_tensor, y_val_tensor, X_test_tensor, y_test_tensor

def create_dataloader(X_tensor, y_tensor, batch_size):
    # Create DataLoader objects
    data = TensorDataset(X_tensor, y_tensor)
    loader = DataLoader(data, batch_size=batch_size, shuffle=True)
    return loader

#### Hyperparameters

This section defines key training parameters:
  - HIDDEN_DIM: number of hidden units in the MLP;
  - BATCH_SIZE: number of samples per training batch;
  - EPOCHS: maximum number of local training epochs;
  - LEARNING_RATE: optimizer learning rate;
  - DEVICE: automatically selects GPU if available, otherwise CPU;
  - PATIENCE: number of epochs without improvement before early stopping.

In [4]:
HIDDEN_DIM = 64
BATCH_SIZE = 32
EPOCHS = 100
LEARNING_RATE = 1e-3
DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu'
PATIENCE = 5  # Early stopping patience

### Pipeline execution

Data loading and data loader creation.

In [5]:
# Load and preprocess server data
X_train_server, y_train_server, X_val_server, y_val_server, X_test_server, y_test_server, INPUT_DIM, OUTPUT_DIM, X_full_unscaled, y_full_unscaled = load_dataset(
    "../data_fl/train_health_data.csv", "../data_fl/test_health_data.csv")

# We now capture the fitted scaler object (4th return value)
X_train_server_scaled, X_val_server_scaled, X_test_server_scaled, scaler = features_scaling(X_train_server, X_val_server, X_test_server)

X_train_tensor_server, y_train_tensor_server, X_val_tensor_server, y_val_tensor_server, X_test_tensor_server, y_test_tensor_server = convert_to_tensors(
    X_train_server_scaled, y_train_server, X_val_server_scaled, y_val_server, X_test_server_scaled, y_test_server)

train_loader_server = create_dataloader(X_train_tensor_server, y_train_tensor_server, BATCH_SIZE)
val_loader_server = create_dataloader(X_val_tensor_server, y_val_tensor_server, BATCH_SIZE)
test_loader_server = create_dataloader(X_test_tensor_server, y_test_tensor_server, BATCH_SIZE)

print(f"Device: {DEVICE}, Input dim: {INPUT_DIM}, Output dim: {OUTPUT_DIM}")

Classes: ['Healthy' 'Disease' 'At Risk']
Features: Index(['Age', 'BMI', 'Cholesterol', 'BloodPressure', 'HeartRate', 'Hemoglobin',
       'Glucose', 'SmokingStatus', 'PhysicalActivity', 'FamilyHistory', 'LDL',
       'HDL', 'Triglycerides', 'CRP', 'WhiteBloodCellCount', 'Platelets',
       'VitaminD', 'Calcium', 'KidneyFunction', 'LiverEnzymes'],
      dtype='object')
Training samples: 640, Validation samples: 160, Test samples: 200
Features scaled successfully.
Device: cpu, Input dim: 20, Output dim: 3


#### Server training

This section performs:
  1. Defines the server MLP model, optimizer (Adam), and loss function (CrossEntropyLoss);
  2. Trains the model with early stopping based on validation loss to prevent overfitting;
  3. After training, evaluates the model on the test set to compute test loss and accuracy;
  4. Collects predictions for further metrics calculation;
  5. Computes and prints the confusion matrix, weighted F1 score, precision, and recall.


In [13]:
####### Train server

# Model, optimizer, and loss
server_model = MLP(INPUT_DIM, OUTPUT_DIM, HIDDEN_DIM).to(DEVICE)
optimizer = torch.optim.Adam(server_model.parameters(), lr=LEARNING_RATE)
criterion = nn.CrossEntropyLoss()

# Early stopping setup
best_val_loss = float('inf')
patience_counter = 0


# Training loop with early stopping
print("Starting training...")
for epoch in range(EPOCHS):
    train_loss, train_acc = train(server_model, train_loader_server, optimizer, criterion)
    val_loss, val_acc = evaluate(server_model, val_loader_server, criterion)
    print(f"Epoch {epoch+1}/{EPOCHS} | Train Loss: {train_loss:.4f} | Train Acc: {train_acc:.4f} | Val Loss: {val_loss:.4f} | Val Acc: {val_acc:.4f}")

    # Early stopping check (avoid overfitting)
    if val_loss < best_val_loss:
        best_val_loss = val_loss
        patience_counter = 0
        print("New best validation loss. Resetting patience counter.")
    else:
        patience_counter += 1
        print(f"No improvement. Patience counter: {patience_counter}/{PATIENCE}")
        if patience_counter >= PATIENCE:
            print("Early stopping triggered.")
            break


# -----Test the model------
test_loss, test_acc = evaluate(server_model, test_loader_server, criterion)
server_test_accuracy = test_acc
print(f"Test Loss: {test_loss:.4f}, Test Accuracy: {test_acc:.4f}")

# Collect predictions for metrics
server_model.eval()
y_true = []
y_pred = []
with torch.no_grad():
    for features, labels in test_loader_server:
        features, labels = features.to(DEVICE), labels.to(DEVICE)
        outputs = server_model(features)
        _, preds = torch.max(outputs, 1)
        y_true.extend(labels.cpu().numpy())
        y_pred.extend(preds.cpu().numpy())

# Confusion matrix and other metrics
conf_matrix = confusion_matrix(y_true, y_pred)
print("Confusion Matrix:")
print(conf_matrix)

f1 = f1_score(y_true, y_pred, average='weighted')
precision = precision_score(y_true, y_pred, average='weighted')
recall = recall_score(y_true, y_pred, average='weighted')

print(f"F1 Score: {f1:.4f}")
print(f"Precision: {precision:.4f}")
print(f"Recall: {recall:.4f}")

Starting training...
Epoch 1/100 | Train Loss: 0.6462 | Train Acc: 0.6890 | Val Loss: 0.5934 | Val Acc: 0.7968
New best validation loss. Resetting patience counter.
Epoch 2/100 | Train Loss: 0.5423 | Train Acc: 0.7560 | Val Loss: 0.4891 | Val Acc: 0.7914
New best validation loss. Resetting patience counter.
Epoch 3/100 | Train Loss: 0.4882 | Train Acc: 0.7627 | Val Loss: 0.4541 | Val Acc: 0.7861
New best validation loss. Resetting patience counter.
Epoch 4/100 | Train Loss: 0.4712 | Train Acc: 0.7668 | Val Loss: 0.4487 | Val Acc: 0.7968
New best validation loss. Resetting patience counter.
Epoch 5/100 | Train Loss: 0.4562 | Train Acc: 0.7761 | Val Loss: 0.4467 | Val Acc: 0.7914
New best validation loss. Resetting patience counter.
Epoch 6/100 | Train Loss: 0.4436 | Train Acc: 0.7895 | Val Loss: 0.4469 | Val Acc: 0.7861
No improvement. Patience counter: 1/5
Epoch 7/100 | Train Loss: 0.4453 | Train Acc: 0.7882 | Val Loss: 0.4494 | Val Acc: 0.7861
No improvement. Patience counter: 2/5
Epo

#### Definition of Federated Learning parameters.

This section initializes parameters and structures for FL:
  - aggregation_freq: number of local epochs between global model aggregations
  - aggregation_round: computed number of aggregation steps based on total epochs
  - clients_number: total number of simulated FL clients
  - train_loader_clients, val_loader_clients, test_loader_clients: lists to store DataLoaders for each client
  - models: list to hold each client's local model during training



In [14]:
# FL parameters
aggregation_freq = 5
aggregation_round = round(EPOCHS / aggregation_freq)

print(f"Aggregation_round: {aggregation_round}")
clients_number = 5

# FL clients data
train_loader_clients = []
val_loader_clients = []
test_loader_clients = []

models = []

Aggregation_round: 20


 --- Federated Averaging functions (Task 3.1 B & C) ---

This section defines two aggregation strategies for FL:
  1. fed_avg: standard FedAvg with equal weighting for all clients;
  2. fed_avg_weighted: weighted FedAvg where each client's contribution
     is proportional to its dataset size, giving more influence to larger clients.


In [16]:
# --- Implémentation FedAvg Standard et Pondéré (Task 3.1 B & C) ---
def fed_avg(models_weights):
    """Standard FedAvg: Moyenne simple (égale) des poids."""
    new_state_dict = {}
    num_clients = len(models_weights)
    for key in models_weights[0].keys():
        # Moyenne des poids pour chaque couche
        new_state_dict[key] = sum([m[key] for m in models_weights]) / num_clients
    return new_state_dict

def fed_avg_weighted(models_weights, client_data_sizes):
    """Weighted FedAvg: Moyenne pondérée par la taille des données du client."""
    total_size = sum(client_data_sizes)
    # Calcul des poids de pondération
    weights = [size / total_size for size in client_data_sizes]

    new_state_dict = deepcopy(models_weights[0])
    for key in new_state_dict.keys():
        new_state_dict[key] = new_state_dict[key] * 0.0

    # Calcul de la moyenne pondérée
    for key in new_state_dict.keys():
        for i, client_weights in enumerate(models_weights):
            new_state_dict[key] += client_weights[key] * weights[i]
    return new_state_dict

#### Data loading and preprocessing (Clients)


This section performs:
  1. Non-IID partitioning of the full training data by size for each client;
     here, each client receives a different number of samples (ratios: 15%, 20%, 10%, 30%, 25%);
  2. Scaling client data using the global scaler fitted on the full training set;
  3. Creating PyTorch DataLoaders for each client;
  4. Initializing a global model and cloning it for each client;
  5. Preparing client sizes for weighted FedAvg aggregation.


In [18]:
# --- Partitionnement des Données Clients (Task 3.1 A) ---

def split_data_non_iid_by_size(X_full, y_full, num_clients):
    """Partitionne le jeu de données de manière non-uniforme en taille (Non-IID par taille)."""
    # Ratios non-uniformes : 15%, 20%, 10%, 30%, 25% (total 100%)
    ratios = np.array([0.15, 0.20, 0.10, 0.30, 0.25]) 
    ratios = ratios / ratios.sum()
    
    total_samples = len(X_full)
    sizes = (ratios * total_samples).astype(int)
    # Ajustement des erreurs d'arrondi
    sizes[0] += total_samples - sizes.sum()
    
    client_data_splits = []
    current_idx = 0
    for size in sizes:
        # Assurez-vous d'utiliser une copie pour éviter les problèmes de vues
        X_client = X_full[current_idx:current_idx + size].copy()
        y_client = y_full[current_idx:current_idx + size].copy()
        client_data_splits.append((X_client, y_client))
        current_idx += size
        
    client_sizes = sizes.tolist()
    client_names = [f"Client {i+1}" for i in range(num_clients)]
    
    print(f"Client data sizes (samples): {client_sizes}")
    return client_data_splits, client_sizes, client_names

def create_client_dataloaders(client_data_splits, scaler):
    """Crée les DataLoaders pour chaque client, en utilisant le scaler global."""
    client_train_loaders = []
    client_sizes = [] 
    
    for X_client, y_client in client_data_splits:
        # Scale the client data using the global scaler (fitted on full training data)
        X_client_scaled = scaler.transform(X_client)
        
        # Create Tensors (y_client est déjà un tableau numpy d'entiers)
        X_tensor = torch.tensor(X_client_scaled, dtype=torch.float32)
        y_tensor = torch.tensor(y_client.tolist(), dtype=torch.long)
        
        # Create DataLoader
        loader = create_dataloader(X_tensor, y_tensor, BATCH_SIZE)
        client_train_loaders.append(loader)
        client_sizes.append(len(X_client))
        
    return client_train_loaders, client_sizes

# Initialisation des modèles et partitionnement
global_model = deepcopy(server_model).to(DEVICE)

# Partitionnement des données (Non-IID par taille)
# Use the full unscaled train set captured from the pipeline execution block
client_data_splits, client_sizes_total, client_names = split_data_non_iid_by_size(
    X_full_unscaled, y_full_unscaled, clients_number)

# Création des DataLoaders clients, passing the correctly fitted scaler
train_loader_clients, client_train_sizes = create_client_dataloaders(
    client_data_splits, scaler)
clients_number = len(client_names)

# Ensure to use the correct client sizes for aggregation weighting
client_sizes_for_agg = client_sizes_total 

# Créer des modèles pour chaque client avec les poids du modèle global
models = [deepcopy(global_model).to(DEVICE) for _ in range(clients_number)]
print(f"Global model initialized and {clients_number} client models cloned.")
print(f"Total training samples across clients (for aggregation weighting): {sum(client_sizes_for_agg)}")


--- Starting FL Simulation: Standard FedAvg ---

--- Starting FL Simulation: Weighted FedAvg ---


#### Core of the FL, the aggregation and training cycles.

 --- Federated Learning Training and Aggregation Loop (Task 3.1 D) ---

This section defines and runs the FL simulation for both FedAvg variants:
  1. run_fl_simulation function:
     - Performs local training for each client over a number of local epochs;
     - Aggregates client models using standard or weighted FedAvg;
     - Evaluates the updated global model on the server test set after each round;
     - Records global test accuracy and loss histories;
     - Computes final per-client accuracies after FL training.
  2. Executes the FL simulation:
     - First with standard (unweighted) FedAvg;
     - Then with weighted FedAvg (proportional to client dataset sizes).


In [None]:
# --- Boucle d'Agrégation et d'Entraînement FL (Task 3.1 D) ---

def run_fl_simulation(global_model, train_loader_clients, client_data_sizes, test_loader_server, 
                     num_rounds, local_epochs, criterion, lr, weighted=False):
    
    current_global_model = deepcopy(global_model).to(DEVICE)
    client_models = [deepcopy(current_global_model).to(DEVICE) for _ in train_loader_clients]
    
    global_test_accuracy_history = []
    global_test_loss_history = []
    
    print(f"\n--- Starting FL Simulation: {'Weighted' if weighted else 'Standard'} FedAvg ---")

    for round_num in range(1, num_rounds + 1):
        
        # 1. Local Training
        client_weights_updates = []
        for i, loader in enumerate(train_loader_clients):
            
            # Envoyer le modèle global au client (synchronisation)
            client_models[i].load_state_dict(current_global_model.state_dict())
            optimizer = torch.optim.Adam(client_models[i].parameters(), lr=lr)
            
            # Entraînement local
            for _ in range(local_epochs):
                train_loss, train_acc = train(client_models[i], loader, optimizer, criterion)
            
            client_weights_updates.append(client_models[i].state_dict())
        
        # 2. Global Aggregation
        if weighted:
            new_global_weights = fed_avg_weighted(client_weights_updates, client_data_sizes)
        else:
            new_global_weights = fed_avg(client_weights_updates)
        
        current_global_model.load_state_dict(new_global_weights)
        
        # 3. Global Evaluation
        test_loss, test_acc = evaluate(current_global_model, test_loader_server, criterion)
        global_test_accuracy_history.append(test_acc)
        global_test_loss_history.append(test_loss)
        
        print(f"Round {round_num}/{num_rounds} | Global Test Loss: {test_loss:.4f} | Global Test Acc: {test_acc:.4f}")
        
    # 4. Final Per-Client Accuracy (sur le jeu local du client)
    final_client_accuracies = []
    for i, loader in enumerate(train_loader_clients):
        # Évaluation sur les données d'entraînement locales (représente la performance locale après FL)
        _, client_acc = evaluate(current_global_model, loader, criterion)
        final_client_accuracies.append(client_acc)
            
    return current_global_model, global_test_accuracy_history, global_test_loss_history, final_client_accuracies


# --- Execution de la simulation FL ---
criterion = nn.CrossEntropyLoss()

# Standard FedAvg
final_global_model_standard, std_acc, std_loss, std_client_acc = run_fl_simulation(
    global_model, train_loader_clients, client_sizes_for_agg, test_loader_server, 
    aggregation_round, aggregation_freq, criterion, LEARNING_RATE, weighted=False)

# Weighted FedAvg
final_global_model_weighted, wtd_acc, wtd_loss, wtd_client_acc = run_fl_simulation(
    global_model, train_loader_clients, client_sizes_for_agg, test_loader_server, 
    aggregation_round, aggregation_freq, criterion, LEARNING_RATE, weighted=True)

# Model validation

Test the model and print the metrics.

In [20]:
# Helper pour tester un modèle et calculer toutes les métriques
def test_model_and_metrics(model, test_loader, model_name):
    model.eval()
    criterion = nn.CrossEntropyLoss()
    
    y_true = []
    y_pred = []
    with torch.no_grad():
        for features, labels in test_loader:
            features, labels = features.to(DEVICE), labels.to(DEVICE)
            outputs = model(features)
            _, preds = torch.max(outputs, 1)
            y_true.extend(labels.cpu().numpy())
            y_pred.extend(preds.cpu().numpy())

    # Calcul de la perte et de la précision finale
    test_loss, test_acc = evaluate(model, test_loader, criterion)
    
    conf_matrix = confusion_matrix(y_true, y_pred)
    f1 = f1_score(y_true, y_pred, average='weighted', zero_division=0)
    precision = precision_score(y_true, y_pred, average='weighted', zero_division=0)
    recall = recall_score(y_true, y_pred, average='weighted', zero_division=0)

    print(f"\n--- Metrics for {model_name} ---")
    print(f"Test Loss: {test_loss:.4f}, Test Accuracy: {test_acc:.4f}")
    print("Confusion Matrix:")
    print(conf_matrix)
    print(f"F1 Score: {f1:.4f}")
    print(f"Precision: {precision:.4f}")
    print(f"Recall: {recall:.4f}")
    
    return test_acc, f1, precision, recall

# Test du modèle global FedAvg Standard
test_model_and_metrics(final_global_model_standard, test_loader_server, "Standard FedAvg Global Model")

# Test du modèle global FedAvg Pondéré
test_model_and_metrics(final_global_model_weighted, test_loader_server, "Weighted FedAvg Global Model")

# Création d'un DataFrame pour les précisions finales par client (pour le graphique)
final_client_acc_data = {
    'Client': client_names,
    'Standard FedAvg Acc': std_client_acc,
    'Weighted FedAvg Acc': wtd_client_acc,
    'Size': client_sizes_for_agg
}
final_client_acc_df = pd.DataFrame(final_client_acc_data)

# Récupérer la précision du modèle centralisé (du bloc Server training)
server_test_accuracy = server_test_accuracy # déjà calculée dans la section précédente


=== Federated Learning Models Validation ===
Standard FedAvg - Global Test Accuracy History:
[0.8082191780821918, 0.8116438356164384, 0.815068493150685, 0.8082191780821918, 0.8184931506849316, 0.815068493150685, 0.8184931506849316, 0.815068493150685, 0.8082191780821918, 0.791095890410959, 0.8013698630136986, 0.7945205479452054, 0.7808219178082192, 0.7773972602739726, 0.7808219178082192, 0.7808219178082192, 0.7842465753424658, 0.773972602739726, 0.7773972602739726, 0.7773972602739726]
Final Per-client Accuracies: [0.8590604026845637, 0.8518518518518519, 0.7997587454764777]

Weighted FedAvg - Global Test Accuracy History:
[0.8082191780821918, 0.8082191780821918, 0.797945205479452, 0.8013698630136986, 0.7945205479452054, 0.8047945205479452, 0.791095890410959, 0.7876712328767124, 0.7773972602739726, 0.7773972602739726, 0.7876712328767124, 0.7671232876712328, 0.791095890410959, 0.797945205479452, 0.797945205479452, 0.797945205479452, 0.7876712328767124, 0.7808219178082192, 0.77397260273972

# Graphs

 --- Visualization of Federated Learning Results (Task 3.2) ---

This section generates the required plots for FL analysis:
  1. Global accuracy vs. communication rounds for both FedAvg variants, with a baseline line for the centralized model;
  2. Global loss vs. communication rounds;
  3. Per-client final accuracy comparison (bar chart), sorted by client dataset size;
  4. Weighted vs. unweighted FedAvg convergence (overlayed in global accuracy plot);
  5. Histogram showing data distribution (number of samples) per client.


In [None]:
# ---- Génération des Graphiques (Task 3.2) ---
rounds = range(1, aggregation_round + 1)
client_labels = final_client_acc_df['Client'].tolist()

# 1. Global accuracy vs. rounds & 4. Weighted vs. Unweighted convergence
plt.figure(figsize=(10, 6))
plt.plot(rounds, std_acc, label='Standard FedAvg', marker='o')
plt.plot(rounds, wtd_acc, label='Weighted FedAvg', marker='x')
# Ligne de référence pour le modèle centralisé
plt.axhline(y=server_test_accuracy, color='r', linestyle='--', label=f'Centralisé (Baseline) Acc: {server_test_accuracy:.4f}')
plt.title('Précision (Accuracy) Globale de Test vs. Rounds de Communication')
plt.xlabel('Round de Communication')
plt.ylabel('Précision Globale de Test')
plt.legend()
plt.grid(True)
plt.xticks(rounds)
plt.savefig('fl_global_accuracy_vs_rounds.png')
plt.close()

# 2. Global loss vs. rounds
plt.figure(figsize=(10, 6))
plt.plot(rounds, std_loss, label='Standard FedAvg', marker='o')
plt.plot(rounds, wtd_loss, label='Weighted FedAvg', marker='x')
plt.title('Perte (Loss) Globale de Test vs. Rounds de Communication')
plt.xlabel('Round de Communication')
plt.ylabel('Perte Globale de Test')
plt.legend()
plt.grid(True)
plt.xticks(rounds)
plt.savefig('fl_global_loss_vs_rounds.png')
plt.close()


# 3. Per-client accuracy (bar chart)
# Trier les clients par taille
final_client_acc_df_sorted = final_client_acc_df.sort_values(by='Size', ascending=True)

x = np.arange(clients_number)
width = 0.35

fig, ax = plt.subplots(figsize=(12, 6))
# Barres pour Standard FedAvg
rects1 = ax.bar(x - width/2, final_client_acc_df_sorted['Standard FedAvg Acc'], width, label='Standard FedAvg')
# Barres pour Weighted FedAvg
rects2 = ax.bar(x + width/2, final_client_acc_df_sorted['Weighted FedAvg Acc'], width, label='Weighted FedAvg')

ax.set_ylabel('Précision Finale du Client (sur données locales)')
ax.set_xlabel('Client (Trié par Taille des Données)')
ax.set_title('Comparaison de la Précision Finale par Client')
ax.set_xticks(x)
ax.set_xticklabels([f"{c} ({s} samples)" for c, s in zip(final_client_acc_df_sorted['Client'], final_client_acc_df_sorted['Size'])], rotation=45, ha="right")
ax.legend()
ax.grid(axis='y')
plt.tight_layout()
plt.savefig("fl_per_client_accuracy.png")
plt.close()

# 5. Data distribution histogram (Data size per client)
plt.figure(figsize=(8, 6))
plt.bar(client_labels, client_sizes_for_agg, color='skyblue')
plt.title('Distribution des données par client (Taille des échantillons)')
plt.xlabel('Client ID')
plt.ylabel('Nombre d\'échantillons')
plt.grid(axis='y', linestyle='--')
plt.savefig('fl_client_data_distribution.png')
plt.close()

print("All required graphs have been generated.")

All required graphs have been generated.


##  Interpretation paragraph


 #### Comparison behavior
- The FedAvg Standard produced a better overall model (better accuracy/loss on the server test set) and was fairer to clients   with less data.
 - The Weighted FedAvg produced an overall model that is more accurate for larger clients, which makes sense since their influence has been increased.
