In [42]:
import pandas as pd
import numpy as np
import torch
import torch.nn as nn 
from torch.utils.data import random_split, DataLoader, TensorDataset 
import flwr as fl
from flwr.common import Metrics
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from collections import OrderedDict
from typing import List, Tuple
import sys
import time
import warnings
warnings.simplefilter('ignore')

In [43]:
print("Python version:", sys.version)
print("Version info:", sys.version_info)

Python version: 3.10.11 (tags/v3.10.11:7d4cc5a, Apr  5 2023, 00:38:17) [MSC v.1929 64 bit (AMD64)]
Version info: sys.version_info(major=3, minor=10, micro=11, releaselevel='final', serial=0)


In [44]:
DEVICE = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
print(f"Training on {DEVICE} using PyTorch {torch.__version__} and Flower {fl.__version__}")

Training on cpu using PyTorch 2.1.0+cpu and Flower 1.5.0


In [45]:
NUM_CLIENTS = 3
EPOCHS = 10
ROUNDS = 5

BATCH_SIZE = 32
IN_FEATURES = 3
HIDDEN_LAYERS = 80
OUT_FEATURES = 2

weight = torch.tensor([0.8608, 0.1392])

In [46]:
def load_datasets():
    df = pd.read_csv('./datasets/label_data.csv')
    
    feature = df.iloc[:, :-1]
    target = df.loc[:, 'label']
    feature = torch.Tensor(feature.to_numpy())
    target = torch.tensor(target.to_numpy())
    tensor_data = TensorDataset(feature, target)
    
    split_ratio = 0.8
    train_split = int(len(feature) * split_ratio)
    test_split = len(feature) - train_split
    while train_split % NUM_CLIENTS != 0:
        train_split -= 1
        test_split += 1
    train_set, test_set = random_split(tensor_data, [train_split, test_split])  

    # split_index = int(len(df) * split_ratio)
    # train_set = tensor_data.iloc[:split_index, :]
    # test_set = tensor_data.iloc[split_index:, :]
    
    part_size = len(train_set) // NUM_CLIENTS
    length = [part_size] * NUM_CLIENTS  # lengths for each client
    
    # Split the test set evenly into thirds, removing the remainders
    # random_choose = np.random.choice(train_set.index, (len(train_set) % NUM_CLIENTS), replace=False)
    # train_set = train_set.drop(random_choose)
    
    datasets = random_split(train_set, length, generator=torch.Generator().manual_seed(42))
    
    train_loader = []
    val_loader = []
    
    for data in datasets:
        val_length = len(data) // 10  # 10% for validation 
        
        train_length = len(data) - val_length
        length = [train_length, val_length]
        train_data, val_data = random_split(data, length, generator=torch.Generator().manual_seed(42))
        
        train_loader.append(DataLoader(train_data, batch_size=BATCH_SIZE, shuffle=True))
        val_loader.append(DataLoader(val_data, batch_size=BATCH_SIZE, shuffle=True))
    
    test_loader = DataLoader(test_set, batch_size=BATCH_SIZE, shuffle=True)
    return train_loader, val_loader, test_loader
    
train_loader, val_loader, test_loader = load_datasets()

In [47]:
class Network(nn.Module):
    
    def __init__(self, IN_FEATURES, HIDDEN_LAYERS, OUT_FEATURES):
        super().__init__()
        self.flatten = nn.Flatten()
        
        self.linear_relu_stack = nn.Sequential(
            nn.Linear(IN_FEATURES, HIDDEN_LAYERS), 
            nn.ReLU(),
            nn.Linear(HIDDEN_LAYERS, HIDDEN_LAYERS), 
            nn.ReLU(),
            nn.Linear(HIDDEN_LAYERS, OUT_FEATURES), 
            nn.Softmax(dim=1)
        )
        
    def forward(self, x):
        x = self.flatten(x)
        logits = self.linear_relu_stack(x)
        return logits

In [48]:
def train(model, train_loader, epochs):
    criterion = torch.nn.CrossEntropyLoss(weight=weight)
    optimizer = torch.optim.Adam(model.parameters(), lr=0.0001)
    model.train()
    
    for epoch in range(1, epochs + 1): 
        correct, total, epoch_loss = 0, 0, 0.0
        
        for feature, target in train_loader:
            feature, target = feature.to(DEVICE), target.to(DEVICE)
            optimizer.zero_grad()
            output = model(feature)
            train_loss = criterion(output, target)
            train_loss.backward()
            optimizer.step()
            
            epoch_loss += train_loss
            total += target.size(0)
            correct += (torch.max(output.data, 1)[1] == target).sum().item()
            
        epoch_loss /= len(train_loader.dataset)
        epoch_accuracy = correct / total
        
        print(f"Epoch {epoch}/{EPOCHS}: train loss is {epoch_loss:.4f}, accuracy is {epoch_accuracy:.4f}.")

In [49]:
def test(model, test_loader):
    criterion = torch.nn.CrossEntropyLoss(weight=weight)
    model.eval()
    
    correct, total, loss = 0, 0, 0.0
    actual_labels = []
    predicted_labels = []
 
    with torch.no_grad():
        for feature, target in test_loader:
            feature, target = feature.to(DEVICE), target.to(DEVICE)
            output = model(feature)
            _, predicted = torch.max(output.data, 1)
            
            loss += criterion(output, target).item()
            # total += target.size(0)
            # correct += (predicted == target).sum().item()
            
            actual_labels.extend(target.cpu().numpy())
            predicted_labels.extend(predicted.cpu().numpy())
            
        loss /= len(test_loader.dataset)
        # accuracy = correct / total
        # return loss, accuracy
        
    accuracy = accuracy_score(actual_labels, predicted_labels)
    precision = precision_score(actual_labels, predicted_labels, average='weighted')
    recall = recall_score(actual_labels, predicted_labels, average='weighted')
    f1 = f1_score(actual_labels, predicted_labels, average='weighted')
    
    print("Testing Result:"
          "\n----------------------------------------------------------------------------------------")
    print(f"Loss: {loss:.4f}   Accuracy: {accuracy:.4f}   Precision: {precision:.4f}   Recall: {recall:.4f}   F1-Score: {f1:.4f}"
          f"\n----------------------------------------------------------------------------------------")

In [50]:
# PyTorch model testing

train_loader = train_loader[1]
val_loader = val_loader[1]
model = Network(IN_FEATURES, HIDDEN_LAYERS, OUT_FEATURES).to(DEVICE)

train_start = time.time()
train(model, train_loader, EPOCHS)
train_end = time.time()
print(f"\nTraining time: {train_end - train_start:.4f} secs.")

test(model, val_loader)

Epoch 1/10: train loss is 0.0109, accuracy is 0.8590.
Epoch 2/10: train loss is 0.0106, accuracy is 0.8606.
Epoch 3/10: train loss is 0.0106, accuracy is 0.8606.
Epoch 4/10: train loss is 0.0106, accuracy is 0.8606.
Epoch 5/10: train loss is 0.0106, accuracy is 0.8606.
Epoch 6/10: train loss is 0.0106, accuracy is 0.8606.
Epoch 7/10: train loss is 0.0106, accuracy is 0.8606.
Epoch 8/10: train loss is 0.0106, accuracy is 0.8606.
Epoch 9/10: train loss is 0.0106, accuracy is 0.8606.
Epoch 10/10: train loss is 0.0106, accuracy is 0.8606.

Training time: 81.0174 secs.
Testing Result:
----------------------------------------------------------------------------------------
Loss: 0.0106   Accuracy: 0.8633   Precision: 0.7453   Recall: 0.8633   F1-Score: 0.8000
----------------------------------------------------------------------------------------


In [51]:
def get_parameters(model) -> List[np.ndarray]:
    return [val.cpu().numpy() for _, val in model.state_dict().items()]

def set_parameters(model, parameters: List[np.ndarray]):
    params_dict = zip(model.state_dict().keys(), parameters)
    state_dict = OrderedDict({k: torch.Tensor(v) for k, v in params_dict})
    model.load_state_dict(state_dict, strict=True)

In [52]:
class FlowerClient(fl.client.NumPyClient):
    def __init__(self, cid, model, train_loader, val_loader):
        self.cid = cid
        self.model = model
        self.train_loader = train_loader
        self.val_loader = val_loader

    def get_parameters(self, config):
        print(f"Client {self.cid} received the parameters.")
        return get_parameters(self.model)

    def fit(self, parameters, config):
        print(f"Client {self.cid} fit, config: {config}.")
        set_parameters(self.model, parameters)
        train(self.model, self.train_loader, epochs=EPOCHS)
        return get_parameters(self.model), len(self.train_loader), {}

    def evaluate(self, parameters, config):
        print(f"Client {self.cid} evaluate, config: {config}.")
        set_parameters(self.model, parameters)
        loss, accuracy = test(self.model, self.val_loader)
        return float(loss), len(self.val_loader), {'accuracy: ': float(accuracy)}

In [53]:
def client_fn(cid: str) -> FlowerClient:
    print(f"Client: {cid} -------------------------------------------------------------------------")
    model = Network(IN_FEATURES, HIDDEN_LAYERS, OUT_FEATURES).to(DEVICE)
    
    train_loader, val_loader, test_loader = load_datasets()
    train_loader = train_loader[int(cid)]
    val_loader = val_loader[int(cid)]
    
    return FlowerClient(cid, model, train_loader, val_loader)

In [54]:
def weighted_average(metrics: List[Tuple[int, Metrics]]) -> Metrics:
    accuracies = [num_examples * m['accuracy'] for num_examples, m in metrics]
    examples = [num_examples for num_examples, _ in metrics]
    return {'accuracy': sum(accuracies) / sum(examples)}

In [56]:
params = get_parameters(Network(IN_FEATURES, HIDDEN_LAYERS, OUT_FEATURES))

strategy = fl.server.strategy.FedAvg(
    fraction_fit=1.0,
    fraction_evaluate=1.0,
    min_fit_clients=NUM_CLIENTS,
    min_evaluate_clients=NUM_CLIENTS,
    min_available_clients=NUM_CLIENTS,
    initial_parameters=fl.common.ndarrays_to_parameters(params),
)

fl.simulation.start_simulation(
    client_fn=client_fn,
    num_clients=NUM_CLIENTS,
    config=fl.server.ServerConfig(num_rounds=ROUNDS),
    strategy=strategy,
)

INFO flwr 2023-11-09 17:15:32,395 | app.py:175 | Starting Flower simulation, config: ServerConfig(num_rounds=5, round_timeout=None)


FileNotFoundError: [WinError 2] 系统找不到指定的文件。

Citation:   
https://flower.dev/docs/framework/tutorial-series-get-started-with-flower-pytorch.html
https://pytorch.org/tutorials/beginner/basics/quickstart_tutorial.html