In [52]:
import torch.nn as nn
from torch.utils.data import DataLoader, Dataset
from torch import optim
from torch.utils.data import TensorDataset
from sklearn.model_selection import train_test_split
import pandas as pd
import torch
from sklearn.preprocessing import LabelEncoder
import numpy as np
from copy import deepcopy




In [53]:
class Net(nn.Module):
    def __init__(self, num_features, num_classes):
        super(Net, self).__init__()
        self.fc1 = nn.Linear(num_features, 128)
        self.fc2 = nn.Linear(128, 64)
        self.fc3 = nn.Linear(64, num_classes)
        self.relu = nn.ReLU()
        self.softmax = nn.LogSoftmax(dim=1)

    def forward(self, x):
        x = self.relu(self.fc1(x))
        x = self.relu(self.fc2(x))
        x = self.softmax(self.fc3(x))
        return x



# Performing Federated Learning Where each client has the entire dataset

In [44]:
myGPU = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

def read_data():
    # i want to read the data that every column are features from the csv file, and store the features in a tensor
    # and the labels in a tensor
    # i want to return a list of tensors, each tensor is a feature, and a tensor of labels
    
    # read the csv file
    df = pd.read_csv('dataset/covid_x_csv_classification.csv')
    label_df = pd.read_csv('dataset/covid_y_classification.csv')
    
    
    
    df_sampled = df.sample(frac=0.1, random_state=42)
    label_df_sampled = label_df.loc[df_sampled.index]
    
    encoder = LabelEncoder()
    label_df_sampled = encoder.fit_transform(label_df_sampled.values.ravel())

    features_tensor = torch.tensor(df_sampled.values, dtype=torch.float32)


    labels_tensor = torch.tensor(label_df_sampled, dtype=torch.long)
    
    
    return features_tensor, labels_tensor

X, y = read_data()

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

train_dataset = TensorDataset(X_train.to(myGPU), y_train.to(myGPU))
test_dataset = TensorDataset(X_test.to(myGPU), y_test.to(myGPU))

train_loader = DataLoader(dataset=train_dataset, batch_size=64, shuffle=True)
test_loader = DataLoader(dataset=test_dataset, batch_size=64, shuffle=True)

def client_training(global_model):
    
    local_model = deepcopy(global_model).to(myGPU)
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.SGD(local_model.parameters(), lr=0.01)
    
    for epoch in range(10):  # loop over the dataset multiple times
        loss = 0.0
        local_model.train()
        for i, data in enumerate(train_loader, 0):
            inputs, labels = data[0].to(myGPU), data[1].to(myGPU)
            optimizer.zero_grad()
            outputs = local_model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
    
        local_model.eval()
        # Validation loop
        with torch.no_grad():
            correct = 0
            total = 0
            for data in test_loader:
                inputs, labels = data
                outputs = local_model(inputs)
                _, predicted = torch.max(outputs.data, 1)
                total += labels.size(0)
                correct += (predicted == labels).sum().item()
    
        print(f'Epoch {epoch + 1}, Loss: {loss.item():.4f}, Accuracy: {(correct / total) * 100:.2f}%')
    return local_model

def model_aggregate(clients_models):
    aggregated_state = {}
    clients_model_list = []
    for models in clients_models:
        clients_model_list.append(models.state_dict())
        
    for key in global_model.state_dict().keys():
        tensors_to_aggregate = []
        for client_state in clients_model_list:
            client_tensor = client_state[key]
            tensors_to_aggregate.append(client_tensor)

        ### Performing FedAvg ###
        stacked_tensors = torch.stack(tensors_to_aggregate, dim=0)
        mean_tensor = torch.mean(stacked_tensors, dim=0)

        aggregated_state[key] = mean_tensor

    # Update the global model's weights with the aggregated weights
    global_model.load_state_dict(aggregated_state)
    return global_model

global_model = Net(30, 4).to(myGPU)

total_training_rounds = 5

clients_number = 5



for i in range(total_training_rounds):
    print(f"=========== Training round {i+1} ===========")
    clients_models = []
    for i in range(clients_number):
        print(f"Training client {i+1}")
        clients_models.append(client_training(global_model))
        print("\n")
    # aggregate the models
    global_model = model_aggregate(clients_models)
print("End of Training")
    



Training client 1
Epoch 1, Loss: 0.5856, Accuracy: 64.86%
Epoch 2, Loss: 0.7232, Accuracy: 66.14%
Epoch 3, Loss: 0.6062, Accuracy: 66.31%
Epoch 4, Loss: 0.6141, Accuracy: 66.51%
Epoch 5, Loss: 0.7048, Accuracy: 66.56%
Epoch 6, Loss: 0.7150, Accuracy: 66.58%
Epoch 7, Loss: 0.6249, Accuracy: 66.70%
Epoch 8, Loss: 0.6362, Accuracy: 66.61%
Epoch 9, Loss: 0.6280, Accuracy: 66.77%
Epoch 10, Loss: 0.6466, Accuracy: 66.75%


Training client 2
Epoch 1, Loss: 0.6115, Accuracy: 64.57%
Epoch 2, Loss: 0.5295, Accuracy: 66.17%
Epoch 3, Loss: 0.6913, Accuracy: 66.39%
Epoch 4, Loss: 0.6543, Accuracy: 66.39%
Epoch 5, Loss: 0.6540, Accuracy: 66.51%
Epoch 6, Loss: 0.8747, Accuracy: 66.55%
Epoch 7, Loss: 0.7932, Accuracy: 66.65%
Epoch 8, Loss: 0.7982, Accuracy: 66.77%
Epoch 9, Loss: 0.6679, Accuracy: 66.75%
Epoch 10, Loss: 0.7038, Accuracy: 66.81%


Training client 3
Epoch 1, Loss: 0.6132, Accuracy: 65.33%
Epoch 2, Loss: 0.8327, Accuracy: 66.11%
Epoch 3, Loss: 0.7514, Accuracy: 66.40%
Epoch 4, Loss: 0.625

# Performing Federated Training Where each Client has its own part of data

In [48]:
myGPU = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
def read_data():
    # Read the CSV files
    df = pd.read_csv('dataset/covid_x_csv_classification.csv')
    label_df = pd.read_csv('dataset/covid_y_classification.csv')

    # Sample 10% of the data
    df_sampled = df.sample(frac=1, random_state=42)
    label_df_sampled = label_df.loc[df_sampled.index]

    # Label encoding
    encoder = LabelEncoder()
    label_df_sampled = encoder.fit_transform(label_df_sampled.values.ravel())

    # Concatenate features and labels
    combined_df = pd.concat([df_sampled, pd.DataFrame(label_df_sampled, index=df_sampled.index, columns=['Label'])], axis=1)

    # Shuffle the combined DataFrame
    combined_df_shuffled = combined_df.sample(frac=1, random_state=42).reset_index(drop=True)

    # Split the DataFrame into 5 chunks
    chunks = np.array_split(combined_df_shuffled, 5)

    # Separate features and labels for each chunk
    features_chunks = [torch.tensor(chunk.iloc[:, :-1].values, dtype=torch.float32) for chunk in chunks]
    labels_chunks = [torch.tensor(chunk.iloc[:, -1].values, dtype=torch.long) for chunk in chunks]

    return features_chunks, labels_chunks

# Usage
X, y = read_data()


  return bound(*args, **kwds)


Data partitioning to let each client has its own distinctive data

In [49]:
client_train_dataloader = []
client_test_dataloader = []

for index in range(len(X)):
    X_train, X_test, y_train, y_test = train_test_split(X[index], y[index], test_size=0.2, random_state=42)
    
    train_dataset = TensorDataset(X_train.to(myGPU), y_train.to(myGPU))
    test_dataset = TensorDataset(X_test.to(myGPU), y_test.to(myGPU))
    
    train_loader = DataLoader(dataset=train_dataset, batch_size=64, shuffle=True)
    test_loader = DataLoader(dataset=test_dataset, batch_size=64, shuffle=True)
    client_train_dataloader.append(train_loader)
    client_test_dataloader.append(test_loader)

In [50]:
def client_training(global_model,train_loader,test_loader):
    
    local_model = deepcopy(global_model).to(myGPU)
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.SGD(local_model.parameters(), lr=0.01)
    
    for epoch in range(10):  # loop over the dataset multiple times
        loss = 0.0
        local_model.train()
        for i, data in enumerate(train_loader, 0):
            inputs, labels = data[0].to(myGPU), data[1].to(myGPU)
            optimizer.zero_grad()
            outputs = local_model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
    
        local_model.eval()
        # Validation loop
        with torch.no_grad():
            correct = 0
            total = 0
            for data in test_loader:
                inputs, labels = data
                outputs = local_model(inputs)
                _, predicted = torch.max(outputs.data, 1)
                total += labels.size(0)
                correct += (predicted == labels).sum().item()
    
        print(f'Epoch {epoch + 1}, Loss: {loss.item():.4f}, Accuracy: {(correct / total) * 100:.2f}%')
    return local_model

def model_aggregate(clients_models):
    aggregated_state = {}
    clients_model_list = []
    for models in clients_models:
        clients_model_list.append(models.state_dict())
        
    for key in global_model.state_dict().keys():
        tensors_to_aggregate = []
        for client_state in clients_model_list:
            client_tensor = client_state[key]
            tensors_to_aggregate.append(client_tensor)

        ### Performing FedAvg ###
        stacked_tensors = torch.stack(tensors_to_aggregate, dim=0)
        mean_tensor = torch.mean(stacked_tensors, dim=0)

        aggregated_state[key] = mean_tensor

    # Update the global model's weights with the aggregated weights
    global_model.load_state_dict(aggregated_state)
    return global_model


global_model = Net(30, 4).to(myGPU)

total_training_rounds = 5

clients_number = 5

for i in range(total_training_rounds):
    print(f"=========== Training round {i + 1} ===========")
    clients_models = []
    for i in range(clients_number):
        print(f"Training client {i + 1}")
        clients_models.append(client_training(global_model, client_train_dataloader[i],client_test_dataloader[i]))
        print("\n")
    # aggregate the models
    global_model = model_aggregate(clients_models)

print("End of training")



Training client 1
Epoch 1, Loss: 0.7867, Accuracy: 65.88%
Epoch 2, Loss: 0.7690, Accuracy: 66.31%
Epoch 3, Loss: 0.9238, Accuracy: 66.33%
Epoch 4, Loss: 0.9973, Accuracy: 66.43%
Epoch 5, Loss: 0.9041, Accuracy: 66.49%
Epoch 6, Loss: 0.6180, Accuracy: 66.50%
Epoch 7, Loss: 0.6846, Accuracy: 66.67%
Epoch 8, Loss: 0.5848, Accuracy: 66.60%
Epoch 9, Loss: 0.5273, Accuracy: 66.62%
Epoch 10, Loss: 0.7130, Accuracy: 66.71%


Training client 2
Epoch 1, Loss: 0.6233, Accuracy: 66.09%
Epoch 2, Loss: 0.5985, Accuracy: 66.52%
Epoch 3, Loss: 0.5688, Accuracy: 66.65%
Epoch 4, Loss: 0.6054, Accuracy: 66.75%
Epoch 5, Loss: 0.5648, Accuracy: 66.72%
Epoch 6, Loss: 0.8392, Accuracy: 66.83%
Epoch 7, Loss: 0.6011, Accuracy: 66.85%
Epoch 8, Loss: 0.6707, Accuracy: 66.94%
Epoch 9, Loss: 0.6771, Accuracy: 66.90%
Epoch 10, Loss: 0.8407, Accuracy: 66.95%


Training client 3
Epoch 1, Loss: 0.5803, Accuracy: 65.63%
Epoch 2, Loss: 0.5773, Accuracy: 66.20%
Epoch 3, Loss: 0.5929, Accuracy: 66.33%
Epoch 4, Loss: 0.572

# Performing Federated Training using Label "Died"

In [None]:
myGPU = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
def read_data():
    # Read the CSV files
    df = pd.read_csv('dataset/covid_x_died.csv')
    label_df = pd.read_csv('dataset/covid_y_died.csv')

    # Sample 10% of the data
    df_sampled = df.sample(frac=1, random_state=42)
    label_df_sampled = label_df.loc[df_sampled.index]

    # Label encoding
    encoder = LabelEncoder()
    label_df_sampled = encoder.fit_transform(label_df_sampled.values.ravel())

    # Concatenate features and labels
    combined_df = pd.concat([df_sampled, pd.DataFrame(label_df_sampled, index=df_sampled.index, columns=['Label'])], axis=1)

    # Shuffle the combined DataFrame
    combined_df_shuffled = combined_df.sample(frac=1, random_state=42).reset_index(drop=True)

    # Split the DataFrame into 5 chunks
    chunks = np.array_split(combined_df_shuffled, 5)

    # Separate features and labels for each chunk
    features_chunks = [torch.tensor(chunk.iloc[:, :-1].values, dtype=torch.float32) for chunk in chunks]
    labels_chunks = [torch.tensor(chunk.iloc[:, -1].values, dtype=torch.long) for chunk in chunks]

    return features_chunks, labels_chunks

# Usage
X, y = read_data()

client_train_dataloader = []
client_test_dataloader = []

for index in range(len(X)):
    X_train, X_test, y_train, y_test = train_test_split(X[index], y[index], test_size=0.2, random_state=42)
    
    train_dataset = TensorDataset(X_train.to(myGPU), y_train.to(myGPU))
    test_dataset = TensorDataset(X_test.to(myGPU), y_test.to(myGPU))
    
    train_loader = DataLoader(dataset=train_dataset, batch_size=64, shuffle=True)
    test_loader = DataLoader(dataset=test_dataset, batch_size=64, shuffle=True)
    client_train_dataloader.append(train_loader)
    client_test_dataloader.append(test_loader)
    
def client_training(global_model,train_loader,test_loader):
    
    local_model = deepcopy(global_model).to(myGPU)
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.SGD(local_model.parameters(), lr=0.01)
    
    for epoch in range(10):  # loop over the dataset multiple times
        loss = 0.0
        local_model.train()
        for i, data in enumerate(train_loader, 0):
            inputs, labels = data[0].to(myGPU), data[1].to(myGPU)
            optimizer.zero_grad()
            outputs = local_model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
    
        local_model.eval()
        # Validation loop
        with torch.no_grad():
            correct = 0
            total = 0
            for data in test_loader:
                inputs, labels = data
                outputs = local_model(inputs)
                _, predicted = torch.max(outputs.data, 1)
                total += labels.size(0)
                correct += (predicted == labels).sum().item()
    
        print(f'Epoch {epoch + 1}, Loss: {loss.item():.4f}, Accuracy: {(correct / total) * 100:.2f}%')
    return local_model

def model_aggregate(clients_models):
    aggregated_state = {}
    clients_model_list = []
    for models in clients_models:
        clients_model_list.append(models.state_dict())
        
    for key in global_model.state_dict().keys():
        tensors_to_aggregate = []
        for client_state in clients_model_list:
            client_tensor = client_state[key]
            tensors_to_aggregate.append(client_tensor)

        ### Performing FedAvg ###
        stacked_tensors = torch.stack(tensors_to_aggregate, dim=0)
        mean_tensor = torch.mean(stacked_tensors, dim=0)

        aggregated_state[key] = mean_tensor

    # Update the global model's weights with the aggregated weights
    global_model.load_state_dict(aggregated_state)
    return global_model


global_model = Net(36, 2).to(myGPU)

total_training_rounds = 5

clients_number = 5

for i in range(total_training_rounds):
    print(f"=========== Training round {i + 1} ===========")
    clients_models = []
    for i in range(clients_number):
        print(f"Training client {i + 1}")
        clients_models.append(client_training(global_model, client_train_dataloader[i],client_test_dataloader[i]))
        print("\n")
    # aggregate the models
    global_model = model_aggregate(clients_models)

print("End of training")


  return bound(*args, **kwds)


Training client 1
Epoch 1, Loss: 0.1164, Accuracy: 93.89%
Epoch 2, Loss: 0.1018, Accuracy: 93.97%
Epoch 3, Loss: 0.0793, Accuracy: 94.05%
Epoch 4, Loss: 0.1374, Accuracy: 94.10%
Epoch 5, Loss: 0.1199, Accuracy: 94.15%
Epoch 6, Loss: 0.1081, Accuracy: 94.18%
Epoch 7, Loss: 0.0924, Accuracy: 94.19%
Epoch 8, Loss: 0.1831, Accuracy: 94.18%
Epoch 9, Loss: 0.3064, Accuracy: 94.19%
Epoch 10, Loss: 0.0657, Accuracy: 94.18%


Training client 2
Epoch 1, Loss: 0.3073, Accuracy: 93.66%
Epoch 2, Loss: 0.0643, Accuracy: 93.77%
Epoch 3, Loss: 0.1372, Accuracy: 93.82%
Epoch 4, Loss: 0.0940, Accuracy: 93.85%
Epoch 5, Loss: 0.0814, Accuracy: 93.88%
Epoch 6, Loss: 0.0542, Accuracy: 93.91%
Epoch 7, Loss: 0.1137, Accuracy: 93.88%
Epoch 8, Loss: 0.0166, Accuracy: 93.91%
Epoch 9, Loss: 0.1832, Accuracy: 93.85%
Epoch 10, Loss: 0.0896, Accuracy: 93.93%


Training client 3
Epoch 1, Loss: 0.1530, Accuracy: 93.75%
Epoch 2, Loss: 0.0299, Accuracy: 93.74%
Epoch 3, Loss: 0.2142, Accuracy: 93.91%
Epoch 4, Loss: 0.182