In [30]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
from sklearn.model_selection import train_test_split
from sklearn.metrics import roc_auc_score
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split, StratifiedKFold

# Load and preprocess data
import pandas as pd
from sklearn.model_selection import train_test_split

# Load the dataset
df = pd.read_csv('/content/heart.csv')

# Split the features and the target variable
X = df.drop('target', axis=1).values
y = df['target'].values

# Define the number of clients
num_clients = 10

# Calculate the size of each split
split_size = len(df) // num_clients
df = df[:(len(df)-100)]
# Create a dictionary to store client data
client_data = {}

# Split data for 10 clients
for i in range(num_clients):
    # Define the start and end index for the split
    start_idx = i * split_size
    if i == num_clients - 1:  # For the last client, take all remaining data to handle any rounding issues
        end_idx = len(df)
    else:
        end_idx = start_idx + split_size

    # Split the data accordingly
    X_train, y_train = X[start_idx:end_idx], y[start_idx:end_idx]

    # Assign to client data dictionary
    client_data[i] = {'train': (X_train, y_train)}

# Now client_data[i] holds the training data for the i-th client
# Define the model architecture
class SimpleNN(nn.Module):
    def __init__(self):
        super(SimpleNN, self).__init__()
        self.fc1 = nn.Linear(X.shape[1], 50)
        self.fc2 = nn.Linear(50, 20)
        self.fc3 = nn.Linear(20, 2)  # Assuming binary classification

    def forward(self, x):
        x = torch.relu(self.fc1(x))
        x = torch.relu(self.fc2(x))
        return torch.log_softmax(self.fc3(x), dim=1)

# Training function for one epoch
def train_one_epoch(model, train_loader, optimizer, criterion):
    model.train()
    for data, targets in train_loader:
        optimizer.zero_grad()
        outputs = model(data)
        loss = criterion(outputs, targets)
        loss.backward()
        optimizer.step()

# Federated Averaging
def federated_averaging(models):
    global_model = SimpleNN()
    global_state_dict = global_model.state_dict()

    for key in global_state_dict.keys():
        global_state_dict[key] = torch.mean(torch.stack([models[i].state_dict()[key] for i in range(len(models))]), 0)

    global_model.load_state_dict(global_state_dict)
    return global_model

# Test the global model
def test_model(model, test_loader, criterion):
    model.eval()
    total_loss = 0
    correct = 0
    total = 0
    targets_list = []
    outputs_list = []

    with torch.no_grad():
        for data, targets in test_loader:
            outputs = model(data)
            loss = criterion(outputs, targets)
            total_loss += loss.item()
            _, predicted = torch.max(outputs.data, 1)
            total += targets.size(0)
            correct += (predicted == targets).sum().item()
            targets_list.extend(targets.numpy())
            outputs_list.extend(outputs[:, 1].exp().numpy())  # Assuming the second column is probability of class 1

    acc = correct / total
    auc = roc_auc_score(targets_list, outputs_list)
    return total_loss / len(test_loader), acc, auc

# Main loop for federated training
NUM_ROUNDS = 500
clients_models = [SimpleNN() for _ in range(10)]

for round in range(NUM_ROUNDS):
    for i in range(10):
        local_data = client_data[i]
        train_dataset = TensorDataset(torch.FloatTensor(local_data['train'][0]), torch.LongTensor(local_data['train'][1]))
        train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)

        optimizer = optim.Adam(clients_models[i].parameters(), lr=0.01)
        criterion = nn.NLLLoss()
        train_one_epoch(clients_models[i], train_loader, optimizer, criterion)

    # Perform federated averaging
    global_model = federated_averaging(clients_models)

    # Update local models
    for model in clients_models:
        model.load_state_dict(global_model.state_dict())

# Evaluate the global model
X_test, y_test = X[-100:], y[-100:]

# Assign to client data dictionary
client_data_test = {'test': (X_test, y_test)}
test_data = np.concatenate([client_data_test['test'][0] for i in range(1)], axis=0)
test_labels =np.concatenate([client_data_test['test'][1] for i in range(1)], axis=0)
test_dataset = TensorDataset(torch.FloatTensor(test_data), torch.LongTensor(test_labels))
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)

loss, accuracy, auc_score = test_model(global_model, test_loader, criterion)
print(f'Final AUC Score: {auc_score}')


Final AUC Score: 0.8850644122383253
