In [None]:
# Install packages #
# !pip install numpy
# !pip install matplotlib
# !pip install torch
# !pip install xlrd
# !pip install pandas
# !pip install torchvision

In [9]:
import csv
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import os
import torch
from torch import nn
from torch.utils.data import TensorDataset, DataLoader
from torchvision import datasets, transforms
from torchvision.datasets import MNIST
from torch import Tensor
from torch.optim.optimizer import (Optimizer, required, _use_grad_for_differentiable, _default_to_fused_or_foreach,
                        _differentiable_doc, _foreach_doc, _maximize_doc)
from typing import List, Optional

In [2]:
# Loading training data
dataset = pd.read_csv("bmi_train.csv")
dataset.replace({'Gender': {'Female': 0, 'Male': 1}}, inplace=True) #Gender -> boolean
dataset = dataset.to_numpy()

# Splitting off 80% of data for training, 20% for validation
train_split = int(0.8 * len(dataset))
X_train = dataset[:train_split, [0,1,2]]
y_train = dataset[:train_split, 3]
X_test = dataset[train_split:, [0,1,2]]
y_test = dataset[train_split:, 3]

print(X_train)
print(y_train)

# Loading prediction data
prediction_dataset = pd.read_csv("bmi_validation.csv")
prediction_dataset.replace({'Gender': {'Female': 0, 'Male': 1}}, inplace=True) #Gender -> boolean
X_prediction = prediction_dataset.to_numpy()

# Normalize data set
X_train_normalized = (X_train - X_train.min(0)) / (X_train.max(0) - X_train.min(0))
X_test_normalized = (X_test - X_test.min(0)) / (X_test.max(0) - X_test.min(0))
X_prediction_normalized = (X_prediction - X_prediction.min(0)) / (X_prediction.max(0) - X_prediction.min(0))

# Turn data to tensor
X_train_tensor = torch.from_numpy(X_train_normalized)
y_train_tensor = torch.from_numpy(y_train)
X_test_tensor = torch.from_numpy(X_test_normalized)
y_test_tensor = torch.from_numpy(y_test)
X_prediction_tensor = torch.from_numpy(X_prediction_normalized)

[[  1 161  89]
 [  1 179 127]
 [  1 172 139]
 [  1 153 104]
 [  1 165  68]
 [  1 172  92]
 [  1 182 108]
 [  1 179 130]
 [  1 142  71]
 [  0 158 153]
 [  1 194 108]
 [  0 178 107]
 [  1 155  57]
 [  0 151  64]
 [  0 181  80]
 [  0 147 126]
 [  0 142 159]
 [  1 165 155]
 [  0 146 104]
 [  1 157  56]
 [  0 173  82]
 [  0 170 102]
 [  0 190 118]
 [  0 168 140]
 [  0 153  78]
 [  1 188 123]
 [  0 162  64]
 [  1 182 104]
 [  1 194 115]
 [  0 185 102]
 [  1 178  52]
 [  0 192  90]
 [  1 147 142]
 [  1 152 103]
 [  0 169  54]
 [  1 178 127]
 [  1 165 105]
 [  0 172  67]
 [  1 140 129]
 [  1 141  86]
 [  0 172  96]
 [  1 191  96]
 [  0 147 107]
 [  1 197 119]
 [  0 162 159]
 [  1 173 139]
 [  1 185 139]
 [  0 152  90]
 [  1 163 131]
 [  1 188 141]
 [  1 189 104]
 [  0 165  57]
 [  1 185  76]
 [  0 157 110]
 [  1 179  56]
 [  0 186 143]
 [  0 195 104]
 [  0 141 126]
 [  1 148 141]
 [  0 191  54]
 [  0 150  70]
 [  1 170  95]
 [  0 153  77]
 [  1 179 152]
 [  1 184 157]
 [  1 154 112]
 [  1 190 

In [17]:
# Loading training data
dataset = pd.read_csv("epsilon_normalized_testing.txt", sep=' ', header=None, nrows=1000)
dataset = dataset.to_numpy()
for i in range(1, dataset.shape[1]-1):
    dataset[:, i] = [float(value.split(':')[1]) if isinstance(value, str) else value for value in dataset[:, i]]
dataset = dataset[:, :-1]
print(dataset)


# Splitting off data for training and validation
train_split = int(0.8 * len(dataset))
X_train = dataset[:train_split, 1:].astype(np.float32)
y_train = dataset[:train_split, 0].astype(np.float32)
X_test = dataset[train_split:, 1:].astype(np.float32)
y_test = dataset[train_split:, 0].astype(np.float32)
#print(X_train)
#print(y_train)

# Normalize data set
#X_train_normalized = (X_train - X_train.min(0)) / (X_train.max(0) - X_train.min(0))
#X_test_normalized = (X_test - X_test.min(0)) / (X_test.max(0) - X_test.min(0))

# Turn data to tensor
X_train_tensor = torch.from_numpy(X_train)
y_train_tensor = torch.from_numpy(y_train)
X_test_tensor = torch.from_numpy(X_test)
y_test_tensor = torch.from_numpy(y_test)
print(X_train_tensor.size())
print(y_train_tensor.size())
print(X_test_tensor.size())
print(y_test_tensor.size())

[[-1 0.00543878 0.013331 ... -0.022238 -0.00945892 0.0275439]
 [1 0.00144174 0.0338203 ... 0.023466 0.0234591 0.0364969]
 [1 0.00459705 -0.0427841 ... -0.00280583 -0.00405907 0.0245653]
 ...
 [-1 -0.00806419 -0.0144029 ... -0.0133173 -0.0401071 0.00589705]
 [-1 -0.0175411 -0.0501311 ... 0.0183344 -0.0016165 -0.0097232]
 [-1 -0.00874117 0.0151158 ... -0.0409596 -0.0260277 -0.00208328]]
torch.Size([800, 1999])
torch.Size([800])
torch.Size([200, 1999])
torch.Size([200])


In [None]:
# Train and test error rate analysis function
def calculate_error_rate(X, y, w, b):
    num_samples = X.shape[0]
    y_pred = np.dot(X, w) + b
    y_pred = torch.round(torch.from_numpy(y_pred))
    error_count = torch.count_nonzero(y_pred - y)
    error_rate = error_count / num_samples
    return error_rate

Custom SGD

In [None]:
# Vanilia Gradient Descent Algorithms
def gradient_descent(X, y, learning_rate, num_iterations):
    num_samples, num_features = X.shape
    
    # Initialize weights and bias
    w = np.zeros(num_features)
    b = 0
    cost_history = []
    
    for _ in range(num_iterations):
        # Calculate predictions
        y_pred = np.dot(X, w) + b
        
        # Calculate the difference between predictions and actual values
        error = y_pred - y
        
        # Calculate the gradient
        w_gradient = (1/num_samples) * np.dot(X.T, error)
        b_gradient = (1/num_samples) * np.sum(error)
        
        # Update theta using the learning rate and gradient
        w -= learning_rate * w_gradient
        b -= learning_rate * b_gradient
        
        # Calculate the cost (mean squared error)
        cost = np.mean(np.square(error))
        cost_history.append(cost)
    
    return w, b, cost_history

# Train the model using gradient descent
learning_rate = 0.01
num_iterations = 10000
w, b, cost_history = gradient_descent(X_train_normalized, y_train, learning_rate, num_iterations)

# Print the learned parameters
print("Learned parameters:")

for i, w_i in enumerate(w):
    print(f"w{i} =", w_i)
print("b =", b)

# Plot the cost history
plt.plot(cost_history)
plt.xlabel("Iteration")
plt.ylabel("Cost")
plt.title("Cost History")
plt.show()

# Calculate train error rate
train_error_rate = calculate_error_rate(X_train_normalized,  y_train, w, b)
print("Train error rate:", train_error_rate)
    
# Calculate test error rate if test data is provided
if X_test is not None and y_test is not None:
    test_error_rate = calculate_error_rate(X_test_normalized, y_test, w, b)
    print("Test error rate:", test_error_rate)

In [None]:
# Stochastic Gradien Descent Algorithms
def stochastic_gradient_descent(X, y, learning_rate, num_epochs, batch_size):
    num_samples, num_features = X.shape
    num_batches = num_samples // batch_size

    # Initialize weights and bias
    w = np.zeros(num_features)
    b = 0
    cost_history = []

    for epoch in range(num_epochs):
        # Shuffle the data for each epoch
        permutation = np.random.permutation(num_samples)
        X_shuffled = X[permutation]
        y_shuffled = y[permutation]

        for batch in range(num_batches):
            # Select the current batch
            start = batch * batch_size
            end = (batch + 1) * batch_size
            X_batch = X_shuffled[start:end]
            y_batch = y_shuffled[start:end]

            # Calculate predictions
            y_pred = np.dot(X_batch, w) + b

            # Calculate the difference between predictions and actual values
            error = y_pred - y_batch

            # Calculate the gradients
            w_gradient = (1 / batch_size) * np.dot(X_batch.T, error)
            b_gradient = (1 / batch_size) * np.sum(error)

            # Update weights and bias
            w -= learning_rate * w_gradient
            b -= learning_rate * b_gradient

            # Calculate the cost (mean squared error)
            cost = np.mean(np.square(error))
            cost_history.append(cost)
            
    return w, b, cost_history

# Train the model using stochastic gradient descent
learning_rate = 0.01
num_epochs = 1000
batch_size = 10
w, b, cost_history = stochastic_gradient_descent(X_train_normalized, y_train, learning_rate, num_epochs, batch_size)

# Print the learned parameters
print("Learned parameters:")
for i, w_i in enumerate(w):
    print(f"w{i} =", w_i)
print("b =", b)

# Plot the cost history
plt.plot(cost_history)
plt.xlabel("Iteration")
plt.ylabel("Cost")
plt.title("Cost History")
plt.show()

# Calculate train error rate
train_error_rate = calculate_error_rate(X_train_normalized,  y_train, w, b)
print("Train error rate:", train_error_rate)
    
# Calculate test error rate if test data is provided
if X_test is not None and y_test is not None:
    test_error_rate = calculate_error_rate(X_test_normalized, y_test, w, b)
    print("Test error rate:", test_error_rate)

Pytorch SGD

Pytorch SGD Test (This is done by Chris for testing purpose)

In [None]:
# Define the learning rate and number of epochs
learning_rate = 0.01
num_epochs = 5000

# Define the number of features
num_features = X_train_tensor.size()[1]

# Define the model parameters (weights and bias)
w = torch.zeros(num_features, dtype=torch.float, requires_grad=True)
# w = torch.tensor([1., 1., 1.], requires_grad=True)
b = torch.zeros(1, dtype=torch.float, requires_grad=True)
# b = torch.tensor([1.], requires_grad=True)
cost_history = []

# Define the loss function
criterion = torch.nn.MSELoss()

# Define the optimizer (Vanilla Gradient Descent)
optimizer = torch.optim.SGD([w, b], lr=learning_rate, weight_decay=0)

# Perform gradient descent
for epoch in range(num_epochs):
    # Forward pass
    outputs = torch.matmul(X_train_tensor.float(), w) + b
    loss = criterion(outputs, y_train_tensor.float())

    # Backward pass and optimization
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

    # Record the loss
    cost_history.append(loss.detach().numpy())
    
    # Print the loss every 10 epochs
    if (epoch + 1) % 10 == 0 or epoch == 0:
        print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item():.8f}')
        

# Print learned parameters
print('Trained weights:', w)
print('Trained bias:', b)

# Plot the cost history
plt.plot(cost_history)
plt.xlabel("Iteration")
plt.ylabel("Cost")
plt.title("Cost History")
plt.show()

# Calculate train error rate
train_error_rate = calculate_error_rate(X_train_normalized,  y_train, w.detach().numpy(), b.detach().numpy())
print("Train error rate:", train_error_rate)
    
# Calculate test error rate if test data is provided
if X_test is not None and y_test is not None:
    test_error_rate = calculate_error_rate(X_test_normalized, y_test, w.detach().numpy(), b.detach().numpy())
    print("Test error rate:", test_error_rate)

Custom SGD Class

In [None]:
class custom_optimizer_SGD(Optimizer):
    def __init__(self, params, lr=required, weight_decay=0 ):
        if lr is not required and lr < 0.0:
            raise ValueError(f"Invalid learning rate: {lr}")
        if weight_decay < 0.0:
            raise ValueError(f"Invalid weight_decay value: {weight_decay}")
        defaults = dict(lr=lr, weight_decay=weight_decay)
        super().__init__(params, defaults)
    
#    def step(self):
 #       for group in self.param_groups:
  #          self.update_SGD(self.param_groups, weight_decay = group['weight_decay'], lr = group['lr'])
#        
 #   def update_SGD(param_groups, weight_decay: float, lr: float):
  #      for param in group['params']:
#            if param.grad is None:
#                continue
 #           grad = param.grad.data
  #          param.data.add_(-lr, grad)
#            if weight_decay != 0:
 #               param.data.add_(-lr * weight_decay, param.data)
                
    def step(self):
        for group in self.param_groups:
            for param in group['params']:
                if param.grad is None:
                    continue
                grad = param.grad.data
                weight_decay = group['weight_decay']
                lr = group['lr']
                param.data.add_(-lr, grad)
                if weight_decay != 0:
                    param.data.add_(-lr * weight_decay, param.data)

Neural Network

In [None]:
# Define a custom neural network class
class NeuralNetwork(nn.Module):
    def __init__(self):
        super().__init__()
        self.flatten = nn.Flatten()
        self.activation_stack = nn.Sequential(
            nn.Linear(3, 1),
        )

    def forward(self, x):
        x = self.flatten(x)
        logits = self.activation_stack(x)
        return torch.squeeze(logits)
    
# Define the learning rate and number of epochs
learning_rate = 0.01
num_epochs = 5000

# Define the model parameters
cost_history = []

# Define neural network model, loss criterion and optimizer
criterion = torch.nn.MSELoss()
NeuralNetwork_model = NeuralNetwork()
print(NeuralNetwork_model)
optimizer = custom_optimizer_SGD(NeuralNetwork_model.parameters(), lr=learning_rate, weight_decay = 0)

#for name, param in NeuralNetwork_model.named_parameters():
#    print( name )
#    values = torch.ones( param.shape )
#    param.data = values
    
# Perform training
NeuralNetwork_model.train()
for epoch in range(num_epochs):
    # Forward propagation to obtain the predicted output
    outputs = NeuralNetwork_model(X_train_tensor.float())
    
    # Calculate the loss
    loss = criterion(outputs, y_train_tensor.float())
    
    # Backward propagation and optimization
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()
    
    # Record the loss
    cost_history.append(loss.item())
    
    # Print the loss every 100 epochs
    if (epoch + 1) % 10 == 0 or epoch == 0:
        print(outputs[1])
        print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item():.8f}')
        
# Print learned parameters
for name, param in NeuralNetwork_model.named_parameters():
    if param.requires_grad:
        print(f'{name}: {param.data}')
        
        
# Plot the cost history
plt.plot(cost_history)
plt.xlabel("Iteration")
plt.ylabel("Cost")
plt.title("Cost History")
plt.show()

# Calculate train error rate
# train_error_rate = calculate_error_rate(X_train_normalized,  y_train, w.T.detach().numpy(), b.detach().numpy())
# print("Train error rate:", train_error_rate)
    
# Calculate test error rate if test data is provided
# if X_test is not None and y_test is not None:
#    test_error_rate = calculate_error_rate(X_test_normalized, y_test, w.T.detach().numpy(), b.detach().numpy())
#    print("Test error rate:", test_error_rate)

Fedearted Learning Algorithm

In [None]:
# Define a custom class for each client so they can update separately
class ClientUpdate:
    def __init__(self, model, criterion, optimizer, train_data_loader):
        self.model = model
        self.criterion = criterion
        self.optimizer = optimizer
        self.train_data_loader = train_data_loader

    def update_weights(self, num_epochs):
        self.model.train()
        for epoch in range(num_epochs):
            for inputs, targets in self.train_data_loader:
                self.optimizer.zero_grad()
                outputs = self.model(inputs)
                loss = self.criterion(outputs, targets)
                loss.backward()
                self.optimizer.step()

        return self.model.state_dict()

def send_client_weights(weights):
    print("ok")
    
def aggregate_weights_on_server(client_weights_list):
    print("Aggregating client weights on the server...")
    aggregated_weights = {}
    # Aggregate the client weights
    for key in client_weights_list[0].keys():
        aggregated_weights[key] = torch.stack([weights[key] for weights in client_weights_list]).mean(dim=0)
    print("Client weights aggregated successfully.")
    
def federated_learning(model, criterion, optimizer, train_data, num_rounds, batch_size, num_epochs):
    num_clients = len(train_data)
    global_weights = model.state_dict()

    for round in range(num_rounds):
        m = min(num_clients * , 1)
        selected_clients = torch.randperm(num_clients)[:m]

        for client in selected_clients:
            client_data = train_data[client]
            client_loader = DataLoader(client_data, batch_size=batch_size)
            client_update = ClientUpdate(model, criterion, optimizer, client_loader)
            client_weights = client_update.update_weights(num_epochs)

            # Send client weights to the server
            send_client_weights(client_weights)
            
            # Collect client weights for aggregation on the server
            client_weights_list.append(client_weights)

        # Aggregate client weights on the server
        aggregated_weights = aggregate_weights_on_server(client_weights_list)

        # Update global weights with aggregated weights
        model.load_state_dict(aggregated_weights)

    return global_weights

# Define the learning rate and number of epochs
learning_rate = 0.01
num_rounds = 1
num_epochs = 5000

# Define the model parameters
cost_history = []

# Define neural network model, loss criterion and optimizer
model = NeuralNetwork()
criterion = torch.nn.MSELoss()
optimizer = custom_optimizer_SGD(model.parameters(), lr=learning_rate)

# Define your training data for each client
#train_data = [
#    DataLoader(client1_data, batch_size=32),
#    DataLoader(client2_data, batch_size=32),
#    # ...
#]

# Define the training data for each client
train_data = []

# Client 1 data
client1_transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.1307,), (0.3081,), (0.3081,))
])
client1_dataset = MNIST(root='./data', train=True, download=True, transform=client1_transform)
client1_data = client1_dataset

# Client 2 data
client2_transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.1307,), (0.3081,), (0.3081,))
])
client2_dataset = MNIST(root='./data', train=True, download=True, transform=client2_transform)
client2_data = client2_dataset

# Add the client data to the train_data list
train_data.append(client1_data)
train_data.append(client2_data)

# Run federated learning
global_weights = federated_learning(model, criterion, optimizer, train_data, num_rounds=num_rounds, batch_size=1, num_epochs=num_epochs)