In [220]:
import pandas as pd
import numpy as np 
from matplotlib import pyplot as plt
from sklearn.preprocessing import Normalizer
from imblearn.under_sampling import RandomUnderSampler
from imblearn.over_sampling import SMOTE, RandomOverSampler
from sklearn.model_selection import train_test_split
import torch 
import torch.nn as nn 
from torch.utils.data import random_split, DataLoader, TensorDataset 
import torch.nn.functional as F
import torch.optim as optim
import flwr as fl
from flwr.common import Metrics
import sys

In [221]:
# import platform, cpuinfo, GPUtil, psutil
# print(f"OS: {platform.uname().system} {platform.uname().release}")
# print(f"CPU: {cpuinfo.get_cpu_info()['brand_raw']}")
# print(f"GPU: {GPUtil.getGPUs()[0].name}")
# print(f"Memory: {psutil.virtual_memory().total / (1024 ** 3):.2f} GB")

In [222]:
print("Python version:", sys.version)
print("Version info:", sys.version_info)

Python version: 3.11.4 (tags/v3.11.4:d2340ef, Jun  7 2023, 05:45:37) [MSC v.1934 64 bit (AMD64)]
Version info: sys.version_info(major=3, minor=11, micro=4, releaselevel='final', serial=0)


In [223]:
DEVICE = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(f"Training on {DEVICE} using PyTorch {torch.__version__} and Flower {fl.__version__}")

Training on cpu using PyTorch 2.0.1+cpu and Flower 1.5.0


In [224]:
NUM_CLIENTS = 3

EPOCHS = 10
BATCH_SIZE = 100

IN_FEATURES = 3
HIDDEN_LAYERS = 128
OUT_FEATURES = 2

In [225]:
def load_datasets():
    df = pd.read_csv('./datasets/label_data.csv')
    df = df.rename(columns={'label': 'target'})
    
    feature = df.iloc[:, :-1]
    target = df.loc[:, 'target']
    feature = torch.Tensor(feature.to_numpy())
    target = torch.tensor(target.to_numpy())
    tensor_data = TensorDataset(feature, target)
    
    number_rows = len(feature)
    test_split = int(number_rows * 0.2)
    train_split = number_rows - test_split
    train_set, test_set = random_split(tensor_data, [train_split, test_split])  

    # split_ratio = 0.8
    # split_index = int(len(df) * split_ratio)
    # 
    # train_set = tensor_data.iloc[:split_index, :]
    # test_set = tensor_data.iloc[split_index:, :]
    
    part_size = len(train_set) // NUM_CLIENTS
    length = [part_size] * NUM_CLIENTS  # lengths for each client
    
    # Split the test set evenly into thirds, removing the remainders
    # random_choose = np.random.choice(train_set.index, (len(train_set) % NUM_CLIENTS), replace=False)
    # train_set = train_set.drop(random_choose)
    
    datasets = random_split(train_set, length, generator=torch.Generator().manual_seed(42))
    
    train_loader = []
    val_loader = []
    
    for data in datasets:
        val_length = len(data) // 10
        train_length = len(data) - val_length
        length = [train_length, val_length]
        train_data, val_data = random_split(data, length, generator=torch.Generator().manual_seed(42))
        
        train_loader.append(DataLoader(train_data, batch_size=BATCH_SIZE, shuffle=True))
        val_loader.append(DataLoader(val_data, batch_size=BATCH_SIZE, shuffle=True))
    
    test_loader = DataLoader(test_set, batch_size=BATCH_SIZE, shuffle=True)
    return train_loader, val_loader, test_loader
    
train_loader, val_loader, test_loader = load_datasets()

In [226]:
class Network(nn.Module):
    def __init__(self, IN_FEATURES, HIDDEN_LAYERS, OUT_FEATURES):
        super().__init__()
        self.flatten = nn.Flatten()
        self.linear_relu_stack = nn.Sequential(
            nn.Linear(IN_FEATURES, HIDDEN_LAYERS), 
            nn.ReLU(),
            nn.Linear(HIDDEN_LAYERS, HIDDEN_LAYERS), 
            nn.ReLU(),
            nn.Linear(HIDDEN_LAYERS, OUT_FEATURES), 
            nn.Softmax(dim=1)
        )
        
    def forward(self, x):
        x = self.flatten(x)
        logits = self.linear_relu_stack(x)
        return logits

In [227]:
def train(model, train_loader, epochs):
    criterion = torch.nn.CrossEntropyLoss()
    optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
    model.train()
    
    for epoch in range(1, epochs + 1): 
        correct, total, epoch_loss = 0, 0, 0.0
        
        for feature, target in train_loader:
            feature, target = feature.to(DEVICE), target.to(DEVICE)
            optimizer.zero_grad()
            output = model(feature)
            train_loss = criterion(output, target)
            train_loss.backward()
            optimizer.step()
            
            epoch_loss += train_loss
            total += target.size(0)
            correct += (torch.max(output.data, 1)[1] == target).sum().item()
            
        epoch_loss /= len(train_loader.dataset)
        epoch_accuracy = correct / total
        
        print(f"Epoch {epoch}: train loss: {epoch_loss:.8f}, accuracy: {epoch_accuracy:.8f}.")

In [228]:
def test(model, test_loader):
    criterion = torch.nn.CrossEntropyLoss()
    correct, total, loss = 0, 0, 0.0
    model.eval()
 
    with torch.no_grad():
        for feature, target in test_loader:
            feature, target = feature.to(DEVICE), target.to(DEVICE)
            output = model(feature)
            loss += criterion(output, target).item()
            _, predicted = torch.max(output.data, 1)
            total += target.size(0)
            correct += (predicted == target).sum().item()
            
        loss /= len(test_loader.dataset)
        accuracy = correct / total
        return loss, accuracy

In [229]:
train_loader = train_loader[1]
val_loader = val_loader[1]
model = Network(IN_FEATURES, HIDDEN_LAYERS, OUT_FEATURES).to(DEVICE)

train(model, train_loader, EPOCHS)

loss, accuracy = test(model, val_loader)
print(f"Final test set performance: \n\tloss: {loss:.8f}, accuracy: {accuracy:.8f}")

Epoch 1: train loss: 0.00454832, accuracy: 0.85926477.
Epoch 2: train loss: 0.00453187, accuracy: 0.86018749.
Epoch 3: train loss: 0.00453205, accuracy: 0.86018749.
Epoch 4: train loss: 0.00453197, accuracy: 0.86018749.
Epoch 5: train loss: 0.00453197, accuracy: 0.86018749.
Epoch 6: train loss: 0.00453214, accuracy: 0.86018749.
Epoch 7: train loss: 0.00453184, accuracy: 0.86018749.
Epoch 8: train loss: 0.00453201, accuracy: 0.86018749.
Epoch 9: train loss: 0.00453183, accuracy: 0.86018749.
Epoch 10: train loss: 0.00453188, accuracy: 0.86018749.
Final test set performance: loss: 0.00451649, accuracy: 0.86325624
