In [28]:
import pandas as pd
import numpy as np 
from matplotlib import pyplot as plt
from sklearn.preprocessing import Normalizer
from imblearn.under_sampling import RandomUnderSampler
from imblearn.over_sampling import SMOTE, RandomOverSampler
from sklearn.model_selection import train_test_split
import torch 
import torch.nn as nn 
from torch.utils.data import random_split, DataLoader, TensorDataset 
import torch.nn.functional as F
import torch.optim as optim
import flwr as fl
from flwr.common import Metrics
import sys

In [29]:
# import platform, cpuinfo, GPUtil, psutil
# print(f"OS: {platform.uname().system} {platform.uname().release}")
# print(f"CPU: {cpuinfo.get_cpu_info()['brand_raw']}")
# print(f"GPU: {GPUtil.getGPUs()[0].name}")
# print(f"Memory: {psutil.virtual_memory().total / (1024 ** 3):.2f} GB")

In [30]:
print("Python version:", sys.version)
print("Version info:", sys.version_info)

Python version: 3.11.4 (tags/v3.11.4:d2340ef, Jun  7 2023, 05:45:37) [MSC v.1934 64 bit (AMD64)]
Version info: sys.version_info(major=3, minor=11, micro=4, releaselevel='final', serial=0)


In [31]:
DEVICE = torch.device("cpu")  # Try "cuda" to train on GPU
print(
    f"Training on {DEVICE} using PyTorch {torch.__version__} and Flower {fl.__version__}"
)

Training on cpu using PyTorch 2.0.1+cpu and Flower 1.5.0


In [32]:
NUM_CLIENTS = 3

EPOCHS = 10
BATCH_SIZE = 100

IN_FEATURES = 3
HIDDEN_LAYERS = 128
OUT_FEATURES = 8

In [33]:
def load_datasets():
    df = pd.read_csv('./datasets/label_data.csv')
    df = df.rename(columns={'label': 'target'})
        
    split_ratio = 0.8
    split_index = int(len(df) * split_ratio)
    
    train_set = df.iloc[:split_index, :]
    test_set = df.iloc[split_index:, :]
    
    part_size = len(train_set) // NUM_CLIENTS
    lengths = [part_size] * NUM_CLIENTS
    
    # Split the test set evenly into thirds, removing the remainders    
    random_choose = np.random.choice(train_set.index, (len(train_set) % NUM_CLIENTS), replace=False)
    train_set = train_set.drop(random_choose)
    
    datasets = random_split(train_set, lengths, generator=torch.Generator().manual_seed(42))
    
    train_loader = []
    val_loader = []
    
    for data in datasets:
        val_length = len(data) // 10
        train_length = len(data) - val_length
        lengths = [train_length, val_length]
        train_data, val_data = random_split(data, lengths, generator=torch.Generator().manual_seed(42))
        
        train_loader.append(DataLoader(train_data, batch_size=BATCH_SIZE, shuffle=True))
        val_loader.append(DataLoader(val_data, batch_size=BATCH_SIZE, shuffle=True))
    
    test_loader = DataLoader(test_set, batch_size=BATCH_SIZE, shuffle=True)
    return train_loader, val_loader, test_loader
    
train_loader, val_loader, test_loader = load_datasets()