In [1]:
# This is K-NN Model for Federated learning Setting
# 10 Virtual clients have been selected to train the models at each round with a limited client drop-rate
# hence we should expect possibly less than 10 clients chosen at random at any given round to train.
# These are under the communication of an virtually simulated orchastrated server.

In [2]:
import torch
from torchvision import datasets, transforms
from torch.utils.data import DataLoader, random_split
import numpy as np
import copy
import random
from sklearn.neighbors import NearestNeighbors
from collections import Counter
from concurrent.futures import ThreadPoolExecutor

import time


from sklearn.neighbors import NearestNeighbors
import numpy as np
from collections import Counter

import sys
sys.path.append('../')
from FLDataset.FLDataset import load_dataset
from FLDataset.FLDataset import getActualImgs

import warnings
warnings.filterwarnings('ignore')


In [3]:

# Define Arguments class
class Arguments:
    def __init__(self):
        self.images = 60000  # Number of images in the training set
        self.clients = 10  # Number of clients
        self.rounds = 6  # Number of rounds of federated learning
        self.local_batches = 64  # Local batch size for each client
        self.k_neighbors = 5  # Number of neighbors for KNN
        self.C = 0.8  # Fraction of clients to select per round
        self.drop_rate = 0.2
        self.torch_seed = 0
        self.log_interval = 10  # Interval for logging
        self.iid = 'iid'  # IID setting
        self.split_size = self.images // self.clients  # Number of samples per client
        self.use_cuda = torch.cuda.is_available()  # Use CUDA if available
        self.save_model = False  # Whether to save the global model


In [4]:
args = Arguments()
device = torch.device("cuda" if args.use_cuda else "cpu")

# Set random seed for reproducibility
torch.manual_seed(0)
np.random.seed(0)
random.seed(0)

# Load dataset and split into clients
transform = transforms.Compose([transforms.ToTensor(), transforms.Normalize((0.1307,), (0.3081,))])
global_dataset = datasets.MNIST('./data', train=True, download=True, transform=transform)
global_test_dataset = datasets.MNIST('./data', train=False, download=True, transform=transform)

# Split the global dataset into client datasets
client_datasets = random_split(global_dataset, [args.split_size] * args.clients)


In [5]:
# Define the KNN class
class KNN:
    def __init__(self, k=5):
        self.k = k
        self.X_train = None
        self.y_train = None
        self.nn_model = None

    def fit(self, X_train, y_train):
        self.X_train = np.array(X_train)
        self.y_train = np.array(y_train)
        self.nn_model = NearestNeighbors(n_neighbors=self.k, metric='euclidean')
        self.nn_model.fit(self.X_train)

    def predict(self, X_test, batch_size=100):
        X_test = np.array(X_test)
        num_samples = X_test.shape[0]
        predictions = []

        for i in range(0, num_samples, batch_size):
            batch_end = min(i + batch_size, num_samples)
            X_test_batch = X_test[i:batch_end]
            if self.nn_model is None:
                raise ValueError("The nn_model attribute has not been initialized properly.")
            distances, indices = self.nn_model.kneighbors(X_test_batch)

            for index_list in indices:
                k_labels = self.y_train[index_list]
                counter = Counter(k_labels)
                most_common_label = counter.most_common(1)[0][0]
                predictions.append(most_common_label)

        return np.array(predictions)


In [6]:
# Initialize clients with datasets and models
clients = []
for i in range(args.clients):
    client = {
        'id': f'client{i + 1}',
        'trainset': DataLoader(client_datasets[i], batch_size=args.local_batches, shuffle=True),
        'model': KNN(k=args.k_neighbors)
    }
    clients.append(client)

# Create a global test loader
global_test_loader = DataLoader(global_test_dataset, batch_size=args.local_batches, shuffle=False)

In [7]:
# Client update function
def client_update(client, round_number):
    model = client['model']
    train_loader = client['trainset']
    
    # Collect training data
    X_train = []
    y_train = []
    for data, target in train_loader:
        data = data.view(data.shape[0], -1).numpy()
        X_train.extend(data)
        y_train.extend(target.numpy())
    
    # Fit the local KNN model
    model.fit(X_train, y_train)
    print(f"{client['id']} is selected to take part in training the local KNN model\n")
   
    # Track progress of training
    for batch_idx, (data, target) in enumerate(train_loader):
        data = data.view(data.shape[0], -1).numpy()
        target = target.numpy()
        predictions = model.predict(data)

        # Calculate accuracy
        accuracy = np.mean(predictions == target) * 100

        # Log progress
        if (batch_idx + 1) % args.log_interval == 0:
            progress = (batch_idx + 1) * args.local_batches
            total_samples = len(train_loader.dataset)
            percentage = 100.0 * progress / total_samples
            print(f"Model {client['id']} Train round: {round_number} [{progress}/{total_samples} ({percentage:.0f}%)]")

In [8]:
# Function to aggregate client models and update the global model
def aggregate_models(clients):
    print("\nAggregating models from clients:")
    global_X_train = []
    global_y_train = []
    for client in clients:
        # Collect data from each client
        train_loader = client['trainset']
        X_train = []
        y_train = []
        for data, target in train_loader:
            data_np = data.view(data.shape[0], -1).numpy()
            X_train.extend(data_np)
            y_train.extend(target.numpy())
        global_X_train.extend(X_train)
        global_y_train.extend(y_train)
    
    print(f"Aggregated data size: {len(global_X_train)} samples")
    # Create and fit the global model
    global_model = KNN(k=args.k_neighbors)
    global_model.fit(global_X_train, global_y_train)
    print("Global model updated with aggregated data.")
    print("Computing the global model prediction accuracy:")
    return global_model

# Function to test the global model
def test_global_model(model, test_loader, name):
    correct = 0
    total = 0
    X_test = []
    y_test = []
    for data, target in test_loader:
        data_np = data.view(data.shape[0], -1).numpy()
        X_test.extend(data_np)
        y_test.extend(target.numpy())
    print(f"Testing {name} model with {len(X_test)} samples")
    
    # Measure prediction time
    start_time = time.time()

    # Make predictions
    predictions = model.predict(X_test)
    prediction_time = time.time() - start_time
    print(f"Prediction time: {prediction_time:.2f} seconds")
    
    # Calculate accuracy
    accuracy = np.sum(predictions == y_test) / len(y_test) * 100
    error_rate = 100 - accuracy
    print(f"{name} Model Prediction Accuracy: {accuracy:.2f}%") 
    print(f"{name} Model Prediction Error: {error_rate:.2f}%\n")


In [9]:
# Main training loop
global_model = KNN(k=args.k_neighbors)
for fed_round in range(args.rounds):
    print(f"FEDERATED LEARNING MODEL ROUND: {fed_round + 1}")

    # Select random subset of clients for the current round
    num_selected_clients = int(args.C * args.clients)
    selected_clients = random.sample(clients, num_selected_clients)

    # Train each selected client concurrently
    with ThreadPoolExecutor() as executor:
        results = executor.map(lambda client: client_update(client, fed_round + 1), selected_clients)

    # Aggregate models from clients and update the global model
    global_model = aggregate_models(selected_clients)

    # Test the global model
    test_global_model(global_model, global_test_loader, "Global")

    # Share the updated global model with all clients
    for client in clients:
        client['model'] = copy.deepcopy(global_model)

if args.save_model:
    torch.save(global_model, "KNN.pt")

FEDERATED LEARNING MODEL ROUND: 1
client1 is selected to take part in training the local KNN model

client2 is selected to take part in training the local KNN model

client4 is selected to take part in training the local KNN model

client10 is selected to take part in training the local KNN model

client5 is selected to take part in training the local KNN model

client7 is selected to take part in training the local KNN model

client3 is selected to take part in training the local KNN model

client6 is selected to take part in training the local KNN model


Aggregating models from clients:
Aggregated data size: 48000 samples
Global model updated with aggregated data.
Computing the global model prediction accuracy:
Testing Global model with 10000 samples
Prediction time: 50.54 seconds
Global Model Prediction Accuracy: 96.82%
Global Model Prediction Error: 3.18%

FEDERATED LEARNING MODEL ROUND: 2
client6 is selected to take part in training the local KNN model

client5 is selected to tak