In [3]:
import numpy as np
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score
import pandas as pd

In [11]:
import random

# Set random seed for reproducibility
random.seed(42)

# Client Update Function
def client_update(X_train, y_train, X_test, k=5):
    # Initialize kNN classifier
    clf = KNeighborsClassifier(n_neighbors=k)
    # Shuffle training data
    X_train_shuffled, y_train_shuffled = shuffle_data(X_train, y_train)
    # Train the classifier
    clf.fit(X_train_shuffled, y_train_shuffled)
    # Predict labels for test data
    y_pred = clf.predict(X_test)
    return y_pred

# Shuffle Data Function
def shuffle_data(X, y):
    combined = list(zip(X, y))
    random.shuffle(combined)
    X_shuffled, y_shuffled = zip(*combined)
    return np.array(X_shuffled), np.array(y_shuffled)

# Server Aggregation Function
def server_aggregate(predictions):
    # Simply return the mode of the predictions
    aggregated_predictions = np.array(predictions).T
    aggregated_labels = np.apply_along_axis(lambda x: np.bincount(x).argmax(), axis=1, arr=aggregated_predictions)
    return aggregated_labels

# Evaluate Function
def evaluate(y_true, y_pred):
    accuracy = accuracy_score(y_true, y_pred)
    return accuracy

# Load CSV data
def load_data(file_path):
    df = pd.read_csv(file_path)
    X = df.drop(columns=['risk', 'Risk', 'state_ut', 'district']).values
    y = df['risk'].values
    return X, y

# Federated Learning Loop
def federated_learning(X_train_clients, y_train_clients, X_test, y_test, k=5, num_rounds=10):
    global_predictions = []

    for round in range(num_rounds):
        local_predictions = []
        # Set different random seed for each round
        random.seed(42 + round)
        for i, (X_train, y_train) in enumerate(zip(X_train_clients, y_train_clients)):
            # Perform client update
            y_pred = client_update(X_train, y_train, X_test, k=k)
            local_predictions.append(y_pred)
            # Compute client accuracy
            client_accuracy = evaluate(y_test, y_pred)
            print("Round {}, Client {}: Accuracy: {:.2f}%".format(round, i, client_accuracy * 100))
        
        # Server aggregates local predictions
        aggregated_labels = server_aggregate(local_predictions)
        global_predictions.append(aggregated_labels)

        # Evaluate global model
        accuracy = evaluate(y_test, aggregated_labels)
        print("Round {}, Test Accuracy: {:.2f}%".format(round, accuracy * 100))

# Load CSV data for federated learning
def load_federated_data(file_path, num_clients):
    X, y = load_data(file_path)
    X_train_clients = np.array_split(X, num_clients)
    y_train_clients = np.array_split(y, num_clients)
    return X_train_clients, y_train_clients

# Example usage
if __name__ == "__main__":
    # Path to the CSV file
    csv_file_path = "crime.csv"

    # Number of clients
    num_clients = 10

    # Load federated data
    X_train_clients, y_train_clients = load_federated_data(csv_file_path, num_clients)

    # Load test data
    X_test, y_test = load_data(csv_file_path)

    # Run federated learning
    federated_learning(X_train_clients, y_train_clients, X_test, y_test)

Round 0, Client 0: Accuracy: 98.56%
Round 0, Client 1: Accuracy: 98.99%
Round 0, Client 2: Accuracy: 98.71%
Round 0, Client 3: Accuracy: 98.48%
Round 0, Client 4: Accuracy: 98.39%
Round 0, Client 5: Accuracy: 98.81%
Round 0, Client 6: Accuracy: 98.81%
Round 0, Client 7: Accuracy: 98.85%
Round 0, Client 8: Accuracy: 98.58%
Round 0, Client 9: Accuracy: 98.51%
Round 0, Test Accuracy: 99.19%
Round 1, Client 0: Accuracy: 98.56%
Round 1, Client 1: Accuracy: 98.99%
Round 1, Client 2: Accuracy: 98.71%
Round 1, Client 3: Accuracy: 98.48%
Round 1, Client 4: Accuracy: 98.39%
Round 1, Client 5: Accuracy: 98.81%
Round 1, Client 6: Accuracy: 98.81%
Round 1, Client 7: Accuracy: 98.85%
Round 1, Client 8: Accuracy: 98.58%
Round 1, Client 9: Accuracy: 98.51%
Round 1, Test Accuracy: 99.19%
Round 2, Client 0: Accuracy: 98.56%
Round 2, Client 1: Accuracy: 98.99%
Round 2, Client 2: Accuracy: 98.71%
Round 2, Client 3: Accuracy: 98.48%
Round 2, Client 4: Accuracy: 98.39%
Round 2, Client 5: Accuracy: 98.81%
Ro