In [1]:
import numpy as np
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
from sklearn.metrics import precision_score, recall_score, f1_score
import pandas as pd
import random

In [2]:
def get_second_data():
    (x_train, y_train), (x_test, y_test) = tf.keras.datasets.mnist.load_data()

    # Shuffle the training data
    indices = np.arange(x_train.shape[0])
    np.random.shuffle(indices)
    x_train = x_train[indices]
    y_train = y_train[indices]

    # Normalize the data
    X_train = x_train.reshape(-1, 28, 28, 1).astype('float32') / 255.0
    X_test = x_test.reshape(-1, 28, 28, 1).astype('float32') / 255.0

    y_train = tf.keras.utils.to_categorical(y_train, 10)
    y_test = tf.keras.utils.to_categorical(y_test, 10)

    return X_train, y_train, X_test, y_test

In [3]:
def get_second_data_non_iid():

    def create_non_iid_data(x, y):

        num_partitions=3

        partitions = [[] for _ in range(num_partitions)]
        labels = [[] for _ in range(num_partitions)]

        class_distribution = [
            [0, 1, 2, 3],
            [4, 5, 6],
            [7, 8, 9]
        ]

        for i in range(num_partitions):

            indices = np.where(np.isin(y, class_distribution[i]))[0]
            x_partition, _, y_partition, _ = train_test_split(x[indices], y[indices], test_size=0.5, random_state=42)
            partitions[i] = x_partition
            labels[i] = y_partition

        return partitions, labels


    (x_train, y_train), (x_test, y_test) = tf.keras.datasets.mnist.load_data()

    partitions, labels = create_non_iid_data(x_train, y_train)

    x_train_non_iid = np.concatenate(partitions)
    y_train_non_iid = np.concatenate(labels)

    X_train = x_train_non_iid.reshape(-1, 28, 28, 1).astype('float32') / 255.0
    X_test = x_test.reshape(-1, 28, 28, 1).astype('float32') / 255.0

    y_train = tf.keras.utils.to_categorical(y_train_non_iid, 10)
    y_test = tf.keras.utils.to_categorical(y_test, 10)


    return X_train, y_train, X_test, y_test

In [4]:
def data_poisoning():

    def flip_labels(y_train, label1, label2):

        flipped_y_train = np.copy(y_train)
        flipped_y_train[y_train == label1] = label2
        flipped_y_train[y_train == label2] = label1
        return flipped_y_train

    def create_non_iid_partitions(x, y, num_partitions=3):

        partitions = [[] for _ in range(num_partitions)]
        labels = [[] for _ in range(num_partitions)]

        class_distribution = [
            [0, 1, 2, 3],
            [4, 5, 6],
            [7, 8, 9]
        ]

        for i in range(num_partitions):

            indices = np.where(np.isin(y, class_distribution[i]))[0]
            x_partition, _, y_partition, _ = train_test_split(x[indices], y[indices], test_size=0.5, random_state=42)
            partitions[i] = x_partition
            labels[i] = y_partition

        return partitions, labels

    (x_train, y_train), (x_test, y_test) = tf.keras.datasets.mnist.load_data()

    flipped_y_train = flip_labels(y_train, 1, 9)

    partitions, labels = create_non_iid_partitions(x_train, flipped_y_train)

    x_train_non_iid = np.concatenate(partitions)
    y_train_non_iid = np.concatenate(labels)

    x_train_non_iid = x_train_non_iid.reshape(-1, 28, 28, 1).astype('float32') / 255.0
    x_test = x_test.reshape(-1, 28, 28, 1).astype('float32') / 255.0

    flipped_y_train_cat = tf.keras.utils.to_categorical(y_train_non_iid, 10)
    y_test_cat = tf.keras.utils.to_categorical(y_test, 10)

    return x_train_non_iid, flipped_y_train_cat, x_test, y_test_cat

In [5]:
def get_second_model():

    model = keras.Sequential([
        layers.Conv2D(16, kernel_size=(5, 5), padding='same', input_shape=(28, 28, 1)),
        layers.BatchNormalization(),
        layers.ReLU(),
        layers.MaxPooling2D(pool_size=(2, 2)),

        layers.Conv2D(32, kernel_size=(5, 5), padding='same'),
        layers.BatchNormalization(),
        layers.ReLU(),
        layers.MaxPooling2D(pool_size=(2, 2)),

        layers.Flatten(),
        layers.Dense(10)
    ])

    return model


In [6]:
class Client:

    def __init__(self, model, data):
        self.model = model
        self.data = data
        self.local_weights = self.model.get_weights()

    def train(self, epochs=5, batch_size=16):
        self.model.set_weights(self.local_weights)
        self.model.compile(optimizer='adam', loss=tf.keras.losses.CategoricalCrossentropy(from_logits=True), metrics=['accuracy'])
        self.model.fit(self.data[0], self.data[1], epochs=epochs, batch_size=batch_size, verbose=0)
        self.local_weights = self.model.get_weights()
        return self.local_weights

In [7]:
class Server:
    def __init__(self, model):
        self.model = model

    def aggregate_weights(self, client_weights):
        new_weights = []
        for weights in zip(*client_weights):
            new_weights.append(np.mean(weights, axis=0))
        self.model.set_weights(new_weights)

    def evaluate(self, test_data):
        self.model.compile(optimizer='adam', loss=tf.keras.losses.CategoricalCrossentropy(from_logits=True), metrics=['accuracy'])
        loss, accuracy = self.model.evaluate(test_data[0], test_data[1], verbose=0)
        y_pred = np.argmax(self.model.predict(test_data[0]), axis=1)
        y_true = np.argmax(test_data[1], axis=1)
        recall = recall_score(y_true, y_pred, average='macro')
        return loss, accuracy, recall

    def get_metrics_per_class(self, test_data):
        y_pred = np.argmax(self.model.predict(test_data[0]), axis=1)
        y_true = np.argmax(test_data[1], axis=1)
        precision_per_class = precision_score(y_true, y_pred, average=None, zero_division=0)
        recall_per_class = recall_score(y_true, y_pred, average=None)
        f1_per_class = f1_score(y_true, y_pred, average=None)
        return precision_per_class, recall_per_class, f1_per_class

In [8]:
def initialize_clients(num_clients, num_poisoned_clients):

    normal_data = (X_train, y_train)
    poisoned_data = data_poisoning()

    clients = []

    for i in range(num_clients):

        if i < num_poisoned_clients:

            print("Poisoned Client")

            indices = np.arange(poisoned_data[0].shape[0])
            np.random.shuffle(indices)
            client_data = (poisoned_data[0][indices][i::num_clients], poisoned_data[1][indices][i::num_clients])
        else:
            indices = np.arange(normal_data[0].shape[0])
            np.random.shuffle(indices)
            client_data = (normal_data[0][indices][i::num_clients], normal_data[1][indices][i::num_clients])
        clients.append(Client(get_second_model(), client_data))

    return clients

In [9]:
X_train, y_train, X_test, y_test = get_second_data_non_iid()

In [10]:
num_clients = 7
num_poisoned_clients = 1
clients_per_round = 4

clients = initialize_clients(num_clients, num_poisoned_clients)

server = Server(get_second_model())

Poisoned Client


In [None]:
num_rounds = 5
epochs = 5
batch_size = 16
loss_history = []
accuracy_history = []
recall_history = []
precision_history = []
recall_history_per_class = []
f1_history = []

for round_num in range(num_rounds):
    selected_clients = random.sample(clients, clients_per_round)
    client_weights = [client.train(epochs=epochs) for client in selected_clients]
    server.aggregate_weights(client_weights)
    loss, accuracy, recall = server.evaluate((X_test, y_test))
    precision_per_class, recall_per_class, f1_per_class = server.get_metrics_per_class((X_test, y_test))

    accuracy_history.append(accuracy)
    loss_history.append(loss)
    recall_history.append(recall)
    precision_history.append(precision_per_class)
    recall_history_per_class.append(recall_per_class)
    f1_history.append(f1_per_class)

    print(f"Round {round_num+1}")

Round 1
Round 2
Round 3

In [None]:
final_accuracy = accuracy_history[-1]
final_recall = recall_history[-1]
final_precision_per_class = precision_history[-1]
final_recall_per_class = recall_history_per_class[-1]
accuracy_class_1 = final_precision_per_class[1]
accuracy_class_9 = final_precision_per_class[9]
recall_class_1 = final_recall_per_class[1]
recall_class_9 = final_recall_per_class[9]

In [None]:
print(f"Final Accuracy: {final_accuracy}")
print(f"Final Recall: {final_recall}")
print(f"Final Accuracy for Class 1: {accuracy_class_1}")
print(f"Final Accuracy for Class 9: {accuracy_class_9}")
print(f"Final Recall for Class 1: {recall_class_1}")
print(f"Final Recall for Class 9: {recall_class_9}")

In [None]:
rounds = range(1, num_rounds + 1)

In [None]:
plt.figure(figsize=(18, 10))
plt.subplot(2, 2, 1)
plt.plot(rounds, accuracy_history, label='Accuracy')
plt.xlabel('Round')
plt.ylabel('Accuracy')
plt.title('Model Accuracy Over Rounds')
plt.legend()

In [None]:
plt.figure(figsize=(18, 10))
plt.subplot(2, 2, 1)
plt.plot(rounds, accuracy_history, label='Loss')
plt.xlabel('Round')
plt.ylabel('Loss')
plt.title('Model Loss Over Rounds')
plt.legend()

In [None]:
plt.figure(figsize=(18, 10))
plt.subplot(2, 2, 2)
plt.plot(rounds, recall_history, label='Recall')
plt.xlabel('Round')
plt.ylabel('Recall')
plt.title('Model Recall Over Rounds')
plt.legend()

In [None]:
plt.figure(figsize=(18, 10))
plt.subplot(2, 2, 3)
precision_history = np.array(precision_history)
for i in range(precision_history.shape[1]):
    plt.plot(rounds, precision_history[:, i], label=f'Class {i}')
plt.xlabel('Round')
plt.ylabel('Precision')
plt.title('Class Precision Over Rounds')
plt.legend()

In [None]:
plt.figure(figsize=(18, 10))
plt.subplot(2, 2, 4)
recall_history_per_class = np.array(recall_history_per_class)
for i in range(recall_history_per_class.shape[1]):
    plt.plot(rounds, recall_history_per_class[:, i], label=f'Class {i}')
plt.xlabel('Round')
plt.ylabel('Recall')
plt.title('Class Recall Over Rounds')
plt.legend()

plt.tight_layout()
plt.show()