<a href="https://colab.research.google.com/github/ThisumiWijesinghe/Fraud-Detection-with-Federated-Learning/blob/main/Fed_L_gemini.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

#Import Libraries

In [15]:
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow.keras import layers
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split


#Load & Preprocess Dataset

In [16]:
import kagglehub

# Download dataset from Kaggle
path = kagglehub.dataset_download("ealaxi/paysim1")

# Define file path
file_path = path + "/PS_20174392719_1491204439457_log.csv"

# Load dataset
df = pd.read_csv(file_path)

print("Dataset shape:", df.shape)


# Encode categorical column
le = LabelEncoder()
df["type"] = le.fit_transform(df["type"])

# Select features
features = ["step", "type", "amount",
            "oldbalanceOrg", "newbalanceOrig",
            "oldbalanceDest", "newbalanceDest"]

X = df[features].values
y = df["isFraud"].values

INPUT_DIM = X.shape[1]


Using Colab cache for faster access to the 'paysim1' dataset.
Dataset shape: (6362620, 11)


#Create Non-IID Split (Dirichlet Distribution)

In [17]:
NUM_CLIENTS = 12
alpha = 0.5

def dirichlet_split(X, y, num_clients, alpha):
    data_per_client = [[] for _ in range(num_clients)]
    labels = np.unique(y)

    for label in labels:
        idx = np.where(y == label)[0]
        np.random.shuffle(idx)
        proportions = np.random.dirichlet(np.repeat(alpha, num_clients))
        proportions = (np.cumsum(proportions) * len(idx)).astype(int)[:-1]
        split_idx = np.split(idx, proportions)

        for i in range(num_clients):
            data_per_client[i].extend(split_idx[i])

    clients_data = {}
    for i in range(num_clients):
        client_idx = data_per_client[i]
        clients_data[i] = {
            "X": X[client_idx],
            "y": y[client_idx]
        }

    return clients_data

clients_data = dirichlet_split(X, y, NUM_CLIENTS, alpha)


#Create Model (Used for FedAvg & FedBN)

In [18]:
def create_model():
    model = tf.keras.Sequential([
        layers.Input(shape=(INPUT_DIM,)),
        layers.Dense(64, activation="relu"),
        layers.BatchNormalization(),
        layers.Dense(32, activation="relu"),
        layers.BatchNormalization(),
        layers.Dense(1, activation="sigmoid")
    ])

    model.compile(
        optimizer=tf.keras.optimizers.Adam(0.001),
        loss="binary_crossentropy",
        metrics=["accuracy"]
    )
    return model


#FedAvg Aggregation

In [19]:
def fedavg_aggregate(client_weights):
    new_weights = []
    for weights in zip(*client_weights):
        new_weights.append(np.mean(weights, axis=0))
    return new_weights


#Accuracy Evaluation

In [20]:
def evaluate_accuracy(model, clients_data):
    total_acc = 0
    for client_id in clients_data:
        loss, acc = model.evaluate(
            clients_data[client_id]["X"],
            clients_data[client_id]["y"],
            verbose=0
        )
        print(f"Client {client_id+1} Accuracy: {acc:.4f}")
        total_acc += acc

    print(f"Average Accuracy: {total_acc/len(clients_data):.4f}\n")


Federated Training Loop (FedAvg Example)

In [21]:
NUM_ROUNDS = 5
LOCAL_EPOCHS = 1
BATCH_SIZE = 1024

global_model = create_model()

for round_num in range(NUM_ROUNDS):
    print(f"--- Global Round {round_num+1} ---")

    client_weights = []

    for client_id in range(NUM_CLIENTS):
        local_model = create_model()
        local_model.set_weights(global_model.get_weights())

        local_model.fit(
            clients_data[client_id]["X"],
            clients_data[client_id]["y"],
            epochs=LOCAL_EPOCHS,
            batch_size=BATCH_SIZE,
            verbose=0
        )

        client_weights.append(local_model.get_weights())

    # Server Aggregation
    new_weights = fedavg_aggregate(client_weights)
    global_model.set_weights(new_weights)

    # Evaluate
    evaluate_accuracy(global_model, clients_data)


--- Global Round 1 ---
Client 1 Accuracy: 0.9998
Client 2 Accuracy: 0.9972
Client 3 Accuracy: 0.9998
Client 4 Accuracy: 0.9986
Client 5 Accuracy: 0.9981
Client 6 Accuracy: 0.9992
Client 7 Accuracy: 0.9994
Client 8 Accuracy: 0.9837
Client 9 Accuracy: 0.8505
Client 10 Accuracy: 0.9990
Client 11 Accuracy: 0.9997
Client 12 Accuracy: 0.9996
Average Accuracy: 0.9854

--- Global Round 2 ---
Client 1 Accuracy: 1.0000
Client 2 Accuracy: 0.9972
Client 3 Accuracy: 1.0000
Client 4 Accuracy: 0.9988
Client 5 Accuracy: 0.9982
Client 6 Accuracy: 0.9994
Client 7 Accuracy: 0.9995
Client 8 Accuracy: 0.9830
Client 9 Accuracy: 0.8395
Client 10 Accuracy: 0.9991
Client 11 Accuracy: 0.9999
Client 12 Accuracy: 0.9998
Average Accuracy: 0.9845

--- Global Round 3 ---
Client 1 Accuracy: 1.0000
Client 2 Accuracy: 0.9973
Client 3 Accuracy: 1.0000
Client 4 Accuracy: 0.9988
Client 5 Accuracy: 0.9982
Client 6 Accuracy: 0.9994
Client 7 Accuracy: 0.9995
Client 8 Accuracy: 0.9833
Client 9 Accuracy: 0.8421
Client 10 Accur

#FedBN Aggregation

In [22]:
def fedbn_aggregate(client_weights, global_model):
    new_weights = global_model.get_weights()

    # Identify BatchNorm layers
    bn_layers = []
    for i, layer in enumerate(global_model.layers):
        if isinstance(layer, tf.keras.layers.BatchNormalization):
            bn_layers.append(i)

    # Go through each layer's weights
    weight_index = 0
    for layer_idx, layer in enumerate(global_model.layers):
        layer_weights = layer.get_weights()
        num_weights = len(layer_weights)

        if num_weights == 0:
            continue

        # If not BatchNorm â†’ average normally
        if layer_idx not in bn_layers:
            averaged = []
            for weights in zip(*[client_weights[c][weight_index:weight_index+num_weights]
                                 for c in range(len(client_weights))]):
                averaged.append(np.mean(weights, axis=0))

            new_weights[weight_index:weight_index+num_weights] = averaged

        # Move weight index
        weight_index += num_weights

    return new_weights


#FedBN Training Loop

In [23]:
NUM_ROUNDS = 10
LOCAL_EPOCHS = 3
BATCH_SIZE = 1024

global_fedbn_model = create_model()

print("\nStarting Federated Training using FedBN\n")

for round_num in range(NUM_ROUNDS):
    print(f"--- Global Round {round_num+1} ---")

    client_weights = []

    for client_id in range(NUM_CLIENTS):
        local_model = create_model()
        local_model.set_weights(global_fedbn_model.get_weights())

        local_model.fit(
            clients_data[client_id]["X"],
            clients_data[client_id]["y"],
            epochs=LOCAL_EPOCHS,
            batch_size=BATCH_SIZE,
            verbose=0
        )

        client_weights.append(local_model.get_weights())

    # FedBN aggregation
    new_weights = fedbn_aggregate(client_weights, global_fedbn_model)
    global_fedbn_model.set_weights(new_weights)

    evaluate_accuracy(global_fedbn_model, clients_data)



Starting Federated Training using FedBN

--- Global Round 1 ---
Client 1 Accuracy: 0.4485
Client 2 Accuracy: 0.4511
Client 3 Accuracy: 0.4489
Client 4 Accuracy: 0.4487
Client 5 Accuracy: 0.4492
Client 6 Accuracy: 0.4509
Client 7 Accuracy: 0.4493
Client 8 Accuracy: 0.4620
Client 9 Accuracy: 0.5695
Client 10 Accuracy: 0.4490
Client 11 Accuracy: 0.4499
Client 12 Accuracy: 0.4486
Average Accuracy: 0.4605

--- Global Round 2 ---
Client 1 Accuracy: 0.5024
Client 2 Accuracy: 0.5042
Client 3 Accuracy: 0.5029
Client 4 Accuracy: 0.5022
Client 5 Accuracy: 0.5042
Client 6 Accuracy: 0.5054
Client 7 Accuracy: 0.5032
Client 8 Accuracy: 0.5129
Client 9 Accuracy: 0.6130
Client 10 Accuracy: 0.5031
Client 11 Accuracy: 0.5037
Client 12 Accuracy: 0.5025
Average Accuracy: 0.5133

--- Global Round 3 ---
Client 1 Accuracy: 0.5647
Client 2 Accuracy: 0.5659
Client 3 Accuracy: 0.5654
Client 4 Accuracy: 0.5640
Client 5 Accuracy: 0.5676
Client 6 Accuracy: 0.5679
Client 7 Accuracy: 0.5654
Client 8 Accuracy: 0.5743

#Create Personalized Model

In [24]:
def create_personalized_model():
    base = tf.keras.Sequential([
        layers.Input(shape=(INPUT_DIM,)),
        layers.Dense(64, activation="relu"),
        layers.Dense(32, activation="relu")
    ])

    head = layers.Dense(1, activation="sigmoid")

    model = tf.keras.Sequential([
        base,
        head
    ])

    model.compile(
        optimizer=tf.keras.optimizers.Adam(0.001),
        loss="binary_crossentropy",
        metrics=["accuracy"]
    )

    return model


#Personalized Aggregation

In [25]:
def fedper_aggregate(client_weights):
    new_weights = []

    # Exclude last 2 weights (kernel + bias of final layer)
    for weights in zip(*client_weights):
        if len(new_weights) < len(client_weights[0]) - 2:
            new_weights.append(np.mean(weights, axis=0))
        else:
            new_weights.append(client_weights[0][len(new_weights)])

    return new_weights


#Personalized Training Loop

In [1]:
NUM_ROUNDS = 10
LOCAL_EPOCHS = 2

global_model = create_personalized_model()

print("\nStarting Personalized Federated Learning\n")

for round_num in range(NUM_ROUNDS):
    print(f"--- Global Round {round_num+1} ---")

    client_weights = []

    for client_id in range(NUM_CLIENTS):
        local_model = create_personalized_model()
        local_model.set_weights(global_model.get_weights())

        local_model.fit(
            clients_data[client_id]["X"],
            clients_data[client_id]["y"],
            epochs=LOCAL_EPOCHS,
            batch_size=1024,
            verbose=0
        )

        client_weights.append(local_model.get_weights())

    # Aggregate shared layers only
    new_weights = fedper_aggregate(client_weights)
    global_model.set_weights(new_weights)

    evaluate_accuracy(global_model, clients_data)


NameError: name 'create_personalized_model' is not defined