# <center>Training a Decentralized Machine Learning Model</center>

In this phase, we will train a decentralized machine learning (ML) model using Horizontal Federated Learning (HFL), where five clients collaboratively train a model on fraud transaction data without sharing their raw data. Each client retains its dataset locally, and only model updates are exchanged, ensuring privacy and compliance with data protection regulations.

Unlike centralized ML, where data is aggregated in a single location, HFL addresses privacy concerns, data silos, and regulatory constraints**. After benchmarking the centralized model, we will compare its performance, efficiency, and trade-offs against the federated approach to assess its viability in fraud detection.

--- 

In [1]:
%load_ext autoreload
%load_ext watermark
    
%autoreload 2
%watermark --python -p torch,flwr

Python implementation: CPython
Python version       : 3.12.9
IPython version      : 9.0.2

torch: 2.6.0
flwr : 1.14.0



--------

## Loading Dependencies

In [2]:
import os
import sys
sys.path.append("../")

import torch
import torch.nn as nn

from logging import ERROR

from flwr.common import ndarrays_to_parameters, Context 
from flwr.client import Client, ClientApp, NumPyClient
from flwr.server import ServerApp, ServerConfig, ServerAppComponents
from flwr.server.strategy import FedAvg 
from flwr.simulation import run_simulation 

from src.config import NUM_CLASSES, NUM_FEATURES, SERVER_CONFIG
from src.train_decentralized import train_model
from src.FL_client import BankFLClient
from src.FL_server import weighted_average, evaluate 
from src.local_utility import load_client_data, load_test_data, get_weights, set_weights
from src.local_utility import set_device, set_seed

------

In [3]:
DEVICE = set_device()
set_seed()

In [4]:
# Load Datasets
num_clients = SERVER_CONFIG['num_clients']
train_sets = []
val_sets = []

for i in range(num_clients):
    train_set, val_set = load_client_data(i)
    train_sets.append(train_set)
    val_sets.append(val_set)

test_set = load_test_data()

------

-----

## Trying Transformer Model

In [5]:
import torch
import torch.nn as nn
import torch.optim as optim
import pandas as pd
import numpy as np

from torch.utils.data import Dataset, DataLoader

In [6]:
# Step 1: Load and preprocess the data
from src.paths import DATA_DIR
df = pd.read_csv(f'{DATA_DIR}/base_downsampled.csv')
df.head()

Unnamed: 0,fraud_bool,income,name_email_similarity,prev_address_months_count,current_address_months_count,customer_age,days_since_request,intended_balcon_amount,payment_type,zip_count_4w,...,has_other_cards,proposed_credit_limit,foreign_request,source,session_length_in_minutes,device_os,keep_alive_session,device_distinct_emails_8w,device_fraud_count,month
0,0,0.1,0.794401,-1,68,30,0.00852,21.849185,AA,2384,...,1,1000.0,0,INTERNET,3.768328,linux,1,1,0,2
1,0,0.7,0.140562,-1,151,30,0.017462,-1.218606,AC,2650,...,0,200.0,0,INTERNET,3.675692,other,1,1,0,7
2,0,0.1,0.000572,-1,57,30,0.016778,-1.469338,AB,3443,...,0,200.0,0,INTERNET,2.884211,other,0,1,0,2
3,0,0.9,0.776983,84,10,30,0.013509,-0.836791,AD,234,...,0,500.0,0,INTERNET,13.437128,other,1,1,0,7
4,0,0.6,0.094623,-1,100,20,0.001095,-0.488158,AD,1911,...,0,200.0,1,INTERNET,8.945112,other,1,1,0,0


In [7]:
from src.local_utility import upload_dataset

In [8]:
X_train, X_val, X_test, y_train, y_val, y_test, feature_names, pipeline = upload_dataset()

In [9]:
# Convert to PyTorch tensors
X_train_tensor = torch.tensor(X_train, dtype=torch.float32)
y_train_tensor = torch.tensor(y_train, dtype=torch.float32)
X_val_tensor = torch.tensor(X_val, dtype=torch.float32)
y_val_tensor = torch.tensor(y_val, dtype=torch.float32)
X_test_tensor = torch.tensor(X_test, dtype=torch.float32)
y_test_tensor = torch.tensor(y_test, dtype=torch.float32)

In [10]:
# Create a Dataset class
class FraudDataset(Dataset):
    def __init__(self, features, labels):
        self.features = features
        self.labels = labels

    def __len__(self):
        return len(self.features)

    def __getitem__(self, idx):
        return self.features[idx], self.labels[idx]

train_dataset = FraudDataset(X_train_tensor, y_train_tensor)
val_dataset = FraudDataset(X_val_tensor, y_val_tensor)
test_dataset = FraudDataset(X_test_tensor, y_test_tensor)

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)

In [11]:
class TransformerModel(nn.Module):
    def __init__(self, input_dim, num_classes, num_heads, num_layers, dim_feedforward, dropout=0.1):
        super().__init__()
        self.embedding = nn.Linear(input_dim, dim_feedforward)
        encoder_layer = nn.TransformerEncoderLayer(
            d_model=dim_feedforward, 
            nhead=num_heads, 
            dim_feedforward=dim_feedforward, 
            dropout=dropout,
            batch_first=True  # Set batch_first to True
        )
        self.transformer_encoder = nn.TransformerEncoder(encoder_layer, num_layers=num_layers)
        self.fc = nn.Linear(dim_feedforward, num_classes)

    def forward(self, x):
        x = self.embedding(x)
        x = x.unsqueeze(1)  # Add sequence dimension
        x = self.transformer_encoder(x)
        x = x.squeeze(1)  # Remove sequence dimension
        x = self.fc(x)
        return x #torch.sigmoid(x)

# Hyperparameters
input_dim = X_train.shape[1]
num_classes = 2
num_heads = 8
num_layers = 2
dim_feedforward = 64
dropout = 0.1

model = TransformerModel(input_dim, num_classes, num_heads, num_layers, dim_feedforward, dropout)

In [12]:
input_dim 

51

In [13]:
# Step 3: Training
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr=3e-3)

----

In [14]:
def evaluate_model(model, dataloader):
    correct, total_loss, total_examples = 0, 0, 0
    
    with torch.no_grad():
        model.eval()
        for idx, (features, class_labels) in enumerate(dataloader):
            probas = model(features)
        
            true_label = class_labels.long() #class_labels.view(probas.shape).to(probas.dtype)
            loss = criterion(probas, true_label)
            total_loss += loss.item()

            pred = torch.argmax(probas, dim=1) #torch.where(probas > 0.5, 1, 0)
            correct += torch.sum(true_label == pred).item()
            total_examples += class_labels.numel() #len(class_labels)
            
    accuracy = (correct / total_examples) * 100 
    avg_loss = total_loss / len(dataloader)
    
    return accuracy, avg_loss

In [15]:
def train(model, train_loader, val_loader, criterion, optimizer, num_epochs=10, patience=5, verbose=True):
    best_val_loss = float('inf')
    epochs_without_improvement = 0

    torch.manual_seed(42)
    
    for epoch in range(num_epochs):
        model.train()
        epoch_train_loss = 0
        for batch_idx, (data, target) in enumerate(train_loader):
            optimizer.zero_grad()
            output = model(data)
            loss = criterion(output, target.long() #target.view(output.shape)
                            )
            loss.backward()
            optimizer.step()

            epoch_train_loss += loss.item()

        # Calculate average training loss for the epoch
        train_loss = epoch_train_loss / len(train_loader)
        
        # Validation Step
        val_accuracy, val_loss = evaluate_model(model, val_loader)

        # Logging 
        if verbose:
            print(f'Epoch {epoch+1:03d}/{num_epochs:03d} | Train Loss: {train_loss:.2f} | Val Accuracy: {val_accuracy:.2f}%'
            f' | Val Loss: {val_loss:.2f}')

        # Model Callback (Early Stopping)
        if val_loss < best_val_loss:
            best_val_loss = val_loss
            epochs_without_improvement = 0
        else:
            epochs_without_improvement += 1
            if epochs_without_improvement >= patience:
                print(f'Early stopping at epoch {epoch+1}')
                break

In [16]:
train(model, train_loader, val_loader, criterion, optimizer, num_epochs=50, patience=5, verbose=True)

Epoch 001/050 | Train Loss: 0.50 | Val Accuracy: 80.38% | Val Loss: 0.42
Epoch 002/050 | Train Loss: 0.45 | Val Accuracy: 80.63% | Val Loss: 0.41
Epoch 003/050 | Train Loss: 0.44 | Val Accuracy: 80.49% | Val Loss: 0.42
Epoch 004/050 | Train Loss: 0.44 | Val Accuracy: 80.89% | Val Loss: 0.41
Epoch 005/050 | Train Loss: 0.43 | Val Accuracy: 80.53% | Val Loss: 0.41
Epoch 006/050 | Train Loss: 0.43 | Val Accuracy: 80.85% | Val Loss: 0.41
Epoch 007/050 | Train Loss: 0.43 | Val Accuracy: 80.71% | Val Loss: 0.41
Epoch 008/050 | Train Loss: 0.43 | Val Accuracy: 80.49% | Val Loss: 0.41
Epoch 009/050 | Train Loss: 0.43 | Val Accuracy: 80.56% | Val Loss: 0.41
Epoch 010/050 | Train Loss: 0.43 | Val Accuracy: 80.53% | Val Loss: 0.41
Epoch 011/050 | Train Loss: 0.43 | Val Accuracy: 80.20% | Val Loss: 0.42
Epoch 012/050 | Train Loss: 0.43 | Val Accuracy: 80.56% | Val Loss: 0.41
Epoch 013/050 | Train Loss: 0.43 | Val Accuracy: 80.60% | Val Loss: 0.41
Epoch 014/050 | Train Loss: 0.43 | Val Accuracy: 80

In [17]:
test_accuracy, test_loss = evaluate_model(model, test_loader)

print(f"Test Accuracy: {test_accuracy:.2f} Test Loss: {test_loss:.4f}")

Test Accuracy: 80.46 Test Loss: 0.4203


In [195]:
test_accuracy, test_loss = evaluate_model(model, test_loader)

print(f"Test Accuracy: {test_accuracy:.2f} Test Loss: {test_loss:.4f}")

Test Accuracy: 81.01 Test Loss: 0.4185


----