In [None]:
import gdown

In [None]:
!gdown --id 1-3usuwhbiZUNWiZqUzRIJgnH1AjLCv0K 

In [None]:
!gdown --id 1HvmlbHuwrmRUYJSyP8fKzq8dqKKKPMsv

In [None]:
!gdown --id 1rzQx4mugpWQmJbAQzDCiEgqivSp2u9b-

In [None]:
import pandas as pd
import torch
from torch.utils.data import DataLoader, TensorDataset
import numpy as np
from collections import OrderedDict
from typing import List, Dict
from sklearn.model_selection import train_test_split
import copy
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
from sklearn.metrics import precision_score, recall_score, f1_score
from torch import nn

In [None]:
!pip install -q flwr[simulation]

import flwr as fl
from flwr.common import (
    EvaluateIns,
    EvaluateRes,
    FitIns,
    FitRes,
    Parameters,
    Scalar,
    Status,
    NDArrays,
    ndarrays_to_parameters,
    parameters_to_ndarrays,
)
import os

from flwr.server.client_manager import ClientManager
from flwr.server.client_proxy import ClientProxy
from flwr.server.strategy.aggregate import aggregate, weighted_loss_avg
import numpy as np
import pandas as pd
from typing import List, Tuple, Dict, Union, Optional
from functools import partial, reduce

In [None]:

def accuracy_fn(y_true, y_pred):
    correct = torch.eq(y_true, y_pred).sum().item()
    acc = (correct / len(y_pred)) * 100
    return acc

def get_parameters(net) -> List[np.ndarray]:
    return [val.cpu().numpy() for _, val in net.state_dict().items()]

def set_parameters(net, parameters: List[np.ndarray]):
    params_dict = zip(net.state_dict().keys(), parameters)
    state_dict = OrderedDict({k: torch.tensor(v) for k, v in params_dict})
    net.load_state_dict(state_dict)

def train(net, trainloader, criterion, optimizer, device, proximal_mu: float = None):
    net.to(device)
    net.train()
    running_loss, running_corrects, tot = 0.0, 0, 0

    global_params = copy.deepcopy(net).parameters()

    for images, labels in trainloader:
        images, labels = images.to(device), labels.to(device)
        optimizer.zero_grad()
        outputs = net(images)

        loss = criterion(outputs, labels)

        if proximal_mu is not None:
            proximal_term = sum((local - global_).norm(2)
                                for local, global_ in zip(net.parameters(), global_params))
            loss += (proximal_mu / 2) * proximal_term

        loss.backward()
        optimizer.step()

        preds = torch.argmax(outputs, dim=1)

        tot += images.size(0)
        running_corrects += torch.sum(preds == labels).item()
        running_loss += loss.item() * images.size(0)

    running_loss /= tot
    accuracy = running_corrects / tot
    
    return running_loss, accuracy


def test(net, testloader, device):
    net.to(device)
    net.eval()
    criterion = nn.CrossEntropyLoss()
    corrects, total_loss, tot = 0, 0.0, 0
    all_preds, all_labels = [], []

    with torch.no_grad():
        for images, labels in testloader:
            images, labels = images.to(device), labels.to(device)
            outputs = net(images)
            loss = criterion(outputs, labels)

            preds = torch.argmax(outputs, dim=1)

            all_preds.extend(preds.cpu().numpy())
            all_labels.extend(labels.cpu().numpy())

            corrects += torch.sum(preds == labels).item()
            total_loss += loss.item() * images.size(0)
            tot += images.size(0)

    total_loss /= tot
    accuracy = corrects / tot
    precision = precision_score(all_labels, all_preds, average='macro', zero_division=0)
    recall = recall_score(all_labels, all_preds, average='macro', zero_division=0)
    f1 = f1_score(all_labels, all_preds, average='macro', zero_division=0)

    return total_loss, accuracy, precision, recall, f1

def data_processing(df, NUM_FEATURES):
   y_train = df['Label']
   flow_id = df['flow_id']

   df = df/255

   X_train = df.drop(['Label', 'flow_id'], axis=1)
   X_train = X_train.to_numpy()

   X_train = X_train.reshape(-1, 20, NUM_FEATURES)
   y_train = y_train.to_numpy()

   y_train = y_train.reshape(-1,20)[:,-1]
   return X_train, y_train

In [None]:
from torch import nn

class BN_CNN(nn.Module):
    def __init__(self, in_channel, num_classes=3):
        super().__init__()
        self.conv1 = nn.Conv2d(
            in_channels=in_channel,
            out_channels=128,
            kernel_size=5,
            padding='same')

        self.conv2 = nn.Conv2d(
            in_channels=128,
            out_channels=64,
            kernel_size=5,
            padding='same')

        self.conv3 = nn.Conv2d(
            in_channels=64,
            out_channels=64,
            kernel_size=3,
            padding='same')

        self.conv4 = nn.Conv2d(
            in_channels=64,
            out_channels=32,
            kernel_size=3,
            padding='same')

        self.conv5 = nn.Conv2d(
            in_channels=32,
            out_channels=32,
            kernel_size=3,
            padding='same')

        self.conv6 = nn.Conv2d(
            in_channels=32,
            out_channels=16,
            kernel_size=3,
            padding='same')

        self.pool = nn.MaxPool2d(kernel_size=2)
        self.relu = nn.ReLU()
        self.dropout = nn.Dropout(0.1)
        self.bn1 = nn.BatchNorm2d(num_features=64)
        self.bn2 = nn.BatchNorm2d(num_features=32)
        self.bn3 = nn.BatchNorm2d(num_features=16)
      
        self.lin1 = nn.Linear(16 * 2 * 32, 256)
        self.classification = nn.Linear(256, out_features=num_classes)

    def forward(self, X):
        X = self.pool(self.bn1(self.relu(self.conv2(self.relu(self.conv1(X))))))
        X = self.pool(self.bn2(self.relu(self.conv4(self.relu(self.conv3(X))))))
        X = self.pool(self.bn3(self.relu(self.conv6(self.relu(self.conv5(X))))))

        X = torch.flatten(X, start_dim=1) 
        X = self.lin1(X)
        X = self.relu(X)
        X = self.dropout(X)
        X = self.classification(X)

        return X

In [None]:
from torch import nn

class CNN(nn.Module):
    def __init__(self, in_channel, num_classes=3):
        super().__init__()
        self.conv1 = nn.Conv2d(
            in_channels=in_channel,
            out_channels=128,
            kernel_size=5,
            padding='same')

        self.conv2 = nn.Conv2d(
            in_channels=128,
            out_channels=64,
            kernel_size=5,
            padding='same')

        self.conv3 = nn.Conv2d(
            in_channels=64,
            out_channels=64,
            kernel_size=3,
            padding='same')

        self.conv4 = nn.Conv2d(
            in_channels=64,
            out_channels=32,
            kernel_size=3,
            padding='same')

        self.conv5 = nn.Conv2d(
            in_channels=32,
            out_channels=32,
            kernel_size=3,
            padding='same')

        self.conv6 = nn.Conv2d(
            in_channels=32,
            out_channels=16,
            kernel_size=3,
            padding='same')

        self.pool = nn.MaxPool2d(kernel_size=2)
        self.relu = nn.ReLU()
        self.dropout = nn.Dropout(0.1)

        self.lin1 = nn.Linear(16 * 2 * 32, 256)
        self.classification = nn.Linear(256, out_features=num_classes)

    def forward(self, X):
        X = self.pool(self.relu(self.conv2(self.relu(self.conv1(X)))))
        X = self.pool(self.relu(self.conv4(self.relu(self.conv3(X)))))
        X = self.pool(self.relu(self.conv6(self.relu(self.conv5(X)))))

        X = torch.flatten(X, start_dim=1)
        X = self.lin1(X)
        X = self.relu(X)
        X = self.dropout(X)
        X = self.classification(X)

        return X

In [None]:
from algorithm.import_lib import *

class FedAvg(fl.server.strategy.Strategy):
    def __init__(
        self, exp_name, algo_name, num_rounds, num_clients, device,
        decay_rate=0.995, fraction_fit=1.0, fraction_evaluate=1.0,
        min_fit_clients=2, min_evaluate_clients=2, min_available_clients=2,
        learning_rate=0.01, current_parameters=None
    ):
        super().__init__()
        self.exp_name = exp_name
        self.algo_name = algo_name
        self.num_rounds = num_rounds
        self.num_clients = num_clients
        self.fraction_fit = fraction_fit
        self.fraction_evaluate = fraction_evaluate
        self.min_fit_clients = min_fit_clients
        self.min_evaluate_clients = min_evaluate_clients
        self.min_available_clients = min_available_clients
        self.learning_rate = learning_rate
        self.current_parameters = current_parameters
        self.device = device
        self.decay_rate = decay_rate
        self.result = {
            "round": [],
            "train_loss": [],
            "train_accuracy": [],
            "test_loss": [],
            "test_accuracy": [],
            "test_precision": [],
            "test_recall": [],
            "test_f1": []
        }

    def __repr__(self):
        return "FedAvg"

    def initialize_parameters(self, client_manager):
        return self.current_parameters

    def configure_fit(self, server_round, parameters, client_manager):
        sample_size, min_num_clients = self.num_fit_clients(client_manager.num_available())
        clients = client_manager.sample(sample_size, min_num_clients)
        config = {"learning_rate": self.learning_rate, "device": self.device}
        self.learning_rate *= self.decay_rate
        return [(client, FitIns(parameters, config)) for client in clients]

    def aggregate_fit(self, server_round, results, failures):
        self.current_parameters = ndarrays_to_parameters(
            aggregate([(parameters_to_ndarrays(f.parameters), f.num_examples) for _, f in results])
        )
        examples = [f.num_examples for _, f in results]
        total = sum(examples)

        def weighted_avg(metric_name):
            return sum(f.num_examples * f.metrics[metric_name] for _, f in results) / total

        loss = weighted_avg("loss")
        acc = weighted_avg("accuracy")

        self.result["round"].append(server_round)
        self.result["train_loss"].append(loss)
        self.result["train_accuracy"].append(acc)

        print(f"Train R{server_round}: loss={loss:.4f}, acc={acc:.4f}")

        return self.current_parameters, {}

    def configure_evaluate(self, server_round, parameters, client_manager):
        sample_size, min_num_clients = self.num_evaluation_clients(client_manager.num_available())
        clients = client_manager.sample(sample_size, min_num_clients)
        config = {"device": self.device}
        return [(client, EvaluateIns(parameters, config)) for client in clients]

    def aggregate_evaluate(self, server_round, results, failures):
        examples = [r.num_examples for _, r in results]
        total = sum(examples)

        def weighted_avg(metric_name):
            return sum(r.num_examples * r.metrics[metric_name] for _, r in results) / total

        loss = sum(r.num_examples * r.loss for _, r in results) / total
        acc = weighted_avg("accuracy")
        prec = weighted_avg("precision")
        rec = weighted_avg("recall")
        f1 = weighted_avg("f1")

        if server_round != 0:
            self.result["test_loss"].append(loss)
            self.result["test_accuracy"].append(acc)
            self.result["test_precision"].append(prec)
            self.result["test_recall"].append(rec)
            self.result["test_f1"].append(f1)

        print(f"Test R{server_round}: loss={loss:.4f}, acc={acc:.4f}, prec={prec:.4f}, recall={rec:.4f}, f1={f1:.4f}")

        if server_round == self.num_rounds:
            pd.DataFrame(self.result).to_csv(f"result/{self.algo_name}_{self.exp_name}.csv", index=False)

        return loss, {}

    def evaluate(self, server_round, parameters):
        return None

    def num_fit_clients(self, num_available):
        return max(int(num_available * self.fraction_fit), self.min_fit_clients), self.min_available_clients

    def num_evaluation_clients(self, num_available):
        return max(int(num_available * self.fraction_evaluate), self.min_evaluate_clients), self.min_available_clients


In [None]:
from algorithm.import_lib import *

class BaseClient(fl.client.NumPyClient):
    def __init__(self, cid, net, trainloader, valloader, criterion):
        self.cid = cid
        self.net = net
        self.trainloader = trainloader
        self.valloader = valloader
        self.criterion = criterion

    def get_parameters(self, config):
        return get_parameters(self.net)

    def fit(self, parameters, config):
        set_parameters(self.net, parameters)
        optimizer = torch.optim.SGD(self.net.parameters(), lr=config["learning_rate"])
        loss, acc = train(self.net, self.trainloader, self.criterion, optimizer, device=config["device"])
        return self.get_parameters(config), len(self.trainloader.sampler), {
            "loss": loss,
            "accuracy": acc,
            "id": self.cid
        }

    def evaluate(self, parameters, config):
        set_parameters(self.net, parameters)
        loss, acc, prec, rec, f1 = test(self.net, self.valloader, config["device"])
        return loss, len(self.valloader.sampler), {
            "accuracy": acc,
            "precision": prec,
            "recall": rec,
            "f1": f1
        }

In [None]:
class CustomDataset(torch.utils.data.Dataset):
    def __init__(self, input_data, targets, transform=None):
        self.data = [input_data[i].unsqueeze(0).float() for i in range(input_data.size(0))]
        self.targets = targets
        self.classes = torch.unique(targets).tolist()

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        return self.data[idx], self.targets[idx]


In [None]:
c1_data = pd.read_feather('/kaggle/working/Domain 1.feather')
c2_data = pd.read_feather('/kaggle/working/Domain 2.feather')
c3_data = pd.read_feather('/kaggle/working/Domain 3.feather')

data_full = [c1_data, c2_data , c3_data]

In [None]:
from torch.utils.data import DataLoader
import numpy as np
import random

def set_seed(seed_value):
    random.seed(seed_value)
    np.random.seed(seed_value)
    torch.manual_seed(seed_value)
    if torch.cuda.is_available():
        torch.cuda.manual_seed_all(seed_value)
    print(f"Seeds set to {seed_value}")

def domain_partition(X, y, num_clients):
    num_classes = np.unique(y).shape[0]
    class_indices = [[] for _ in range(num_classes)]

    for i, lab in enumerate(y):
        class_indices[lab].append(i)

    client_indices = [[] for _ in range(num_clients)]

    for c in range(num_classes):
        proportions = np.random.dirichlet(np.ones(num_clients) * 5)
        indices = np.array(class_indices[c])
        np.random.shuffle(indices)

        proportions = (np.cumsum(proportions) * len(indices)).astype(int)[:-1]
        split_indices = np.split(indices, proportions)

        for i, idx in enumerate(split_indices):
            client_indices[i].extend(idx.tolist())

    client_data = [(torch.from_numpy(X[client_idx]), torch.from_numpy(y[client_idx])) for client_idx in client_indices]

    return client_data

def get_clients_dataset(full_domain_data, num_domains, num_clients_per_domain):   
    set_seed(42)
    all_data = [] 

    for domain in full_domain_data:
        all_data.append(data_processing(domain, 256))

    domain_clients = []
    for data, label in all_data:  
        domain_clients.extend(domain_partition(data, label, num_clients_per_domain))

    for i in range(len(domain_clients)):
        domain_clients[i] = CustomDataset(domain_clients[i][0], domain_clients[i][1])
    return domain_clients
    

NUM_DOMAINS = 3
NUM_CLIENTS_PER_DOMAIN = 3
BATCH_SIZE = 32
RANDOM_STATE = 42
DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu'

clients_dataset = get_clients_dataset(data_full, NUM_DOMAINS, NUM_CLIENTS_PER_DOMAIN)

train_set, validation_set = [], []

for i in range(len(clients_dataset)):
    train, val = train_test_split(clients_dataset[i], test_size=0.2, random_state=RANDOM_STATE)
    train_set.append(train)
    validation_set.append(val)

trainloaders = [DataLoader(train_set[i], batch_size=BATCH_SIZE) for i in range(len(train_set))]
valloaders = [DataLoader(validation_set[i], batch_size=BATCH_SIZE) for i in range(len(validation_set))]

In [None]:
def get_label_counts(dataset):
    labels = dataset.targets
    unique_labels, counts = torch.unique(labels, return_counts=True)
    return dict(zip(unique_labels.tolist(), counts.tolist()))

all_label_counts = [get_label_counts(dataset) for dataset in clients_dataset]

client_labels = []
client_counts = []
client_ids = []
for i, label_counts in enumerate(all_label_counts):
    for label, count in label_counts.items():
        client_ids.append(f"Client {i}")
        client_labels.append(label)
        client_counts.append(count)

plot_df = pd.DataFrame({'Client': client_ids, 'Label': client_labels, 'Count': client_counts})

plt.figure(figsize=(12, 6))
sns.barplot(x='Client', y='Count', hue='Label', data=plot_df)
plt.title('Label Distribution per Client')
plt.xlabel('Client ID')
plt.ylabel('Number of Samples')
plt.show()

In [None]:
NUM_ROUNDS = 3
LEARNING_RATE = 0.01
net = BN_CNN(in_channel=1, num_classes=3)
criterion = nn.CrossEntropyLoss()
def base_client_fn(cid: str):
    idx = int(cid)
    return BaseClient(cid, net, trainloaders[idx], valloaders[idx], criterion).to_client()

current_parameters = ndarrays_to_parameters(get_parameters(net))
client_resources = {"num_cpus": 1, "num_gpus": 0.2} if DEVICE == "cuda" else {"num_cpus": 1, "num_gpus": 0.0}

fl.simulation.start_simulation(
            client_fn           = base_client_fn,
            num_clients         = NUM_DOMAINS * NUM_CLIENTS_PER_DOMAIN,
            config              = fl.server.ServerConfig(num_rounds=NUM_ROUNDS),
            strategy            = FedAvg(
                learning_rate       = LEARNING_RATE,
                exp_name            = 'FL',
                algo_name           = 'FedAvg',
                net                 = net,
                device              = DEVICE,
                num_rounds          = NUM_ROUNDS,
                num_clients         = NUM_DOMAINS * NUM_CLIENTS_PER_DOMAIN,
                current_parameters  = current_parameters,
                ),
            client_resources     = client_resources
        )