#**Secure Federated Weighted Average strategy for imbalanced datasets**

#Setup


*   Pytorch and flower installation

In [None]:
!pip install -q flwr[simulation] torch torchvision opacus gmpy2 pympler

- Download and install Paillier wrapper library

In [None]:
!gdown 1sU2Z1S1jbpA-GS2Rc9PJU5i--nzj2aJs
!pip install -q simplephe-0.0.1-py3-none-any.whl

##All General Imports

In [None]:
import os
import glob
import math
import json
import timeit
import platform

from functools import reduce
from collections import OrderedDict
from hashlib import md5
from pympler import asizeof
from copy import deepcopy
from typing import Callable, Dict, List, Optional, Tuple, Union, NewType

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

from opacus import PrivacyEngine
from opacus.accountants.rdp import RDPAccountant

from scipy.stats import entropy

In [None]:
# Seaborn plot settings
sns.set_style("white")
#palette = sns.color_palette("Set2")
palette = ['red','blue','green','grey','brown','violet','cyan']
sns.set_context("paper", font_scale=1.2)  # Increase font size

##All Machine Learning Imports

In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torchvision.transforms as transforms
from torch.utils.data import DataLoader, random_split, TensorDataset
from torchvision.datasets import CIFAR10
from torch import Tensor

from sklearn.metrics import f1_score
from sklearn.model_selection import train_test_split, StratifiedShuffleSplit
from collections import Counter, OrderedDict

##All Federated Learning Imports

In [None]:
import flwr as fl
import random

In [None]:
from flwr.common import (
    EvaluateIns,
    EvaluateRes,
    FitRes,
    Parameters,
    Scalar,
    NDArrays,
    parameters_to_ndarrays,
    ndarrays_to_parameters,
    MetricsAggregationFn
)
from flwr.server.client_manager import ClientManager
from flwr.server.client_proxy import ClientProxy
from flwr.server.strategy import fedavg

---
**Tested with flower version 1.5.0 and torch version 2.0.1+cu118**

---



In [None]:
fl.__version__

In [None]:
torch.__version__

### Homomorphic Encryption

In [None]:
import simplephe as sp

##Reproducibility Params

In [None]:
# For dataloader workers
def _init_fn(worker_id):
    np.random.seed(int(random_seed))


def set_random_seeds(random_seed):
    os.environ['PYTHONHASHSEED'] = str(random_seed)
    torch.manual_seed(random_seed)
    random.seed(random_seed)
    np.random.seed(random_seed)
    torch.use_deterministic_algorithms(True)
    torch.cuda.manual_seed(random_seed)
    torch.cuda.manual_seed_all(random_seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False
    torch.backends.cudnn.enabled = False

    torch.manual_seed(random_seed)

random_seed = 123
set_random_seeds(random_seed)


##All Globals

In [None]:
experiment_params = {}
strategy_colors = ['red', 'green', 'blue', 'cyan']
#strategy_list = ['fedavg', 'fedadagrad', 'weightedfedavg', 'fedadam', 'fedyogi']


In [None]:
# @title Globals { display-mode: "form" }
# @markdown Number of federated clients:
n_clients = 8 # @param {type:"slider", min:6, max:10, step:1}
loop_on_strategies = True # @param {type:"boolean"}
USE_DP = False # @param {type:"boolean"}
target_epsilon = 0.3 # @param {type:"number"}
# @markdown ---
experiment_params["n_clients"] = n_clients
experiment_params["loop_on_strategies"] = loop_on_strategies
experiment_params["USE_DP"] = USE_DP
if USE_DP:
  experiment_params["target_epsilon"] = target_epsilon

In [None]:
# @title Default strategy { display-mode: "form" }
strategy_type = 'secwfedavg' # @param ['fedavg', 'fedadagrad', 'weightedfedavg', 'secwfedavg','fedadam', 'fedyogi']
# @markdown ---

In [None]:
# @title #### Strategies
strategy_list = []
if loop_on_strategies:
  strategy_list = ['fedavg', 'weightedfedavg', 'secwfedavg']
  experiment_params["strategy_list"] = strategy_list
  print(strategy_list)
else:
  experiment_params["strategy_type"] = strategy_type
  print(strategy_type)

##Hyperparameters

In [None]:
# @title ### Hypers { display-mode: "form" }
shallow_model = True # @param {type:"boolean"}
n_epochs = 2 # @param {type:"slider", min:1, max:25}
n_rounds = 15 # @param {type:"slider", min:2, max:25}
batch_size = 32 # @param {type:"slider", min:32, max:128, step:32}
validation_split = 0.2
learning_rate = 0.003190727031874879 # @param {type:"number"}
# for shallow model 0.003190727031874879 else 0.0018673528886359607
# @markdown ---
experiment_params["shallow_model"] = n_epochs
experiment_params["n_epochs"] = n_epochs
experiment_params["n_rounds"] = n_rounds
experiment_params["batch_size"] = batch_size
experiment_params["learning_rate"] = learning_rate

In [None]:
# @title ### Split method { display-mode: "form" }
# @markdown Select method:
# @markdown - majority = one majority class per client. Different sample size per client.
# @markdown - majority_even = one majority class per client. Same sample size per client.
# @markdown - pick_two = two majority class per client
# @markdown - random = random splits
method_selected = 'majority' # @param ['stratified', 'random', 'majority_even', 'majority', 'pick_two']
# @markdown Ratio for majority classes
# *Not all values are possible*
# @markdown **Bug:** *Not all values are possible*, it gives an error if there are not enough samples to distribute to all clients.
ratio_majority_class = 0.5 # @param {type:"slider", min:0.1, max:1, step:0.1}
test_split_size = 0.25 # @param {type:"slider", min:0.1, max:0.5, step:0.05}
# @markdown ---
experiment_params["method_selected"] = method_selected
experiment_params["ratio_majority_class"] = ratio_majority_class
experiment_params["test_split_size"] = test_split_size

In [None]:
# @title ### Weighting options { display-mode: "form" }
# @markdown Options:
# @markdown - Weighted -> using sample sizes and class frequencies
# @markdown - Standard = FedAvg
# @markdown - Arithmetic = Naive average (not weighted)
avg_strategy = 'weighted' # @param ["standard", "arithmetic", "weighted"]
# @markdown ---
experiment_params["avg_strategy"] = avg_strategy

##Initializations

In [None]:
# @title Encrypted layers selection
# @markdown Specify layer indices to encrypt:
# @markdown - all weights [0, 2, 4, 5]
# @markdown - all weights and non linear layers [i for i in range(6)]
# @markdown - some layers [2,4,5]
# @markdown - for testing use layer [5]

# @markdown For the shallow model:
# @markdown - all weights [0, 2, 3]
# @markdown - all weights and non linear layers [i for i in range(4)]
# @markdown - some layers [2, 3]
# @markdown - for testing use layer [3]
encrypted_layers_ids = [0, 2, 3]
experiment_params["encrypted_layers_ids"] = encrypted_layers_ids

In [None]:
experiment_json = json.dumps(experiment_params)

In [None]:
# @title Save path
save_path = md5(experiment_json.encode()).hexdigest()[:8]
print(save_path)

In [None]:
# @title ### Prefix for experiment folder
prefix = "shallow-layers-0-2-3-rounds-15-epochs-2" # @param {type:"string"}

In [None]:
save_path = f"{prefix}_{save_path}"
with open(f'{save_path}.json', 'w') as f:
    f.write(experiment_json)

In [None]:
start_global_time = timeit.default_timer()

if not os.path.exists(save_path):
    os.makedirs(save_path)

with open(f'{save_path}/experiment_parameters.json', 'w') as f:
    f.write(experiment_json)

DEVICE = torch.device("cpu")  # Prova "cuda" per addestramento su GPU
print(
    f"Training on {DEVICE} using PyTorch {torch.__version__} and Flower {fl.__version__}"
)

OS = platform.system()           # Sistema Operativo

#Data preparation

##Data Download

In [None]:
def data_download(file_to_download, gdrive_code, OS, uncompress = True):
  if not os.path.exists(file_to_download):
    os.system('gdown --id "'+gdrive_code+'" --output '+file_to_download)
    if OS == "Linux" and uncompress:
        os.system('unzip -o -n "./'+file_to_download+'" -d '+os.path.dirname(file_to_download))
    return True
  else:
    return None



In [None]:
out = data_download("./har_datasets_fl.zip", "1LUjU4yvBRh6FPBlIHRCD2uf5zMH6l9tC", OS)
#urllib.request.urlretrieve("https://archive.ics.uci.edu/ml/machine-learning-databases/00240/UCI%20HAR%20Dataset.zip", filename="har-data.zip")


##Data Splitting

Proposed methods:
- Majority even: a majority class will be distributed to each client $i=\{1\to \text{num classes}\}$ with a custom ratio ensuring that the splits for all clients have the same number of samples.
- Majority: as above but splits do not have the same size.
- Pick two: as the first method but distributing two "majority" classes.
- Random: distributes samples randomly to each client.

In [None]:
n_splits = n_clients

In [None]:
trainloaders = []

# Awful hack, when True this flips test and train datasets for a stratified
# split ensuring that independent balanced samples are distributed in each split
# Normal behavior flip=False
flip = False
def stratified_split(data, targets, n_splits, split_size=None):
    # NOTE: We pick one stratified split => n_splits=1 because we want a
    # balanced test set, the training part will be postprocessed
    if not split_size:
      df = pd.DataFrame(data)
      data_length = len(df)
      split_size = int(data_length / n_splits)
      print("split_size", test_size)
    sss = StratifiedShuffleSplit(n_splits=n_splits, test_size=split_size, random_state=random_seed)
    for train_index, val_index in sss.split(data, targets):
        if flip:
          yield data[val_index], targets[val_index], data[train_index], targets[train_index]
        else:
          yield data[train_index], targets[train_index], data[val_index], targets[val_index]

def random_split(data, targets, n_splits, split_size=None):
    if not split_size:
      split_size = 1 / n_splits
    for _ in range(n_splits):
        X_train, X_val, y_train, y_val = train_test_split(data, targets, test_size=split_size)
        yield X_train, y_train, X_val, y_val

def majority_even(data, targets, n_splits, split_size=None,
                        ratio_majority_class=ratio_majority_class):
    df = pd.DataFrame(data)
    df['Y'] = targets
    df = df.sample(frac=1, random_state=0).reset_index(drop=True)
    datasets = []
    data_length = len(df)
    split_size = int(data_length / n_splits)
    class_counts = df['Y'].value_counts().to_dict()
    for cls, count in class_counts.items():
        majority_sample = df[df['Y'] == cls].sample(int(count * ratio_majority_class))
        #add second majority class for 3rd first datasets
        other_classes_sample = df[~(df['Y'] == cls)].sample(split_size - len(majority_sample))
        dataset = pd.concat([majority_sample, other_classes_sample]).sample(frac=1).reset_index(drop=True)
        datasets.append(dataset)
        used_indices = pd.Index(majority_sample.index).union(other_classes_sample.index)
        df.drop(used_indices, inplace=True)
        df.reset_index(drop=True, inplace=True)

    for _ in range(n_splits -len(datasets)):
        dataset = df.sample(split_size)
        datasets.append(dataset)
        df.drop(dataset.index, inplace=True)
        df.reset_index(drop=True, inplace=True)

    for dataset in datasets:
        X_train = dataset.drop(columns=['Y']).to_numpy()
        y_train = dataset['Y'].to_numpy()
        yield X_train, y_train, None, None

def pick_two(data, targets, n_splits, split_size=None,
                        ratio_majority_class=ratio_majority_class):
    df = pd.DataFrame(data)
    df['Y'] = targets
    df = df.sample(frac=1, random_state=0).reset_index(drop=True)
    datasets = []
    data_length = len(df)
    split_size = int(data_length / n_splits)
    class_counts = df['Y'].value_counts().to_dict()
    for (cls1, count1), (cls2, count2)  in zip(list(class_counts.items())[::2],
                                               list(class_counts.items())[1::2]):
        print((cls1, count1), (cls2, count2))
        majority_sample = df[(df['Y'] == cls1) | (df['Y']==cls2)].sample(int(count1 * ratio_majority_class))
        other_classes_sample = df[~((df['Y'] == cls1) | (df['Y']==cls2))].sample(split_size - len(majority_sample))
        dataset = pd.concat([majority_sample, other_classes_sample]).sample(frac=1).reset_index(drop=True)
        datasets.append(dataset)
        used_indices = pd.Index(majority_sample.index).union(other_classes_sample.index)
        df.drop(used_indices, inplace=True)
        df.reset_index(drop=True, inplace=True)

    for _ in range(n_splits -len(datasets)):
        dataset = df.sample(split_size)
        datasets.append(dataset)
        df.drop(dataset.index, inplace=True)
        df.reset_index(drop=True, inplace=True)

    for dataset in datasets:
        X_train = dataset.drop(columns=['Y']).to_numpy()
        y_train = dataset['Y'].to_numpy()
        yield X_train, y_train, None, None

def pick_majority(data, targets, n_splits, split_size=None,
                          percent_majority_class=ratio_majority_class):
    df = pd.DataFrame(data)
    df['Y'] = targets
    df = df.sample(frac=1, random_state=0).reset_index(drop=True)
    maj_datasets = []
    data_length = len(df)
    class_counts = df['Y'].value_counts().to_dict()
    for cls, count in class_counts.items():
        majority_sample = df[df['Y'] == cls].sample(int(count * ratio_majority_class))
        maj_datasets.append(majority_sample)
        used_indices = pd.Index(majority_sample.index)
        df.drop(used_indices, inplace=True)
        df.reset_index(drop=True, inplace=True)

    splits = np.array_split(df, n_splits)
    datasets = []
    for i, split in enumerate(splits):
        if i < len(np.unique(targets)):
          dataset = pd.concat([pd.DataFrame(split), maj_datasets[i]]).sample(frac=1).reset_index(drop=True)
        else:
          dataset = pd.DataFrame(split)
        datasets.append(dataset)

    for dataset in datasets:
        X_train = dataset.drop(columns=['Y']).to_numpy()
        y_train = dataset['Y'].to_numpy()
        yield X_train, y_train, None, None


SPLIT_METHODS = {
    'stratified': stratified_split,
    'random': random_split,
    'majority_even': majority_even,
    'majority': pick_majority,
    'pick_two': pick_two
}

def gini_index(y):
  uniques = np.unique(y+1, return_counts=True)
  probs = uniques[1]/np.sum(uniques[1])
  #print(uniques, probs, np.sum(probs))
  gini_index = 1.0 - np.sum(probs ** 2)
  return gini_index

def get_data_from_path(path):
    fold_number = os.path.basename(path).split('-')[0].strip()
    trainset = pd.read_csv(f"{path}/train/{fold_number}_ALL_train.csv", delimiter=';')
    testset = pd.read_csv(f"{path}/test/{fold_number}_ALL_test.csv", delimiter=';')
    return trainset, testset

def create_datasets_from_dataframe(df):
    # Extract features from columns '0' to '560'
    X = pd.concat([df[str(i)] for i in range(561)], axis=1).values
    # Adjust labels in 'Y' column to start from 0
    y = (df['Y'] - 1).values

    return X, y


def generate_dataloaders(data, targets, split_method, n_splits):
    dataloaders = []
    split_function = SPLIT_METHODS[split_method]
    gini_indices = []
    # Assuming set_random_seeds function is defined elsewhere
    set_random_seeds(random_seed)

    for i, (X_train, y_train, _, _) in\
     enumerate(split_function(data, targets, n_splits)):
        train_gini = gini_index(y_train)


        # Print distribution of classes for this fold
        class_distribution = Counter(y_train)
        print(f"Fold {i+1} size {len(y_train)} class distribution: {class_distribution}")

        gini_data = {
            'Dataset': i + 1,
            'Train Gini Index': train_gini,
        }

        gini_indices.append(gini_data)

        train_dataset = TensorDataset(torch.tensor(X_train).float(), torch.tensor(y_train).long())
        train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)

        dataloaders.append(train_loader)

    return dataloaders, gini_indices


In [None]:
# Let's combine the old data splits into a single dataframe
all_data = []
for path in [f.path for f in os.scandir('./har_datasets_fl') if f.is_dir()]:
    train_df, test_df = get_data_from_path(path)
    all_data.append(train_df)
    all_data.append(test_df)

combined_df = pd.concat(all_data, axis=0)
print(f"Total data points {len(combined_df)}")

X_all, y_all = create_datasets_from_dataframe(combined_df)

# 1st stratified to get all train data and test data for (server) evaluation
test_size = 0.2
X_train_combined, y_train_combined, X_test, y_test =\
  next(stratified_split(X_all, y_all, n_splits=1, split_size=test_split_size))

print(f"Total train data points {len(X_train_combined)}")
print(f"Total train data points {len(X_test)}")

#### Baseline
The datasets for the baseline contains independent stratified balanced folds, one per client.

In [None]:
flip = True
trainloaders_bl, gini_data_bl = \
  generate_dataloaders(X_train_combined, y_train_combined,
                       "stratified", n_splits)

gini_df = pd.DataFrame(gini_data_bl)
print(gini_df)

print("Number of Training Subsets: ", len(trainloaders_bl))

In [None]:
# number of classes
n_classes = len(np.unique(y_train_combined))
# Assuming class_names is a dictionary mapping class numbers to class names
class_names = {0: "Walking", 1: "Walking\nupstairs", 2: "Walking\ndownstairs", 3: "Sitting", 4: "Standing", 5: "Laying"}

In [None]:
def plot_classes(trainloaders):
  fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(14, 6))

  ax1.set_title("Classes distributions")
  offset=-0.4
  counts_per_client = [pd.DataFrame(np.concatenate(tuple([y.numpy() for x, y  in t])), columns=["class"]).value_counts() for t in trainloaders]

  df = pd.concat([pd.DataFrame(ser).assign(client=i) for i, ser in enumerate(counts_per_client)]).reset_index().sort_values(by=["client", "class"]).replace({'class': class_names})

  ax2.set_title("Client distributions")
  sns.barplot(x="class", y=0, hue="client", data=df, ax=ax1)
  ax1.set_ylabel("Counts")
  ax1.set_xlabel("Classes")
  sns.barplot(x="client", y=0, hue="class", data=df, ax=ax2)
  ax2.set_xlabel("Client #")
  ax2.set_ylabel("Counts")
  plt.show()
  return df

In [None]:
df_bl = plot_classes(trainloaders_bl)

#### Dataloaders for split datasets

In [None]:
# get dataloaders
flip = False
trainloaders, gini_data = \
  generate_dataloaders(X_train_combined, y_train_combined,
                       method_selected, n_splits)

test_dataset = TensorDataset(torch.tensor(X_test).float(), torch.tensor(y_test).long())
test_dataloader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

gini_df = pd.DataFrame(gini_data)
print(gini_df)

print("Number of Training Subsets: ", len(trainloaders))


In [None]:
xdf = plot_classes(trainloaders)

##Compute weights

- Index $i$ runs on clients, i.e, 1 to 8 clients
- Index $j$ runs on classes, i.e, 1 to 6 classes
- Properties on $i$ are related to clients: weights $w_i$, Gini coefficient $G_i$, entropy $H_i$
- Properties on $j$ are related to classes: probabilities for picking particular class $j$ for client $i$.

A vector of probabilities $p_i$ is local for a client $i$: $\vec{p}_i=p^i_j\rightarrow \{p^i_1, \dots, p^i_6\}$ with $p^i_j = \frac{\text{count class}\, j\, \text{client}\, i}{N_i}$ with $N_i$ the total number of samples for client $i$. $p^i_j$ is normalized.


The vector $P_j$ is global (common to all clients) $\vec{P}=P_j\rightarrow \{P_1, \dots, P_6\}$ with $P_j = \frac{\text{count class j for all clients}}{N}$ with $N =\sum_i N_i$ the total number of samples for all clients. $P_j$ is normalized.

A set of weights could be constructed by multiplying a vector of a local property times a set of local vectors for each client. For instance, let $\tilde{P}$ be the normalized product of the probabilities of picking a class $j$ in the joint dataset times the square probability for a client $i$ to have this class $j$,
$$\tilde{P}_i = \frac{\sum_j P_j \cdot (p^j_i)^2}{\sum_{i}\sum_{j} P_j \cdot (p^j_i)^2}$$

Setting this squared probabilities adds more importance to majority classes respect to the minority classes.
Then the normalized weights can be computed as the inverse of this value:
$$w_i = \frac{1/\tilde{P_i}}{\sum_i 1/\tilde{P}_i}$$


We can weight by sample size $N_i$ per client $i$ as follows,
$$\alpha_i = \frac{N_i}{N}$$

Finally, the weights are,
$$W_i = \frac{\alpha_i w_i}{\sum_i \alpha_i w_i}$$

In [None]:
# group the dataset by client and class and add the samples
nxdf = xdf.groupby(["client", "class"]).sum()

In [None]:
# build the matrix of number of samples per client per class
S = nxdf.to_numpy().reshape((n_clients, n_classes))

In [None]:
def compute_weights(s):
  """Compute weights given the count of samples per client per class s
  """
  print("Count of samples per class per client\n", s)

  # compute alpha ratio of sample sizes for all clients
  alpha = np.sum(s, axis=1)/np.sum(s)

  # compute pij probabilities for client i to have j class
  p = np.zeros((n_clients, n_classes))
  for i in np.arange(n_clients):
    p[i] = s[i] / np.sum(s[i])
  print("\nProbabilities per client per class\n", p)

  # Compute the probabilities of having class j if we merge all datasets
  S = np.zeros(n_classes)
  P = np.zeros(n_classes)
  for j, ss in enumerate(s.T):
    S[j] = np.sum(ss)
    P[j] = np.sum(ss) / np.sum(s)

  H = [entropy(ss, base=2) for ss in s]
  print("\nTotal samples per class", S)
  print("Total probabilities per class", P)
  print("Entropy", H)
  print("Balance", [h/np.log2(n_classes) for h in H])

  # compute squared probabilities pij*pij
  p2 = p*p
  # compute probabilities by total probabilities P
  iP = np.sum(p2.dot(P))/(p2.dot(P))
  G = [1-np.sum(pp) for pp in p2]
  print("Gini index ", G)
  print("Mixed probabilities inverse", iP)

  # Weights will be the inverse of the result above, normalized
  w = iP / np.sum(iP)
  print("\nPlain weights", w)

  print("\nSample size ratios", alpha)

  # Final weights taking into account sample size
  W = alpha * w / np.sum(alpha * w)
  print("\nFinal weights", W)

  # return weights and probabilities pij
  return W, w, p

In [None]:
W, Uw, p = compute_weights(S)

In [None]:
def plot_weights(W, p):
  fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(16, 6))
  ax2.set_title('Weights')
  ax1.set_title("Probabilities per client per class $p_{ij}$")
  pd.DataFrame(W).plot.bar(ax=ax2, legend=False)
  sns.heatmap(p.T, ax=ax1, annot=True, fmt=".2f", linewidths=.5)
  ax1.set(xlabel='clients', ylabel='classes')
  ax1.set_yticklabels(class_names.values())
  ax1.set_xlabel("Client #")
  ax2.set(xlabel='clients', ylabel='weights')
  ax2.bar_label(ax2.containers[0], fmt='%.3f')
  ax2.set_xlabel("Client #")
  plt.show()

In [None]:
plot_weights(W, p)

#Model

In [None]:
class MLP(nn.Module):
    """ Multi Layer Perceptron """
    def __init__(self) -> None:
        super(MLP, self).__init__()
        #self.flatten = nn.Flatten()
        if shallow_model:
          self.linear_relu_stack = nn.Sequential(
              nn.Linear(561, 432),
              nn.ReLU(),
              nn.Linear(432, 6),
          )
          pass
        else:
          self.linear_relu_stack = nn.Sequential(
              nn.Linear(561, 437),
              nn.ReLU(),
              nn.Linear(437, 312),
              nn.ReLU(),
              nn.Linear(312, 6)
          )

    def forward(self, x: Tensor) -> Tensor:
        #x = self.flatten(x)
        logits = self.linear_relu_stack(x)
        return logits

Net = MLP

##Training

###Parameter updates

In [None]:
def get_parameters(net) -> List[np.ndarray]:
    return [val.cpu().numpy() for _, val in net.state_dict().items()]

def set_parameters(net, parameters: List[np.ndarray]):
    params_dict = zip(net.state_dict().keys(), parameters)
    state_dict = OrderedDict({k: torch.Tensor(v) for k, v in params_dict})
    net.load_state_dict(state_dict, strict=True)

###Training function

In [None]:
def train(net, trainloader, epochs: int):
    print("Round of training started")
    torch.manual_seed(random_seed)
    torch.use_deterministic_algorithms(True)

    training_size = len(trainloader.dataset)
    batch_size = trainloader.batch_size

    # Modify target_epsilon and target_delta here
    noise_generator = torch.Generator()
    noise_generator.manual_seed(random_seed)

    target_delta = 1e-5

    max_grad_norm = 1.0
    noise_multiplier = 1.0  # This value will be used to initialize the PrivacyEngine, but it will be modified automatically to reach the target epsilon

    criterion = torch.nn.CrossEntropyLoss()
    optimizer = torch.optim.Adam(net.parameters())
    model = net
    dataloader = trainloader  # Define dataloader here

    if USE_DP:
        privacy_engine = PrivacyEngine(accountant = 'rdp')

        model, optimizer, dataloader = privacy_engine.make_private_with_epsilon(
            module=model,
            optimizer=optimizer,
            data_loader=dataloader,
            target_epsilon=target_epsilon,
            target_delta=target_delta,
            epochs=epochs,
            max_grad_norm=max_grad_norm,
            noise_generator=noise_generator
        )
    else:
        # If not using DP, PrivacyEngine is not defined and can't be used to get epsilon later.
        privacy_engine = None

    model = model.to(DEVICE)

    model.train()
    for epoch in range(epochs):
        correct, total, epoch_loss = 0, 0, 0.0
        for images, labels in dataloader:
            images, labels = images.to(DEVICE), labels.to(DEVICE)
            optimizer.zero_grad()
            outputs = model(images)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

            epoch_loss += loss.item()
            total += labels.size(0)
            correct += (torch.max(outputs.data, 1)[1] == labels).sum().item()

        epoch_loss /= len(dataloader.dataset)

    # After training, you can get the final epsilon
    if privacy_engine:  # Only try to get epsilon if privacy_engine was defined
        final_epsilon = privacy_engine.get_epsilon(delta=target_delta)
        print(f"The target epsilon was: {target_epsilon}")
        print(f"The final epsilon is: {final_epsilon}")


##Model Testing

In [None]:
def test(net, testloader):
    """Evaluate the network on the entire test set."""

    torch.manual_seed(random_seed)
    torch.use_deterministic_algorithms(True)
    criterion = torch.nn.CrossEntropyLoss()
    correct, total, loss = 0, 0, 0.0
    net.eval()

    all_labels = []
    all_predicted = []

    with torch.no_grad():
        for images, labels in testloader:
            images, labels = images.to(DEVICE), labels.to(DEVICE)
            outputs = net(images)
            loss += criterion(outputs, labels).item()
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

            all_labels.append(labels.cpu())
            all_predicted.append(predicted.cpu())

    all_labels = torch.cat(all_labels) # concatenate all labels tensors
    all_predicted = torch.cat(all_predicted) # concatenate all predicted tensors

    loss /= len(testloader.dataset)
    accuracy = correct / total

    # Calculate F1 score. Need to convert tensors to numpy arrays
    f1_score_value_micro = f1_score(all_labels.numpy(), all_predicted.numpy(), average='micro')
    f1_score_value_macro = f1_score(all_labels.numpy(), all_predicted.numpy(), average='macro')
    f1_score_value_perclass = f1_score(all_labels.numpy(), all_predicted.numpy(), average=None)

    return accuracy, loss, f1_score_value_micro, f1_score_value_macro, f1_score_value_perclass


#Client implementation

In [None]:
class FlowerClient(fl.client.NumPyClient):

    def __init__(self, cid, net, trainloader, valloader):
        self.cid = cid
        self.net = net
        self.trainloader = trainloader
        self.valloader = valloader

    def get_parameters(self, config):

        print(f"[Client {self.cid}] get_parameters")
        return get_parameters(self.net)

    def fit(self, parameters, config):
        print(f"[Client {self.cid}] fit, config: {config}")
        set_parameters(self.net, parameters)
        train(self.net, self.trainloader, epochs=n_epochs)
        return get_parameters(self.net), len(self.trainloader), {}

    def evaluate(self, parameters, config):
        print(f"[Client {self.cid}] evaluate, config: {config}")
        set_parameters(self.net, parameters)
        accuracy, loss, f1_score_value_micro, f1_score_value_macro, f1_score_value_perclass = test(self.net, self.valloader)
        print(f"[Client {self.cid}] loss: {loss}, accuracy: {accuracy}, f1_score_micro: {f1_score_value_micro}, f1_score_macro: {f1_score_value_macro}, f1_score_perclass: {f1_score_value_perclass}")  # Add this line

        return float(loss), len(self.valloader), {
                                                    "accuracy": float(accuracy),
                                                    "f1_score_micro": float(f1_score_value_micro),
                                                    "f1_score_macro": float(f1_score_value_macro),
                                                    "f1_score_perclass": [float(score) for score in f1_score_value_perclass]}



In [None]:
class HARClient(fl.client.NumPyClient):
    """Flower client implementing for HAR data using PyTorch.

    Client implementation.
    """

    def __init__(
        self,
        cid: int,
        model: object,  # har.NeuralNetwork,
        trainloader: torch.utils.data.DataLoader,
        valloader: torch.utils.data.DataLoader,
        keygen,
        debug: bool = False,
        #test_set_name: str = None,
    ) -> None:
        """Set model and train-test data loaders.

        Parameters:
        ----------
        cid
            Client ID
        model
            Torch model
        trainloader
            DataLoader for train dataset
        testloader
            DataLoader for test dataset
        debug : bool
            Flag for trigger some debug messages
        """
        self.cid = cid
        self.model = model
        self.trainloader = trainloader
        self.valloader = valloader
        #self.testloader = testloader
        # HACK this specifies if False is the first communication between
        # server and client in a round of training
        self.train_state = False
        self.first_evaluation_call = True
        self.debug = debug
        self.keygen = keygen#sp.KeyGenerator.load()
        #self.test_set_name =\
        #    NSO(int(str(os.path.basename(test_set_name)).split("_")[0]))

    def get_parameters(self, config=None) -> List[np.ndarray]:
        """Get parameters."""
        self.model.train()
        print(f"Calling get_parameters from {self.cid}")
        encrypted_parameters = []
        for n, (name, val) in enumerate(self.model.state_dict().items()):
            if n in encrypted_layers_ids:
                print(f"Encrypting {name} {val.cpu().numpy().shape}")
                enc_ndarray = (
                    sp.EncArray(val.cpu().numpy())
                    .encrypt_singlethread(self.keygen.public_key)
                    .serialize_ndarray()
                )
                print(f"Encrypted ndarray shape {enc_ndarray.shape}")
                encrypted_parameters.append(enc_ndarray)
            else:
                print(f"Not encrypted {name} {val.cpu().numpy().shape}")
                encrypted_parameters.append(val.cpu().numpy())
        print(f"End calling get_parameters from {self.cid}")
        return encrypted_parameters

    def set_parameters(self, parameters: List[np.ndarray]) -> None:
        """Set parameters."""
        print(f"Setting {len(parameters)} parameters from {self.cid}")
        parameters_clear = []
        # iterate in list of arrays from each client
        for n, e in enumerate(parameters):
            if e.flatten().dtype.type is np.str_:
                print(
                    f"Deserialiazing and decrypting {e.shape} elements "
                )
                # HACK exponent has changed from 32 to 47
                enc_array = sp.EncArray.deserialize_ndarray(
                    e, self.keygen.public_key, -47
                )
                print(f"with shape {enc_array.shape}")
                parameters_clear.append(enc_array.decrypt_singlethread(self.keygen.private_key))
            else:
                # this one is on the clear
                parameters_clear.append(e)
        # Set model parameters from a list of NumPy ndarrays
        self.model.train()
        print("Updating model dict")
        params_dict = zip(self.model.state_dict().keys(), parameters_clear)
        state_dict = OrderedDict({k: torch.Tensor(v) for k, v in params_dict})
        self.model.load_state_dict(state_dict, strict=True)

    def fit(
        self, parameters: List[np.ndarray], config: Dict[str, str]
    ) -> Tuple[List[np.ndarray], int]:
        """Set model parameters, train model, return updated model parameters.

        Parameters:
        -----------
        parameters
            model parameters as a list of NumPy ndarrays, excluding
            parameters of BN layers when using FedBN
        config
            complete

        Returns:
        --------
        tuple
            updated parameters, size of train dataset, None
        """
        print(f"Calling fit {config}")
        self.set_parameters(parameters)
        #train(self.model, self.trainloader, epochs=n_epochs, device=DEVICE)
        train(self.model, self.trainloader, epochs=n_epochs)

        return self.get_parameters(), len(self.trainloader), {}

    def evaluate(
        self, parameters: List[np.ndarray], config: Dict[str, str]
    ) -> Tuple[int, float, float]:
        """Set model parameters, evaluate model on local test dataset,
        and return result.

        Parameters:
        -----------
        parameters
            model parameters as a list of NumPy ndarrays, excluding
            parameters of BN layers when using FedBN

        config
            complete.

        Returns:
        --------
        tuple
            loss, size, and accuracy
        """
        print("Calling evaluate")

        for n, e in enumerate(parameters):
            if isinstance(e[0], np.str_):
                # if need to check for ciphertexts
                print("Found encrypted layers")
                break
        self.set_parameters(parameters)

        #return float(loss), len(self.testloader), {"accuracy": float(accuracy)}
        print(f"[Client {self.cid}] evaluate, config: {config}")
        #set_parameters(self.net, parameters)
        accuracy, loss, f1_score_value_micro, f1_score_value_macro, f1_score_value_perclass = test(self.model, self.valloader)
        print(f"[Client {self.cid}] loss: {loss}, accuracy: {accuracy}, f1_score_micro: {f1_score_value_micro}, f1_score_macro: {f1_score_value_macro}, f1_score_perclass: {f1_score_value_perclass}")  # Add this line

        return float(loss), len(self.valloader), {
                                                    "accuracy": float(accuracy),
                                                    "f1_score_micro": float(f1_score_value_micro),
                                                    "f1_score_macro": float(f1_score_value_macro),
                                                    "f1_score_perclass": [float(score) for score in f1_score_value_perclass]}

### Create keys

In [None]:
keygen = sp.KeyGenerator()

### Encrypted parameters' (weights') functions

In [None]:
def get_eparameters(model, keygen) -> List[np.ndarray]:
    """Get parameters."""
    model.eval()
    print("Calling get_eparameters")
    encrypted_parameters = []
    for n, (name, val) in enumerate(model.state_dict().items()):
        if n in encrypted_layers_ids:
            print(f"Encrypting {name} {val.cpu().numpy().shape}")
            enc_ndarray = (
                sp.EncArray(val.cpu().numpy())
                .encrypt_singlethread(keygen.public_key)
                .serialize_ndarray()
            )
            print(f"Encrypted ndarray shape {enc_ndarray.shape}")
            encrypted_parameters.append(enc_ndarray)
        else:
            print(f"Not encrypted {name} {val.cpu().numpy().shape}")
            encrypted_parameters.append(val.cpu().numpy())
    return encrypted_parameters

def set_eparameters(parameters: List[np.ndarray]) -> None:
    """Set parameters."""
    print(f"Setting {len(parameters)} parameters from ")
    parameters_clear = []
    print(keygen.public_key)
    # iterate in list of arrays from each client
    for n, e in enumerate(parameters):
        if e.flatten().dtype.type is np.str_:
            print(
                f"Deserialiazing and decrypting {e.shape} elements "
            )
            # HACK exponent has changed from 32 to 47
            enc_array = sp.EncArray.deserialize_ndarray(
                e, keygen.public_key, -47
            )
            print(f"with shape {enc_array.shape}")
            parameters_clear.append(enc_array.decrypt_singlethread(keygen.private_key))
        else:
            # this one is on the clear
            parameters_clear.append(e)
    return parameters_clear

### Encrypting and decrypting time

In [None]:
#%%time
enc_time = %timeit -n5 -r1 -o eparams = get_eparameters(MLP(), keygen)

In [None]:
eparams = get_eparameters(MLP(), keygen)

In [None]:
dec_time = %timeit -n5 -r1 -o oparams = set_eparameters(eparams)

In [None]:
enc_time, dec_time

In [None]:
ps_time = %timeit -n5 -r1 -o params = get_parameters(MLP())

In [None]:
params = get_parameters(MLP())

In [None]:
pd_time = %timeit -n5 -r1 -o oparams = set_parameters(MLP(), params)

### Size of encrypted parameters

In [None]:
esize = asizeof.asizeof(eparams)
print(esize)

In [None]:
# plaintext size
psize = asizeof.asizeof(get_parameters(MLP()))
print(psize)

### Client instantiation

####Client for encrypted weights

In [None]:
def eclient_fn(cid) -> HARClient:
    torch.manual_seed(1)
    #torch.use_deterministic_algorithms(True)
    net = Net().to(DEVICE)
    trainloader = trainloaders[int(cid)]
    #return FlowerClient(cid, net, trainloader, valloader)\
    print(keygen.public_key)
    return HARClient(cid, net, trainloader, test_dataloader, keygen=deepcopy(keygen))

####Client for plaintext weights

In [None]:
def oclient_fn(cid) -> FlowerClient:
    torch.manual_seed(1)
    #torch.use_deterministic_algorithms(True)
    net = Net().to(DEVICE)
    trainloader = trainloaders[int(cid)]
    return FlowerClient(cid, net, trainloader, test_dataloader)

#Aggregation strategies

##Custom aggregation

In [None]:
class WeightedFedAvg(fl.server.strategy.FedAvg):
    def __init__(
        self,
        *,
        fraction_fit: float = 1.0,
        fraction_evaluate: float = 1.0,
        min_fit_clients: int = 2,
        min_evaluate_clients: int = 2,
        min_available_clients: int = 2,
        evaluate_fn: Optional[
            Callable[
                [int, NDArrays, Dict[str, Scalar]],
                Optional[Tuple[float, Dict[str, Scalar]]],
            ]
        ] = None,
        on_fit_config_fn: Optional[Callable[[int], Dict[str, Scalar]]] = None,
        on_evaluate_config_fn: Optional[Callable[[int], Dict[str, Scalar]]] = None,
        accept_failures: bool = True,
        fit_metrics_aggregation_fn: Optional[MetricsAggregationFn] = None,
        evaluate_metrics_aggregation_fn: Optional[MetricsAggregationFn] = None,
        keygen=None,
        initial_parameters: Optional[Parameters] = None,
        weighting_fn = None
    ) -> None:
        """Implement Simple Paillier Averaging strategy.

        Implementation based on flower FedAvg

        Parameters
        ----------
        fraction_fit
            Fraction of clients used during training. Defaults to 0.1.
        fraction_evaluate
            Fraction of clients used during validation. Defaults to 0.1.
        min_fit_clients
            Minimum number of clients used during training. Defaults to 2.
        min_evaluate_clients
            Minimum number of clients used during validation. Defaults to 2.
        min_available_clients
            Minimum number of total clients in the system. Defaults to 2.
        eval_fn
            Optional function used for validation. Defaults to None.
        on_fit_config_fn
            Function used to configure training. Defaults to None.
        on_evaluate_config_fn
            Function used to configure validation. Defaults to None.
        accept_failures
            Whether or not accept rounds containing failures. Defaults to True.
        initial_parameters
            Initial global model parameters.
        weighting_fn
            Custom weighting function for average aggregation of parameters
        """
        super().__init__(
            fraction_fit=fraction_fit,
            fraction_evaluate=fraction_evaluate,
            min_fit_clients=min_fit_clients,
            min_evaluate_clients=min_evaluate_clients,
            min_available_clients=min_available_clients,
            evaluate_fn=evaluate_fn,
            on_fit_config_fn=on_fit_config_fn,
            on_evaluate_config_fn=on_evaluate_config_fn,
            accept_failures=accept_failures,
            initial_parameters=initial_parameters,
            fit_metrics_aggregation_fn=fit_metrics_aggregation_fn,
            evaluate_metrics_aggregation_fn=evaluate_metrics_aggregation_fn,
        )
        self.weighting_fn = weighting_fn

    def aggregate_fit(
          self,
          server_round: int,
          results: List[Tuple[ClientProxy, FitRes]],
          failures: List[Union[Tuple[ClientProxy, FitRes], BaseException]],
        ) -> Tuple[Optional[Parameters], Dict[str, Scalar]]:
        """Aggregate fit results using weighted average."""
        if not results:
            return None, {}
        # Do not aggregate if there are failures and failures are not accepted
        if not self.accept_failures and failures:
            return None, {}

        # Convert results into weights
        weights_results = [
            (parameters_to_ndarrays(fit_res.parameters), fit_res.num_examples)
            for _, fit_res in results
        ]
        # Extract client ids
        cids = [int(cp.cid) for cp, _ in results]
        print("client proxies", cids)

        # compute weights using the weighting function
        avg_weights = self.weighting_fn(weights_results, cids)

        # Create a list of weights, each multiplied by the weights computed above
        weighted_weights = [
            [layer * avg_weights for layer in weights] for (weights, _), avg_weights in zip(weights_results, avg_weights)
        ]

        # Compute average weights of each layer
        weights_prime: NDArrays = [
            reduce(np.add, layer_updates)
            for layer_updates in zip(*weighted_weights)
        ]
        return ndarrays_to_parameters(weights_prime), {}

def custom_weighting(results, cids):
    """Given results (which have the weights) and client ids compute the vector
    of weights of size n_clients.
    """

    if avg_strategy == "standard":
      total_samples = sum([num_samples for _, num_samples in results])
      # Plain FedAvg as flower (no different as flower implementation)
      weights = [num_samples / total_samples for _, num_samples in results]
    elif avg_strategy == "arithmetic":
      weights = np.ones(n_clients)/len(cids)   # Simple FedAvs, arithmetic average
    elif avg_strategy == "weighted":
      # use custom weights
      # alpha = np.array([num_samples / total_samples for _, num_samples in results])
      # reorder Uw that depends on (Pj, pij) and normalize by sample size alpha
      weights = W[cids]#alpha * Uw[cids] / np.sum(alpha * Uw[cids])

    #print(f"num samples {[num_samples for _, num_samples in results]}")
    #print(f"Aggregation {avg_strategy} weights {weights}")
    #print(f"Sum of weights: {np.sum(weights)}")
    #print(f"FedAvg weights {[num_samples / total_samples for _, num_samples in results]}")

    return weights

##Encrypted custom aggregation

In [None]:
%%time
enc_weights = None
if avg_strategy == "weighted":
  enc_weights = (sp.EncArray(W).encrypt_singlethread(keygen.public_key))
  print(f"Encrypted ndarray shape {enc_weights.shape}")
print(enc_weights)

In [None]:
class EncryptedWeightedFedAvg(fedavg.FedAvg):
    def __init__(
        self,
        *,
        fraction_fit: float = 1.0,
        fraction_evaluate: float = 1.0,
        min_fit_clients: int = 2,
        min_evaluate_clients: int = 2,
        min_available_clients: int = 2,
        evaluate_fn: Optional[
            Callable[
                [int, NDArrays, Dict[str, Scalar]],
                Optional[Tuple[float, Dict[str, Scalar]]],
            ]
        ] = None,
        on_fit_config_fn: Optional[Callable[[int], Dict[str, Scalar]]] = None,
        on_evaluate_config_fn: Optional[Callable[[int], Dict[str, Scalar]]] = None,
        accept_failures: bool = True,
        fit_metrics_aggregation_fn: Optional[MetricsAggregationFn] = None,
        evaluate_metrics_aggregation_fn: Optional[MetricsAggregationFn] = None,
        keygen=None,
        initial_parameters: Optional[Parameters] = None,
        weighting_fn = None
    ) -> None:
        """Implement Encrypted Weighted Averaging strategy.

        Implementation based on flower FedAvg

        Parameters
        ----------
        fraction_fit
            Fraction of clients used during training. Defaults to 0.1.
        fraction_evaluate
            Fraction of clients used during validation. Defaults to 0.1.
        min_fit_clients
            Minimum number of clients used during training. Defaults to 2.
        min_evaluate_clients
            Minimum number of clients used during validation. Defaults to 2.
        min_available_clients
            Minimum number of total clients in the system. Defaults to 2.
        eval_fn
            Optional function used for validation. Defaults to None.
        on_fit_config_fn
            Function used to configure training. Defaults to None.
        on_evaluate_config_fn
            Function used to configure validation. Defaults to None.
        accept_failures
            Whether or not accept rounds containing failures. Defaults to True.
        keygen
            Encryption keys
        initial_parameters
            Initial global model parameters.
        weighting_fn
            Custom weighting function for average aggregation of parameters
        """
        super().__init__(
            fraction_fit=fraction_fit,
            fraction_evaluate=fraction_evaluate,
            min_fit_clients=min_fit_clients,
            min_evaluate_clients=min_evaluate_clients,
            min_available_clients=min_available_clients,
            evaluate_fn=evaluate_fn,
            on_fit_config_fn=on_fit_config_fn,
            on_evaluate_config_fn=on_evaluate_config_fn,
            accept_failures=accept_failures,
            initial_parameters=initial_parameters,
            fit_metrics_aggregation_fn=fit_metrics_aggregation_fn,
            evaluate_metrics_aggregation_fn=evaluate_metrics_aggregation_fn,
        )
        self.other = 1
        print("config")
        self.keygen = keygen
        print(self.keygen.public_key)
        self.weighting_fn = weighting_fn

    def aggregate_fit(
          self,
          server_round: int,
          results: List[Tuple[ClientProxy, FitRes]],
          failures: List[Union[Tuple[ClientProxy, FitRes], BaseException]],
        ) -> Tuple[Optional[Parameters], Dict[str, Scalar]]:
        """Aggregate fit results using encrypted weighted average."""
        if not results:
            return None, {}
        # Do not aggregate if there are failures and failures are not accepted
        if not self.accept_failures and failures:
            return None, {}

        print("Public key", self.keygen.public_key)
        # Convert results
        res = []
        for client, fit_res in results:
            # bytes to ndarray
            res_array = parameters_to_ndarrays(fit_res.parameters)
            weights_results = []
            # iterate in list of arrays from each client
            for n, e in enumerate(res_array):
                # check for encrypted serialized array
                if e.flatten().dtype.type is np.str_:
                    print(f"Deserializing {e.size} elements.")
                    enc_array = sp.EncArray.deserialize_ndarray(e, self.keygen.public_key)
                    print(f"with shape {enc_array.shape}")
                    weights_results.append(enc_array)
                else:
                    # parameters on the clear, just append them
                    weights_results.append(e)
            res.append((weights_results, fit_res.num_examples))
        num_examples_total = sum([num_examples for _, num_examples in res])

        ## Convert results into weights
        #weights_results = [
        #    (parameters_to_ndarrays(fit_res.parameters), fit_res.num_examples)
        #    for _, fit_res in results
        #]
        # Extract client ids
        cids = [int(cp.cid) for cp, _ in results]
        print("client proxies", cids)

        # compute weights using the weighting function
        avg_weights = self.weighting_fn(res, cids)

        # Create a list of weights, each multiplied by the weights computed above
        weighted_weights = [
            [layer * avg_weights for layer in weights] for (weights, _), avg_weights in zip(res, avg_weights)
        ]

        # Compute average weights of each layer
        weights_prime: NDArrays = [
            reduce(np.add, layer_updates)
            for layer_updates in zip(*weighted_weights)
        ]

        # serialization for transmission, we have a mix of clear & ciphertexts
        weights_pp = []
        for n, e in enumerate(weights_prime):
            if isinstance(e, sp.EncArray):
                weights_pp.append(e.serialize_ndarray())
            else:
                weights_pp.append(e)

        # ndarray to bytes for transmission
        return ndarrays_to_parameters(weights_pp), {}

    def evaluate(
        self, server_round: int, parameters: Parameters
        ) -> Optional[Tuple[float, Dict[str, Scalar]]]:
        """Evaluate model parameters using an evaluation function."""
        print(
            "Calling evaluate... Not implemented. Server cant see the data!"
        )
        return None

def custom_enc_weighting(results, cids):
    """Given results (which have the weights) and client ids compute the vector
    of weights of size n_clients.
    """

    #total_samples = sum([num_samples for _, num_samples in results])
    if avg_strategy == "standard":
      total_samples = sum([num_samples for _, num_samples in results])
      # Plain FedAvg as flower (no different as flower implementation)
      weights = np.array([num_samples / total_samples for _, num_samples in results])
    elif avg_strategy == "arithmetic":
      weights = np.ones(n_clients)/len(cids)   # Simple FedAvs, arithmetic average
    elif avg_strategy == "weighted":
      # use custom weights
      # alpha = np.array([num_samples / total_samples for _, num_samples in results])
      # reorder Uw that depends on (Pj, pij) and normalize by sample size alpha
      weights = W[cids]#enc_weights[cids]#W[cids]#alpha * Uw[cids] / np.sum(alpha * Uw[cids])

    #print(f"num samples {[num_samples for _, num_samples in results]}")
    #print(f"Aggregation {avg_strategy} weights {weights}")
    #print(f"Sum of weights: {np.sum(weights)}")
    #print(f"FedAvg weights {[num_samples / total_samples for _, num_samples in results]}")

    return weights

w1 * Enc(m1) + w2 * Enc(2)
Enc(w1) * Enc(m1) + ...

In [None]:
# how to aggregate custom evaluation results https://flower.dev/docs/save-progress.html

Scalar = NewType("Scalar", float)
model_metrics = {}
set_random_seeds(random_seed)

def custom_aggregate_evaluate(
        rnd: int,
        results: List[Tuple[fl.server.client_proxy.ClientProxy , fl.common.EvaluateRes]],
        failures: List[BaseException],
    ) -> Tuple[Optional[float], Dict[str, Scalar]]:

    if not results:
        return None, {}

    weights, losses, metrics = zip(*[(r.num_examples, r.loss, r.metrics) for _, r in results])

    total_weight = sum(weights)

    weighted_loss_sum = sum(w * l for w, l in zip(weights, losses))

    loss = weighted_loss_sum / total_weight

    aggregated_metrics = {}
    num_classes = len(results[0][1].metrics.get('f1_score_perclass', []))

    for metric_name in results[0][1].metrics:
        if metric_name != "f1_score_perclass":
            metric_sum = sum(
                r.metrics.get(metric_name, 0) * r.num_examples for _, r in results
            )
            aggregated_metric = metric_sum / total_weight
            aggregated_metrics[metric_name] = aggregated_metric
        else:
            # Calculate weighted F1 scores for each class
            per_class_f1_scores = [r.metrics.get(metric_name, [0]*num_classes) for _, r in results]
            per_class_f1_scores_weighted_sum = [
                sum(w * class_f1_score for w, class_f1_score in zip(weights, class_f1_scores))
                for class_f1_scores in zip(*per_class_f1_scores)
            ]
            aggregated_per_class_f1_scores = [weighted_sum / total_weight for weighted_sum in per_class_f1_scores_weighted_sum]
            aggregated_metrics[metric_name] = aggregated_per_class_f1_scores

    # Return aggregated loss and metrics (i.e., aggregated accuracy and F1 score)
    return loss, aggregated_metrics




## Run experiment

In [None]:
%%time
strategies_acc = []
time_acc = {}
model_metricsF1={}

loop_list = strategy_list

if loop_on_strategies is False:
  loop_list = [strategy_type]

for strategy_type in loop_list:
  print("\nAnalyzing Strategy... : ", strategy_type.capitalize())
  init_time_st = timeit.default_timer()

  params = get_parameters(MLP())

  if strategy_type == 'fedavg':
    # Creazione della Strategia FedAvg
    # Passa i parametri alla strategia per l'inizializzazione dei parametri lato Server
    strategy = fl.server.strategy.FedAvg(
        fraction_fit=1.0,
        fraction_evaluate=1./n_clients,
        min_fit_clients=n_clients,
        min_evaluate_clients=1,
        min_available_clients=n_clients,
        initial_parameters=fl.common.ndarrays_to_parameters(params),
    )
    client_fn = oclient_fn

  if strategy_type == 'weightedfedavg':
    # Creazione della Strategia personalizzata WeightedFedAvg

    strategy = WeightedFedAvg(
        fraction_fit=1.0,
        fraction_evaluate=1./n_clients,
        min_fit_clients=n_clients,
        min_evaluate_clients=1,
        min_available_clients=n_clients,
        initial_parameters=fl.common.ndarrays_to_parameters(params),
        weighting_fn=custom_weighting
        )
    client_fn = oclient_fn

  if strategy_type == "secwfedavg":
    strategy = EncryptedWeightedFedAvg(
          fraction_fit=1.0,
          fraction_evaluate=1.0/n_clients,
          min_fit_clients=n_clients,
          min_evaluate_clients=1,
          min_available_clients=n_clients,
          keygen=keygen,
          initial_parameters=fl.common.ndarrays_to_parameters(eparams),
          weighting_fn=custom_enc_weighting
      )
    # set the proper client for encrypted weights
    client_fn = eclient_fn

  if strategy_type == 'fedadagrad':
    # Creazione della Strategia FedAdagrad
    strategy = fl.server.strategy.FedAdagrad(
        fraction_fit=1.0,
        fraction_evaluate=1./n_clients,
        min_fit_clients=n_clients,
        min_evaluate_clients=1,
        min_available_clients=n_clients,
        initial_parameters=fl.common.ndarrays_to_parameters(params),
    )
    client_fn = oclient_fn

  if strategy_type == 'fedadam':
    # Creazione della Strategia FedAdam
    strategy = fl.server.strategy.FedAdam(
        fraction_fit=1.0,
        fraction_evaluate=1./n_clients,
        min_fit_clients=n_clients,
        min_evaluate_clients=1,
        min_available_clients=n_clients,
        initial_parameters=fl.common.ndarrays_to_parameters(params))
    client_fn = oclient_fn

  if strategy_type == 'fedyogi':
    # Creazione della Strategia FedYogi
    # Passa i parametri alla strategia per l'inizializzazione dei parametri lato Server
    strategy = fl.server.strategy.FedYogi(
        fraction_fit=1.0,
        fraction_evaluate=1./n_clients,
        min_fit_clients=n_clients,
        min_evaluate_clients=1,
        min_available_clients=n_clients,
        initial_parameters=fl.common.ndarrays_to_parameters(params),
    )
    client_fn = oclient_fn

  strategy.aggregate_evaluate = custom_aggregate_evaluate

  # Specifica le risorse del client se si ha bisogno della GPU (default a 1 per CPU e 0 per GPU)
  client_resources = None
  if DEVICE.type == "cuda":
      client_resources = {"num_gpus": 1}

  # Avvio della Simulazione
  history = fl.simulation.start_simulation(
      client_fn=client_fn,
      num_clients=n_clients,
      config=fl.server.ServerConfig(num_rounds=n_rounds),
      strategy=strategy,
      client_resources=client_resources,
  )

  time_st = timeit.default_timer() - init_time_st
  time_acc[strategy_type] = time_st
  print(f"\nrun time for strategy {strategy_type.capitalize()} : {time_st}")

  aggregated_metrics = history.metrics_distributed

  loss_dist = history.losses_distributed
  acc = [m[1] for m in history.metrics_distributed['accuracy']]
  f1_scores_micro = [m[1] for m in history.metrics_distributed['f1_score_micro']]
  f1_scores_macro = [m[1] for m in history.metrics_distributed['f1_score_macro']]
  f1_scores_perclass = [m[1] for m in history.metrics_distributed['f1_score_perclass']]
  rounds = [m[0] for m in history.metrics_distributed['accuracy']]

  model_metrics[strategy_type] = {'accuracy': acc, 'f1_score_micro': f1_scores_micro, 'f1_score_macro': f1_scores_macro, 'f1_score_perclass': f1_scores_perclass}
  model_metricsF1[strategy_type] = {'f1_score': f1_scores_macro}
  # Plot the accuracy and F1 scores
  plt.figure()
  plt.plot(rounds, acc, label=strategy_type.capitalize()+' Accuracy', color = strategy_colors[loop_list.index(strategy_type)])
  plt.plot(rounds, f1_scores_micro, label=strategy_type.capitalize()+' F1 Score Micro', color = strategy_colors[(loop_list.index(strategy_type) + 1) % len(strategy_colors)])
  plt.plot(rounds, f1_scores_macro, label=strategy_type.capitalize()+' F1 Score Macro', color = strategy_colors[(loop_list.index(strategy_type) + 2) % len(strategy_colors)])

  plt.legend()
  plt.title(strategy_type.capitalize() + " Accuracy and F1 Scores")
  plt.xlabel("Round")  # Added x-label
  plt.ylabel("Score")  # Added y-label
  plt.savefig(f"{save_path}/"+strategy_type+"_acc_f1.png", dpi = 300)
  plt.show()
  plt.close()

  # Plot each class' F1 score in a new figure
  plt.figure()

  # Plot each class' F1 score in a new figure
  plt.figure()
  for i, class_f1_scores in enumerate(zip(*f1_scores_perclass)):
      plt.plot(rounds, class_f1_scores, label=f' {class_names[i]}', color = strategy_colors[(loop_list.index(strategy_type) + 3 + i) % len(strategy_colors)])

  plt.legend()
  plt.title(strategy_type.capitalize() + " Per Class F1 Scores")
  plt.xlabel("Round")  # Added x-label
  plt.ylabel("Score")  # Added y-label
  plt.savefig(f"{save_path}/"+strategy_type+"_perclass_f1.png", dpi = 300)
  plt.show()
  plt.close()

  strategies_acc.append((strategy_type, max(acc), acc[-1], max(f1_scores_micro), f1_scores_micro[-1], max(f1_scores_macro), f1_scores_macro[-1]))



In [None]:

  aggregated_metrics = history.metrics_distributed

  loss_dist = history.losses_distributed
  acc = [m[1] for m in history.metrics_distributed['accuracy']]
  f1_scores_micro = [m[1] for m in history.metrics_distributed['f1_score_micro']]
  f1_scores_macro = [m[1] for m in history.metrics_distributed['f1_score_macro']]
  f1_scores_perclass = [m[1] for m in history.metrics_distributed['f1_score_perclass']]
  rounds = [m[0] for m in history.metrics_distributed['accuracy']]

  model_metrics[strategy_type] = {'accuracy': acc, 'f1_score_micro': f1_scores_micro, 'f1_score_macro': f1_scores_macro, 'f1_score_perclass': f1_scores_perclass}

  # Plot the accuracy and F1 scores
  plt.figure()
  plt.plot(rounds, acc, label=strategy_type.capitalize()+' Accuracy', color = strategy_colors[loop_list.index(strategy_type)])
  plt.plot(rounds, f1_scores_micro, label=strategy_type.capitalize()+' F1 Score Micro', color = strategy_colors[(loop_list.index(strategy_type) + 1) % len(strategy_colors)])
  plt.plot(rounds, f1_scores_macro, label=strategy_type.capitalize()+' F1 Score Macro', color = strategy_colors[(loop_list.index(strategy_type) + 2) % len(strategy_colors)])

  plt.legend()
  plt.title(strategy_type.capitalize() + " Accuracy and F1 Scores")
  plt.xlabel("Round")  # Added x-label
  plt.ylabel("Score")  # Added y-label
  plt.savefig(f"{save_path}/"+strategy_type+"_acc_f1.png", dpi = 300)
  plt.show()
  plt.close()

  # Walking, Walking_upstairs, Walking_downstairs, Sitting, Standing, Laying
  #class_names = {0: "Walking", 1: "Walking_upstairs", 2: "Walking_downstairs", 3: "Sitting", 4: "Standing", 5: "Laying"}


  # Plot each class' F1 score in a new figure
  plt.figure()

  # Plot each class' F1 score in a new figure
  plt.figure()
  for i, class_f1_scores in enumerate(zip(*f1_scores_perclass)):
      plt.plot(rounds, class_f1_scores, label=f' {class_names[i]}', color = strategy_colors[(loop_list.index(strategy_type) + 3 + i) % len(strategy_colors)])

  plt.legend()
  plt.title(strategy_type.capitalize() + " Per Class F1 Scores")
  plt.xlabel("Round")  # Added x-label
  plt.ylabel("Score")  # Added y-label
  plt.savefig(f"{save_path}/"+strategy_type+"_perclass_f1.png", dpi = 300)
  plt.show()
  plt.close()

  strategies_acc.append((strategy_type, max(acc), acc[-1], max(f1_scores_micro), f1_scores_micro[-1], max(f1_scores_macro), f1_scores_macro[-1]))



In [None]:
strategies_acc

In [None]:
import matplotlib as mpl

In [None]:
cmap = mpl.color_sequences['Set3']


# Plot F1 scores per method
plt.figure()
#plt.plot(rounds, acc, label=strategy_type.capitalize()+' Accuracy', color = strategy_colors[loop_list.index(strategy_type)])
#plt.plot(rounds, f1_scores_micro, label=strategy_type.capitalize()+' F1 Score Micro', color = strategy_colors[(loop_list.index(strategy_type) + 1) % len(strategy_colors)])
#plt.plot(rounds, f1_scores_macro, label=strategy_type.capitalize()+' F1 Score Macro', color = strategy_colors[(loop_list.index(strategy_type) + 2) % len(strategy_colors)])
i=0
for strategy_type in loop_list:
  #plt.plot(rounds, model_metricsF1[strategy_type]['f1_score'] , label=strategy_type.capitalize(), marker='o', markersize=4, linestyle='-', linewidth=1, color = strategy_colors[(loop_list.index(strategy_type) + 2) % len(strategy_colors)])
  plt.plot(rounds, model_metricsF1[strategy_type]['f1_score'] , label=strategy_type.capitalize(), marker='o', markersize=4, linestyle='-', linewidth=1, color = cmap[i])
  i=i+1
plt.legend()
plt.title(method_selected.capitalize() + " - F1 Scores")

#plt.title(strategy_type.capitalize() + " Accuracy and F1 Scores")
plt.xlabel("Round")  # Added x-label
plt.ylabel("Score")  # Added y-label
#plt.savefig(f"{save_path}/"+strategy_type+"_acc_f1.png", dpi = 300)

plt.savefig(f"{save_path}/"+method_selected+"_f1.png", dpi = 300)
plt.show()
plt.close()

In [None]:
#Save the output data in a json format.
epsilon_str = str(target_epsilon).replace('.', '_')
with open(f'{save_path}/model_metrics_epsilon_{epsilon_str}.json', 'w') as f:
    json.dump(model_metrics, f)

In [None]:
# List of metrics to plot
metrics_to_plot = ['accuracy', 'f1_score_micro', 'f1_score_macro']

# Iterate over each metric
for metric in metrics_to_plot:
    fig, ax = plt.subplots(figsize=(8, 6))
    for idx, (strategy, metrics) in enumerate(model_metrics.items()):
        metric_scores = metrics[metric]
        rounds = list(range(1, len(metric_scores) + 1))
        ax.plot(rounds, metric_scores, marker='o', markersize=4, linestyle='-', linewidth=1, color=palette[idx], label='Testing ' + metric + ' - ' + strategy)
    ax.set_ylim(bottom=0, top=1)  # Ensure the y-axis range is [0, 1]
    ax.set_title('Testing ' + metric.capitalize(), fontsize=14)
    ax.set_xlabel('Rounds', fontsize=12)
    ax.set_ylabel(metric.capitalize(), fontsize=12)
    ax.legend()
    plt.tight_layout()
    plt.savefig(f"{save_path}/fl_" + metric + "_over_strategies.png", dpi=300)
    plt.show()


In [None]:
# Loop through each strategy
for strategy, metrics in model_metrics.items():

    # Get F1 scores per class
    f1_scores_perclass = metrics['f1_score_perclass']

    # Assume that f1_scores_perclass is a list of lists
    # where each sublist is a list of F1 scores for each class for a particular round
    num_classes = len(f1_scores_perclass[0])
    num_rounds = len(f1_scores_perclass)

    # Create rounds list
    rounds = list(range(1, num_rounds + 1))

    # Create a subplot for this strategy
    fig, ax = plt.subplots(figsize=(8, 6))

    # For each class
    for class_idx in range(num_classes):
        # Extract F1 scores for this class across all rounds
        class_f1_scores = [round_f1_scores[class_idx] for round_f1_scores in f1_scores_perclass]
        # Plot
        ax.plot(rounds, class_f1_scores, marker='o', markersize=4, linestyle='-', linewidth=1, color=palette[class_idx], label=class_names[class_idx])

    ax.set_ylim(bottom=0, top=1)
    ax.set_title(f'Testing F1 Score for All Classes, Strategy: {strategy}', fontsize=14)
    ax.set_xlabel('Rounds', fontsize=12)
    ax.set_ylabel('F1 Score', fontsize=12)
    ax.legend()

    plt.tight_layout()
    plt.savefig(f"{save_path}/fl_f1score_perclass_{strategy}.png", dpi=300)
    plt.show()


In [None]:
# Loop through each strategy
for strategy, metrics in model_metrics.items():

    # Get F1 scores per class
    f1_scores_perclass = metrics['f1_score_perclass']

    # Assume that f1_scores_perclass is a list of lists
    # where each sublist is a list of F1 scores for each class for a particular round
    num_classes = len(f1_scores_perclass[0])
    num_rounds = len(f1_scores_perclass)

    # Create rounds list
    rounds = list(range(1, num_rounds + 1))

    # Create a subplot for this strategy
    fig, ax = plt.subplots(figsize=(8, 6))

    # For each class
    for class_idx in range(num_classes):
        # Extract F1 scores for this class across all rounds
        class_f1_scores = [round_f1_scores[class_idx] for round_f1_scores in f1_scores_perclass]
        # Plot
        ax.plot(rounds, class_f1_scores, marker='o', markersize=4, linestyle='-', linewidth=1, color=palette[class_idx], label=class_names[class_idx])

    ax.set_ylim(bottom=0, top=1)
    ax.set_title(f'Testing F1 Score for All Classes, Strategy: {strategy}', fontsize=14)
    ax.set_xlabel('Rounds', fontsize=12)
    ax.set_ylabel('F1 Score', fontsize=12)
    ax.legend()

    plt.tight_layout()
    plt.savefig(f"{save_path}/fl_f1score_perclass_{strategy}.png", dpi=300)
    plt.show()


In [None]:
strategies_acc_df = pd.DataFrame(strategies_acc, columns=[
    'Strategy',
    'Max Accuracy',
    'Last Accuracy',
    'Max F1 Score Micro',
    'Last F1 Score Micro',
    'Max F1 Score Macro',
    'Last F1 Score Macro'
])

# Create separate DataFrames for each metric
accuracy_df = strategies_acc_df[['Strategy', 'Max Accuracy', 'Last Accuracy']].melt(id_vars='Strategy', var_name='Metric', value_name='Accuracy')
f1_micro_df = strategies_acc_df[['Strategy', 'Max F1 Score Micro', 'Last F1 Score Micro']].melt(id_vars='Strategy', var_name='Metric', value_name='F1 Score')
f1_macro_df = strategies_acc_df[['Strategy', 'Max F1 Score Macro', 'Last F1 Score Macro']].melt(id_vars='Strategy', var_name='Metric', value_name='F1 Score')

# Plot accuracy
plt.figure(figsize=(10, 5))
sns.barplot(x='Strategy', y='Accuracy', hue='Metric', data=accuracy_df, palette=palette, capsize=.1, errwidth=1)  # Added error bars
plt.ylim(0, 1)  # Set the y-axis range to be [0, 1]
plt.title('Comparison of Max and Last Accuracy Across Strategies')
plt.tight_layout()
plt.savefig(f"{save_path}/fl_accuracy_bar_over_strategies.png", dpi=300)
plt.show()

# Plot F1 Score Micro
plt.figure(figsize=(10, 5))
sns.barplot(x='Strategy', y='F1 Score', hue='Metric', data=f1_micro_df, palette=palette, capsize=.1, errwidth=1)  # Added error bars
plt.ylim(0, 1)  # Set the y-axis range to be [0, 1]
plt.title('Comparison of Max and Last Micro F1 Score Across Strategies')
plt.tight_layout()
plt.savefig(f"{save_path}/fl_f1score_micro_bar_over_strategies.png", dpi=300)
plt.show()

# Plot F1 Score Macro
plt.figure(figsize=(10, 5))
sns.barplot(x='Strategy', y='F1 Score', hue='Metric', data=f1_macro_df, palette=palette, capsize=.1, errwidth=1)  # Added error bars
plt.ylim(0, 1)  # Set the y-axis range to be [0, 1]
plt.title('Comparison of Max and Last Macro F1 Score Across Strategies')
plt.tight_layout()
plt.savefig(f"{save_path}/fl_f1score_macro_bar_over_strategies.png", dpi=300)
plt.show()




In [None]:
bar_width = 0.2  # adjust the bar width to fit more bars
r1 = np.arange(len(strategies_acc_df))
r2 = [x + bar_width for x in r1]
r3 = [x + 2*bar_width for x in r1]
r4 = [x + 3*bar_width for x in r1]

# Accuracy plot remains the same
plt.figure(figsize=(10, 5))
plt.bar(r1, strategies_acc_df['Max Accuracy'], width=bar_width, label='Max Accuracy')
plt.bar(r2, strategies_acc_df['Last Accuracy'], width=bar_width, label='Last Accuracy')
plt.xlabel('Strategy')
plt.ylabel('Accuracy')
plt.title('Max Accuracy and Last Accuracy for Different Strategies')
plt.xticks([r + bar_width / 2 for r in range(len(strategies_acc_df))], strategies_acc_df['Strategy'])
plt.legend()
plt.ylim(0, 1)  # Setting y-axis limit to [0, 1]
plt.tight_layout()
plt.savefig(f"{save_path}/fl_accuracy_bar_over_strategies.png", dpi=300)
plt.show()

# Updated F1 score plot
plt.figure(figsize=(10, 5))
plt.bar(r1, strategies_acc_df['Max F1 Score Micro'], width=bar_width, label='Max F1 Score Micro')
plt.bar(r2, strategies_acc_df['Last F1 Score Micro'], width=bar_width, label='Last F1 Score Micro')
plt.bar(r3, strategies_acc_df['Max F1 Score Macro'], width=bar_width, label='Max F1 Score Macro')
plt.bar(r4, strategies_acc_df['Last F1 Score Macro'], width=bar_width, label='Last F1 Score Macro')
plt.xlabel('Strategy')
plt.ylabel('F1 Score')
plt.title('Max and Last F1 Score (Micro and Macro) for Different Strategies')
plt.xticks([r + 1.5*bar_width for r in range(len(strategies_acc_df))], strategies_acc_df['Strategy'])
plt.legend()
plt.ylim(0, 1)  # Setting y-axis limit to [0, 1]
plt.tight_layout()
plt.savefig(f"{save_path}/fl_f1score_bar_over_strategies.png", dpi=300)
plt.show()


In [None]:
tot_time = timeit.default_timer()- start_global_time
print(f"Elapsed time {tot_time:.2f} s {tot_time/60:.2f} min")

In [None]:
# @title
from IPython.display import Markdown as md

In [None]:
# @title ## HE Results
md(f"""# Results {save_path}
### **Encryption-serialization time** {enc_time} takes {enc_time.average/ps_time.average:.2f}x compared to plaintext
### **Deserialization-decryption time** {dec_time} takes {dec_time.average/pd_time.average:.2f}x compared to plaintext
### **Run time** {time_acc["secwfedavg"]:.2f} s takes {time_acc["secwfedavg"]/time_acc["fedavg"]:.2f}x compared to plain fedavg, {time_acc["secwfedavg"]/time_acc["weightedfedavg"]:.2f}x compared to weightedfedavg
### **Ciphertext model expansion** {esize/psize:.2f}x""")

In [None]:
he_results = {}
he_results['enc_time'] = enc_time.average
he_results['dec_time'] = enc_time.average
he_results['ps_time'] = ps_time.average
he_results['ps_time'] = pd_time.average
he_results['enc_time_overhead'] = enc_time.average/ps_time.average
he_results['dec_time_overhead'] = dec_time.average/pd_time.average
he_results['enc_model_size'] = esize
he_results['plaintext_model_size'] = psize
he_results['model_expansion'] = esize/psize
he_results['run_time'] = time_acc
he_results['time_overhead'] = time_acc["secwfedavg"]/time_acc["fedavg"]
he_results['time_overhead_weighted'] = time_acc["secwfedavg"]/time_acc["weightedfedavg"]

In [None]:
# save HE results
with open(f'{save_path}/he_results.json', 'w') as f:
    json.dump(he_results, f)

# HE 2 inner layers results

## Results layers-5-rounds-15-epochs-2_6ad4f728¶
- Encryption-serialization time 15.8 ms ± 0 ns per loop (mean ± std. dev. of 1 run, 5 loops each) takes 8.41x compared to plaintext
- Deserialization-decryption time 2.54 ms ± 0 ns per loop (mean ± std. dev. of 1 run, 5 loops each) takes 1.16x compared to plaintext
- Run time 24.79 s takes 1.07x compared to plain fedavg, 1.03x compared to weightedfedavg
- Ciphertext model expansion 9.67x

## Results layers-4-5-rounds-15-epochs-2_a56bf3e4
- Encryption-serialization time 4.15 s ± 0 ns per loop (mean ± std. dev. of 1 run, 5 loops each) takes 2467.06x compared to plaintext
- Deserialization-decryption time 628 ms ± 0 ns per loop (mean ± std. dev. of 1 run, 5 loops each) takes 295.85x compared to plaintext
- Run time 163.78 s takes 7.73x compared to plain fedavg, 6.28x compared to weightedfedavg
- Ciphertext model expansion 2720.99x

## Results layers-2-4-5-rounds-15-epochs-2_13dd53d9
- Encryption-serialization time 11min 28s ± 0 ns per loop (mean ± std. dev. of 1 run, 1 loop each) takes 166326.34x compared to plaintext
- Deserialization-decryption time 1min 38s ± 0 ns per loop (mean ± std. dev. of 1 run, 1 loop each) takes 22691.87x compared to plaintext
- Run time takes too much time to run, colab breaks or disconnects
- Ciphertext model expansion 179631.19x

---
---

# HE shallow model 1 inner layer

## Results shallow-layers-3-rounds-15-epochs-2_c0dddb7a
- Encryption-serialization time 15.1 ms ± 0 ns per loop (mean ± std. dev. of 1 run, 5 loops each) takes 15.50x compared to plaintext
- Deserialization-decryption time 2.3 ms ± 0 ns per loop (mean ± std. dev. of 1 run, 5 loops each) takes 1.86x compared to plaintext
- Run time 19.79 s takes 1.16x compared to plain fedavg, 0.99x compared to weightedfedavg
- Ciphertext model expansion 13.93x

## Results shallow-layers-2-3-rounds-15-epochs-2_8085f830
- Encryption-serialization time 5.75 s ± 0 ns per loop (mean ± std. dev. of 1 run, 5 loops each) takes 5654.66x compared to plaintext¶
- Deserialization-decryption time 900 ms ± 0 ns per loop (mean ± std. dev. of 1 run, 5 loops each) takes 824.07x compared to plaintext
- Run time 226.72 s takes 13.43x compared to plain fedavg, 11.91x compared to weightedfedavg
- Ciphertext model expansion 5605.73x

## Results shallow-layers-0-2-3-rounds-15-epochs-2_1d9f9186
- Encryption-serialization time 9min 8s ± 0 ns per loop (mean ± std. dev. of 1 run, 5 loops each) takes 547464.16x compared to plaintext
- Deserialization-decryption time 1min 20s ± 0 ns per loop (mean ± std. dev. of - 1 run, 5 loops each) takes 36276.55x compared to plaintext
- Ciphertext model expansion 528441.99x

In [None]:
!zip -r {save_path}.zip {save_path}

## Two inner layers
Model:
1. input (561, 437)
2. ReLU
2. inner linear (437, 312)
3. ReLU
5. inner linear (312, 6)
5. output (6)

|Encrypted layers|Encryption/Serialization|Deserialization/Decryption|Cyphertext expansion| Runtime overhead |
|----|----|----|----|----|
|6|9x|1.5x|10x|1x|
|5, 6|2,500x|300x|3,000x|7x|
|3, 5, 6|200,000x|25,000x|180,000x|-|

## One inner layer
Model:
1. input (561, 432)
2. ReLU
3. inner linear (432, 6)
4. output (6)

|Encrypted layers|Encryption/Serialization|Deserialization/Decryption|Cyphertext expansion| Runtime overhead |
|----|----|----|----|----|
|4|16x|1.9x|14x|1x|
|3, 4|5,700x|900x|5,700x|12x|
|1, 3, 4|600,000x|40,000x|550,000x|-|