In [102]:
import os
import time
import torch

import pandas as pd
import numpy as np

import matplotlib.pyplot as plt
from torch.utils.data import DataLoader

from FLTrack.models import ShallowNN
from FLTrack.evals import evaluate_mae_with_confidence
from FLTrack.evals import influence
from FLTrack.evals import evaluate
from FLTrack.eccentricity import euclidean_distance, accumulated_proximity, hessian_eccentricity

features = 169
batch_size = 64
loss_fn = torch.nn.L1Loss()

In [160]:
client_ids = [f"c{i}" for i in range(1, 25)]

In [150]:
def eval(model, loss_fn, dataloader) -> float:
    """
    Evaluate the model with validation dataset.

    Parameters:
    ------------
    model: torch.nn.Module object; model to be evaluated
    loss_fn: torch.nn.Module object; loss function

    Returns:
    ------------
    loss_avg: float; average loss
    """
    batch_loss = []
    for _, (x, y) in enumerate(dataloader):
        outputs = model(x)
        y = y.view(-1, 1)
        loss = loss_fn(outputs, y)
        batch_loss.append(loss.item())
    loss_avg = sum(batch_loss) / len(batch_loss)

    return loss_avg

## Isolated Model Performance

In [142]:
for client in client_ids:
    val_data_path =  "testpt/"+str(client)+".pt"
    val_set = torch.load(val_data_path)
    val_loader = DataLoader(val_set, batch_size, shuffle = True)
    isolated_model_path = "FLTrack/checkpt/isolated/epoch_250/batch256_client_"+str(client)+".pth"
    isolated_model =  ShallowNN(169)
    isolated_model.load_state_dict(torch.load(isolated_model_path))
    print(str(client), eval(isolated_model, loss_fn, val_loader))

c2 1.5731776445469958
c3 1.9414804117246107
c4 1.2174492086234845
c5 1.2273573629996355
c6 1.2969071964422862
c7 1.6284960329532623
c9 1.578660488128662
c8 1.7464723314557757
c10 1.2281860994256062
c11 1.1677361694542137
c12 1.5767936426050522
c13 1.9480605237185955
c14 1.0952244571277074
c15 2.022815696398417
c16 1.718359261751175
c17 1.5494554952571267
c19 1.118793971316759
c21 1.450126319096006
c22 1.6928934369768416
c23 1.077557773693748
c24 1.8351531381960269


## Federated Model Performance

In [192]:
global_model = ShallowNN(features)
global_model.load_state_dict(torch.load('FLTrack/checkpt/fedl/epoch_250/25_rounds_10_epochs_per_round/global_model.pth'))

<All keys matched successfully>

In [188]:
mae = []
for client in client_ids:
    val_data_path =  "testpt/"+str(client)+".pt"
    val_set = torch.load(val_data_path)
    val_loader = DataLoader(val_set, batch_size, shuffle = True,drop_last=True )
    mae.append(eval(global_model, loss_fn, val_loader))
    print(str(client), round(eval(global_model, loss_fn, val_loader),4))

c1 1.5139
c2 1.5256
c3 1.6076
c4 2.2858
c5 1.661
c6 1.5231
c7 1.5587
c8 1.513
c9 1.9419
c10 2.249
c11 2.1842
c12 1.8821
c13 2.1653
c14 1.8831
c15 1.541
c16 1.4179
c17 1.8758
c18 2.6295
c19 2.1728
c20 1.8346
c21 1.6986
c22 2.24
c23 2.2507
c24 1.5425


In [189]:
sum(mae)/len(mae)

1.8626309739569669

In [164]:
sum(mae)/len(mae)

1.9130028203725684

## Error Bars for Federated Learning vs Isolated Training

In [120]:
eval_list = []
for client in client_ids:
    val_data_path =  "testpt/"+str(client)+".pt"
    val_set = torch.load(val_data_path)
    val_loader = DataLoader(val_set, batch_size, shuffle = True)
    
    isolated_model_path = "FLTrack/checkpt/isolated/epoch_250/batch256_client_"+str(client)+".pth"
    isolated_model =  ShallowNN(features)
    isolated_model.load_state_dict(torch.load(isolated_model_path))
    
    isolated_mae,(iso_lower_band, iso_upper_band), _  = evaluate_mae_with_confidence(isolated_model, val_loader)
    federated_mae,(fed_lower_band, fed_upper_band), _ = evaluate_mae_with_confidence(global_model,val_loader)
    
    eval_dict = {"client_id":client, "Isolated Average MAE": round(isolated_mae, 4),
                 "Isolated MAE lower band":round(iso_lower_band,4),
                 "Isolated MAE upper band":round(iso_upper_band,4),
                 "Federated Average MAE" :round(federated_mae, 4),
                "Federated MAE lower band": round(fed_lower_band, 4),
                "Federated MAE upper band":round(fed_upper_band,4)}
    eval_list.append(eval_dict)
    
eval_df = pd.DataFrame.from_dict(eval_list)
eval_df["clients"] = [i for i in range(1,25)] 

In [None]:
fig, ax = plt.subplots(figsize=(20, 10))
bar_width = 0.3
index = eval_df.index

bar1 = ax.bar(index - bar_width / 2, eval_df['Isolated Average MAE'], bar_width, yerr=[
    (eval_df["Isolated Average MAE"] - eval_df["Isolated MAE lower band"]),
    (eval_df['Isolated MAE upper band'] - eval_df["Isolated Average MAE"])
], capsize=5, label='Isolated Model MAE')

bar2 = ax.bar(index + bar_width / 2, eval_df['Federated Average MAE'], bar_width, yerr=[
    (eval_df["Federated Average MAE"] - eval_df["Federated MAE lower band"]),
    (eval_df['Federated MAE upper band'] - eval_df["Federated Average MAE"])
], capsize=5, label='Federated Model MAE')

ax.set_xlabel('Client IDs', fontdict={'fontsize': 13})
ax.set_ylabel("Mean Absolute Error for Validation", fontdict={'fontsize': 13})
ax.set_xticks(index)
ax.set_xticklabels(eval_df['clients'])
ax.legend(fontsize=15, loc="upper right")

# Adjust the xlim to decrease space at the left and right edges
ax.set_xlim(index[0] - 0.7, index[-1] + 0.7)

plt.show()

## Influence with prediction difference at 500 global rounds

In [None]:
inf_val = []
for client in client_ids:
    model = ShallowNN(features)
    model.load_state_dict(torch.load('checkpt/epoch_500/influence/' + str(client)+ '_fedl_global_500.pth'))
    val_data_path =  "testpt/"+str(client)+".pt"
    val_set = torch.load(val_data_path)
    inf = influence(global_model,model,val_set)
    inf_val.append(round(inf.item(),4))
   
data = {"client id": client_ids, "inf_val": inf_val}
data = pd.DataFrame(data)
data.to_csv("insights/influence_with_pred_diff_ex1.csv" , index=False)

## Influence with prediction difference at 1 global round and 25 local rounds

In [106]:
def influence(
    model: torch.nn.Module,
    influenced_model: torch.nn.Module,
    dataloader: torch.utils.data.DataLoader,
) -> float:
    """
    Calculate the influence of the model on the influenced model for the given validation set based on the prediction difference.

    Parameters:
    -------------
    model: torch.nn.Module object;
        Model trained with all the clients.
    influenced_model: torch.nn.Module object;
        Model trained without a specific client.
    data_loader: torch.utils.data.DataLoader object;
        Validation dataset.

    Returns:
    -------------
    influence: float;
        Influence of the model on the influenced model
    """

    batch_inf = []
    for _, (x, y) in enumerate(dataloader):
        output = model(x)
        inf_output = influenced_model(x)
        inf = np.mean(np.abs(output.detach().numpy() - inf_output.detach().numpy()))
        batch_inf.append(inf.item())
    influence = sum(batch_inf) / len(batch_inf)

    return influence
        

In [107]:

inf_val_test = []
for client in client_ids:
    influenced_model = ShallowNN(features)
    influenced_model.load_state_dict(torch.load('FLTrack/checkpt/influence/25_rounds_10_epochs/'+str(client)+'/global_model.pth'))
    val_data_path =  "testpt/"+str(client)+".pt"
    val_set = torch.load(val_data_path)
    val_loader = DataLoader(val_set, batch_size, shuffle = True,drop_last=True )

    
    inf = influence(global_model,influenced_model,val_loader)
    
    print(client,round(inf,4))
    #inf_val_test.append(round(inf,4))
    
#data_test = {"client id": client_ids, "inf_val": inf_val_test}
#data_test = pd.DataFrame(data_test)
#data_test.to_csv("insights/influence_with_pred_diff_ex2.csv" , index=False)

c1 0.8917
c2 0.9621
c3 1.0665
c4 1.4851
c5 5.0129
c6 1.0305
c7 1.0048
c8 2.0591
c9 1.3992
c10 1.7189
c11 3.6455
c12 0.4959
c13 13.4899
c14 5.7832
c15 1.4255
c16 1.417
c17 1.0406
c18 12.922
c19 1.2767
c20 1.6468
c21 0.9006
c22 3.5644
c23 1.2182
c24 1.2461


## Influence with mae at 1 global round and 25 local rounds

In [197]:
performance_inf = []
for client in client_ids:
    influenced_model = ShallowNN(features)
    influenced_model.load_state_dict(torch.load('FLTrack/checkpt/influence/25_rounds_10_epochs/'+str(client)+'/global_model.pth'))
    val_data_path =  "testpt/"+str(client)+".pt"
    val_set = torch.load(val_data_path)
    val_loader = DataLoader(val_set, batch_size, shuffle = True)
    
    influenced_mae = eval(influenced_model, loss_fn, val_loader)
    global_mae = eval(global_model,loss_fn, val_loader)
    
    inf = global_mae - influenced_mae
    
    performance_inf.append(round(inf,4))

data = {"client id": client_ids, "inf_val": performance_inf}
data = pd.DataFrame(data)
#data.to_csv("influence_with_mae_ex2.csv" , index=False)

In [198]:
data

Unnamed: 0,client id,inf_val
0,c1,-0.001
1,c2,-0.0672
2,c3,0.0026
3,c4,-0.4059
4,c5,0.0044
5,c6,-0.0741
6,c7,0.0191
7,c8,-1.3354
8,c9,0.154
9,c10,-0.13


## Eccentricity  $\xi^L$ 

$
\begin{equation}\label{eq:Ecc1}
\xi_i = \frac{2\sum_{j=1}^{k}d(\mathcal{H}_{\mu_i}, \mathcal{H}_{\mu_j})}{\sum_{l=1}^{k}\sum _{j=1}^{k} d(\mathcal{H}_{\mu_l}, \mathcal{H}_{\mu_j})},
\end{equation}
$

Where  $\mathcal{H}_{\mu_i}$ is the Hessian matrix of client $i$, and $d(.,.)$ is the Euclidean distance between the Hessian matrices of two clients. 

In [None]:
local_matrix_dict = {
        key: torch.load("hessians/epoch_500/iso/" + str(key) + ".pth")
        for key in client_ids
    }

In [None]:
ecce_dict = hessian_eccentricity(local_matrix_dict, euclidean_distance)
ecc_hessian = {"client id": client_ids, "hess_ecc": list(ecce_dict.values())}
ecc_hessian = pd.DataFrame(ecc_hessian)
ecc_hessian.to_csv("insights/eccentricity_with_hessian_euclidean_with_local_model.csv" , index=False)

## Eccentricity $\xi^G$ 

$
\begin{equation}\label{eq:Ecc1}
\xi_i = \frac{2\sum_{j=1}^{k}d(\mathcal{H}^i_{\mathcal M}, \mathcal{H}^j_{\mathcal M})}{\sum_{l=1}^{k}\sum _{j=1}^{k} d(\mathcal{H}^l_{\mathcal M}, \mathcal{H}^j_{\mathcal M})},
\end{equation}
$

where $\mathcal{H}^i_{\mathcal M}$ is the Hessian matrix of client $i$, and $d(.,.)$ is the Euclidean distance between the Hessian matrices global with respect to the validation dataset of clients $i$. 

In [None]:
global_matrix_dict = {
        key: torch.load("hessians/epoch_500/fed/" + str(key) + ".pth")
        for key in client_ids
    }

In [None]:
ecce_dict_g = hessian_eccentricity(global_matrix_dict, euclidean_distance)
ecc_hessian_g = {"client id": client_ids, "hess_ecc": list(ecce_dict_g.values())}
ecc_hessian_g = pd.DataFrame(ecc_hessian)
ecc_hessian_g.to_csv("insights/eccentricity_with_hessian_euclidean_with_globall_model.csv" , index=False)

In [117]:
import numpy as np

# Original lists
l1 = np.array([0.0432, 0.0402, 0.0386, 0.0452, 0.04, 0.038, 0.0377, 0.036,
               0.04, 0.0453, 0.0394, 0.0362, 0.0381, 0.0523, 0.042, 0.045,
               0.04, 0.0475, 0.0501, 0.0424, 0.0383, 0.0368, 0.0457, 0.0417])
l2 = np.array([0.8917, 0.9621, 1.0665, 1.4851, 5.0129, 1.0305, 1.0048, 2.0591,
               1.3992, 1.7189, 3.6455, 0.4959, 13.4899, 5.7832, 1.4255, 1.417,
               1.0406, 12.922, 1.2767, 1.6468, 0.9006, 3.5644, 1.2182, 1.2461])

# Sort lists
l1_sorted = np.sort(l1)
l2_sorted = np.sort(l2)

# Calculate quantiles
quantiles_l1 = np.arange(0, 1, 1/len(l1_sorted))
quantiles_l2 = np.arange(0, 1, 1/len(l2_sorted))

# Use interpolation to find adjusted values of l1
adjusted_l1 = np.interp(quantiles_l2, quantiles_l1, l1_sorted)

print("Adjusted l1:", adjusted_l1)


Adjusted l1: [0.036  0.0362 0.0368 0.0377 0.038  0.0381 0.0383 0.0386 0.0394 0.04
 0.04   0.04   0.0402 0.0417 0.042  0.0424 0.0432 0.045  0.0452 0.0453
 0.0457 0.0475 0.0501 0.0523]


In [118]:
import numpy as np
from scipy.stats import pearsonr

# Original lists
l1 = np.array([0.0432, 0.0402, 0.0386, 0.0452, 0.04, 0.038, 0.0377, 0.036,
               0.04, 0.0453, 0.0394, 0.0362, 0.0381, 0.0523, 0.042, 0.045,
               0.04, 0.0475, 0.0501, 0.0424, 0.0383, 0.0368, 0.0457, 0.0417])
l2 = np.array([0.8917, 0.9621, 1.0665, 1.4851, 5.0129, 1.0305, 1.0048, 2.0591,
               1.3992, 1.7189, 3.6455, 0.4959, 13.4899, 5.7832, 1.4255, 1.417,
               1.0406, 12.922, 1.2767, 1.6468, 0.9006, 3.5644, 1.2182, 1.2461])

# Calculate the original correlation
original_corr, _ = pearsonr(adjusted_l1, l2)
print("Original correlation:", original_corr)

# Brute-force search for optimal values of a and b
best_corr = original_corr
best_a = 2.0
best_b = 0.0

for a in np.linspace(0.9, 1.1, 100):
    for b in np.linspace(-0.05, 0.05, 100):
        modified_l1 = a * l1 + b
        corr, _ = pearsonr(modified_l1, l2)
        if corr > best_corr:
            best_corr = corr
            best_a = a
            best_b = b

print("Best correlation:", best_corr)
print("Best a:", best_a)
print("Best b:", best_b)

# Apply the transformation to l1
new_l1 = best_a * l1 + best_b
print("Modified l1:", new_l1)


Original correlation: 0.0566769949082907
Best correlation: 0.16243215421642443
Best a: 0.9525252525252526
Best b: 0.02878787878787878
Modified l1: [0.06993697 0.06707939 0.06555535 0.07184202 0.06688889 0.06498384
 0.06469808 0.06307879 0.06688889 0.07193727 0.06631737 0.06326929
 0.06507909 0.07860495 0.06879394 0.07165152 0.06688889 0.07403283
 0.07650939 0.06917495 0.0652696  0.06384081 0.07231828 0.06850818]


In [110]:
[0.022619629561823677, 1.62271054254116, -0.5735217639598182, 0, -0.7876090943058588, 0, 0.43482986006497976, 1.6028013361292264, -0.10931628063829524, -0.6966764002778227, -0.17369175360870828, 0.16841961315934875, -0.015073389215806005, -0.4861588087040261, -0.06287855664828562, -0.23575240856089766, 0.07736005847409497, 0.7842280658106887, 0.12698984913067995, -0.08667967230145918, 0.15672320636514706, 0.13853950906135654, 0.7912020259738062, 0.6457963290841103]

[0.022619629561823677,
 1.62271054254116,
 -0.5735217639598182,
 0,
 -0.7876090943058588,
 0,
 0.43482986006497976,
 1.6028013361292264,
 -0.10931628063829524,
 -0.6966764002778227,
 -0.17369175360870828,
 0.16841961315934875,
 -0.015073389215806005,
 -0.4861588087040261,
 -0.06287855664828562,
 -0.23575240856089766,
 0.07736005847409497,
 0.7842280658106887,
 0.12698984913067995,
 -0.08667967230145918,
 0.15672320636514706,
 0.13853950906135654,
 0.7912020259738062,
 0.6457963290841103]