In [37]:
%load_ext autoreload
%autoreload 2

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from models.loda_dumb import LODA
from fed_loda import FedLODA
from sklearn.metrics import average_precision_score, roc_auc_score, recall_score
from scipy.stats import spearmanr
from scipy.io import loadmat

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [38]:
datasets = {}

shuttle = loadmat('shuttle/shuttle.mat')
shuttle_df_orig = pd.DataFrame(shuttle['X'], columns=[f"x{i}" for i in range(1, 10)])
shuttle_df_orig['y'] = shuttle['y']
shuttle_df = shuttle_df_orig.sample(frac=1).reset_index(drop=True)
datasets["shuttle"] = shuttle_df


satimage = loadmat('satimage/satimage-2.mat')
satimage_df = pd.DataFrame(satimage['X'], columns=[f"x{i}" for i in range(1, 37)])
satimage_df['y'] = satimage['y']
datasets["satimage"] = satimage_df


musk = loadmat('musk/musk.mat')
musk_df = pd.DataFrame(musk['X'], columns=[f"x{i}" for i in range(1, 167)])
musk_df['y'] = musk['y']
datasets["musk"] = musk_df

In [87]:
dataname = "musk"
num_agents = 3
data_tu_use = datasets[dataname]

In [88]:
from sklearn.model_selection import train_test_split

def feder_strat_split(df, k):
    Xy_0 = np.array_split(df[df.iloc[:,-1]==0], k)
    Xy_1 = np.array_split(df[df.iloc[:,-1]==1], k)
    Xy = []
    for i, j in zip(Xy_0, Xy_1):
        Xy.append(pd.concat([i,j]))
    
    X_list=[]
    y_list=[]
    for sub in Xy:
        X_list.append(sub.iloc[:,:-1])
        y_list.append(sub.iloc[:,-1])
    
    X_train_l, X_test_l, y_train_l, y_test_l = [], [], [], []
    for X, y in zip(X_list, y_list):
        X_train, X_test, y_train, y_test =\
            train_test_split(X, y, test_size=0.1, random_state=42, stratify=y)
        X_train_l.append(X_train)
        X_test_l.append(X_test)
        y_train_l.append(y_train)
        y_test_l.append(y_test)

    assert len(X_train_l)==len(X_test_l)==len(y_train_l)==len(y_test_l)==k

    for i, j in zip(X_train_l, y_train_l):
        assert i.shape[0] == j.shape[0] 
    for i, j in zip(X_test_l, y_test_l):
        assert i.shape[0] == j.shape[0] 

    return X_train_l, X_test_l, y_train_l, y_test_l








In [89]:
X_train, X_test, y_train, y_test = feder_strat_split(data_tu_use, num_agents)

y_test_df = pd.concat(y_test)
X_test_df = pd.concat(X_test)


for i, j, k, l in zip(X_train, X_test, y_train, y_test):
    print(i.shape, k.shape, k.sum())
    print(j.shape, l.shape, l.sum())

(919, 166) (919,) 30.0
(103, 166) (103,) 3.0
(918, 166) (918,) 29.0
(102, 166) (102,) 3.0
(918, 166) (918,) 29.0
(102, 166) (102,) 3.0


In [90]:
print(X_test_df.shape, y_test_df.shape)

(307, 166) (307,)


In [91]:
    
floda = FedLODA(
n_random_cuts = 500, standardize=False
)
floda.fit(X_train, y_train)


<fed_loda.FedLODA at 0x7f6a016fa290>

In [92]:
for i, (x_tr, y_tr) in enumerate(zip(X_train, y_train)):
    print(f"\nAgent_{i}:")

    metrics_tr = floda.models[i].valid_metrics(x_tr, y_tr)
    fed_metrics_tr = floda.valid_metrics(x_tr, y_tr)
    
    print(f"""
    \n\t {metrics_tr}\n
    \n\t {fed_metrics_tr}\n
    """)

metrics_te = floda.models[i].valid_metrics(X_test_df, y_test_df)
fed_metrics_te = floda.valid_metrics(X_test_df, y_test_df)
print(f"""
\n\t {metrics_te}\n
\n\t {fed_metrics_te}\n
""")



Agent_0:

    
	 {'accuracy': 0.29053318824809576, 'av_prec': 0.995766129032258, 'auc': 0.9998500187476566}

    
	 {'accuracy': 0.9477693144722524, 'av_prec': 0.19082862864522626, 'auc': 0.760217472815898}

    

Agent_1:

    
	 {'accuracy': 0.9771241830065359, 'av_prec': 0.562098385515666, 'auc': 0.9893720181529033}

    
	 {'accuracy': 0.9901960784313726, 'av_prec': 0.8899748140017899, 'auc': 0.9961987510181917}

    

Agent_2:

    
	 {'accuracy': 1.0, 'av_prec': 0.9999999999999998, 'auc': 1.0}

    
	 {'accuracy': 0.9880174291938998, 'av_prec': 0.9203013440310857, 'auc': 0.9967417865870215}

    


	 {'accuracy': 0.5830618892508144, 'av_prec': 0.04782864111019545, 'auc': 0.6733780760626399}


	 {'accuracy': 0.9576547231270358, 'av_prec': 0.3109789611885623, 'auc': 0.9205816554809844}




In [93]:
metr = {
    "accuracy": 'Accuracy',
    'av_prec':"Average precision",
    'auc': "AUC" 
}
TEST_df = pd.DataFrame(columns=['Accuracy', "Average precision", "AUC"])
for i in range(num_agents):
    metrics_te = floda.models[i].valid_metrics(X_test_df, y_test_df)
    TEST_df = TEST_df.append({metr[k]:v for k, v in metrics_te.items()}, ignore_index=True)


fed_metrics_te = floda.valid_metrics(X_test_df, y_test_df)
TEST_df = TEST_df.append({metr[k]:v for k, v in fed_metrics_te.items()}, ignore_index=True)
TEST_df.index = [*[f"LODA{i}" for i in range(num_agents)], "FedLODA"]

print(TEST_df)
TEST_df.to_latex(f"/mnt/c/Users/mateb/Dropbox/Apps/Overleaf/FedLearnHW/lodaontest_{dataname}.tex", float_format="%.4f")

         Accuracy  Average precision       AUC
LODA0    0.198697           0.934083  0.997390
LODA1    0.635179           0.063147  0.767711
LODA2    0.583062           0.047829  0.673378
FedLODA  0.957655           0.310979  0.920582


In [94]:
num_a = len(X_train)

metr = {
    "accuracy": 'Accuracy',
    'av_prec':"Average precision",
    'auc': "AUC" 
}
TRAIN_df = pd.DataFrame(columns=['Accuracy', "Average precision", "AUC"])
for i, (x_tr, y_tr) in enumerate(zip(X_train, y_train)):
    metrics_te = floda.models[i].valid_metrics(x_tr, y_tr)

    fed_metrics_te = floda.valid_metrics(x_tr, y_tr)
    TRAIN_df = TRAIN_df.append({metr[k]:f"{v1:.4}/{v:.4}" for v1, (k, v) in zip(metrics_te.values(), fed_metrics_te.items())}, ignore_index=True)

TRAIN_df.index = [f"LODA{i}/Fed." for i in range(num_a)]

print(TRAIN_df)

TRAIN_df.to_latex(f"/mnt/c/Users/mateb/Dropbox/Apps/Overleaf/FedLearnHW/lodaontrain_{dataname}.tex", float_format="%.4f")

                 Accuracy Average precision            AUC
LODA0/Fed.  0.2905/0.9478     0.9958/0.1908  0.9999/0.7602
LODA1/Fed.  0.9771/0.9902       0.5621/0.89  0.9894/0.9962
LODA2/Fed.      1.0/0.988        1.0/0.9203     1.0/0.9967
