# Example
In this file it is presented how to execute each algoritm and get its corresponding metrics. It is assumed that with this example and the two experiments showed in the folder experiments, the rest of the experiments can be reproduced in a similar manner. 

In [1]:
from models.PolatoAdaBoost import polato_AdaBoost_eval
from models.FRF import FRF_eval
from models.FL_AdaBoost_Dist import FLEnsembleDist
from sklearn.model_selection import train_test_split
from flextrees.datasets.tabular_datasets import adult
import numpy as np

Load the dataset

In [2]:
seed = 0
train_data, test_data = adult(ret_feature_names=False, categorical=False)
X_data,y_data = train_data.to_numpy()
X_test,y_test = test_data.to_numpy()
X_data = np.concatenate((X_data,X_test))
y_data = np.concatenate((y_data,y_test))

In [3]:
#Separate public data
data,public_data,targets,predicted_public_data = train_test_split(X_data,y_data,test_size=0.25,train_size=0.75,random_state=seed)
#Get train and test set
X_train,X_test,y_train,y_test = train_test_split(data,targets,test_size=0.1,train_size=0.9,random_state=seed)

In [4]:
#When creating the model the data partition according to the chosen data distribution is made. 
o_s_Wmv_cw = FLEnsembleDist(X_train,y_train,public_data,Nclients=10,max_depth=8,T=10,
                                        data_distribution='niid_quantity_skew',distribution_param=0.5,
                                        public_data_prediction='weighted_majority_voting',server_alpha_weight_adj='common_weighted',
                                        prediction_weights='only_server',random_state=seed)
#store data distrib for later models
train_dict = o_s_Wmv_cw.train_clients_data.copy()
test_dict = o_s_Wmv_cw.test_clients_data.copy()
#Takes the columns of weights of AdaBoost out
for key,(train,labeltr) in train_dict.items():
    train_dict[key] = (train[:,:-1],labeltr)
    test,labelte = test_dict[key]
    test_dict[key] = (test[:,:-1],labelte)

In [5]:
#Train the model
o_s_Wmv_cw.fitmodel()

In [6]:
#We can simply evaluate how the federated model works in local tests (on average) and global test 
os_cw_acc_global,os_cw_f1_global,os_cw_acc_local,os_cw_f1_local = o_s_Wmv_cw.sumar_overall_score(X_test,y_test)
print(os_cw_acc_global,os_cw_f1_global,os_cw_acc_local,os_cw_f1_local)

84.93038493038492 85.96960066752173 85.4925945642368 86.210968548284


In [7]:
#If we want to compare it to the local models, we first need to train local models with same data
o_s_Wmv_cw.fit_local_clients_models()

In [8]:
#Then we can get a Dataframe with all the results for each client
o_s_Wmv_cw.overall_acc_score(X_test,y_test)

Unnamed: 0,data_distrib,FL_acc_own_data,FL_acc_global_data,local_acc_own_data,local_acc_global_data,local_difference,global_difference
0,2178.0,85.53719,84.930385,83.884298,83.865684,1.652893,1.064701
1,115.0,92.307692,84.930385,76.923077,77.600328,15.384615,7.330057
2,309.0,77.142857,84.930385,77.142857,81.081081,0.0,3.849304
3,2043.0,85.462555,84.930385,83.259912,83.046683,2.202643,1.883702
4,1094.0,83.606557,84.930385,81.147541,82.964783,2.459016,1.965602
5,1929.0,84.651163,84.930385,85.116279,83.579034,-0.465116,1.351351
6,522.0,89.830508,84.930385,89.830508,81.040131,0.0,3.890254
7,4644.0,84.883721,84.930385,84.689922,85.257985,0.193798,-0.3276
8,5015.0,84.587814,84.930385,84.050179,84.479934,0.537634,0.45045
9,1922.0,86.915888,84.930385,85.046729,83.374283,1.869159,1.556102


In [9]:
o_s_Wmv_cw.overall_F1_score(X_test,y_test)

Unnamed: 0,data_distrib,FL_acc_own_data,FL_acc_global_data,local_acc_own_data,local_acc_global_data,local_difference,global_difference
0,2178.0,86.82208,85.969601,85.486928,84.949945,1.335152,1.019655
1,115.0,93.42252,85.969601,75.164835,78.116199,18.257684,7.853402
2,309.0,77.142857,85.969601,75.918367,80.994124,1.22449,4.975477
3,2043.0,86.091737,85.969601,82.932628,83.037834,3.159109,2.931767
4,1094.0,84.594986,85.969601,81.664495,83.880698,2.930491,2.088903
5,1929.0,85.44103,85.969601,85.820071,84.339278,-0.37904,1.630322
6,522.0,89.830508,85.969601,89.431415,82.113472,0.399093,3.856129
7,4644.0,85.518146,85.969601,85.427714,86.08651,0.090432,-0.116909
8,5015.0,85.520002,85.969601,84.820591,85.47582,0.699411,0.493781
9,1922.0,87.725819,85.969601,85.176881,84.074853,2.548938,1.894748


In [10]:
#Now we can use it to compare it to other state of the art algorithm with the function:
#For FRF 100 estimators:
FRF_acc_global,FRF_f1_global,FRF_acc_local,FRF_f1_local = FRF_eval(train_dict,test_dict,X_test,y_test,hyperparameters='theirs')


In [11]:
#For DistBoost.F:
DB_acc_global,DB_f1_global,DB_acc_local,DB_f1_local = polato_AdaBoost_eval(train_dict,test_dict,
                                                                            X_test,y_test, seed, model='distsamme',n_estimators=10,max_depth=8) 


In [12]:
#PreWeak.F
PW_acc_global,PW_f1_global,PW_acc_local,PW_f1_local= polato_AdaBoost_eval(train_dict,test_dict, 
                                                                          X_test,y_test, seed, model='preweaksamme',n_estimators=10,max_depth=8)


In [13]:
#AdaBoost.F
AB_acc_global,AB_f1_global,AB_acc_local,AB_f1_local = polato_AdaBoost_eval(train_dict,test_dict,
                                                                            X_test,y_test, seed, model='adaboost.f1',n_estimators=10,max_depth=8)
            

In [14]:
print('FRF: ',FRF_acc_global,FRF_f1_global,FRF_acc_local,FRF_f1_local)
print('DistBoost.F: ',DB_acc_global,DB_f1_global,DB_acc_local,DB_f1_local)
print('PreWeak.F: ', PW_acc_global,PW_f1_global,PW_acc_local,PW_f1_local)
print('AdaBoost.F: ', AB_acc_global,AB_f1_global,AB_acc_local,AB_f1_local)

FRF:  83.25143325143326 81.4110372232859 84.24321463324983 82.0797092690395
DistBoost.F:  85.21703521703522 84.42816186977163 86.02703100852365 85.57949349068673
PreWeak.F:  84.07043407043408 83.23314085242748 85.68058268484965 85.13450484677129
AdaBoost.F:  85.13513513513513 84.62202773461213 85.33730710495595 84.87955334114086
