In [1]:
import numpy as np
import pandas as pd

In [2]:
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import GridSearchCV,train_test_split

In [3]:
# saving the model
from joblib import dump, load

###  Dataset with newly added sample

In [4]:
df = pd.read_csv("Cleveland_Full.csv")
df.head()

Unnamed: 0,age,resting_blood_pressure,cholesterol,fasting_blood_sugar,max_heart_rate_achieved,exercise_induced_angina,st_depression,target,sex_male,chest_pain_type_atypical angina,chest_pain_type_non-anginal pain,chest_pain_type_typical angina,rest_ecg_left ventricular hypertrophy,rest_ecg_normal,st_slope_flat,st_slope_upsloping
0,40,140,289,0,172,0,0.0,0,1,1,0,0,0,1,0,1
1,49,160,180,0,156,0,1.0,1,0,0,1,0,0,1,1,0
2,37,130,283,0,98,0,0.0,0,1,1,0,0,0,0,0,1
3,48,138,214,0,108,1,1.5,1,0,0,0,0,0,1,1,0
4,54,150,195,0,122,0,0.0,0,1,0,1,0,0,1,0,1


In [5]:
df.shape

(288, 16)

In [6]:
X = df.drop("target", axis=1)

In [7]:
y = df["target"]

In [8]:
X_train2, X_test2, y_train2, y_test2 = train_test_split(X, y, 
                                                        test_size=0.20, stratify=y,
                                                        random_state=5)
print('-----------remodeled-Training Set------------------')
print(X_train2.shape)
print(y_train2.shape)

-----------remodeled-Training Set------------------
(230, 15)
(230,)


### Using the federated models parameters to evaluate dataset with new samples

# 1

In [9]:
# loading the model from saved file
fed_model_3 = load("fed_model_3.joblib")
print(fed_model_3)

model4 = fed_model_3
model4.fit(X_train2,y_train2)
pred4 = model4.predict(X_test2)

acc4 = accuracy_score(y_test2,pred4)
print('Test Accuracy_4:', acc4*100)

precision_4 = precision_score(y_test2, pred4)
print('Precision_4:', precision_4*100)

recall_4 = recall_score(y_test2, pred4)
print('Recall_4:', recall_4*100)

f1_score_4 = f1_score(y_test2, pred4)
print('f1_score_4:', f1_score_4*100)

RandomForestClassifier(criterion='entropy', max_depth=6, max_features='log2',
                       n_estimators=50, random_state=1)
Test Accuracy_4: 96.55172413793103
Precision_4: 95.0
Recall_4: 95.0
f1_score_4: 95.0


# 2

In [16]:
# Define the grid of hyperparameters 'params_rf'
params_rf_5 = {'n_estimators': list(range(60,120,5)), 'max_depth': list(range(5,9)),
             'max_features': ['log2','sqrt'], 'criterion': ['gini', 'entropy']}

model1=RandomForestClassifier(random_state=1)

# Instantiate a 5-fold CV grid search object 'grid_rf'
grid_rf_5 = GridSearchCV(estimator=model1, param_grid=params_rf_5, scoring='accuracy', cv=5, n_jobs=-1)

grid_rf_5.fit(X_train2, y_train2)

# Extract best model from 'grid_rf'
best_model5 = grid_rf_5.best_estimator_

# Extract best hyperparameters from 'grid_rf'
best_hyperparams_5 = grid_rf_5.best_params_
print('Best hyerparameters', best_hyperparams_5)

# Evaluate test set accuracy
pred5 = best_model5.predict(X_test2)

test_acc5 = accuracy_score(y_test2, pred5)
print('Test Accuracy_5:', test_acc5*100)

precision_5 = precision_score(y_test2, pred5)
print('Precision_5:', precision_5*100)

recall_5 = recall_score(y_test2, pred5)
print('Recall_5:', recall_5*100)

f1_score_5 = f1_score(y_test2, pred5)
print('f1_score_5:', f1_score_5*100)

Best hyerparameters {'criterion': 'gini', 'max_depth': 7, 'max_features': 'log2', 'n_estimators': 115}
Test Accuracy_5: 94.82758620689656
Precision_5: 94.73684210526315
Recall_5: 90.0
f1_score_5: 92.3076923076923


In [15]:
dump(best_model5, "client_cleveland_3.joblib")

['client_cleveland_3.joblib']

# 3

In [22]:
fed_model_4 = load("fed_model_4.joblib")
print(fed_model_4)

model6 = fed_model_4
model6.fit(X_train2,y_train2)

pred6 = model6.predict(X_test2)

test_acc6 = accuracy_score(y_test2, pred6)
print('Test Accuracy_6:', test_acc6*100)

precision_6 = precision_score(y_test2, pred6)
print('Precision_6:', precision_6*100)

recall_6 = recall_score(y_test2, pred6)
print('Recall_6:', recall_6*100)

f1_score_6 = f1_score(y_test2, pred6)
print('f1_score_6:', f1_score_6*100)

RandomForestClassifier(criterion='entropy', max_depth=9, max_features='log2',
                       n_estimators=65, random_state=1)
Test Accuracy_6: 95.83333333333334
Precision_6: 95.83333333333334
Recall_6: 92.0
f1_score_6: 93.87755102040816
