## <center> <font color='purple'> Dynamic Ensemble Machine Learning Models (Static Ensemble ML Pool)</font></center> 
#### <center>Firuz Juraev (Sungkyunkwan Unniversity)</center>

### <font color='green'> Libraries 

#### <font color='blue'> Basic Libraries 

In [1]:
import pandas as pd
import numpy as np 
from numpy import mean
from numpy import std
import random
import matplotlib.pyplot as plt
import warnings
warnings.filterwarnings('ignore')

#### <font color='blue'> Single ML Models Libraries 

In [2]:
from sklearn.naive_bayes import GaussianNB
from sklearn.neighbors import KNeighborsClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.svm import SVC
from sklearn.neural_network import MLPClassifier

#### <font color='blue'> Static ML Models Libraries 

In [3]:
from lightgbm import LGBMClassifier
from sklearn.ensemble import GradientBoostingClassifier
from catboost import CatBoostClassifier
from sklearn.ensemble import AdaBoostClassifier
from sklearn.ensemble import RandomForestClassifier
from xgboost import XGBClassifier
from sklearn.ensemble import VotingClassifier
from sklearn.tree import DecisionTreeClassifier

#### <font color='blue'> DES Libraries 

In [4]:
from deslib.des import DESP
from deslib.des import KNORAE
from deslib.des import KNORAU
from deslib.des import METADES
from deslib.des import DESKNN # new 
from deslib.des import KNOP # new 

#### <font color='blue'> DCS Libraries 

In [5]:
from deslib.dcs import MCB

#### <font color='blue'> Processing Libraries 

In [6]:
from sklearn import metrics
from sklearn.metrics import plot_confusion_matrix, confusion_matrix

from sklearn.metrics import (accuracy_score,
                             precision_score,
                             recall_score, 
                             f1_score,
                             roc_auc_score, 
                             auc)
from sklearn.metrics import roc_curve, roc_auc_score 
from sklearn.feature_selection import mutual_info_classif
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split

### <font color='green'> Load Dataset 

In [7]:
train_dataset_upsampled = pd.read_csv("Data/Resampled_neonates_train_data_4.csv")
test_dataset_upsampled = pd.read_csv("Data/Resampled_neonates_test_data_4.csv")

X_train = train_dataset_upsampled.drop(["DEAD"], axis=1) 
y_train = train_dataset_upsampled["DEAD"]

X_test = test_dataset_upsampled.drop(["DEAD"], axis=1) 
y_test = test_dataset_upsampled["DEAD"]

In [8]:
columns = X_train.columns.to_list()

### <font color='green'> Data Normalization

In [9]:
Min_max_scaler = MinMaxScaler().fit(X_train)

## Scaling 
X_train_mm_scaled = Min_max_scaler.transform(X_train)
X_test_mm_scaled = Min_max_scaler.transform(X_test)

## Numpy Array to DataFrame 
df_train_mm_scaled = pd.DataFrame(X_train_mm_scaled, columns = columns)
df_test_mm_scaled = pd.DataFrame(X_test_mm_scaled, columns = columns)

### <font color='green'> Feature Selection

In [10]:
tain_mm_scaled_df = df_train_mm_scaled[:]
tain_mm_scaled_df["DEAD"] = y_train 

test_mm_scaled_df = df_test_mm_scaled[:] 
test_mm_scaled_df["DEAD"] = y_test


importances = mutual_info_classif(df_train_mm_scaled, y_train)
feat_importance = pd.Series(importances, tain_mm_scaled_df.columns[0:len(tain_mm_scaled_df.columns)-1])
    
feat_importance = feat_importance.sort_values(ascending=False)
    
selected_features = feat_importance[:30]
selected_features_list_mm_scaled = selected_features.index.to_list()


tain_mm_scaled_df[selected_features_list_mm_scaled].head(2)

Unnamed: 0,temperature_mean,respRate_std,respRate_var,skinTemperature_var,skinTemperature_std,heartRate_std,heartRate_var,sao2_var,sao2_std,bpCuffMean_var,...,BIRTH_WEIGHT,bpCuffDiastolic_mean,bpCuffSystolic_mean,temperature_var,glucometer_mean,sao2_mean,temperature_std,bpCuffMean_mean,PLATELET,D10W_MEAN
0,0.775318,0.422245,0.178291,0.017977,0.134078,0.22911,0.057937,0.009568,0.097817,0.053294,...,0.264267,0.457143,0.602649,0.015695,0.312593,0.881455,0.125281,0.310696,0.28133,0.151138
1,0.709668,0.583964,0.341014,0.007512,0.086672,0.28258,0.086102,0.006551,0.080938,0.020317,...,0.437819,0.47933,0.644907,0.00377,0.272551,0.926056,0.061399,0.351928,0.392157,0.196784


In [11]:
X_train_mm = df_train_mm_scaled[selected_features_list_mm_scaled][:]
X_test_mm = df_test_mm_scaled[selected_features_list_mm_scaled][:]

### <font color='purple'> Hold-out Test (With Single ML) - (+FS, +HO)

In [12]:
def hold_out_single_ML(): 
    rng = np.random.RandomState(42) 
    X_train, X_dsel, y_train_en, y_dsel = train_test_split(X_train_mm, y_train, test_size=0.40, random_state=rng)
    
    model_dt1 = DecisionTreeClassifier(criterion='entropy', max_depth=3)
    model_dt2 = DecisionTreeClassifier(criterion='entropy', max_depth=3)
    model_dt3 = DecisionTreeClassifier(criterion='entropy', max_depth=3)
    model_dt4 = DecisionTreeClassifier(criterion='entropy', max_depth=3)
    model_dt5 = DecisionTreeClassifier(criterion='entropy', max_depth=3)
    voting_classifiers = [("dt1", model_dt1),
                          ("dt2", model_dt2),
                          ("dt3", model_dt3),
                          ("dt4", model_dt4), 
                          ("dt5", model_dt5)]
    
    

    model_ada = AdaBoostClassifier(n_estimators=250, learning_rate=0.01)
    model_gb = GradientBoostingClassifier(learning_rate = 0.005, n_estimators=120, max_depth=4)
    model_rf  = RandomForestClassifier(criterion='gini', n_estimators=100, max_depth=2)
    model_cat = MLPClassifier(solver='adam', max_iter=11, verbose=10,learning_rate_init=.003)
    model_lgb = LGBMClassifier(max_depth=1, n_estimators=150, objective="binary")
    model_vote = VotingClassifier(estimators = voting_classifiers, voting='soft') 
    
    model_ada.fit(X_train, y_train_en)
    model_gb.fit(X_train, y_train_en)
    model_rf.fit(X_train, y_train_en)
    model_cat.fit(X_train, y_train_en)
    model_lgb.fit(X_train, y_train_en) 
    model_vote.fit(X_train, y_train_en) 
    
    classifiers_names = ["AdaBoost","GradientBoosting", "RandomForest", "CatBoost", "LGBM", "MajorityVoting"]
    
    pool_classifiers = [model_ada, 
                        model_gb,
                        model_rf, 
                        model_cat, 
                        model_lgb, 
                        model_vote] 
    
    c_acc_list = [] 
    
    for cls in pool_classifiers:
        y_preds_c = cls.predict(X_test_mm) 
        c_acc_list.append(accuracy_score(y_test, y_preds_c))           
        
        
    classifiers_results =  {'name': classifiers_names, 
                            'accuracy': c_acc_list}
    
    clsDF = pd.DataFrame.from_dict(classifiers_results)
        
    # DES STARTS
    
    knorau = KNORAU(pool_classifiers)
    kne = KNORAE(pool_classifiers)  
    metades = METADES(pool_classifiers)
    desknn = DESKNN(pool_classifiers)
    mcb = MCB(pool_classifiers)
    desp = DESP(pool_classifiers)
    knop = KNOP(pool_classifiers)

    fire_knorau = KNORAU(pool_classifiers, DFP=True, k=9) 
    fire_kne = KNORAE(pool_classifiers, DFP=True, k=9) 
    fire_metades = METADES(pool_classifiers, DFP=True, k=5) 
    fire_desknn = DESKNN(pool_classifiers, DFP=True, k=9) 
    fire_mcb = MCB(pool_classifiers, DFP=True, k=9)
    fire_desp = DESP(pool_classifiers, DFP=True, k=9)
    fire_knop = KNOP(pool_classifiers, DFP=True, k=15)
    
    ensemble_classifiers = [fire_knorau, knorau, fire_kne, kne, fire_metades, metades, fire_desknn, desknn, 
                            fire_mcb, mcb, fire_desp, desp, fire_knop, knop]
    
    ensemble_names = ["FIRE-KNORA-U", "KNORA-U", "FIRE-KNORA-E", "KNORA-E", "FIRE-METADES", "METADES",
                      "FIRE-DESKNN", "DESKNN", "FIRE-MCB", "MCB", "FIRE-DESP", "DESP", "FIRE-KNOP", "KNOP"]
    
    acc_list = [] 
    precision_list = [] 
    recall_list = []
    f1_lists = [] 
    auc_list = [] 
    fpr_list = [] 
    tpr_list = []
    
    for e_cls in ensemble_classifiers:
        e_cls.fit(X_dsel, y_dsel)
        y_preds = e_cls.predict(X_test_mm) 
        yproba = e_cls.predict_proba(X_test_mm)[::,1] 

        acc_list.append(accuracy_score(y_test, y_preds))
        precision_list.append(precision_score(y_test, y_preds))
        recall_list.append(recall_score(y_test, y_preds))
        fone = f1_score(y_test, y_preds)
        f1_lists.append(fone) 
        
        fpr, tpr, _ = roc_curve(y_test,  yproba)
        fpr_list.append(fpr)
        tpr_list.append(tpr)
        auc = roc_auc_score(y_test, yproba)
        auc_list.append(auc) 
    
    results =  {'name': ensemble_names, 
                'accuracy': acc_list, 
                'precision': precision_list, 
                'recall': recall_list, 'f1': f1_lists, 
                'auc': auc_list, 
                'tpr': tpr_list, 'fpr': fpr_list}
    
    df = pd.DataFrame.from_dict(results)
    
    return df, clsDF

In [13]:
results_data = []
classifier_results_data = []
for i in range(0, 10):
    result, cls_results = hold_out_single_ML()
    results_data.append(result)
    classifier_results_data.append(cls_results)
    

fireResultsDF = pd.concat(results_data)
classifiersResultsDF = pd.concat(classifier_results_data)

Iteration 1, loss = 0.62293652
Iteration 2, loss = 0.50709734
Iteration 3, loss = 0.42240566
Iteration 4, loss = 0.37263477
Iteration 5, loss = 0.34248651
Iteration 6, loss = 0.32382802
Iteration 7, loss = 0.31242574
Iteration 8, loss = 0.29796014
Iteration 9, loss = 0.29090702
Iteration 10, loss = 0.28124459
Iteration 11, loss = 0.27249038
Iteration 1, loss = 0.63135566
Iteration 2, loss = 0.51320125
Iteration 3, loss = 0.43286008
Iteration 4, loss = 0.38601213
Iteration 5, loss = 0.36353497
Iteration 6, loss = 0.34011263
Iteration 7, loss = 0.32521631
Iteration 8, loss = 0.31736553
Iteration 9, loss = 0.30835975
Iteration 10, loss = 0.29329173
Iteration 11, loss = 0.28727384
Iteration 1, loss = 0.63288587
Iteration 2, loss = 0.51321885
Iteration 3, loss = 0.42630732
Iteration 4, loss = 0.37717238
Iteration 5, loss = 0.34824758
Iteration 6, loss = 0.32702647
Iteration 7, loss = 0.31079426
Iteration 8, loss = 0.29734589
Iteration 9, loss = 0.28829791
Iteration 10, loss = 0.27774981
Ite

In [14]:
ensemble_names = ["FIRE-KNORA-U", "KNORA-U", "FIRE-KNORA-E", "KNORA-E", "FIRE-METADES", "METADES",
                  "FIRE-DESKNN", "DESKNN", "FIRE-MCB", "MCB", "FIRE-DESP", "DESP", "FIRE-KNOP", "KNOP"]

accuracy = [] 
accuracy_std =  [] 
precision = [] 
precision_std = [] 
recall = [] 
recall_std = [] 
f1_score = [] 
f1_std = []
auc_score = []
auc_std = []


for n in ensemble_names:
    accuracy.append(round(fireResultsDF[fireResultsDF.name == n].accuracy.mean(), 3))
    accuracy_std.append(round(fireResultsDF[fireResultsDF.name == n].accuracy.std(), 3))
    precision.append(round(fireResultsDF[fireResultsDF.name == n].precision.mean(), 3))
    precision_std.append(round(fireResultsDF[fireResultsDF.name == n].precision.std(), 3))
    recall.append(round(fireResultsDF[fireResultsDF.name == n].recall.mean(), 3))
    recall_std.append(round(fireResultsDF[fireResultsDF.name == n].recall.std(), 3))
    f1_score.append(round(fireResultsDF[fireResultsDF.name == n].f1.mean(), 3))
    f1_std.append(round(fireResultsDF[fireResultsDF.name == n].f1.std(), 3))
    auc_score.append(round(fireResultsDF[fireResultsDF.name == n].auc.mean(), 3))
    auc_std.append(round(fireResultsDF[fireResultsDF.name == n].auc.std(), 3))
    
final_results = {"method": ensemble_names, 
                     "accuracy": accuracy, 
                     "accuracy_std": accuracy_std,
                     "precision": precision, 
                     "precision_std": precision_std,
                     "recall": recall, 
                     "recall_std": recall_std,
                     "f1_score": f1_score, 
                     "f1_std": f1_std, 
                     "auc": auc_score, 
                     "auc_std": auc_std}

finalResultsDF = pd.DataFrame.from_dict(final_results)

In [15]:
finalResultsDF

Unnamed: 0,method,accuracy,accuracy_std,precision,precision_std,recall,recall_std,f1_score,f1_std,auc,auc_std
0,FIRE-KNORA-U,0.97,0.001,0.943,0.003,1.0,0.0,0.971,0.001,0.991,0.0
1,KNORA-U,0.969,0.002,0.942,0.003,1.0,0.0,0.97,0.001,0.99,0.0
2,FIRE-KNORA-E,0.979,0.001,0.96,0.001,1.0,0.0,0.979,0.001,0.989,0.0
3,KNORA-E,0.98,0.001,0.961,0.001,1.0,0.0,0.98,0.001,0.989,0.0
4,FIRE-METADES,0.936,0.023,0.966,0.002,0.903,0.046,0.933,0.025,0.973,0.02
5,METADES,0.935,0.023,0.965,0.002,0.903,0.046,0.933,0.025,0.974,0.02
6,FIRE-DESKNN,0.937,0.014,0.972,0.001,0.901,0.028,0.935,0.016,0.987,0.001
7,DESKNN,0.937,0.014,0.971,0.002,0.901,0.028,0.934,0.016,0.987,0.001
8,FIRE-MCB,0.953,0.005,0.955,0.005,0.952,0.008,0.953,0.005,0.969,0.003
9,MCB,0.953,0.006,0.955,0.005,0.95,0.009,0.952,0.006,0.968,0.004


In [16]:
classifiers_names = ["AdaBoost", "GradientBoosting", "RandomForest", "CatBoost", "LGBM", "MajorityVoting"]

accuracy_list = []
accuracy_std_list = []
for n in classifiers_names:
    accuracy_list.append(classifiersResultsDF[classifiersResultsDF.name == n].accuracy.mean())
    accuracy_std_list.append(classifiersResultsDF[classifiersResultsDF.name == n].accuracy.std())

final_cls_results = {"classifier": classifiers_names, 
                     "accuracy": accuracy_list, 
                     "accuracy_std": accuracy_std_list}

finalClassifierResultsDF = pd.DataFrame.from_dict(final_cls_results)

In [17]:
finalClassifierResultsDF

Unnamed: 0,classifier,accuracy,accuracy_std
0,AdaBoost,0.950378,0.0
1,GradientBoosting,0.912082,1.170278e-16
2,RandomForest,0.957875,0.009395544
3,CatBoost,0.948652,0.008941981
4,LGBM,0.932039,2.340556e-16
5,MajorityVoting,0.93069,0.0002842752


In [20]:
finalResultsDF.describe().T

Unnamed: 0,count,mean,std,min,25%,50%,75%,max
accuracy,14.0,0.953786,0.019823,0.928,0.93625,0.953,0.97225,0.98
accuracy_std,14.0,0.009,0.008476,0.001,0.00125,0.0055,0.0155,0.023
precision,14.0,0.958286,0.009507,0.942,0.95125,0.9605,0.965,0.972
precision_std,14.0,0.002429,0.001342,0.001,0.00125,0.002,0.003,0.005
recall,14.0,0.949429,0.048989,0.888,0.9015,0.951,1.0,1.0
recall_std,14.0,0.016357,0.018079,0.0,0.0,0.0085,0.03025,0.046
f1_score,14.0,0.952714,0.021402,0.924,0.93325,0.9525,0.97325,0.98
f1_std,14.0,0.009786,0.009513,0.001,0.001,0.0055,0.0175,0.025
auc,14.0,0.981786,0.009799,0.964,0.97325,0.987,0.989,0.991
auc_std,14.0,0.003929,0.007,0.0,0.0,0.001,0.00375,0.02


In [21]:
finalResultsDF.to_csv("Results/static_pool_results/des_static_pool_6_models_results.csv", index=False)