In [1]:
from sklearn.ensemble import AdaBoostClassifier
from deslib.des.des_knn import DESKNN
from deslib.des.knora_e import KNORAE
from deslib.des.knora_u import KNORAU
from sklearn.tree import DecisionTreeClassifier
from implementedKNORAE import implementedKNORAE

import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.model_selection import RepeatedStratifiedKFold
from sklearn.metrics import accuracy_score
from sklearn.metrics import precision_score
from sklearn.metrics import recall_score
from sklearn.metrics import f1_score
from sklearn.base import clone


In [2]:
clfs = {
    'dkNN': DESKNN(),    
    'knu': KNORAU(),
    'ikne' : implementedKNORAE(),
}

In [3]:
#datasets = ['australian', 'balance', 'breastcan', 'heart']
#datasets = ['bupa', 'liver', 'wdbc', 'monk-2', 'cryotherapy', 'sonar', 'australian', 'wisconsin', 'monkthree', 'heart']
#datasets = ['australian', 'wisconsin', 'breastcan', 'cryotherapy', 'diabetes',
#            'heart', 'ionosphere', 'liver', 'monkthree',  'sonar',]

datasets = ['australian', 'bupa', 'breastcan', 'cryotherapy', 'diabetes', 'heart', 'ionosphere', 'wdbc', 'breastcancoimbra',  'sonar',]

In [4]:
n_datasets = len(datasets)
n_splits = 5
n_repeats = 2
rskf = RepeatedStratifiedKFold(
    n_splits=n_splits, n_repeats=n_repeats, random_state=42)

scores = np.zeros((len(clfs)+1, n_datasets, n_splits * n_repeats))

precision_scores = np.zeros((len(clfs)+1, n_datasets, n_splits * n_repeats))
recall_scores = np.zeros((len(clfs)+1, n_datasets, n_splits * n_repeats))
f1_scores = np.zeros((len(clfs)+1, n_datasets, n_splits * n_repeats))

In [5]:
for data_id, dataset in enumerate(datasets):
    dataset = np.genfromtxt("datasets/%s.csv" % (dataset), delimiter=",")
    X = dataset[:, :-1]
    y = dataset[:, -1].astype(int)

    for fold_id, (train, test) in enumerate(rskf.split(X, y)):
      
        X_train, X_dsel, y_train, y_dsel = train_test_split(X[train], y[train], test_size=0.5, random_state=42)    
        model = DecisionTreeClassifier(max_depth= 3, max_leaf_nodes = 4,random_state=42)
        pool_classifiers = AdaBoostClassifier(base_estimator=model ,n_estimators=50)
        pool_classifiers.fit(X_train, y_train)
        y_pred = pool_classifiers.predict(X[test])            
        scores[0, data_id, fold_id] = accuracy_score(y[test], y_pred)
        precision_scores[0, data_id, fold_id] = precision_score(y[test], y_pred)
        recall_scores[0, data_id, fold_id] = recall_score(y[test], y_pred)
        f1_scores[0, data_id, fold_id] = f1_score(y[test], y_pred)
      
              
        for clf_id, clf_name in enumerate(clfs):       
            clf = clone(clfs[clf_name])
            clf.pool_classifiers = pool_classifiers            
            clf.fit(X_dsel, y_dsel)
            y_pred = clf.predict(X[test])
            scores[clf_id+1, data_id, fold_id] = accuracy_score(y[test], y_pred)
            precision_scores[clf_id+1, data_id, fold_id] = precision_score(y[test], y_pred)
            recall_scores[clf_id+1, data_id, fold_id] = recall_score(y[test], y_pred)
            f1_scores[clf_id+1, data_id, fold_id] = f1_score(y[test], y_pred)

In [6]:
mean_scores = np.mean(scores, axis=2).T
mean_precision_scores = np.mean(precision_scores, axis=2).T
mean_recall_scores = np.mean(recall_scores, axis=2).T
mean_f1_scores = np.mean(f1_scores, axis=2).T

print("\nMean scores:\n", mean_scores)

print("\nMean precision scores:\n", mean_precision_scores)

print("\nMean recall scores:\n", mean_recall_scores)

print("\nMean F1 scores:\n", mean_f1_scores)


Mean scores:
 [[0.82536232 0.81014493 0.83478261 0.76956522]
 [0.64057971 0.65072464 0.68115942 0.62318841]
 [0.96998712 0.96414234 0.9677866  0.95756226]
 [0.86111111 0.87222222 0.88333333 0.9       ]
 [0.70180375 0.69401579 0.70763942 0.66275783]
 [0.73518519 0.73888889 0.75740741 0.73148148]
 [0.91871227 0.88034205 0.87173038 0.85042254]
 [0.95695544 0.95343114 0.96396522 0.9428893 ]
 [0.67210145 0.62101449 0.65960145 0.59528986]
 [0.78391405 0.78089431 0.80046458 0.73519164]]

Mean precision scores:
 [[0.81640018 0.80591113 0.83190297 0.74649257]
 [0.58027115 0.59734117 0.64559743 0.55672091]
 [0.94794546 0.95202034 0.95856724 0.9471182 ]
 [0.88545455 0.89391414 0.90604895 0.92646465]
 [0.5769811  0.59672563 0.60404726 0.52180712]
 [0.74077937 0.72709034 0.75083045 0.70720842]
 [0.94509132 0.95652262 0.97126462 0.93334801]
 [0.96224712 0.96099853 0.96742972 0.95301444]
 [0.67608202 0.6756352  0.69819119 0.65090909]
 [0.77195941 0.78416856 0.78487843 0.77684046]]

Mean recall score

In [7]:
np.save('results', scores)
np.save('results_prec', precision_scores)
np.save('results_rec', recall_scores)
np.save('results_f1', f1_scores)