In [1]:
from sklearn.ensemble import AdaBoostClassifier
from deslib.des.des_knn import DESKNN
from deslib.des.knora_e import KNORAE
from sklearn.tree import DecisionTreeClassifier
from implementedKNORAE import implementedKNORAE

import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.model_selection import RepeatedStratifiedKFold
from sklearn.metrics import accuracy_score
from sklearn.base import clone


In [2]:
clfs = {
    'dkNN': DESKNN(),
    'kne': KNORAE(),
    'ikne' : implementedKNORAE(),
}

In [3]:
#datasets = ['australian', 'balance', 'breastcan', 'heart']
#datasets = ['australian', 'heart']
datasets = ['australian', 'wisconsin', 'breastcan', 'cryotherapy', 'diabetes',
            'heart', 'ionosphere', 'liver', 'monkthree',  'sonar',]

In [4]:
n_datasets = len(datasets)
n_splits = 5
n_repeats = 2
rskf = RepeatedStratifiedKFold(
    n_splits=n_splits, n_repeats=n_repeats, random_state=42)

scores = np.zeros((len(clfs)+1, n_datasets, n_splits * n_repeats))

In [5]:
for data_id, dataset in enumerate(datasets):
    dataset = np.genfromtxt("datasets/%s.csv" % (dataset), delimiter=",")
    X = dataset[:, :-1]
    y = dataset[:, -1].astype(int)

    for fold_id, (train, test) in enumerate(rskf.split(X, y)):
      
        X_train, X_dsel, y_train, y_dsel = train_test_split(X[train], y[train], test_size=0.5, random_state=42)    
        model = DecisionTreeClassifier(max_depth= 3, max_leaf_nodes = 4,random_state=42)
        pool_classifiers = AdaBoostClassifier(base_estimator=model ,n_estimators=50)
        pool_classifiers.fit(X_train, y_train)
        y_pred = pool_classifiers.predict(X[test])            
        scores[0, data_id, fold_id] = accuracy_score(y[test], y_pred)
      
              
        for clf_id, clf_name in enumerate(clfs):       
            clf = clone(clfs[clf_name])
            clf.pool_classifiers = pool_classifiers            
            clf.fit(X_dsel, y_dsel)
            y_pred = clf.predict(X[test])
            scores[clf_id+1, data_id, fold_id] = accuracy_score(y[test], y_pred)

In [6]:
mean_scores = np.mean(scores, axis=2).T
print("\nMean scores:\n", mean_scores)


Mean scores:
 [[0.82681159 0.81086957 0.76956522 0.76956522]
 [0.95135149 0.9527852  0.93205036 0.93349435]
 [0.96633748 0.96706204 0.95902748 0.96194719]
 [0.86111111 0.88888889 0.89444444 0.88888889]
 [0.70180375 0.69401579 0.66275783 0.65690094]
 [0.73518519 0.73703704 0.72962963 0.70740741]
 [0.91299799 0.87891348 0.84185111 0.86754527]
 [0.61594203 0.64202899 0.59275362 0.59710145]
 [0.97472563 0.96210483 0.96662572 0.96841114]
 [0.77897793 0.80487805 0.73060395 0.720964  ]]


In [7]:
np.save('results', scores)