In [1]:
# Models
from END import EnsembleND
import NestedDichotomies.nd as nd
from sklearn.ensemble import RandomForestClassifier
from PairwiseCoupling import PairwiseCoupling
from sklearn.neural_network import MLPClassifier
# Baselearner
from sklearn import tree
from sklearn import neighbors
from sklearn.linear_model import LogisticRegression
# Methods
from sklearn import metrics
from sklearn.model_selection import StratifiedKFold
from sklearn.calibration import calibration_curve
from sklearn.calibration import CalibratedClassifierCV
from sklearn.preprocessing import LabelEncoder
# Basics
import os
from threading import Thread
import openml
import numpy as np
from sklearn.metrics import make_scorer
import warnings;
warnings.filterwarnings('ignore');

In [2]:
def brier_score(y_predict, y_test, nclass):
    obj_num = np.size(y_test)
    bs_ytrue = np.zeros((obj_num,nclass))
    for i in range(obj_num):
        bs_ytrue[i,y_test[i]]=1
    bs = sum(sum((y_predict-bs_ytrue)**2))/obj_num
    return bs

In [3]:
from modelcombos.END_DT import EnsembleND_DT as END_DT
from modelcombos.END_LR import EnsembleND_LR as END_LR
from modelcombos.END_NB import EnsembleND_NB as END_NB
from modelcombos.PC_DT import PairwiseCoupling_DT as PC_DT
from modelcombos.PC_LR import PairwiseCoupling_LR as PC_LR
from modelcombos.PC_NB import PairwiseCoupling_NB as PC_NB
def generateModelName(model):
    name = 'error'
    if (model.__class__==RandomForestClassifier):
        name = 'RF'
    if (model.__class__==PC_DT):
        name = 'PC_DT'
    if (model.__class__==PC_LR):
        name = 'PC_LR'
    if (model.__class__==PC_NB):
        name = 'PC_NB'
    if (model.__class__==MLPClassifier):
        name = 'MLP'
    if (model.__class__==END_DT):
        name = 'END_DT'
    if (model.__class__==END_LR):
        name = 'END_LR'
    if (model.__class__==END_NB):
        name = 'END_NB'
    return name

In [4]:
def brier_score_singular_factory(nclass):
    #def brier_score_singular(y_test, y_predict):
    #    bs_ytrue = np.zeros((nclass))
    #    bs_ytrue[y_test]=1
    #    bs = sum((y_predict-bs_ytrue)**2)
    #    return bs
    def brier_score_singular(y_test,y_predict):
        #print(y_predict)
        #print(y_test)
        obj_num = np.size(y_test)
        bs_ytrue = np.zeros((obj_num,nclass))
        for i in range(obj_num):
            bs_ytrue[i,y_test[i]]=1
        bs = sum(sum((y_predict-bs_ytrue)**2))/obj_num
        return bs
    
    return brier_score_singular

In [5]:
def TMTB_and_ECE(y_predict, y_test, nclass, nbins=10, ccstrat='uniform'):
    y_pred_list = np.reshape(y_predict,nclass*y_test.size)
    ninst = y_test.size
    onehot = np.zeros((ninst, nclass))
    onehot[np.arange(ninst), y_test] = 1
    y_test_list = np.reshape(onehot,nclass*y_test.size)
    prob_true, prob_pred = calibration_curve(y_test_list, y_pred_list, n_bins=nbins, strategy=ccstrat)
    ece = np.sum(np.absolute(prob_true-prob_pred))/prob_true.size
    return ece

har


In [12]:
from sklearn.model_selection import train_test_split as tts
from sklearn.model_selection import RandomizedSearchCV
# Compare all models
def CompareModels(data_X, data_y, n_runs=1):
    seed = 2000
    num_classes = np.unique(data_y).size
    # for each run the brierscore for each model [no calibration]
    bs_byModel_base_run = []
    # for each run the ece (expected calibration error) for each model [no calibration]
    ece_byModel_base_run = []
    # for each run the brierscore for each model [sigmoid calibration]
    bs_byModel_sig_run = []
    # for each run the ece (expected calibration error) for each model [sigmoid calibration]
    ece_byModel_sig_run = []
    # for each run the brierscore for each model [isotonic calibration]
    bs_byModel_iso_run = []
    # for each run the ece (expected calibration error) for each model [isotonic calibration]
    ece_byModel_iso_run = []
    # for each run the hyperparameter for each model
    hyperparam_byModel_run = []
    # for each run the mean fit time for each model
    mft_byModel_run = []
    # for each run the mean score time for each model
    mst_byModel_run = []
    # RUNS
    for i in range(n_runs):
        print('run',i+1)
        bs_byModel_base_run.append([])
        ece_byModel_base_run.append([])
        bs_byModel_sig_run.append([])
        ece_byModel_sig_run.append([])
        bs_byModel_iso_run.append([])
        ece_byModel_iso_run.append([])
        hyperparam_byModel_run.append([])
        mft_byModel_run.append([])
        mst_byModel_run.append([])
        # train - test von data (80/20)
        X_train, X_test, y_train, y_test = tts(data_X, data_y, test_size=0.2, stratify=data_y, random_state=seed)
        # model - calibration split von train (70/30)
        X_model, X_calibration, y_model, y_calibration = tts(X_train, y_train, test_size=0.3, stratify=y_train, random_state=seed+1)
        
        scoring = {'bs': make_scorer(brier_score_singular_factory(num_classes), greater_is_better=False, needs_proba=True)}
        
        model_dists = generate_ModelHyperparam_pairs(num_classes, seed)
        
        for k in range(len(model_dists)):
            print('model',k+1)
            #if (k==1):
            #    set_trace()
            model_rs = RandomizedSearchCV(model_dists[k][0], param_distributions=model_dists[k][1], scoring=scoring,refit='bs', n_iter = 10, n_jobs = 3, cv=3, random_state=seed)
            model_rs.fit(X_model, y_model)
            print('mean fit time:',model_rs.cv_results_['mean_fit_time'].mean())
            print('mean score time:',model_rs.cv_results_['mean_score_time'].mean())
            seed += 1
            print(model_rs.best_params_)
            # best estimator
            hyperparam_byModel_run[i].append(model_rs.best_params_)
            model = model_rs.best_estimator_
            #calibration sigmoid
            c_sig_model = CalibratedClassifierCV(base_estimator=model,method='sigmoid', cv='prefit')
            c_sig_model.fit(X_calibration, y_calibration)
            #calibration isotonic
            c_iso_model = CalibratedClassifierCV(base_estimator=model,method='isotonic', cv='prefit')
            c_iso_model.fit(X_calibration, y_calibration)
            #prediction base
            y_pred_base = model.predict_proba(X_test)
            bs_base = brier_score(y_predict=y_pred_base, y_test=y_test, nclass=num_classes)
            ece_base = TMTB_and_ECE(y_predict=y_pred_base, y_test=y_test, nclass=num_classes, nbins=10, ccstrat='uniform')
            #prediction sigmoid calibrated model
            y_pred_sig = c_sig_model.predict_proba(X_test)
            bs_sig = brier_score(y_predict=y_pred_sig, y_test=y_test, nclass=num_classes)
            ece_sig = TMTB_and_ECE(y_predict=y_pred_sig, y_test=y_test, nclass=num_classes, nbins=10, ccstrat='uniform')
            #prediction isotonic calibrated model
            y_pred_iso = c_iso_model.predict_proba(X_test)
            bs_iso = brier_score(y_predict=y_pred_iso, y_test=y_test, nclass=num_classes)
            ece_iso = TMTB_and_ECE(y_predict=y_pred_iso, y_test=y_test, nclass=num_classes, nbins=10, ccstrat='uniform')
            #append results
            bs_byModel_base_run[i].append(bs_base)
            ece_byModel_base_run[i].append(ece_base)
            bs_byModel_sig_run[i].append(bs_sig)
            ece_byModel_sig_run[i].append(ece_sig)
            bs_byModel_iso_run[i].append(bs_iso)
            ece_byModel_iso_run[i].append(ece_iso)
            mft_byModel_run[i].append(model_rs.cv_results_['mean_fit_time'].mean())
            mst_byModel_run[i].append(model_rs.cv_results_['mean_score_time'].mean())
            print(bs_base,ece_base,bs_sig, ece_sig, bs_iso, ece_iso)
    return bs_byModel_base_run, ece_byModel_base_run, bs_byModel_sig_run, ece_byModel_sig_run, bs_byModel_iso_run, ece_byModel_iso_run, hyperparam_byModel_run, mft_byModel_run, mst_byModel_run

In [13]:
from modelcombos.END_DT import EnsembleND_DT as END_DT
from modelcombos.END_LR import EnsembleND_LR as END_LR
from modelcombos.END_NB import EnsembleND_NB as END_NB
from modelcombos.PC_DT import PairwiseCoupling_DT as PC_DT
from modelcombos.PC_LR import PairwiseCoupling_LR as PC_LR
from modelcombos.PC_NB import PairwiseCoupling_NB as PC_NB
from scipy.stats import randint
from scipy.stats import uniform
def generate_ModelHyperparam_pairs(nclasses, seed):
    models = []
    RFC_paramdist =  {
        'min_impurity_decrease': uniform(0.00001, 0.1),
        'min_samples_leaf': randint(1,51)}
    models.append((RandomForestClassifier(n_estimators=45, random_state=seed), RFC_paramdist))
    DT_paramdist = {
        'max_depth': randint(1,4)
    }
    LR_paramdist = {
        'penalty': ['l1', 'l2'],
        'C': uniform(0.01,20)
    }
    NB_paramdist = {
        'var_smoothing' : uniform(0.0000000001,1.0)
    }
    models.append((END_DT(number_of_nds=5, number_of_classes=nclasses, max_depth = 1, generator_String='random_pair', random_state=seed), DT_paramdist))
    models.append((END_LR(number_of_nds=5, number_of_classes=nclasses,penalty='l2', C=1.0, generator_String='random_pair', random_state=seed), LR_paramdist))
    models.append((END_NB(number_of_nds=5, number_of_classes=nclasses, var_smoothing=0.00001, generator_String='random_pair', random_state=seed),NB_paramdist))
    models.append((PC_DT(classes=nclasses, seed=seed, max_depth=1),DT_paramdist))
    models.append((PC_LR(classes=nclasses, seed=seed, penalty='l2', C=1.0),LR_paramdist))
    models.append((PC_NB(classes=nclasses, seed=seed, var_smoothing=0.0001),NB_paramdist))
    MLP_paramdist = {
        'alpha': uniform(0.00000001, 10),
        'batch_size': [100,200,300,400,500],
        'power_t': uniform(0.001,5) 
    }
    models.append((MLPClassifier(hidden_layer_sizes = (35,), activation='logistic', learning_rate='invscaling'), MLP_paramdist))
    return models

In [14]:
from time import gmtime, strftime
    
def write_single_value_per_run(file, models, value_name, values):
    file.write(value_name+'\n')
    file.write(generateModelName(models[0][0]))
    for i in range(len(models)-1):
        file.write('\t'+generateModelName(models[i+1][0]))
    file.write('\n')
    for i in range(len(values)):
        file.write(str(values[i][0]))
        for k in range(len(values[i])-1):
            file.write('\t'+str(values[i][k+1]))
        file.write('\n')
    
def save_result(dataset_name, nclasses, bs_byModel_base_run, ece_byModel_base_run, bs_byModel_sig_run, ece_byModel_sig_run, bs_byModel_iso_run, ece_byModel_iso_run, hyperparam_byModel_run, mft_byModel_run, mst_byModel_run):
    dir_path = os.getcwd()
    directory = dir_path+'/experiments/'+dataset_name+'/'
    if not os.path.exists(directory):
        os.makedirs(directory)
        
    models = generate_ModelHyperparam_pairs(nclasses=nclasses, seed=42)
    file = open(directory+'result_'+strftime("%Y-%m-%d %H_%M_%S", gmtime())+'.txt', 'w')
    # WRITE MODEL DESCRIPTIONS
    try:
        # SAVING BRIER SCORE
        file.write('Brier-Score\n')
        file.write(generateModelName(models[0][0]))
        for i in range(len(models)-1):
            file.write('\t\t\t'+generateModelName(models[i+1][0]))
        file.write('\nBase\tSigmoid\tIsotonic')
        for i in range(len(models)-1):
            file.write('\tBase\tSigmoid\tIsotonic')
        file.write('\n')
        for i in range(len(bs_byModel_base_run)):
            file.write(str(bs_byModel_base_run[i][0])+'\t'+str(bs_byModel_sig_run[i][0])+'\t'+str(bs_byModel_iso_run[i][0]))
            for k in range(len(bs_byModel_base_run[i])-1):
                file.write('\t'+str(bs_byModel_base_run[i][k+1])+'\t'+str(bs_byModel_sig_run[i][k+1])+'\t'+str(bs_byModel_iso_run[i][k+1]))
            file.write('\n')
        # SAVING ECE SCORE
        file.write('ECE-Score\n')
        file.write(generateModelName(models[0][0]))
        for i in range(len(models)-1):
            file.write('\t\t\t'+generateModelName(models[i+1][0]))
        file.write('\nBase\tSigmoid\tIsotonic')
        for i in range(len(models)-1):
            file.write('\tBase\tSigmoid\tIsotonic')
        file.write('\n')
        for i in range(len(ece_byModel_base_run)):
            file.write(str(ece_byModel_base_run[i][0])+'\t'+str(ece_byModel_sig_run[i][0])+'\t'+str(ece_byModel_iso_run[i][0]))
            for k in range(len(ece_byModel_base_run[i])-1):
                file.write('\t'+str(ece_byModel_base_run[i][k+1])+'\t'+str(ece_byModel_sig_run[i][k+1])+'\t'+str(ece_byModel_iso_run[i][k+1]))
            file.write('\n')
        # SAVING HYPERPARAMETER
        write_single_value_per_run(file, models, 'Hyperparameter', hyperparam_byModel_run)
        # SAVING MEAN FIT TIME
        write_single_value_per_run(file, models, 'Mean Fit Time', mft_byModel_run)
        # SAVING MEAN SCORE TIME
        write_single_value_per_run(file, models, 'Mean Score Time', mst_byModel_run)
    finally:
        file.close()

In [None]:
#1515 - micro-mass
#1459 - artificial-characters
#1233 - eating
#1569 - poker-hand
#1503 - spoken-arabic-digit
#4541 - Diabetes130US
#4538 - GesturePhaseSegmentationProcessed
#41991 - Kuzushiji-49 - error
#40670 - dna
#1478 - har
#40984 - segment
#40498 - wine-quality-white
#40499 - texture
#40686 - solar-flare
#41972 - Indian_pines
#1475 - first-order-theorem-proving
dataset = openml.datasets.get_dataset(1478)
dataset_name = dataset.name
print(dataset_name)
X, y, categorical_indicator, attribute_names = dataset.get_data(dataset_format='array', target=dataset.default_target_attribute)
num_classes = np.unique(y).size
#bs & ece for (b)ase, (s)igmoid and (i)sotonic
bs_b, ece_b, bs_s, ece_s, bs_i, ece_i, hyppar, mft, mst = CompareModels(X,y, 20)
save_result(dataset_name=dataset_name, nclasses=num_classes, bs_byModel_base_run=bs_b, ece_byModel_base_run=ece_b, bs_byModel_sig_run=bs_s, ece_byModel_sig_run=ece_s, bs_byModel_iso_run=bs_i, ece_byModel_iso_run=ece_i, hyperparam_byModel_run=hyppar, mft_byModel_run=mft, mst_byModel_run=mst)

run 1
model 1
mean fit time: 0.8581302404403687
mean score time: 0.015036876996358233
{'min_impurity_decrease': 0.006820628846711264, 'min_samples_leaf': 28}
0.17420548520877763 0.14113890764992032 0.11594731960489257 0.04482393646309744 0.11524860498042434 0.03260915212399192
model 2
mean fit time: 8.905376688639324
mean score time: 0.042052054405212404
{'max_depth': 3}
0.12995248474795015 0.03489681186210245 0.13411680910395435 0.10832293729548485 0.11817541320433107 0.017184094110143478
model 3
mean fit time: 19.85133022467295
mean score time: 0.23704158465067543
{'C': 3.7387252136774527, 'penalty': 'l1'}
0.028745175480731622 0.05378560189437178 0.031455236960387774 0.20228047815505718 0.029000226888874617 0.042276721086407044
model 4
mean fit time: 4.557569440205893
mean score time: 1.2596526225407918
{'var_smoothing': 0.973649118052793}
0.3548296208094898 0.08169437610800975 0.277814059138731 0.05375908167733816 0.2544807526779767 0.03503325006940079
model 5
mean fit time: 1.65676

mean fit time: 14.959249146779376
mean score time: 0.19911834398905437
{'C': 15.686806624312103, 'penalty': 'l2'}
0.02277785458669842 0.05258757077947375 0.02590221922068659 0.2134472356253978 0.024435770795962944 0.09145717717627408
model 4
mean fit time: 3.9984981377919517
mean score time: 1.2172794580459596
{'var_smoothing': 0.9564737354866564}
0.38588161999058446 0.17552757711899375 0.3036599752848741 0.11105956027593444 0.2445560924551559 0.03783686034659021
model 5
mean fit time: 1.2926393429438272
mean score time: 2.5322684685389207
{'max_depth': 3}
0.14721819123914226 0.054537485631465335 0.15194279768114466 0.10562934898730725 0.1430318525411174 0.05730297447811152
model 6
mean fit time: 2.330815037091573
mean score time: 2.0255462408065794
{'C': 18.906979209000706, 'penalty': 'l2'}
0.021927766175783684 0.05364027169197102 0.02521598363366798 0.295356506165465 0.02424681935308448 0.1071755922046623
model 7
mean fit time: 0.25335693359375
mean score time: 4.986165865262349
{'va

mean fit time: 1.1670327186584473
mean score time: 2.2776728312174486
{'max_depth': 3}
0.13229760495962917 0.053412584078142755 0.13737085335552612 0.0540371458650641 0.12830356732965198 0.05509256182472553
model 6
mean fit time: 1.9917521397272746
mean score time: 1.7068148056666057
{'C': 11.859229469495581, 'penalty': 'l2'}
0.021297089944842293 0.055806114130191306 0.02159773823834896 0.15134352047789684 0.021140748272173042 0.07062718803138802
model 7
mean fit time: 0.21789709726969403
mean score time: 4.429769277572633
{'var_smoothing': 0.9693154448324077}
0.2815735135793718 0.07532283623177477 0.27180470262909684 0.08532280205270232 0.23497340703087846 0.033007846797124686
model 8
mean fit time: 8.010352611541748
mean score time: 0.01359572410583496
{'alpha': 0.275446412189653, 'batch_size': 300, 'power_t': 3.5076951033319106}
0.04033200303583816 0.08575561330768014 0.03331568179602135 0.07472506044998813 0.03128102203771812 0.07847073045684116
run 10
model 1
mean fit time: 0.7514

mean fit time: 0.21489923795064286
mean score time: 4.390936946868896
{'var_smoothing': 0.9394977219648949}
0.29893685336795284 0.09768785336504196 0.25888041128232914 0.07619558443954219 0.21867618773290715 0.05031584707644745
model 8
mean fit time: 6.753205585479736
mean score time: 0.013195943832397462
{'alpha': 0.1884352436666284, 'batch_size': 300, 'power_t': 1.0250537414043126}
0.038088278251792625 0.06202986808394979 0.03661478089700316 0.08322206641086216 0.033318358749896686 0.049001628572856774
run 14
model 1
mean fit time: 0.7931134064992269
mean score time: 0.014628855387369794
{'min_impurity_decrease': 0.001026008354243092, 'min_samples_leaf': 46}
0.13158234923601686 0.13068789064146075 0.0884351652516288 0.03921106149318559 0.08695539800066203 0.040577731306373585
model 2
mean fit time: 6.946978012720743
mean score time: 0.034055622418721516
{'max_depth': 3}
0.12802939515158912 0.057972333618967864 0.12823758859080786 0.08557506265732015 0.12230331071353234 0.033657005451

mean fit time: 0.846201475461324
mean score time: 0.015528599421183264
{'min_impurity_decrease': 0.003273836979912349, 'min_samples_leaf': 13}
0.1389449795224041 0.10840892642364997 0.10856825310662876 0.06851031609066106 0.10469113231572831 0.04246065354100402
model 2
mean fit time: 7.859785850842795
mean score time: 0.03629129727681478
{'max_depth': 3}
0.15752823826588852 0.04737790264649233 0.166107367885232 0.10367537101783646 0.1554309213051762 0.04232754178959406
model 3
mean fit time: 15.91301961739858
mean score time: 0.19722010294596354
{'C': 7.255562116014941, 'penalty': 'l2'}
0.02856814874220612 0.08296273314495714 0.028603479684330628 0.0963232406146006 0.029407961312750364 0.05854502910714191
model 4
mean fit time: 4.0572517315546675
mean score time: 1.1355843146642048
{'var_smoothing': 0.08845230349852072}
0.41801080189214296 0.14789081888727634 0.2950103534372374 0.05550057298086245 0.25624745229160145 0.032986495986473456
model 5
mean fit time: 1.1749239921569825
mean s