In [2]:
# Models
from END import EnsembleND
import NestedDichotomies.nd as nd
from sklearn.ensemble import RandomForestClassifier
from PairwiseCoupling import PairwiseCoupling
from sklearn.neural_network import MLPClassifier
# Baselearner
from sklearn import tree
from sklearn import neighbors
from sklearn.linear_model import LogisticRegression
# Methods
from sklearn import metrics
from sklearn.model_selection import StratifiedKFold
from sklearn.calibration import calibration_curve
from sklearn.calibration import CalibratedClassifierCV
from sklearn.preprocessing import LabelEncoder
# Basics
import pandas as pd
import os
from threading import Thread
import openml
import numpy as np
from sklearn.metrics import make_scorer
import warnings;
warnings.filterwarnings('ignore');

In [3]:
def brier_score(y_predict, y_test, nclass):
    obj_num = np.size(y_test)
    bs_ytrue = np.zeros((obj_num,nclass))
    for i in range(obj_num):
        bs_ytrue[i,y_test[i]]=1
    bs = sum(sum((y_predict-bs_ytrue)**2))/obj_num
    return bs

In [4]:
from modelcombos.END_DT import EnsembleND_DT as END_DT
from modelcombos.END_LR import EnsembleND_LR as END_LR
from modelcombos.END_NB import EnsembleND_NB as END_NB
from modelcombos.PC_DT import PairwiseCoupling_DT as PC_DT
from modelcombos.PC_LR import PairwiseCoupling_LR as PC_LR
from modelcombos.PC_NB import PairwiseCoupling_NB as PC_NB
def generateModelName(model):
    name = 'error'
    if (model.__class__==RandomForestClassifier):
        name = 'RF'
    if (model.__class__==PC_DT):
        name = 'PC_DT'
    if (model.__class__==PC_LR):
        name = 'PC_LR'
    if (model.__class__==PC_NB):
        name = 'PC_NB'
    if (model.__class__==MLPClassifier):
        name = 'MLP'
    if (model.__class__==END_DT):
        name = 'END_DT'
    if (model.__class__==END_LR):
        name = 'END_LR'
    if (model.__class__==END_NB):
        name = 'END_NB'
    return name

In [5]:
def brier_score_singular_factory(nclass):
    #def brier_score_singular(y_test, y_predict):
    #    bs_ytrue = np.zeros((nclass))
    #    bs_ytrue[y_test]=1
    #    bs = sum((y_predict-bs_ytrue)**2)
    #    return bs
    def brier_score_singular(y_test,y_predict):
        #print(y_predict)
        #print(y_test)
        obj_num = np.size(y_test)
        bs_ytrue = np.zeros((obj_num,nclass))
        for i in range(obj_num):
            bs_ytrue[i,y_test[i]]=1
        bs = sum(sum((y_predict-bs_ytrue)**2))/obj_num
        return bs
    
    return brier_score_singular

In [6]:
def TMTB_and_ECE(y_predict, y_test, nclass, nbins=10, ccstrat='uniform'):
    y_pred_list = np.reshape(y_predict,nclass*y_test.size)
    ninst = y_test.size
    onehot = np.zeros((ninst, nclass))
    onehot[np.arange(ninst), y_test] = 1
    y_test_list = np.reshape(onehot,nclass*y_test.size)
    prob_true, prob_pred = calibration_curve(y_test_list, y_pred_list, n_bins=nbins, strategy=ccstrat)
    ece = np.sum(np.absolute(prob_true-prob_pred))/prob_true.size
    return ece

In [7]:
def saveCSVofData(path, file_name, data, columns):
    df = pd.DataFrame(np.array(data),columns=columns)
    df.to_csv(path+file_name+'.csv', sep='\t', encoding='utf-8', index=False)
    
def saveResults(dataset_id, bs_byModel_base_run, ece_byModel_base_run, bs_byModel_sig_run, ece_byModel_sig_run, bs_byModel_iso_run, ece_byModel_iso_run, hyperparam_byModel_run, mft_byModel_run, mst_byModel_run):
    dir_path = os.getcwd()
    directory = dir_path+'/experiments/'+str(dataset_id)+'/result_'+strftime("%Y-%m-%d %H_%M_%S", gmtime())+'/'
    if not os.path.exists(directory):
        os.makedirs(directory)
    models = generate_ModelHyperparam_pairs()
    m_names = [generateModelName(models[i][0]) for i in range(len(models))]
    saveCSVofData(directory,'brier_base', bs_byModel_base_run, m_names)
    saveCSVofData(directory,'ece_base', ece_byModel_base_run, m_names)
    saveCSVofData(directory,'brier_sigmoid', bs_byModel_sig_run, m_names)
    saveCSVofData(directory,'ece_sigmoid', ece_byModel_sig_run, m_names)
    saveCSVofData(directory,'brier_isotonic', bs_byModel_iso_run, m_names)
    saveCSVofData(directory,'ece_isotonic', ece_byModel_iso_run, m_names)
    saveCSVofData(directory,'hyperparameter', hyperparam_byModel_run, m_names)
    saveCSVofData(directory,'mean_fit_time', mft_byModel_run, m_names)
    saveCSVofData(directory,'mean_score_time', mst_byModel_run, m_names)

In [8]:
from sklearn.model_selection import train_test_split as tts
from sklearn.model_selection import RandomizedSearchCV
# Compare all models
def CompareModels(data_X, data_y, n_runs=1, n_jobs=1):
    seed = 42
    num_classes = np.unique(data_y).size
    # for each run the brierscore for each model [no calibration]
    bs_byModel_base_run = []
    # for each run the ece (expected calibration error) for each model [no calibration]
    ece_byModel_base_run = []
    # for each run the brierscore for each model [sigmoid calibration]
    bs_byModel_sig_run = []
    # for each run the ece (expected calibration error) for each model [sigmoid calibration]
    ece_byModel_sig_run = []
    # for each run the brierscore for each model [isotonic calibration]
    bs_byModel_iso_run = []
    # for each run the ece (expected calibration error) for each model [isotonic calibration]
    ece_byModel_iso_run = []
    # for each run the hyperparameter for each model
    hyperparam_byModel_run = []
    # for each run the mean fit time for each model
    mft_byModel_run = []
    # for each run the mean score time for each model
    mst_byModel_run = []
    # RUNS
    for i in range(n_runs):
        print('run',i+1)
        bs_byModel_base_run.append([])
        ece_byModel_base_run.append([])
        bs_byModel_sig_run.append([])
        ece_byModel_sig_run.append([])
        bs_byModel_iso_run.append([])
        ece_byModel_iso_run.append([])
        hyperparam_byModel_run.append([])
        mft_byModel_run.append([])
        mst_byModel_run.append([])
        # train - test von data (80/20)
        X_train, X_test, y_train, y_test = tts(data_X, data_y, test_size=0.2, stratify=data_y, random_state=seed)
        # model - calibration split von train (70/30)
        X_model, X_calibration, y_model, y_calibration = tts(X_train, y_train, test_size=0.3, stratify=y_train, random_state=seed+1)
        
        scoring = {'bs': make_scorer(brier_score_singular_factory(num_classes), greater_is_better=False, needs_proba=True)}
        
        model_dists = generate_ModelHyperparam_pairs(num_classes, seed)
        
        for k in range(len(model_dists)):
            print('model',k+1)
            #if (k==1):
            #    set_trace()
            model_rs = RandomizedSearchCV(model_dists[k][0], param_distributions=model_dists[k][1], scoring=scoring,refit='bs', n_iter = 10, n_jobs = n_jobs, cv=3, random_state=seed)
            model_rs.fit(X_model, y_model)
            print('mean fit time:',model_rs.cv_results_['mean_fit_time'].mean())
            print('mean score time:',model_rs.cv_results_['mean_score_time'].mean())
            seed += 1
            print(model_rs.best_params_)
            # best estimator
            hyperparam_byModel_run[i].append(model_rs.best_params_)
            model = model_rs.best_estimator_
            #calibration sigmoid
            c_sig_model = CalibratedClassifierCV(base_estimator=model,method='sigmoid', cv='prefit')
            c_sig_model.fit(X_calibration, y_calibration)
            #calibration isotonic
            c_iso_model = CalibratedClassifierCV(base_estimator=model,method='isotonic', cv='prefit')
            c_iso_model.fit(X_calibration, y_calibration)
            #prediction base
            y_pred_base = model.predict_proba(X_test)
            bs_base = brier_score(y_predict=y_pred_base, y_test=y_test, nclass=num_classes)
            ece_base = TMTB_and_ECE(y_predict=y_pred_base, y_test=y_test, nclass=num_classes, nbins=10, ccstrat='uniform')
            #prediction sigmoid calibrated model
            y_pred_sig = c_sig_model.predict_proba(X_test)
            bs_sig = brier_score(y_predict=y_pred_sig, y_test=y_test, nclass=num_classes)
            ece_sig = TMTB_and_ECE(y_predict=y_pred_sig, y_test=y_test, nclass=num_classes, nbins=10, ccstrat='uniform')
            #prediction isotonic calibrated model
            y_pred_iso = c_iso_model.predict_proba(X_test)
            bs_iso = brier_score(y_predict=y_pred_iso, y_test=y_test, nclass=num_classes)
            ece_iso = TMTB_and_ECE(y_predict=y_pred_iso, y_test=y_test, nclass=num_classes, nbins=10, ccstrat='uniform')
            #append results
            bs_byModel_base_run[i].append(bs_base)
            ece_byModel_base_run[i].append(ece_base)
            bs_byModel_sig_run[i].append(bs_sig)
            ece_byModel_sig_run[i].append(ece_sig)
            bs_byModel_iso_run[i].append(bs_iso)
            ece_byModel_iso_run[i].append(ece_iso)
            mft_byModel_run[i].append(model_rs.cv_results_['mean_fit_time'].mean())
            mst_byModel_run[i].append(model_rs.cv_results_['mean_score_time'].mean())
            print(bs_base,ece_base,bs_sig, ece_sig, bs_iso, ece_iso)
    return bs_byModel_base_run, ece_byModel_base_run, bs_byModel_sig_run, ece_byModel_sig_run, bs_byModel_iso_run, ece_byModel_iso_run, hyperparam_byModel_run, mft_byModel_run, mst_byModel_run

In [9]:
from modelcombos.END_DT import EnsembleND_DT as END_DT
from modelcombos.END_LR import EnsembleND_LR as END_LR
from modelcombos.END_NB import EnsembleND_NB as END_NB
from modelcombos.PC_DT import PairwiseCoupling_DT as PC_DT
from modelcombos.PC_LR import PairwiseCoupling_LR as PC_LR
from modelcombos.PC_NB import PairwiseCoupling_NB as PC_NB
from scipy.stats import randint
from scipy.stats import uniform
def generate_ModelHyperparam_pairs(nclasses=3, seed=42):
    models = []
    RFC_paramdist =  {
        'min_impurity_decrease': uniform(0.00001, 0.1),
        'min_samples_leaf': randint(1,51)}
    models.append((RandomForestClassifier(n_estimators=45, random_state=seed), RFC_paramdist))
    DT_paramdist = {
        'max_depth': randint(1,4)
    }
    LR_paramdist = {
        'penalty': ['l1', 'l2'],
        'C': uniform(0.01,20)
    }
    NB_paramdist = {
        'var_smoothing' : uniform(0.0000000001,1.0)
    }
    models.append((END_DT(number_of_nds=5, number_of_classes=nclasses, max_depth = 1, generator_String='random_pair', random_state=seed), DT_paramdist))
    models.append((END_LR(number_of_nds=5, number_of_classes=nclasses,penalty='l2', C=1.0, generator_String='random_pair', random_state=seed), LR_paramdist))
    models.append((END_NB(number_of_nds=5, number_of_classes=nclasses, var_smoothing=0.00001, generator_String='random_pair', random_state=seed),NB_paramdist))
    models.append((PC_DT(classes=nclasses, seed=seed, max_depth=1),DT_paramdist))
    models.append((PC_LR(classes=nclasses, seed=seed, penalty='l2', C=1.0),LR_paramdist))
    models.append((PC_NB(classes=nclasses, seed=seed, var_smoothing=0.0001),NB_paramdist))
    MLP_paramdist = {
        'alpha': uniform(0.00000001, 5),
        'batch_size': [100,200,300,400,500],
        'power_t': uniform(0.001,3) 
    }
    models.append((MLPClassifier(hidden_layer_sizes = (50,50,), activation='relu', learning_rate='invscaling', max_iter=1000), MLP_paramdist))
    return models

In [10]:
from time import gmtime, strftime
    
def write_single_value_per_run(file, models, value_name, values):
    file.write(value_name+'\n')
    file.write(generateModelName(models[0][0]))
    for i in range(len(models)-1):
        file.write('\t'+generateModelName(models[i+1][0]))
    file.write('\n')
    for i in range(len(values)):
        file.write(str(values[i][0]))
        for k in range(len(values[i])-1):
            file.write('\t'+str(values[i][k+1]))
        file.write('\n')
    
def save_result(dataset_id, nclasses, bs_byModel_base_run, ece_byModel_base_run, bs_byModel_sig_run, ece_byModel_sig_run, bs_byModel_iso_run, ece_byModel_iso_run, hyperparam_byModel_run, mft_byModel_run, mst_byModel_run):
    dir_path = os.getcwd()
    directory = dir_path+'/experiments/'+dataset_id+'/'
    if not os.path.exists(directory):
        os.makedirs(directory)
        
    models = generate_ModelHyperparam_pairs(nclasses=nclasses, seed=42)
    file = open(directory+'result_'+strftime("%Y-%m-%d %H_%M_%S", gmtime())+'.txt', 'w')
    # WRITE MODEL DESCRIPTIONS
    try:
        # SAVING BRIER SCORE
        file.write('Brier-Score\n')
        file.write(generateModelName(models[0][0]))
        for i in range(len(models)-1):
            file.write('\t\t\t'+generateModelName(models[i+1][0]))
        file.write('\nBase\tSigmoid\tIsotonic')
        for i in range(len(models)-1):
            file.write('\tBase\tSigmoid\tIsotonic')
        file.write('\n')
        for i in range(len(bs_byModel_base_run)):
            file.write(str(bs_byModel_base_run[i][0])+'\t'+str(bs_byModel_sig_run[i][0])+'\t'+str(bs_byModel_iso_run[i][0]))
            for k in range(len(bs_byModel_base_run[i])-1):
                file.write('\t'+str(bs_byModel_base_run[i][k+1])+'\t'+str(bs_byModel_sig_run[i][k+1])+'\t'+str(bs_byModel_iso_run[i][k+1]))
            file.write('\n')
        # SAVING ECE SCORE
        file.write('ECE-Score\n')
        file.write(generateModelName(models[0][0]))
        for i in range(len(models)-1):
            file.write('\t\t\t'+generateModelName(models[i+1][0]))
        file.write('\nBase\tSigmoid\tIsotonic')
        for i in range(len(models)-1):
            file.write('\tBase\tSigmoid\tIsotonic')
        file.write('\n')
        for i in range(len(ece_byModel_base_run)):
            file.write(str(ece_byModel_base_run[i][0])+'\t'+str(ece_byModel_sig_run[i][0])+'\t'+str(ece_byModel_iso_run[i][0]))
            for k in range(len(ece_byModel_base_run[i])-1):
                file.write('\t'+str(ece_byModel_base_run[i][k+1])+'\t'+str(ece_byModel_sig_run[i][k+1])+'\t'+str(ece_byModel_iso_run[i][k+1]))
            file.write('\n')
        # SAVING HYPERPARAMETER
        write_single_value_per_run(file, models, 'Hyperparameter', hyperparam_byModel_run)
        # SAVING MEAN FIT TIME
        write_single_value_per_run(file, models, 'Mean Fit Time', mft_byModel_run)
        # SAVING MEAN SCORE TIME
        write_single_value_per_run(file, models, 'Mean Score Time', mst_byModel_run)
    finally:
        file.close()

In [11]:
#1515 - micro-mass
#1459 - artificial-characters
#1233 - eating
#1569 - poker-hand
#1503 - spoken-arabic-digit
#4541 - Diabetes130US
#4538 - GesturePhaseSegmentationProcessed
#41991 - Kuzushiji-49 - error
#40670 - dna
#1478 - har
#40984 - segment
#40498 - wine-quality-white
#40499 - texture
#40686 - solar-flare
#41972 - Indian_pines
#1475 - first-order-theorem-proving
#id_list = [1515,1459,1233,1569,1503,4541,4538,41991,40670,1478,40984,40498,40499,40686,41972,1475]
#for i in range(len(id_list)):
#    ID_n = id_list[i]
#    RUNS_n = 20
#    JOBS_n = 10
#    run_dataset(ID_n, RUNS_n, JOBS_n)

In [12]:
# start experiment
def run_dataset(ID, RUNS, JOBS):
    dataset = openml.datasets.get_dataset(ID)
    dataset_name = dataset.name
    dataset_id = dataset.dataset_id
    print(dataset_name, 'id: ', dataset_id)
    X, y, categorical_indicator, attribute_names = dataset.get_data(dataset_format='array', target=dataset.default_target_attribute)
    print(np.unique(y))
    num_classes = np.unique(y).size
    #bs & ece for (b)ase, (s)igmoid and (i)sotonic
    bs_b, ece_b, bs_s, ece_s, bs_i, ece_i, hyppar, mft, mst = CompareModels(data_X=X, data_y=y, n_runs=RUNS, n_jobs=JOBS)
    saveResults(dataset_id=dataset_id, bs_byModel_base_run=bs_b, ece_byModel_base_run=ece_b, bs_byModel_sig_run=bs_s, ece_byModel_sig_run=ece_s, bs_byModel_iso_run=bs_i, ece_byModel_iso_run=ece_i, hyperparam_byModel_run=hyppar, mft_byModel_run=mft, mst_byModel_run=mst)
    #save_result(dataset_name=dataset_name, nclasses=num_classes, bs_byModel_base_run=bs_b, ece_byModel_base_run=ece_b, bs_byModel_sig_run=bs_s, ece_byModel_sig_run=ece_s, bs_byModel_iso_run=bs_i, ece_byModel_iso_run=ece_i, hyperparam_byModel_run=hyppar, mft_byModel_run=mft, mst_byModel_run=mst)

In [None]:
id_list = [1515,1459,1233,1569,1503,4541,4538,40670,1478,40984,40498,40499,40686,41972,1475,40474,1565,1559,1568,1560,40668,11,62,2,1557,4153,40975,61,300,1509]
print(len(id_list))
print(len(np.unique(id_list)))
for i in range(24,len(id_list)):
    ID_n = id_list[i]
    RUNS_n = 20
    JOBS_n = 10
    run_dataset(ID_n, RUNS_n, JOBS_n)

30
30
abalone id:  1557
[0 1 2]
run 1
model 1
mean fit time: 0.08721036911010742
mean score time: 0.008754483858744304
{'min_impurity_decrease': 8.787658410143284e-05, 'min_samples_leaf': 21}
0.4702741000350361 0.035355082130345425 0.4756820537552306 0.04757741564650846 0.472405129979134 0.022905308145334098
model 2
mean fit time: 0.04412058194478353
mean score time: 0.0034865538279215498
{'max_depth': 3}
0.511555680484794 0.04292708369624995 0.5148050750673303 0.04155747852106613 0.5108825715267518 0.0557276558510227
model 3
mean fit time: 0.773821767171224
mean score time: 0.0036421616872151694
{'C': 14.21295986654147, 'penalty': 'l1'}
0.48052999614427006 0.028019917865677223 0.4868581217469265 0.041475875418297226 0.48392105584844897 0.02086800855408885
model 4
mean fit time: 0.05591962337493896
mean score time: 0.016459377606709798
{'var_smoothing': 0.16332444830826676}
0.5310067709969344 0.044416023689073755 0.5306527075226061 0.05280223226963788 0.5289280602618014 0.0312398960457

mean fit time: 1.591879455248515
mean score time: 0.003910001118977864
{'C': 13.136117178655478, 'penalty': 'l1'}
0.4493129472095677 0.021405101755760415 0.4508156484978523 0.02657971631488558 0.4583699687319592 0.03892836349431908
model 4
mean fit time: 0.0330948273340861
mean score time: 0.0072480599085489915
{'var_smoothing': 0.24023517596800603}
0.5225582178126821 0.039741819128894865 0.5224048336096015 0.04979265692402896 0.5207447559086149 0.05374036069910304
model 5
mean fit time: 0.0070400079091389985
mean score time: 0.2631131331125895
{'max_depth': 3}
0.4737336774137129 0.030488727697462876 0.4758801654903811 0.025897464373502217 0.46854375125851905 0.025298330183604036
model 6
mean fit time: 0.14680760701497395
mean score time: 0.26682790915171306
{'C': 9.241285229441898, 'penalty': 'l1'}
0.4495493124893721 0.028416435569736255 0.45041764305668114 0.029394884694522908 0.45710247767202367 0.04987888109419709
model 7
mean fit time: 0.005939316749572755
mean score time: 0.61828

mean fit time: 0.007816894849141439
mean score time: 0.28944608370463054
{'max_depth': 3}
0.47870361282124396 0.06826864023494929 0.481488649392107 0.06385239384043481 0.47079253235717594 0.03466590503313858
model 6
mean fit time: 0.15385483105977377
mean score time: 0.2738449017206828
{'C': 15.395249450462456, 'penalty': 'l1'}
0.45017681421595945 0.0281036523314089 0.4536020943429155 0.030163671870177558 0.45043238598767543 0.03133714904526951
model 7
mean fit time: 0.006356565157572427
mean score time: 0.623556383450826
{'var_smoothing': 0.37505712897328514}
0.5220784401861683 0.07067551863362426 0.5174981511285748 0.04157489681800167 0.5113732045134419 0.047847986800414063
model 8
mean fit time: 2.8692528883616126
mean score time: 0.0031083265940348307
{'alpha': 0.5423244362959421, 'batch_size': 400, 'power_t': 0.23412423749074507}
0.43335837619378803 0.02003075718462379 0.4374429328729804 0.032573560142801085 0.4367262736995162 0.015620846591391022
run 10
model 1
mean fit time: 0.0

mean fit time: 0.006005827585856121
mean score time: 0.6345127344131469
{'var_smoothing': 0.29517174282345104}
0.5259546190939022 0.0331353908376384 0.5233704575368716 0.02947566058183764 0.5210659568694201 0.027318758553279344
model 8
mean fit time: 2.526530210177104
mean score time: 0.0032848676045735673
{'alpha': 0.3857538810996816, 'batch_size': 100, 'power_t': 1.4910456749469057}
0.4385701325831539 0.030603282279137296 0.4422057494330423 0.031001149892698563 0.44457420837365536 0.023827393927100166
run 14
model 1
mean fit time: 0.07957646052042643
mean score time: 0.008088143666585286
{'min_impurity_decrease': 0.026080283762706193, 'min_samples_leaf': 2}
0.5446842072130403 0.06526952147549633 0.5455443546638296 0.0926368035843089 0.5055793490597791 0.019681227999498757
model 2
mean fit time: 0.06261937618255616
mean score time: 0.005942130088806152
{'max_depth': 3}
0.4811355109120897 0.032706532749588235 0.48548820781028007 0.06100298536363179 0.4787298055964076 0.0258544644542015

mean fit time: 0.07895297209421792
mean score time: 0.008005324999491373
{'min_impurity_decrease': 0.007511710495915126, 'min_samples_leaf': 13}
0.497364992285026 0.0435697324304025 0.49421989675934536 0.04035229359075428 0.48832241833447554 0.026516218014099945
model 2
mean fit time: 0.04466548760732015
mean score time: 0.0034697771072387694
{'max_depth': 3}
0.48496759664256767 0.04352697842880128 0.48410531954401614 0.03176525771504952 0.4746908167732435 0.02460382316424623
model 3
mean fit time: 0.9686783154805502
mean score time: 0.004213404655456542
{'C': 5.408818256936466, 'penalty': 'l1'}
0.4526169999500146 0.024292644850459632 0.45580734743461393 0.036404477039690866 0.453724107194308 0.024437869127659762
model 4
mean fit time: 0.03360342184702555
mean score time: 0.007435727119445801
{'var_smoothing': 0.1821341143743488}
0.5157804054010375 0.03456311603505903 0.5154314955638873 0.04287176810120327 0.5101995449134719 0.026360671940764257
model 5
mean fit time: 0.009020892779032

mean fit time: 0.07544235388437906
mean score time: 0.004564539591471354
{'C': 12.378435386992205, 'penalty': 'l1'}
0.14351569317282897 0.10239135114166951 0.1692175157606106 0.2416523109778919 0.22529382675252835 0.30638179761251766
model 4
mean fit time: 0.06237728595733642
mean score time: 0.012546920776367187
{'var_smoothing': 0.5858513842348373}
0.4131571369173537 0.1961180103704455 0.3453104211247361 0.08132164883892247 0.47716101955001355 0.2663478635467994
model 5
mean fit time: 0.008060677846272787
mean score time: 0.04917353789011637
{'max_depth': 2}
0.3333226676501458 0.09998636428608826 0.29960376603967653 0.0009429392638451682 0.3304155877330715 0.08163236435143435
model 6
mean fit time: 0.013582539558410645
mean score time: 0.0579204241434733
{'C': 15.353200908573939, 'penalty': 'l1'}
0.13874343820056334 0.19730439603772812 0.16159513031675724 0.28291537647805776 0.18939756692228704 0.2784995555235827
model 7
mean fit time: 0.01130935351053874
mean score time: 0.120019634

mean fit time: 0.007857060432434082
mean score time: 0.05045527617136637
{'max_depth': 2}
0.16666119527763923 0.04998630595588163 0.16282547638449996 0.03715382693175946 0.16666666666666666 0.05000000000000002
model 6
mean fit time: 0.012713774045308431
mean score time: 0.04996160666147868
{'C': 12.484029193616207, 'penalty': 'l1'}
0.11984272554955647 0.24058193598531302 0.14322169447831545 0.20881526439675507 0.14073436144402526 0.1434049671011671
model 7
mean fit time: 0.012400531768798826
mean score time: 0.12103614807128907
{'var_smoothing': 0.17515452342402305}
0.20959355111358388 0.32693192149537187 0.22204725490481997 0.11590934501159596 0.2041382638050502 0.12639970140637774
model 8
mean fit time: 1.0712679704030355
mean score time: 0.000625777244567871
{'alpha': 0.07995037730277127, 'batch_size': 300, 'power_t': 2.7743186814244325}
0.07824650149412989 0.31328915404839697 0.11496637454647701 0.27843987138787807 0.1064641743922099 0.34224028133907747
run 7
model 1
mean fit time:

mean fit time: 0.013061467806498212
mean score time: 0.1389415979385376
{'var_smoothing': 0.4602598141662353}
0.32681182665796454 0.17051422643460348 0.31000726114773447 0.18694306937197525 0.26207976292391105 0.1030426718150318
model 8
mean fit time: 1.1901235421498617
mean score time: 0.0009975353876749673
{'alpha': 0.5566541500262221, 'batch_size': 100, 'power_t': 2.974628213248308}
0.07433269325811376 0.21255266736790052 0.1326129503872935 0.2888714919561415 0.1252356042005315 0.19023560127682482
run 11
model 1
mean fit time: 0.05981462796529134
mean score time: 0.005055983861287435
{'min_impurity_decrease': 0.01570918378916666, 'min_samples_leaf': 11}
0.31302129456558947 0.1919716415603099 0.16118826361012306 0.20927165316285015 0.19934738345269096 0.19121523896964943
model 2
mean fit time: 0.03973565896352132
mean score time: 0.003862738609313965
{'max_depth': 1}
0.13293482435495232 0.14809803827576162 0.1448647351986249 0.26293860999551727 0.13259726927553392 0.21902757029838052

mean fit time: 0.06100152333577473
mean score time: 0.005139772097269694
{'min_impurity_decrease': 0.021283511959237746, 'min_samples_leaf': 18}
0.6336701434593508 0.11045243499779205 0.5899646593862669 0.17085505427926817 0.3444668588310969 0.188866047741745
model 2
mean fit time: 0.04377857049306234
mean score time: 0.004139161109924317
{'max_depth': 3}
0.17333333333333334 0.11302681992337164 0.17922354594167622 0.189715070498725 0.1617436304105206 0.21956501255226102
model 3
mean fit time: 0.07737291653951009
mean score time: 0.004437541961669921
{'C': 16.461644699250716, 'penalty': 'l1'}
0.0733254546455066 0.2716148307080557 0.10611776787387446 0.14112070697811047 0.1447255587519208 0.3039618250613925
model 4
mean fit time: 0.055313515663146975
mean score time: 0.01024033228556315
{'var_smoothing': 0.16939892431484915}
0.3566116554108658 0.26569251637050784 0.268139207363106 0.1742109215534264 0.29648191319135 0.17244815900807745
model 5
mean fit time: 0.008320188522338866
mean sco

mean fit time: 0.08811086813608805
mean score time: 0.004423157374064127
{'C': 18.883822218918542, 'penalty': 'l1'}
0.04400494506506023 0.10806110497970507 0.06913518242939524 0.11122714994746256 0.08024691358024692 0.34187283852088324
model 4
mean fit time: 0.059070237477620434
mean score time: 0.011624320348103841
{'var_smoothing': 0.20461945534637682}
0.3515705291008604 0.17662639732129387 0.3368811742912008 0.3035917067296069 0.36676102991636905 0.29520923939390786
model 5
mean fit time: 0.008260798454284669
mean score time: 0.050265232721964516
{'max_depth': 3}
0.33575872228583237 0.08189014064994174 0.2930244579265271 0.2006150837557881 0.2678612921494047 0.08399280350334362
model 6
mean fit time: 0.016529107093811037
mean score time: 0.05371404488881428
{'C': 16.46208011404777, 'penalty': 'l1'}
0.05768869373229996 0.23599167856718095 0.07343937025175486 0.20615088899478703 0.07672331231137403 0.3171734394870418
model 7
mean fit time: 0.013011042277018232
mean score time: 0.13944

mean fit time: 0.00460206667582194
mean score time: 0.20162951151529945
{'max_depth': 3}
0.19119198735194146 0.09583493061728586 0.19103104550682792 0.08597135317035678 0.17987396258904065 0.0708185549739405
model 6
mean fit time: 0.0266800324122111
mean score time: 0.1978992462158203
{'C': 10.523451221857055, 'penalty': 'l1'}
0.22850923735443243 0.04796361835615252 0.236343348721488 0.07372080201125294 0.22731800208510425 0.06299201550566158
model 7
mean fit time: 0.006957626342773438
mean score time: 0.47005868752797453
{'var_smoothing': 0.29485946110158046}
0.26890339448276873 0.07064389462846328 0.23486816568527963 0.12838319967245157 0.2097611982203119 0.06790819518193877
model 8
mean fit time: 2.6927062908808392
mean score time: 0.001605645815531413
{'alpha': 0.5010403675001693, 'batch_size': 100, 'power_t': 2.1397153962424342}
0.05568507281058819 0.14071702182444917 0.04303930033664578 0.1873840359846089 0.04119586394361997 0.09812898361911525
run 4
model 1
mean fit time: 0.0652

mean fit time: 0.005907098452250162
mean score time: 0.4506399393081665
{'var_smoothing': 0.03895696467464382}
0.22543298942621037 0.061555353168118485 0.23062590273705474 0.11466096235034384 0.2180095382002341 0.08152771137510995
model 8
mean fit time: 3.0929328838984174
mean score time: 0.0015558322270711263
{'alpha': 0.6219997463008934, 'batch_size': 400, 'power_t': 2.3146497640292623}
0.01810418030469371 0.17001908408846944 0.009146415135718204 0.23072558521307063 0.014136437595316593 0.2378892710875098
run 8
model 1
mean fit time: 0.06388891537984212
mean score time: 0.006433534622192383
{'min_impurity_decrease': 0.008285187305032849, 'min_samples_leaf': 3}
0.24447018918701974 0.09206479123094831 0.1209745088736022 0.06535524500247991 0.09427755210237394 0.06831446648466438
model 2
mean fit time: 0.02693074544270833
mean score time: 0.0032832860946655274
{'max_depth': 3}
0.24562259634136804 0.03372016420648225 0.2575422747516325 0.15092910225561768 0.23193568288767472 0.0562422478

mean fit time: 0.06309738159179687
mean score time: 0.006194154421488443
{'min_impurity_decrease': 0.005159682395476507, 'min_samples_leaf': 3}
0.20129280814078868 0.14617040363660483 0.09693127305491737 0.08326751208450914 0.09950893414683872 0.10439612508267371
model 2
mean fit time: 0.027752534548441572
mean score time: 0.0032516638437906907
{'max_depth': 3}
0.21871564687066652 0.06451030388739648 0.19687478697882385 0.08922998331011724 0.17839648863583804 0.027461350343107278
model 3
mean fit time: 0.2154472589492798
mean score time: 0.003322227795918783
{'C': 19.640947017208006, 'penalty': 'l1'}
0.25089605558289724 0.060999487455990944 0.25818930329079715 0.06154507841815645 0.24593701823037611 0.049545553143945
model 4
mean fit time: 0.036634627978007
mean score time: 0.006833163897196452
{'var_smoothing': 0.1095561232138934}
0.22562774339936545 0.055952844947434036 0.20552799957449278 0.08588509992785276 0.19628998720240942 0.06949836773326931
model 5
mean fit time: 0.0041650533

mean fit time: 0.12050892512003582
mean score time: 0.003595733642578125
{'C': 6.726693149772251, 'penalty': 'l1'}
0.22906873024049407 0.033390550052934634 0.23475895056617 0.057776834207294805 0.23530452339767796 0.042903990036479926
model 4
mean fit time: 0.03805391788482666
mean score time: 0.007232880592346192
{'var_smoothing': 0.058821523635050325}
0.21751772624629592 0.0427338761416652 0.2143453600738254 0.06826116046634251 0.21188409508151468 0.08285214524874641
model 5
mean fit time: 0.00450131893157959
mean score time: 0.1989896774291992
{'max_depth': 3}
0.24589082242146248 0.08618535356299385 0.258373460538992 0.09023629745165837 0.24296133322509259 0.08384637600983265
model 6
mean fit time: 0.027197106679280603
mean score time: 0.20634411176045736
{'C': 15.307533397670085, 'penalty': 'l1'}
0.21780720244316135 0.044820201503362245 0.2270314123817752 0.06888365714298175 0.22891868707026042 0.05677452010102554
model 7
mean fit time: 0.0063142458597819015
mean score time: 0.4695

mean fit time: 0.0042946974436442065
mean score time: 0.18595734437306724
{'max_depth': 3}
0.22313630646854782 0.028016121354062173 0.22869557740733662 0.10658572499132775 0.21886467111183208 0.06476710511225724
model 6
mean fit time: 0.035608919461568196
mean score time: 0.19118197758992514
{'C': 16.89305203669584, 'penalty': 'l1'}
0.22305756656141035 0.05705039402905797 0.2294427557340636 0.050894346641802944 0.22979219729869843 0.09561734869966063
model 7
mean fit time: 0.005907471974690754
mean score time: 0.43304453690846767
{'var_smoothing': 0.00286059178094277}
0.22926076403631773 0.08475442745588078 0.22360661852098415 0.062443162886608915 0.2132651819771567 0.08914845299321268
model 8
mean fit time: 2.551056671142578
mean score time: 0.0015632073084513344
{'alpha': 0.6180833918616649, 'batch_size': 400, 'power_t': 0.2909159356547125}
0.040030322456871575 0.10409385580181615 0.033917748834042145 0.18696571487987707 0.03226113896574313 0.1090735234309481
iris id:  61
[0 1 2]
run

mean fit time: 0.002399587631225586
mean score time: 0.019948283831278484
{'var_smoothing': 0.06674229357027071}
0.07031085909026315 0.3615656385891643 0.07411482940113809 0.14235737302642038 0.06666666666666667 0.024999999999999994
model 8
mean fit time: 0.7798012097676594
mean score time: 0.000552678108215332
{'alpha': 0.9666797968903665, 'batch_size': 200, 'power_t': 1.07294027252957}
0.01226462364760507 0.19624066194185427 0.01991950091254687 0.1811726516445981 0.003256850320723126 0.10120979528976035
run 5
model 1
mean fit time: 0.05923407077789307
mean score time: 0.005033445358276367
{'min_impurity_decrease': 0.0006857384821418412, 'min_samples_leaf': 2}
0.14437421124828537 0.09162037037037034 0.13366360132211727 0.14117342212618336 0.12247187206523513 0.0641274063770265
model 2
mean fit time: 0.0116162379582723
mean score time: 0.0018583774566650391
{'max_depth': 1}
0.19611573523583037 0.06724137931034475 0.18278669287657506 0.044894523860855906 0.18641404797213 0.0344957983193

mean fit time: 0.057998275756835936
mean score time: 0.004900972048441568
{'min_impurity_decrease': 0.0821729991671519, 'min_samples_leaf': 3}
0.13182269659045626 0.14488014719655867 0.13564586610001605 0.13805303194939825 0.1366526827732823 0.059357133226044245
model 2
mean fit time: 0.011410419146219892
mean score time: 0.0018292824427286786
{'max_depth': 1}
0.24355555555555553 0.08000000000000002 0.23369081540883319 0.0573842350706382 0.24063116370808676 0.07692307692307696
model 3
mean fit time: 0.031743582089742026
mean score time: 0.0021370569864908855
{'C': 18.109536689378775, 'penalty': 'l1'}
0.05283624740473666 0.06417377940193522 0.06347195308377265 0.3472652342371931 0.06899112556300077 0.41881865401015006
model 4
mean fit time: 0.01687830289204915
mean score time: 0.0034103075663248694
{'var_smoothing': 0.09296837672230705}
0.15174013973837605 0.26490958985640606 0.14304717366873898 0.19674456225412779 0.16047719288451884 0.22045292273999692
model 5
mean fit time: 0.0014904

mean fit time: 0.030721203486124678
mean score time: 0.0019109725952148437
{'C': 18.320088116025577, 'penalty': 'l2'}
0.04941017918939636 0.21126470433142705 0.04169887515734135 0.2574325547594203 0.04585431773349938 0.12424322154224435
model 4
mean fit time: 0.016737786928812663
mean score time: 0.0033286094665527345
{'var_smoothing': 0.0504140811136815}
0.11607992489746725 0.1694453418110649 0.1322305775023304 0.2981163437340667 0.19748407748749267 0.4151154980000132
model 5
mean fit time: 0.001754323641459147
mean score time: 0.008712697029113769
{'max_depth': 2}
0.083331667046233 0.017137318003220377 0.10569833205272113 0.2728292022953585 0.13414071510957323 0.07044117647058826
model 6
mean fit time: 0.004093225797017415
mean score time: 0.009223898251851399
{'C': 18.32951401652248, 'penalty': 'l1'}
0.02721368762073957 0.24290912787070168 0.0261066315235086 0.18503908975583683 0.038511320238478294 0.3801154752530165
model 7
mean fit time: 0.002347850799560547
mean score time: 0.019

mean fit time: 0.0014888445536295574
mean score time: 0.008382646242777507
{'max_depth': 2}
0.33332500054997877 0.12498875026249366 0.2836318493493147 0.08302374314071863 0.2678387308818302 0.08220630613247948
model 6
mean fit time: 0.004495660463968913
mean score time: 0.00929262638092041
{'C': 18.747592006849057, 'penalty': 'l2'}
0.10999013706554325 0.23885727086015923 0.11231407178031777 0.19002745605166296 0.21167075132527696 0.28457759190297066
model 7
mean fit time: 0.0023669401804606123
mean score time: 0.019545650482177733
{'var_smoothing': 0.19700060060005264}
0.18571447540654412 0.1093674021198137 0.17815802159811497 0.11889602468211694 0.1598952993220051 0.06366408359895522
model 8
mean fit time: 0.7145881096522013
mean score time: 0.0005303939183553061
{'alpha': 0.5098173956839811, 'batch_size': 200, 'power_t': 0.6616239323218458}
0.09465448750153939 0.2238605657403448 0.10123089300706267 0.1640864003966015 0.18617459062096436 0.25602423897710075
run 18
model 1
mean fit tim

0.05658979436160002 0.06072741035736642 0.06533419245967334 0.14467851915976723 0.06603283888541694 0.08643880026670776
model 7
mean fit time: 0.8720233599344891
mean score time: 138.37749341328941
{'var_smoothing': 0.01749027103115263}
0.195005582735428 0.15131880083223984 0.18662586075323848 0.32609238812963637 0.1639080351677136 0.08434673454633591
model 8
mean fit time: 23.309041070938108
mean score time: 0.030615735054016113
{'alpha': 1.4491894425039589, 'batch_size': 500, 'power_t': 2.7526094230541496}
0.07510607564630456 0.049181249523318295 0.08087506806414752 0.14095212399967344 0.07743721870501293 0.07228067247535011
run 2
model 1
mean fit time: 0.9372357368469236
mean score time: 0.04640489419301351
{'min_impurity_decrease': 0.007447660735453599, 'min_samples_leaf': 31}
0.43237688146459996 0.28268652455809334 0.19708371817578305 0.08127958631429232 0.1994228842697257 0.057817960087752876
model 2
mean fit time: 16.603390407562255
mean score time: 0.13023846944173179
{'max_dep

0.06572263480733354 0.035168779612230285 0.07502212112244187 0.19275153044270163 0.06720522134765365 0.04074745877968157
run 6
model 1
mean fit time: 0.4549193779627482
mean score time: 0.0407593011856079
{'min_impurity_decrease': 0.013415000147746449, 'min_samples_leaf': 26}
0.5551393116147829 0.30239822347023915 0.28760545947216687 0.10781767919898219 0.2840394930434896 0.06779699718074886
model 2
mean fit time: 13.821928596496582
mean score time: 0.13144487539927166
{'max_depth': 3}
0.2576395541803342 0.12941445817514152 0.20670769039353493 0.03812153012608007 0.20505696016473704 0.04065105526037795
model 3
mean fit time: 22.72450122833252
mean score time: 0.4134367386500041
{'C': 3.481783266932984, 'penalty': 'l2'}
0.06986708929749004 0.06179393614584126 0.06956097007075887 0.13690416728207372 0.0750889032187387 0.04718493309730867
model 4
mean fit time: 10.821766789754232
mean score time: 4.978095602989198
{'var_smoothing': 0.2735065344522666}
0.402479332598302 0.08068618450631962

0.25774135629116457 0.12499951930434605 0.21243768625850637 0.05173621467363525 0.2124864335418006 0.02715731058844744
model 3
mean fit time: 23.709039711952208
mean score time: 0.5852769692738852
{'C': 15.400529916494804, 'penalty': 'l2'}
0.07465363243734303 0.05873140986582749 0.07612495690136757 0.10297150204746774 0.07717861694050304 0.0930192942238735
model 4
mean fit time: 12.34105672836304
mean score time: 5.050218764940898
{'var_smoothing': 0.04540221017700707}
0.32571575220836296 0.07595694048725923 0.28058006284888454 0.03197220505220566 0.2596933732103203 0.030754890516677864
model 5
mean fit time: 4.567808039983113
mean score time: 45.97618815104166
{'max_depth': 3}
0.2806461989961265 0.1389167143766138 0.27892173016585553 0.16435771294038343 0.2343637725979791 0.06918133734733949
model 6
mean fit time: 4.083355196317037
mean score time: 49.071604530016586
{'C': 19.30564111526844, 'penalty': 'l2'}
0.0652597071527371 0.05686724906978633 0.07509008607250843 0.1845764715614906

mean fit time: 4.594383160273234
mean score time: 45.1819935242335
{'max_depth': 3}
0.28830651363049564 0.1740889516631046 0.27847065711414615 0.10824636405953242 0.2460046305843282 0.0886113867722013
model 6
mean fit time: 3.8529765764872237
mean score time: 47.598100344340004
{'C': 16.01627754148761, 'penalty': 'l2'}
0.06585703028157103 0.044567838883161924 0.0736187313272124 0.222655277066853 0.06956905904750205 0.05333484649149356
model 7
mean fit time: 0.8742335557937622
mean score time: 135.44033877054852
{'var_smoothing': 0.03832907311201927}
0.21873070850290655 0.2465736375596383 0.20901706267684206 0.30209850658888926 0.17517786048672607 0.04412540484633098
model 8
mean fit time: 16.677731823921203
mean score time: 0.020548081398010253
{'alpha': 2.501912554754114, 'batch_size': 500, 'power_t': 0.22438881708295533}
0.08213980721444501 0.07731300425081537 0.07685433691648628 0.06278054872088416 0.07542609903135032 0.014632791723353394
run 15
model 1
mean fit time: 0.692895936965

0.2125487588220655 0.21931733558126093 0.2044987377618544 0.2829365493007029 0.17296486915755976 0.03582698776209372
model 8
mean fit time: 21.457380827267965
mean score time: 0.026355703671773277
{'alpha': 0.12121788431644603, 'batch_size': 200, 'power_t': 2.2213922645774136}
0.07132422196889858 0.06216488311027483 0.0833061793836915 0.18549954142291136 0.07077268556404961 0.05770488661739143
run 19
model 1
mean fit time: 0.7338489135106404
mean score time: 0.038644266128540036
{'min_impurity_decrease': 0.006940047351797297, 'min_samples_leaf': 40}
0.4169412676560657 0.2756470926889156 0.19286581300296185 0.062219518317876574 0.20195302821544345 0.04905905946272955
model 2
mean fit time: 18.009340890248616
mean score time: 0.1354140758514404
{'max_depth': 3}
0.22105884765646905 0.12379178511646008 0.1829625542866598 0.06142595060968038 0.18427988325736747 0.04625849727577366
model 3
mean fit time: 32.3311225493749
mean score time: 0.6666870911916096
{'C': 10.249552349461576, 'penalty'

0.8019482024000176 0.142738367790115 0.7727448129434902 0.03404736327508321 0.751739663755415 0.03929914880893705
model 2
mean fit time: 94.99694197177887
mean score time: 0.24898783365885416
{'max_depth': 3}
0.7238702860753398 0.07874400873231334 0.7251367262256665 0.061356791147773544 0.692753899994876 0.041098997070483166
model 3
mean fit time: 96.40038339296976
mean score time: 0.24474229017893473
{'C': 2.4778674136020804, 'penalty': 'l2'}
0.8259064543948745 0.18854472849034312 0.8354868562342946 0.1548532237454266 0.7993663067686703 0.13936600404786387
model 4
mean fit time: 120.23574508825936
mean score time: 0.6451785643895468
{'var_smoothing': 0.0035455469097308485}
0.8488246199739471 0.03483789176029 0.8436917251945418 0.05997968389477145 0.824604988136245 0.08912614398415761
model 5
mean fit time: 1.7236159960428874
mean score time: 573.5876481215158
{'max_depth': 3}
0.6884514589146951 0.04267851794230205 0.6968868797957903 0.04836408611223234 0.6693766279669701 0.02809277246

In [None]:
#id_list = [1515,1459,1233,1569,1503,4541,4538,40670,1478,40984,40498,40499,40686,41972,1475,40474,1565,1559,1568,1560,40668,11,62,2,1557,4153,40975,61,300,1509]
#print(len(id_list))
#print(len(np.unique(id_list)))
#for i in range(0,len(id_list)):
#    dataset = openml.datasets.get_dataset(id_list[i])
#    dataset_name = dataset.name
#    dataset_id = dataset.dataset_id
#    print(dataset_name, dataset_id)
#    X, y, categorical_indicator, attribute_names = dataset.get_data(dataset_format='array', target=dataset.default_target_attribute)
#    print(np.unique(y))