In [1]:
import pandas as pd
import numpy as np
import researchpy as rp
import seaborn as sns
import os
import ast

import statsmodels.api as sm
from statsmodels.formula.api import ols
import statsmodels.stats.multicomp


# Analysis <font color=blue>after </font> combining RF, DT and SVM into 1C

In [2]:
def get_class (model):
    if isinstance(model, str):
        model = '{' +'}'.join('{'.join(model.split('{')[1:]).split('}')[0:1]) + '}'
        m = ast.literal_eval(model)
        if 'classifier:__choice__' in m:
            classifier = m['classifier:__choice__']
        else:
            classifier = ''
        return classifier
    else:
        return ''


def parse_tpot(directory):
    result = pd.DataFrame(columns=['dataset', 'accuracy', 'model', 'precision', 'recall', 'f1score', 'time_budget', 'methods'])
    for subdir, dirs, files in os.walk(directory):
        for file in files:
            if file.endswith('.csv'):
                method = file.split('.')[0].split('_')[2]
                time_budget = file.split('.')[0].split('_')[3]
                run = []
                sub_result = pd.read_csv(os.path.join(subdir, file))
                sub_result.rename(columns={'Unnamed: 0': 'dataset'}, inplace = True)
                run.append(sub_result[['dataset', 'accuracy_1', 'model_1', 'precision_1', 'recall_1', 'f1score_1']])
                run.append(sub_result[['dataset', 'accuracy_2', 'model_2', 'precision_2', 'recall_2', 'f1score_2']])
                run.append(sub_result[['dataset', 'accuracy_3', 'model_3', 'precision_3', 'recall_3', 'f1score_3']])
                for i in range(3):
                    run[i].rename(columns={'accuracy_'+str(i+1): 'accuracy',
                                           'model_'+str(i+1): 'model',
                                           'precision_'+str(i+1): 'precision',
                                           'recall_'+str(i+1): 'recall',
                                           'f1score_'+str(i+1): 'f1score'}, inplace=True)
                    run[i]['methods'] = str(method)
                    run[i]['time_budget'] = int(time_budget)
                    result = pd.concat([result, run[i]], axis=0, sort=True, ignore_index=True)
    result.model = result.model.apply(get_class)
    result = result[~np.isnan(result.f1score)]
    return result
#parse_tpot(r"C:\Users\HassanEldeeb\Documents\GitHub\AutoMLBenchmarking\logs_search_space/")

In [3]:
df = parse_tpot(r"C:\Users\HassanEldeeb\Documents\GitHub\AutoMLBenchmarking\logs_search_space/")
df = df[['dataset', 'time_budget', 'methods', 'f1score']]
df.methods = df.methods.replace("default", "fc")
df.methods = df.methods.replace("SVC", "1c")
df.methods = df.methods.replace("DT", "1c")
df.methods = df.methods.replace("RF", "1c")
fsIs_b = ['vowel', 'openml_phpJNxH0q', 'dataset_31_credit-g', 'dataset_40_sonar']
fsIs_m = ['solar-flare_1', 'wine-quality-red', 'dataset_39_ecoli', 'synthetic_control']
fsIl_b = ['AirlinesCodrnaAdult', 'MagicTelescope', 'electricity-normalized', 'phpmPOD5A']
fsIl_m = ['pokerhand-normalized', 'eye_movements', 'avila-tr']
flIs_b = ['audiology', 'arrhythmia', 'AP_Breast_Lung', 'AP_Omentum_Ovary']
flIs_m = ['Amazon', 'umistfacescropped', 'phpGUrE90']
flIl_b = ['gina_agnostic', 'hiva_agnostic', 'phpZrCzJR', 'phprAeXmK']
flIl_m = ['KDDCup99', 'connect-4', 'dataset_60_waveform-5000', 'dataset_186_satimage']
df_binary = df[df.dataset.isin(fsIs_b) | df.dataset.isin(fsIl_b) | df.dataset.isin(flIs_b) | df.dataset.isin(flIl_b)]
df_multi = df[df.dataset.isin(fsIs_m) | df.dataset.isin(fsIl_m) | df.dataset.isin(flIs_m) | df.dataset.isin(flIl_m)]
#df.drop(df[(df.methods=='fc') & ((df.time_budget==30) | (df.time_budget==10))].index, inplace=True)
#df.drop(df[(df.methods=='3c') & ((df.time_budget==60) | (df.time_budget==10))].index, inplace=True)
df=df_multi
df.shape

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return super().rename(**kwargs)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


(181, 4)

In [4]:
rp.summary_cont(df['f1score'])





Unnamed: 0,Variable,N,Mean,SD,SE,95% Conf.,Interval
0,f1score,181.0,0.682373,0.217561,0.016171,0.650464,0.714283


In [5]:
rp.summary_cont(df.groupby(['time_budget']))['f1score']





Unnamed: 0_level_0,N,Mean,SD,SE,95% Conf.,Interval
time_budget,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
10,52,0.696551,0.218529,0.030305,0.637154,0.755948
30,61,0.666992,0.215246,0.027559,0.612975,0.721008
60,68,0.68533,0.221225,0.026828,0.632748,0.737912


In [6]:
rp.summary_cont(df.groupby(['methods']))['f1score']





Unnamed: 0_level_0,N,Mean,SD,SE,95% Conf.,Interval
methods,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
1c,98,0.627896,0.247474,0.024999,0.578899,0.676893
3C,34,0.7317,0.185629,0.031835,0.669303,0.794096
fc,49,0.757102,0.12869,0.018384,0.721069,0.793135


In [7]:
rp.summary_cont(df.groupby(['time_budget', 'methods']))['f1score']





Unnamed: 0_level_0,Unnamed: 1_level_0,N,Mean,SD,SE,95% Conf.,Interval
time_budget,methods,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
10,1c,32,0.659619,0.251392,0.04444,0.572516,0.746722
10,3C,10,0.770093,0.174673,0.055236,0.661829,0.878356
10,fc,10,0.741192,0.09668,0.030573,0.681269,0.801115
30,1c,35,0.614408,0.242539,0.040997,0.534055,0.694761
30,3C,10,0.746485,0.192579,0.060899,0.627123,0.865847
30,fc,16,0.732335,0.12048,0.03012,0.673299,0.79137
60,1c,31,0.610378,0.25386,0.045595,0.521012,0.699743
60,3C,14,0.693715,0.194465,0.051973,0.591848,0.795582
60,fc,23,0.781249,0.145745,0.03039,0.721685,0.840813


In [8]:
    # Fits the model with the interaction term
    # This will also automatically include the main effects for each factor
    model = ols('f1score ~ C(time_budget)*C(methods)', df).fit()

    # Seeing if the overall model is significant
    print(f"Overall model F({model.df_model: .0f},{model.df_resid: .0f}) = {model.fvalue: .3f}, p = {model.f_pvalue: .4f}")

Overall model F( 8, 172) =  2.100, p =  0.0382


In [9]:
model.summary()

0,1,2,3
Dep. Variable:,f1score,R-squared:,0.089
Model:,OLS,Adj. R-squared:,0.047
Method:,Least Squares,F-statistic:,2.1
Date:,"Fri, 01 Nov 2019",Prob (F-statistic):,0.0382
Time:,22:13:02,Log-Likelihood:,28.183
No. Observations:,181,AIC:,-38.37
Df Residuals:,172,BIC:,-9.58
Df Model:,8,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
Intercept,0.6596,0.038,17.565,0.000,0.585,0.734
C(time_budget)[T.30],-0.0452,0.052,-0.870,0.385,-0.148,0.057
C(time_budget)[T.60],-0.0492,0.054,-0.920,0.359,-0.155,0.056
C(methods)[T.3C],0.1105,0.077,1.435,0.153,-0.041,0.262
C(methods)[T.fc],0.0816,0.077,1.060,0.291,-0.070,0.233
C(time_budget)[T.30]:C(methods)[T.3C],0.0216,0.108,0.200,0.842,-0.192,0.235
C(time_budget)[T.60]:C(methods)[T.3C],-0.0271,0.103,-0.264,0.792,-0.230,0.176
C(time_budget)[T.30]:C(methods)[T.fc],0.0364,0.100,0.363,0.717,-0.161,0.234
C(time_budget)[T.60]:C(methods)[T.fc],0.0893,0.097,0.924,0.357,-0.101,0.280

0,1,2,3
Omnibus:,0.987,Durbin-Watson:,2.078
Prob(Omnibus):,0.61,Jarque-Bera (JB):,1.003
Skew:,0.051,Prob(JB):,0.606
Kurtosis:,2.65,Cond. No.,12.9


In [10]:
# Creates the ANOVA table
res = sm.stats.anova_lm(model, typ= 2)
res

Unnamed: 0,sum_sq,df,F,PR(>F)
C(time_budget),0.042517,2.0,0.471085,0.625127
C(methods),0.66424,2.0,7.359814,0.000857
C(time_budget):C(methods),0.068451,4.0,0.379218,0.823275
Residual,7.761695,172.0,,


In [11]:
# Fits the model
model2 = ols('f1score ~ C(time_budget)+ C(methods)', df).fit()

print(f"Overall model F({model2.df_model: .0f},{model2.df_resid: .0f}) = {model2.fvalue: .3f}, p = {model2.f_pvalue: .4f}")

Overall model F( 4, 176) =  3.876, p =  0.0048


In [12]:
model2.summary()

0,1,2,3
Dep. Variable:,f1score,R-squared:,0.081
Model:,OLS,Adj. R-squared:,0.06
Method:,Least Squares,F-statistic:,3.876
Date:,"Fri, 01 Nov 2019",Prob (F-statistic):,0.00482
Time:,22:13:02,Log-Likelihood:,27.389
No. Observations:,181,AIC:,-44.78
Df Residuals:,176,BIC:,-28.79
Df Model:,4,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
Intercept,0.6509,0.032,20.508,0.000,0.588,0.713
C(time_budget)[T.30],-0.0359,0.040,-0.900,0.369,-0.115,0.043
C(time_budget)[T.60],-0.0321,0.039,-0.816,0.415,-0.110,0.045
C(methods)[T.3C],0.1046,0.042,2.485,0.014,0.022,0.188
C(methods)[T.fc],0.1330,0.037,3.566,0.000,0.059,0.207

0,1,2,3
Omnibus:,1.294,Durbin-Watson:,2.072
Prob(Omnibus):,0.524,Jarque-Bera (JB):,1.211
Skew:,0.06,Prob(JB):,0.546
Kurtosis:,2.617,Cond. No.,4.21


In [13]:
# Creates the ANOVA table
res2 = sm.stats.anova_lm(model2, typ= 2)
res2

Unnamed: 0,sum_sq,df,F,PR(>F)
C(time_budget),0.042517,2.0,0.477827,0.620932
C(methods),0.66424,2.0,7.465138,0.000773
Residual,7.830146,176.0,,


In [14]:
# Calculating effect size
def anova_table(aov):
    aov['mean_sq'] = aov[:]['sum_sq']/aov[:]['df']
    
    aov['eta_sq'] = aov[:-1]['sum_sq']/sum(aov['sum_sq'])
    
    aov['omega_sq'] = (aov[:-1]['sum_sq']-(aov[:-1]['df']*aov['mean_sq'][-1]))/(sum(aov['sum_sq'])+aov['mean_sq'][-1])
    
    cols = ['sum_sq', 'mean_sq', 'df', 'F', 'PR(>F)', 'eta_sq', 'omega_sq']
    aov = aov[cols]
    return aov

anova_table(res2)

Unnamed: 0,sum_sq,mean_sq,df,F,PR(>F),eta_sq,omega_sq
C(time_budget),0.042517,0.021258,2.0,0.477827,0.620932,0.00498,-0.005414
C(methods),0.66424,0.33212,2.0,7.465138,0.000773,0.077808,0.067036
Residual,7.830146,0.044489,176.0,,,,


In [15]:
mc = statsmodels.stats.multicomp.MultiComparison(df['f1score'], df['methods'])
mc_results = mc.tukeyhsd()
print(mc_results)

Multiple Comparison of Means - Tukey HSD, FWER=0.05
group1 group2 meandiff p-adj   lower  upper  reject
---------------------------------------------------
    1c     3C   0.1038 0.0373  0.0049 0.2027   True
    1c     fc   0.1292 0.0016  0.0422 0.2162   True
    3C     fc   0.0254 0.8349 -0.0855 0.1364  False
---------------------------------------------------


In [16]:
mc = statsmodels.stats.multicomp.MultiComparison(df['f1score'], df['time_budget'])
mc_results = mc.tukeyhsd()
print(mc_results)

Multiple Comparison of Means - Tukey HSD, FWER=0.05
group1 group2 meandiff p-adj   lower  upper  reject
---------------------------------------------------
    10     30  -0.0296 0.7344  -0.127 0.0679  False
    10     60  -0.0112    0.9 -0.1063 0.0839  False
    30     60   0.0183 0.8722 -0.0727 0.1094  False
---------------------------------------------------


# Analysis <font color=blue>without </font> combining RF, DT and SVM into 1C

In [17]:
df = pandas.read_excel(r"C:\Users\HassanEldeeb\Documents\GitHub\AutoMLBenchmarking\logs_search_space/skout.xlsx")
df = df[['time_budget', 'methods', 'f1score']]
df.methods = df.methods.replace("['adaboost', 'bernoulli_nb', 'decision_tree', 'extra_trees', 'gaussian_nb', 'gradient_boosting', 'k_nearest_neighbors', 'lda', 'liblinear_svc', 'libsvm_svc', 'multinomial_nb', 'passive_aggressive', 'qda', 'random_forest', 'sgd']", "fc")
df.methods = df.methods.replace("['decision_tree', 'libsvm_svc', 'random_forest']", "3c")
df.methods = df.methods.replace("['libsvm_svc']", "svm")
df.methods = df.methods.replace("['decision_tree']", "DT")
df.methods = df.methods.replace("['random_forest']", "RF")
#df.drop(df[(df.methods=='fc') & ((df.time_budget==30) | (df.time_budget==10))].index, inplace=True)
#df.drop(df[(df.methods=='3c') & ((df.time_budget==60) | (df.time_budget==10))].index, inplace=True)
df.shape

NameError: name 'pandas' is not defined

In [None]:
rp.summary_cont(df['f1score'])

In [None]:
rp.summary_cont(df.groupby(['time_budget']))['f1score']

In [None]:
rp.summary_cont(df.groupby(['methods']))['f1score']

In [None]:
rp.summary_cont(df.groupby(['time_budget', 'methods']))['f1score']

In [None]:
    # Fits the model with the interaction term
    # This will also automatically include the main effects for each factor
    model = ols('f1score ~ C(time_budget)*C(methods)', df).fit()

    # Seeing if the overall model is significant
    print(f"Overall model F({model.df_model: .0f},{model.df_resid: .0f}) = {model.fvalue: .3f}, p = {model.f_pvalue: .4f}")

In [None]:
model.summary()

In [None]:
# Creates the ANOVA table
res = sm.stats.anova_lm(model, typ= 2)
res

In [None]:
# Fits the model
model2 = ols('f1score ~ C(time_budget)+ C(methods)', df).fit()

print(f"Overall model F({model2.df_model: .0f},{model2.df_resid: .0f}) = {model2.fvalue: .3f}, p = {model2.f_pvalue: .4f}")

In [None]:
model2.summary()

In [None]:
# Creates the ANOVA table
res2 = sm.stats.anova_lm(model2, typ= 2)
res2

In [None]:
# Calculating effect size
def anova_table(aov):
    aov['mean_sq'] = aov[:]['sum_sq']/aov[:]['df']
    
    aov['eta_sq'] = aov[:-1]['sum_sq']/sum(aov['sum_sq'])
    
    aov['omega_sq'] = (aov[:-1]['sum_sq']-(aov[:-1]['df']*aov['mean_sq'][-1]))/(sum(aov['sum_sq'])+aov['mean_sq'][-1])
    
    cols = ['sum_sq', 'mean_sq', 'df', 'F', 'PR(>F)', 'eta_sq', 'omega_sq']
    aov = aov[cols]
    return aov

anova_table(res2)

In [None]:
mc = statsmodels.stats.multicomp.MultiComparison(df['f1score'], df['methods'])
mc_results = mc.tukeyhsd()
print(mc_results)

# Analysis <font color=blue>after </font> combining RF, DT and SVM into 1C 
# Comparing FC (60 min) with 3C(30)

In [None]:
df = pandas.read_excel(r"C:\Users\HassanEldeeb\Documents\GitHub\AutoMLBenchmarking\logs_search_space/skout.xlsx")
df = df[['time_budget', 'methods', 'f1score']]
df.methods = df.methods.replace("['adaboost', 'bernoulli_nb', 'decision_tree', 'extra_trees', 'gaussian_nb', 'gradient_boosting', 'k_nearest_neighbors', 'lda', 'liblinear_svc', 'libsvm_svc', 'multinomial_nb', 'passive_aggressive', 'qda', 'random_forest', 'sgd']", "fc")
df.methods = df.methods.replace("['decision_tree', 'libsvm_svc', 'random_forest']", "3c")
df.methods = df.methods.replace("['libsvm_svc']", "1c")
df.methods = df.methods.replace("['decision_tree']", "1c")
df.methods = df.methods.replace("['random_forest']", "1c")
df.drop(df[(df.methods=='fc') & ((df.time_budget==30) | (df.time_budget==10))].index, inplace=True)
df.drop(df[(df.methods=='3c') & ((df.time_budget==60) | (df.time_budget==10))].index, inplace=True)
df.drop(df[(df.methods=='1c') & ((df.time_budget==60) | (df.time_budget==10))].index, inplace=True)
df.shape

In [None]:
rp.summary_cont(df['f1score'])

In [None]:
rp.summary_cont(df.groupby(['time_budget']))['f1score']

In [None]:
rp.summary_cont(df.groupby(['methods']))['f1score']

In [None]:
rp.summary_cont(df.groupby(['time_budget', 'methods']))['f1score']

In [None]:
    # Fits the model with the interaction term
    # This will also automatically include the main effects for each factor
    model = ols('f1score ~ C(time_budget)*C(methods)', df).fit()

    # Seeing if the overall model is significant
    print(f"Overall model F({model.df_model: .0f},{model.df_resid: .0f}) = {model.fvalue: .3f}, p = {model.f_pvalue: .4f}")

In [None]:
model.summary()

In [None]:
# Creates the ANOVA table
res = sm.stats.anova_lm(model, typ= 2)
res

In [None]:
# Fits the model
model2 = ols('f1score ~ C(time_budget)+ C(methods)', df).fit()

print(f"Overall model F({model2.df_model: .0f},{model2.df_resid: .0f}) = {model2.fvalue: .3f}, p = {model2.f_pvalue: .4f}")

In [None]:
model2.summary()

In [None]:
# Creates the ANOVA table
res2 = sm.stats.anova_lm(model2, typ= 2)
res2

In [None]:
# Calculating effect size
def anova_table(aov):
    aov['mean_sq'] = aov[:]['sum_sq']/aov[:]['df']
    
    aov['eta_sq'] = aov[:-1]['sum_sq']/sum(aov['sum_sq'])
    
    aov['omega_sq'] = (aov[:-1]['sum_sq']-(aov[:-1]['df']*aov['mean_sq'][-1]))/(sum(aov['sum_sq'])+aov['mean_sq'][-1])
    
    cols = ['sum_sq', 'mean_sq', 'df', 'F', 'PR(>F)', 'eta_sq', 'omega_sq']
    aov = aov[cols]
    return aov

anova_table(res2)

In [None]:
mc = statsmodels.stats.multicomp.MultiComparison(df['f1score'], df['methods'])
mc_results = mc.tukeyhsd()
print(mc_results)

# Wilcoxon signed-rank test

In [None]:
df = pandas.read_excel(r"C:\Users\HassanEldeeb\Documents\GitHub\AutoMLBenchmarking\logs_search_space/skout.xlsx")
df = df[['time_budget', 'methods', 'f1score']]
df.methods = df.methods.replace("['adaboost', 'bernoulli_nb', 'decision_tree', 'extra_trees', 'gaussian_nb', 'gradient_boosting', 'k_nearest_neighbors', 'lda', 'liblinear_svc', 'libsvm_svc', 'multinomial_nb', 'passive_aggressive', 'qda', 'random_forest', 'sgd']", "fc")
df.methods = df.methods.replace("['decision_tree', 'libsvm_svc', 'random_forest']", "3c")
df.methods = df.methods.replace("['libsvm_svc']", "svm")
df.methods = df.methods.replace("['decision_tree']", "DT")
df.methods = df.methods.replace("['random_forest']", "RF")
df.shape

In [None]:
# Wilcoxon signed-rank test
from numpy.random import seed
from numpy.random import randn
from scipy.stats import wilcoxon
# seed the random number generator
seed(1)
# generate two independent samples
data1 = df.f1score[df.methods=='3c']
data2 = df.f1score[df.methods=='fc']
# compare samples
stat, p = wilcoxon(data1, data2)
print('Statistics=%.3f, p=%.3f' % (stat, p))
# interpret
alpha = 0.05
if p > alpha:
    print('Same distribution (fail to reject H0)')
else:
    print('Different distribution (reject H0)')

In [None]:
df = pandas.read_excel(r"C:\Users\HassanEldeeb\Documents\GitHub\AutoMLBenchmarking\logs_search_space/skout.xlsx")
df = df[['time_budget', 'methods', 'f1score']]
df.methods = df.methods.replace("['adaboost', 'bernoulli_nb', 'decision_tree', 'extra_trees', 'gaussian_nb', 'gradient_boosting', 'k_nearest_neighbors', 'lda', 'liblinear_svc', 'libsvm_svc', 'multinomial_nb', 'passive_aggressive', 'qda', 'random_forest', 'sgd']", "fc")
df.methods = df.methods.replace("['decision_tree', 'libsvm_svc', 'random_forest']", "3c")
df.methods = df.methods.replace("['libsvm_svc']", "svm")
df.methods = df.methods.replace("['decision_tree']", "DT")
df.methods = df.methods.replace("['random_forest']", "RF")
df.shape

In [None]:
# Wilcoxon signed-rank test
from numpy.random import seed
from numpy.random import randn
from scipy.stats import wilcoxon
# seed the random number generator
seed(1)
# generate two independent samples
data1 = df.f1score[df.methods=='3c']
data2 = df.f1score[df.methods=='fc']
# compare samples
stat, p = wilcoxon(data1, data2)
print('Statistics=%.3f, p=%.3f' % (stat, p))
# interpret
alpha = 0.05
if p > alpha:
    print('Same distribution (fail to reject H0)')
else:
    print('Different distribution (reject H0)')