In [1]:
import pandas as pd
import scipy.stats as stats
import researchpy as rp
import statsmodels.api as sm
from statsmodels.formula.api import ols
import os
import ast
import numpy as np
import matplotlib.pyplot as plt


# Analysis <font color=blue>after </font> combining RF, DT and SVM into 1C 10 min

In [2]:
def get_class (model):
    if isinstance(model, str):
        model = '{' +'}'.join('{'.join(model.split('{')[1:]).split('}')[0:1]) + '}'
        m = ast.literal_eval(model)
        if 'classifier:__choice__' in m:
            classifier = m['classifier:__choice__']
        else:
            classifier = ''
        return classifier
    else:
        return ''


def parse_tpot(directory):
    result = pd.DataFrame(columns=['dataset', 'accuracy', 'model', 'precision', 'recall', 'f1score', 'time_budget', 'methods'])
    for subdir, dirs, files in os.walk(directory):
        for file in files:
            if file.endswith('.csv'):
                method = file.split('.')[0].split('_')[2]
                time_budget = file.split('.')[0].split('_')[3]
                run = []
                sub_result = pd.read_csv(os.path.join(subdir, file))
                sub_result.rename(columns={'Unnamed: 0': 'dataset'}, inplace = True)
                run.append(sub_result[['dataset', 'accuracy_1', 'model_1', 'precision_1', 'recall_1', 'f1score_1']])
                run.append(sub_result[['dataset', 'accuracy_2', 'model_2', 'precision_2', 'recall_2', 'f1score_2']])
                run.append(sub_result[['dataset', 'accuracy_3', 'model_3', 'precision_3', 'recall_3', 'f1score_3']])
                for i in range(3):
                    run[i].rename(columns={'accuracy_'+str(i+1): 'accuracy',
                                           'model_'+str(i+1): 'model',
                                           'precision_'+str(i+1): 'precision',
                                           'recall_'+str(i+1): 'recall',
                                           'f1score_'+str(i+1): 'f1score'}, inplace=True)
                    run[i]['methods'] = str(method)
                    run[i]['time_budget'] = int(time_budget)
                    result = pd.concat([result, run[i]], axis=0, sort=True, ignore_index=True)
    result.model = result.model.apply(get_class)
    result = result[~np.isnan(result.f1score)]
    return result
#parse_tpot(r"C:\Users\HassanEldeeb\Documents\GitHub\AutoMLBenchmarking\logs_search_space/")

In [22]:
df = parse_tpot(r"C:\Users\HassanEldeeb\Documents\GitHub\AutoMLBenchmarking\logs_search_space/")
df = df[['dataset', 'time_budget', 'methods', 'f1score']]
df.methods = df.methods.replace("default", "fc")
df.methods = df.methods.replace("3C", "3c")
df.methods = df.methods.replace("SVC", "1c")
df.methods = df.methods.replace("DT", "1c")
df.methods = df.methods.replace("RF", "1c")
df10 = df[df.time_budget==10]
df30 = df[df.time_budget==30]
df60 = df[df.time_budget==60]
fsIs_b = ['vowel', 'openml_phpJNxH0q', 'dataset_31_credit-g', 'dataset_40_sonar']
fsIs_m = ['solar-flare_1', 'wine-quality-red', 'dataset_39_ecoli', 'synthetic_control']
fsIl_b = ['AirlinesCodrnaAdult', 'MagicTelescope', 'electricity-normalized', 'phpmPOD5A']
fsIl_m = ['pokerhand-normalized', 'eye_movements', 'avila-tr']
flIs_b = ['audiology', 'arrhythmia', 'AP_Breast_Lung', 'AP_Omentum_Ovary']
flIs_m = ['Amazon', 'umistfacescropped', 'phpGUrE90']
flIl_b = ['gina_agnostic', 'hiva_agnostic', 'phpZrCzJR', 'phprAeXmK']
flIl_m = ['KDDCup99', 'connect-4', 'dataset_60_waveform-5000', 'dataset_186_satimage']
df_binary = df[df.dataset.isin(fsIs_b) | df.dataset.isin(fsIl_b) | df.dataset.isin(flIs_b) | df.dataset.isin(flIl_b)]
df_multi = df[df.dataset.isin(fsIs_m) | df.dataset.isin(fsIl_m) | df.dataset.isin(flIs_m) | df.dataset.isin(flIl_m)]
#df.drop(df[(df.methods=='fc') & ((df.time_budget==30) | (df.time_budget==10))].index, inplace=True)
#df.drop(df[(df.methods=='3c') & ((df.time_budget==60) | (df.time_budget==10))].index, inplace=True)

df.shape

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


(524, 4)

In [23]:
rp.summary_cont(df10['f1score'])





Unnamed: 0,Variable,N,Mean,SD,SE,95% Conf.,Interval
0,f1score,157.0,0.716536,0.268588,0.021436,0.674194,0.758877


In [24]:
rp.summary_cont(df30['f1score'])





Unnamed: 0,Variable,N,Mean,SD,SE,95% Conf.,Interval
0,f1score,173.0,0.701214,0.273636,0.020804,0.660149,0.742278


In [25]:
rp.summary_cont(df60['f1score'])





Unnamed: 0,Variable,N,Mean,SD,SE,95% Conf.,Interval
0,f1score,194.0,0.707828,0.270216,0.0194,0.669564,0.746092


In [26]:
stats.f_oneway(df10['f1score'][df10['methods'] == '1c'], 
             df10['f1score'][df10['methods'] == '3c'],
             df10['f1score'][df10['methods'] == 'fc'])

F_onewayResult(statistic=6.352906547787905, pvalue=0.002232997352526722)

In [27]:
stats.f_oneway(df30['f1score'][df30['methods'] == '1c'], 
             df30['f1score'][df30['methods'] == '3c'],
             df30['f1score'][df30['methods'] == 'fc'])

F_onewayResult(statistic=9.472582128717065, pvalue=0.00012578153876374195)

In [28]:
stats.f_oneway(df60['f1score'][df60['methods'] == '1c'], 
             df60['f1score'][df60['methods'] == '3c'],
             df60['f1score'][df60['methods'] == 'fc'])

F_onewayResult(statistic=11.833643763871235, pvalue=1.4290456768010514e-05)

In [29]:
    # Fits the model with the interaction term
    # This will also automatically include the main effects for each factor
    model10 = ols('f1score ~ C(methods)', df10).fit()

    # Seeing if the overall model is significant
    print(f"Overall model F({model10.df_model: .0f},{model10.df_resid: .0f}) = {model10.fvalue: .3f}, p = {model10.f_pvalue: .4f}")
    model10.summary()

Overall model F( 2, 154) =  6.353, p =  0.0022


0,1,2,3
Dep. Variable:,f1score,R-squared:,0.076
Model:,OLS,Adj. R-squared:,0.064
Method:,Least Squares,F-statistic:,6.353
Date:,"Fri, 01 Nov 2019",Prob (F-statistic):,0.00223
Time:,22:24:04,Log-Likelihood:,-9.6599
No. Observations:,157,AIC:,25.32
Df Residuals:,154,BIC:,34.49
Df Model:,2,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
Intercept,0.6523,0.028,23.686,0.000,0.598,0.707
C(methods)[T.3c],0.1613,0.053,3.045,0.003,0.057,0.266
C(methods)[T.fc],0.1359,0.052,2.622,0.010,0.033,0.238

0,1,2,3
Omnibus:,19.892,Durbin-Watson:,2.225
Prob(Omnibus):,0.0,Jarque-Bera (JB):,23.936
Skew:,-0.951,Prob(JB):,6.34e-06
Kurtosis:,3.194,Cond. No.,3.16


In [30]:
    # Fits the model with the interaction term
    # This will also automatically include the main effects for each factor
    model30 = ols('f1score ~ C(methods)', df30).fit()

    # Seeing if the overall model is significant
    print(f"Overall model F({model30.df_model: .0f},{model30.df_resid: .0f}) = {model30.fvalue: .3f}, p = {model30.f_pvalue: .4f}")
    model30.summary()

Overall model F( 2, 170) =  9.473, p =  0.0001


0,1,2,3
Dep. Variable:,f1score,R-squared:,0.1
Model:,OLS,Adj. R-squared:,0.09
Method:,Least Squares,F-statistic:,9.473
Date:,"Fri, 01 Nov 2019",Prob (F-statistic):,0.000126
Time:,22:24:05,Log-Likelihood:,-11.635
No. Observations:,173,AIC:,29.27
Df Residuals:,170,BIC:,38.73
Df Model:,2,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
Intercept,0.6247,0.027,23.568,0.000,0.572,0.677
C(methods)[T.3c],0.1753,0.054,3.255,0.001,0.069,0.282
C(methods)[T.fc],0.1732,0.047,3.678,0.000,0.080,0.266

0,1,2,3
Omnibus:,17.93,Durbin-Watson:,2.152
Prob(Omnibus):,0.0,Jarque-Bera (JB):,21.065
Skew:,-0.854,Prob(JB):,2.67e-05
Kurtosis:,3.045,Cond. No.,3.22


In [31]:
    # Fits the model with the interaction term
    # This will also automatically include the main effects for each factor
    model60 = ols('f1score ~ C(methods)', df60).fit()

    # Seeing if the overall model is significant
    print(f"Overall model F({model60.df_model: .0f},{model60.df_resid: .0f}) = {model60.fvalue: .3f}, p = {model60.f_pvalue: .4f}")
    model10.summary()

Overall model F( 2, 191) =  11.834, p =  0.0000


0,1,2,3
Dep. Variable:,f1score,R-squared:,0.076
Model:,OLS,Adj. R-squared:,0.064
Method:,Least Squares,F-statistic:,6.353
Date:,"Fri, 01 Nov 2019",Prob (F-statistic):,0.00223
Time:,22:24:06,Log-Likelihood:,-9.6599
No. Observations:,157,AIC:,25.32
Df Residuals:,154,BIC:,34.49
Df Model:,2,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
Intercept,0.6523,0.028,23.686,0.000,0.598,0.707
C(methods)[T.3c],0.1613,0.053,3.045,0.003,0.057,0.266
C(methods)[T.fc],0.1359,0.052,2.622,0.010,0.033,0.238

0,1,2,3
Omnibus:,19.892,Durbin-Watson:,2.225
Prob(Omnibus):,0.0,Jarque-Bera (JB):,23.936
Skew:,-0.951,Prob(JB):,6.34e-06
Kurtosis:,3.194,Cond. No.,3.16


In [32]:
from statsmodels.stats.multicomp import pairwise_tukeyhsd
from statsmodels.stats.multicomp import MultiComparison

mc = MultiComparison(df10['f1score'], df10['methods'])
mc_results = mc.tukeyhsd()
print(mc_results)

Multiple Comparison of Means - Tukey HSD, FWER=0.05
group1 group2 meandiff p-adj   lower  upper  reject
---------------------------------------------------
    1c     3c   0.1613 0.0077  0.0359 0.2866   True
    1c     fc   0.1359 0.0259  0.0132 0.2586   True
    3c     fc  -0.0254    0.9 -0.1746 0.1238  False
---------------------------------------------------


In [34]:
mc = MultiComparison(df30['f1score'], df30['methods'])
mc_results = mc.tukeyhsd()
print(mc_results)

Multiple Comparison of Means - Tukey HSD, FWER=0.05
group1 group2 meandiff p-adj   lower  upper  reject
---------------------------------------------------
    1c     3c   0.1753 0.0039   0.048 0.3027   True
    1c     fc   0.1732  0.001  0.0619 0.2845   True
    3c     fc  -0.0021    0.9 -0.1462 0.1419  False
---------------------------------------------------


In [35]:
mc = MultiComparison(df60['f1score'], df60['methods'])
mc_results = mc.tukeyhsd()
print(mc_results)

Multiple Comparison of Means - Tukey HSD, FWER=0.05
group1 group2 meandiff p-adj   lower  upper  reject
---------------------------------------------------
    1c     3c   0.1619 0.0027  0.0479 0.2758   True
    1c     fc   0.1902  0.001  0.0877 0.2927   True
    3c     fc   0.0283 0.8449 -0.0994  0.156  False
---------------------------------------------------


# Analysis <font color=blue>without </font> combining RF, DT and SVM into 1C 10 min

In [None]:
df = pd.read_excel(r"C:\Users\HassanEldeeb\Documents\GitHub\AutoMLBenchmarking\logs_search_space/skout.xlsx")
df = df[['time_budget', 'methods', 'f1score']]
df.methods = df.methods.replace("['adaboost', 'bernoulli_nb', 'decision_tree', 'extra_trees', 'gaussian_nb', 'gradient_boosting', 'k_nearest_neighbors', 'lda', 'liblinear_svc', 'libsvm_svc', 'multinomial_nb', 'passive_aggressive', 'qda', 'random_forest', 'sgd']", "fc")
df.methods = df.methods.replace("['decision_tree', 'libsvm_svc', 'random_forest']", "3c")
df.methods = df.methods.replace("['libsvm_svc']", "svc")
df.methods = df.methods.replace("['decision_tree']", "dt")
df.methods = df.methods.replace("['random_forest']", "rf")
df10 = df[df.time_budget==10]
df30 = df[df.time_budget==30]
df60 = df[df.time_budget==60]
df.shape

In [None]:
rp.summary_cont(df10['f1score'])

In [None]:
rp.summary_cont(df30['f1score'])

In [None]:
rp.summary_cont(df60['f1score'])

In [None]:
stats.f_oneway(df10['f1score'][df10['methods'] == '1c'], 
             df10['f1score'][df10['methods'] == '3c'],
             df10['f1score'][df10['methods'] == 'fc'])

In [None]:
stats.f_oneway(df30['f1score'][df30['methods'] == '1c'], 
             df30['f1score'][df30['methods'] == '3c'],
             df30['f1score'][df30['methods'] == 'fc'])

In [None]:
stats.f_oneway(df60['f1score'][df60['methods'] == '1c'], 
             df60['f1score'][df60['methods'] == '3c'],
             df60['f1score'][df60['methods'] == 'fc'])

In [None]:
    # Fits the model with the interaction term
    # This will also automatically include the main effects for each factor
    model10 = ols('f1score ~ C(methods)', df10).fit()

    # Seeing if the overall model is significant
    print(f"Overall model F({model10.df_model: .0f},{model10.df_resid: .0f}) = {model10.fvalue: .3f}, p = {model10.f_pvalue: .4f}")
    model10.summary()

In [None]:
    # Fits the model with the interaction term
    # This will also automatically include the main effects for each factor
    model30 = ols('f1score ~ C(methods)', df30).fit()

    # Seeing if the overall model is significant
    print(f"Overall model F({model30.df_model: .0f},{model30.df_resid: .0f}) = {model30.fvalue: .3f}, p = {model30.f_pvalue: .4f}")
    model30.summary()

In [None]:
    # Fits the model with the interaction term
    # This will also automatically include the main effects for each factor
    model60 = ols('f1score ~ C(methods)', df60).fit()

    # Seeing if the overall model is significant
    print(f"Overall model F({model60.df_model: .0f},{model60.df_resid: .0f}) = {model60.fvalue: .3f}, p = {model60.f_pvalue: .4f}")
    model10.summary()

In [None]:
from statsmodels.stats.multicomp import pairwise_tukeyhsd
from statsmodels.stats.multicomp import MultiComparison

mc = MultiComparison(df10['f1score'], df10['methods'])
mc_results = mc.tukeyhsd()
print(mc_results)

In [None]:
mc = MultiComparison(df30['f1score'], df30['methods'])
mc_results = mc.tukeyhsd(model30.f_pvalue)
print(mc_results)

In [None]:
mc = MultiComparison(df60['f1score'], df60['methods'])
mc_results = mc.tukeyhsd(model60.f_pvalue)
print(mc_results)