# Wilcoxon Test for Accuracy

In [1]:
from numpy.random import seed
from numpy.random import randn
from scipy.stats import wilcoxon
import pandas as pd
import numpy as np
import itertools

In [2]:
# Wilcoxon signed-rank test

class MyWilcoxon:
    def __init__(self, sheet_path=r'./../Complete_Sheet.xlsx',
                sheet_names=['10 Min', '30 Min', '60 Min', '240 Min', 'meta-features']):
        
        
        self.sheet = pd.read_excel(sheet_path,
                              na_values=['', 'NA', 'NAN', 'NaN', 'Nan', 'NA\n','0', '0.0', 'None'],
                              sheet_name=sheet_names)
        
        
        
    def calc_wilcoxon(self):
        for t in ['10 Min', '30 Min', '60 Min', '240 Min']:
            self.sheet[t].rename(columns={"sklearn_e_accuracy_mean": "sklearn-e_accuracy_mean",
                                            "sklearn_m_accuracy_mean": "sklearn-m_accuracy_mean",
                                            "sklearn_v_accuracy_mean": "sklearn-v_accuracy_mean",
                                             "smartml_e_valid_acc": "smartml-e_valid_acc"}, inplace=True)
        result = pd.DataFrame(columns = ['Factor_1', 'Factor_2', 'Time_Budget_1', 'Time_Budget_2',
                                         'p_value', 'stat', 'Statistically_Better'])
        cols = ['sklearn_accuracy_mean', 'sklearn-e_accuracy_mean', 'sklearn-m_accuracy_mean', 'sklearn-v_accuracy_mean',
                'autoweka_accuracy_mean', 'recipe_valid_acc', 'smartml_valid_acc', 'smartml-e_valid_acc', 'tpot_accuracy_mean', 'atm_acc']
        for t in itertools.product(['10 Min', '30 Min', '60 Min', '240 Min'], ['10 Min', '30 Min', '60 Min', '240 Min']):
            for col in itertools.product(cols, cols):
                if (t[0]== t[1] and col[0] != col[1]) or ((t[0]!= t[1] and col[0] == col[1])):
                    data1 = self.sheet[t[0]][col[0]]
                    data2 = self.sheet[t[1]][col[1]]
                    condition = (data1.notna()) & (data2.notna())
                    data1 = data1[condition]
                    data2 = data2[condition]
                    
                    stat, p = wilcoxon(data1, data2)
                    if p <= 0.05:
                        if data1.mean() > data2.mean():
                            better = '1'
                        elif data2.mean() > data1.mean():
                            better = '2'
                        else:
                            better = 'None'
                    else:
                        better = 'None'
                    new_row = {'Factor_1': col[0].split('_')[0], 'Factor_2': col[1].split('_')[0], 'Time_Budget_1':t[0], 'Time_Budget_2':t[1],
                                'p_value': p, 'stat': stat, 'Statistically_Better': better}
                    result = result.append(new_row, ignore_index=True)
        return result
    
    def calc_wilcoxon_time_budgets(self):
        for t in ['10 Min', '30 Min', '60 Min', '240 Min']:
            self.sheet[t].rename(columns={"sklearn_e_accuracy_mean": "sklearn-e_accuracy_mean",
                                            "sklearn_m_accuracy_mean": "sklearn-m_accuracy_mean",
                                            "sklearn_v_accuracy_mean": "sklearn-v_accuracy_mean",
                                             "smartml_e_valid_acc": "smartml-e_valid_acc"}, inplace=True)
        result = pd.DataFrame(columns = ['Factor_1', 'Factor_2', 'Time_Budget_1', 'Time_Budget_2',
                                         'p_value', 'stat', 'avg_diff', 'Statistically_Better'])
        cols = ['sklearn_accuracy_mean', 'sklearn-e_accuracy_mean', 'sklearn-m_accuracy_mean', 'sklearn-v_accuracy_mean',
                'autoweka_accuracy_mean', 'recipe_valid_acc', 'smartml_valid_acc', 'smartml-e_valid_acc', 'tpot_accuracy_mean', 'atm_acc']
        t = ['10 Min', '30 Min', '60 Min', '240 Min']
        for t1 in range(4):
            for t2 in range(t1):
                for col in cols:
                    #print('{} - {}'.format(t[t1], t[t2]))
                    data1 = self.sheet[t[t1]][col]
                    data2 = self.sheet[t[t2]][col]
                    condition = (data1.notna()) & (data2.notna())
                    data1 = data1[condition]
                    data2 = data2[condition]

                    stat, p = wilcoxon(data1, data2)
                    avg_diff = data1.mean() - data2.mean()
                    if p <= 0.05:
                        if avg_diff > 0:
                            better = '1'
                        elif avg_diff < 0:
                            better = '2'
                        else:
                            better = 'None'
                    else:
                        better = 'None'
                    new_row = {'Factor_1': col.split('_')[0], 'Factor_2': col.split('_')[0], 'Time_Budget_1':t[t1], 'Time_Budget_2':t[t2],
                                'p_value': p, 'stat': stat, 'avg_diff': avg_diff, 'Statistically_Better': better}
                    result = result.append(new_row, ignore_index=True)
        return result
            
        
    def calc_wilcoxon_tools_comparison(self):
        for t in ['10 Min', '30 Min', '60 Min', '240 Min']:
            self.sheet[t].rename(columns={"sklearn_e_accuracy_mean": "sklearn-e_accuracy_mean",
                                            "sklearn_m_accuracy_mean": "sklearn-m_accuracy_mean",
                                            "sklearn_v_accuracy_mean": "sklearn-v_accuracy_mean",
                                             "smartml_e_valid_acc": "smartml-e_valid_acc"}, inplace=True)
        result = pd.DataFrame(columns = ['Factor_1', 'Factor_2', 'Time_Budget_1', 'Time_Budget_2',
                                         'p_value', 'stat', 'avg_diff', 'Statistically_Better'])
        cols = ['atm_acc', 'autoweka_accuracy_mean', 'recipe_valid_acc', 'sklearn-e_accuracy_mean', 'sklearn-m_accuracy_mean',
                'sklearn-v_accuracy_mean', 'sklearn_accuracy_mean', 'smartml_valid_acc', 'smartml-e_valid_acc', 'tpot_accuracy_mean']
        t = ['10 Min', '30 Min', '60 Min', '240 Min']
        for t1 in range(4):
            for col1 in cols:
                for col2 in cols:
                    if col1 == col2:
                        continue
                    #print('{} - {}'.format(t[t1], t[t2]))
                    data1 = self.sheet[t[t1]][col1]
                    data2 = self.sheet[t[t1]][col2]
                    condition = (data1.notna()) & (data2.notna())
                    data1 = data1[condition]
                    data2 = data2[condition]
                    
                    stat, p = wilcoxon(data1, data2)
                    avg_diff = data1.mean() - data2.mean()
                    if p <= 0.05:
                        if avg_diff > 0:
                            better = '1'
                        elif avg_diff < 0:
                            better = '2'
                        else:
                            better = 'None'
                    else:
                        better = 'None'
                    new_row = {'Factor_1': col1.split('_')[0], 'Factor_2': col2.split('_')[0], 'Time_Budget_1':t[t1], 'Time_Budget_2':t[t1],
                                'p_value': p, 'stat': stat, 'avg_diff': avg_diff, 'Statistically_Better': better}
                    result = result.append(new_row, ignore_index=True)
        return result


In [3]:
wil = MyWilcoxon()

In [4]:
result_tools = wil.calc_wilcoxon_tools_comparison()



In [18]:
result_tools[(result_tools.Factor_1=='smartml-e') & (result_tools.Factor_2=='smartml')]

Unnamed: 0,Factor_1,Factor_2,Time_Budget_1,Time_Budget_2,p_value,stat,avg_diff,Statistically_Better
79,smartml-e,smartml,10 Min,10 Min,0.01369,1200.0,0.026702,1
169,smartml-e,smartml,30 Min,30 Min,0.014637,1142.0,0.027287,1
259,smartml-e,smartml,60 Min,60 Min,0.046572,1271.0,0.014798,1
349,smartml-e,smartml,240 Min,240 Min,0.030532,1169.0,0.011443,1


In [6]:
# Function to convert   
def listToString(s):  
    
    # initialize an empty string 
    str1 = ""  
    
    # traverse in the string   
    for ele in s:  
        str1 += str(ele)
        str1 += '&'
    
    # return string   
    return str1  

In [7]:
tools = ['atm','autoweka', 'recipe', 'sklearn-e', 'sklearn-m', 'sklearn-v', 'sklearn', 'smartml', 'smartml-e', 'tpot']
tb = '240 Min'
#tool = 1
for tool in range(len(tools)):
    print(tools[tool] + ' & ')
    l1 = list(result_tools[(result_tools.Time_Budget_1 == tb) & (result_tools.Factor_1 == tools[tool])].round(3).p_value)
    l1.insert(tool, '\cellcolor{black!25}{}')
    l2 = list(result_tools[(result_tools.Time_Budget_1 == tb) & (result_tools.Factor_1 == tools[tool])].round(3).Statistically_Better)
    l2.insert(tool, '\cellcolor{black!25}{}')
    for i in range(len(l1)):
        if l2[i] == '1':
            l1[i] = '\\cellcolor{blue!25}\\textbf{' + str(l1[i]) + '}'
    print(listToString(l1)[:-1] + r'\\')
    print('\\hline')
    #print(listToString(l2))

atm & 
\cellcolor{black!25}{}&0.065&0.101&0.535&0.877&0.788&0.561&\cellcolor{blue!25}\textbf{0.001}&\cellcolor{blue!25}\textbf{0.017}&0.092\\
\hline
autoweka & 
0.065&\cellcolor{black!25}{}&0.893&0.0&0.0&0.002&0.0&0.243&0.892&0.0\\
\hline
recipe & 
0.101&0.893&\cellcolor{black!25}{}&0.0&0.002&0.0&0.0&0.052&0.383&0.0\\
\hline
sklearn-e & 
0.535&\cellcolor{blue!25}\textbf{0.0}&\cellcolor{blue!25}\textbf{0.0}&\cellcolor{black!25}{}&\cellcolor{blue!25}\textbf{0.006}&\cellcolor{blue!25}\textbf{0.016}&0.477&\cellcolor{blue!25}\textbf{0.0}&\cellcolor{blue!25}\textbf{0.001}&0.139\\
\hline
sklearn-m & 
0.877&\cellcolor{blue!25}\textbf{0.0}&\cellcolor{blue!25}\textbf{0.002}&0.006&\cellcolor{black!25}{}&0.957&0.0&\cellcolor{blue!25}\textbf{0.0}&\cellcolor{blue!25}\textbf{0.002}&0.001\\
\hline
sklearn-v & 
0.788&\cellcolor{blue!25}\textbf{0.002}&\cellcolor{blue!25}\textbf{0.0}&0.016&0.957&\cellcolor{black!25}{}&0.0&\cellcolor{blue!25}\textbf{0.0}&\cellcolor{blue!25}\textbf{0.003}&0.0\\
\hline
skle

In [8]:
tpot10 = wil.sheet['10 Min'].loc[:,['dataset', 'tpot_accuracy_mean']]
tpot30 = wil.sheet['30 Min'].loc[:,['dataset', 'tpot_accuracy_mean']]
tpot60 = wil.sheet['60 Min'].loc[:,['dataset', 'tpot_accuracy_mean']]
tpot240 = wil.sheet['240 Min'].loc[:,['dataset', 'tpot_accuracy_mean']]
condition = (tpot10.tpot_accuracy_mean.notna()) & (tpot30.tpot_accuracy_mean.notna())
tpot10 = tpot10[condition]
tpot30 = tpot30[condition]
tpot60 = tpot60[condition]
tpot240 = tpot240[condition]
print(tpot10.mean(), tpot30.mean(), tpot60.mean(), tpot240.mean())

tpot_accuracy_mean    0.891778
dtype: float64 tpot_accuracy_mean    0.901203
dtype: float64 tpot_accuracy_mean    0.899061
dtype: float64 tpot_accuracy_mean    0.904069
dtype: float64


In [9]:
s30 = wil.sheet['30 Min'].loc[:,['dataset', 'sklearn_v_accuracy_mean']]
s240 = wil.sheet['240 Min'].loc[:,['dataset', 'sklearn_v_accuracy_mean']]
display(s30)
condition = (s240.sklearn_v_accuracy_mean.notna()) & (s30.sklearn_v_accuracy_mean.notna())
s30 = s30[condition]
s240 = s240[condition]
#print(s30.mean(), s240.mean())
result = pd.concat([s30, s240], axis=1, sort=False)
result

KeyError: "Passing list-likes to .loc or [] with any missing labels is no longer supported. The following labels were missing: Index(['sklearn_v_accuracy_mean'], dtype='object'). See https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#deprecate-loc-reindex-listlike"

In [10]:
result = wil.calc_wilcoxon()
result_tb = wil.calc_wilcoxon_time_budgets()



In [11]:
result[((result.Statistically_Better == '1') ^ (result.Statistically_Better == 'None')) & (((result.Factor_1=='sklearn-v') & (result.Factor_2=='sklearn')) ^ ((result.Factor_2=='sklearn-v') & (result.Factor_1=='sklearn')))].round(3)
#result[(result.Statistically_Better == '1') & ((result.Factor_1=='sklearn-v') ^ (result.Factor_2=='sklearn-v'))].round(3)

Unnamed: 0,Factor_1,Factor_2,Time_Budget_1,Time_Budget_2,p_value,stat,Statistically_Better
2,sklearn,sklearn-v,10 Min,10 Min,0.0,853.0,1
132,sklearn,sklearn-v,30 Min,30 Min,0.002,1018.0,1
262,sklearn,sklearn-v,60 Min,60 Min,0.0,944.0,1
392,sklearn,sklearn-v,240 Min,240 Min,0.0,760.0,1


In [12]:
#display(result_tb[result_tb.Factor_1 == result_tb.Factor_1.unique()[0]].round(3))
for tool in result_tb.Factor_1.unique():
    display(result_tb[result_tb.Factor_1 == tool].round(3))

Unnamed: 0,Factor_1,Factor_2,Time_Budget_1,Time_Budget_2,p_value,stat,avg_diff,Statistically_Better
0,sklearn,sklearn,30 Min,10 Min,0.353,1283.5,0.003,
10,sklearn,sklearn,60 Min,10 Min,0.0,891.5,0.009,1.0
20,sklearn,sklearn,60 Min,30 Min,0.019,1195.0,0.005,1.0
30,sklearn,sklearn,240 Min,10 Min,0.001,1103.0,0.014,1.0
40,sklearn,sklearn,240 Min,30 Min,0.002,1018.0,0.011,1.0
50,sklearn,sklearn,240 Min,60 Min,0.119,1435.0,0.005,


Unnamed: 0,Factor_1,Factor_2,Time_Budget_1,Time_Budget_2,p_value,stat,avg_diff,Statistically_Better
1,sklearn-e,sklearn-e,30 Min,10 Min,0.0,770.0,0.007,1.0
11,sklearn-e,sklearn-e,60 Min,10 Min,0.0,960.0,0.011,1.0
21,sklearn-e,sklearn-e,60 Min,30 Min,0.677,1815.5,0.004,
31,sklearn-e,sklearn-e,240 Min,10 Min,0.0,1053.0,0.013,1.0
41,sklearn-e,sklearn-e,240 Min,30 Min,0.039,1497.5,0.006,1.0
51,sklearn-e,sklearn-e,240 Min,60 Min,0.272,1694.0,0.002,


Unnamed: 0,Factor_1,Factor_2,Time_Budget_1,Time_Budget_2,p_value,stat,avg_diff,Statistically_Better
2,sklearn-m,sklearn-m,30 Min,10 Min,0.211,669.0,0.004,
12,sklearn-m,sklearn-m,60 Min,10 Min,0.198,992.0,0.004,
22,sklearn-m,sklearn-m,60 Min,30 Min,0.956,1164.0,0.0,
32,sklearn-m,sklearn-m,240 Min,10 Min,0.1,1380.5,0.008,
42,sklearn-m,sklearn-m,240 Min,30 Min,0.616,1593.0,0.004,
52,sklearn-m,sklearn-m,240 Min,60 Min,0.398,1557.0,0.004,


Unnamed: 0,Factor_1,Factor_2,Time_Budget_1,Time_Budget_2,p_value,stat,avg_diff,Statistically_Better
3,sklearn-v,sklearn-v,30 Min,10 Min,0.242,1012.0,0.005,
13,sklearn-v,sklearn-v,60 Min,10 Min,0.004,984.5,0.007,1.0
23,sklearn-v,sklearn-v,60 Min,30 Min,0.141,1279.0,0.002,
33,sklearn-v,sklearn-v,240 Min,10 Min,0.0,911.0,0.007,1.0
43,sklearn-v,sklearn-v,240 Min,30 Min,0.027,1391.0,0.002,1.0
53,sklearn-v,sklearn-v,240 Min,60 Min,0.112,1465.0,0.0,


Unnamed: 0,Factor_1,Factor_2,Time_Budget_1,Time_Budget_2,p_value,stat,avg_diff,Statistically_Better
4,autoweka,autoweka,30 Min,10 Min,0.039,271.5,0.007,1.0
14,autoweka,autoweka,60 Min,10 Min,0.253,541.0,0.003,
24,autoweka,autoweka,60 Min,30 Min,0.474,454.0,0.001,
34,autoweka,autoweka,240 Min,10 Min,0.002,448.0,0.009,1.0
44,autoweka,autoweka,240 Min,30 Min,0.223,673.0,0.005,
54,autoweka,autoweka,240 Min,60 Min,0.047,293.0,0.004,1.0


Unnamed: 0,Factor_1,Factor_2,Time_Budget_1,Time_Budget_2,p_value,stat,avg_diff,Statistically_Better
5,recipe,recipe,30 Min,10 Min,0.272,35.0,-0.031,
15,recipe,recipe,60 Min,10 Min,0.067,56.0,0.014,
25,recipe,recipe,60 Min,30 Min,0.829,167.0,0.007,
35,recipe,recipe,240 Min,10 Min,0.01,60.0,0.026,1.0
45,recipe,recipe,240 Min,30 Min,0.65,196.5,0.005,
55,recipe,recipe,240 Min,60 Min,0.939,135.5,-0.001,


Unnamed: 0,Factor_1,Factor_2,Time_Budget_1,Time_Budget_2,p_value,stat,avg_diff,Statistically_Better
6,smartml,smartml,30 Min,10 Min,0.636,662.0,0.007,
16,smartml,smartml,60 Min,10 Min,0.832,772.0,0.009,
26,smartml,smartml,60 Min,30 Min,0.597,514.0,0.009,
36,smartml,smartml,240 Min,10 Min,0.121,835.5,0.026,
46,smartml,smartml,240 Min,30 Min,0.05,625.0,0.025,1.0
56,smartml,smartml,240 Min,60 Min,0.071,577.0,0.015,


Unnamed: 0,Factor_1,Factor_2,Time_Budget_1,Time_Budget_2,p_value,stat,avg_diff,Statistically_Better
7,smartml-e,smartml-e,30 Min,10 Min,0.521,571.0,0.008,
17,smartml-e,smartml-e,60 Min,10 Min,0.589,491.0,0.003,
27,smartml-e,smartml-e,60 Min,30 Min,0.627,849.0,-0.004,
37,smartml-e,smartml-e,240 Min,10 Min,0.092,504.0,0.011,
47,smartml-e,smartml-e,240 Min,30 Min,0.182,896.5,0.004,
57,smartml-e,smartml-e,240 Min,60 Min,0.305,749.0,0.008,


Unnamed: 0,Factor_1,Factor_2,Time_Budget_1,Time_Budget_2,p_value,stat,avg_diff,Statistically_Better
8,tpot,tpot,30 Min,10 Min,0.17,154.0,0.009,
18,tpot,tpot,60 Min,10 Min,0.339,186.0,0.008,
28,tpot,tpot,60 Min,30 Min,0.4,443.0,0.001,
38,tpot,tpot,240 Min,10 Min,0.012,151.0,0.013,1.0
48,tpot,tpot,240 Min,30 Min,0.027,289.5,0.006,1.0
58,tpot,tpot,240 Min,60 Min,0.01,482.0,0.004,1.0


Unnamed: 0,Factor_1,Factor_2,Time_Budget_1,Time_Budget_2,p_value,stat,avg_diff,Statistically_Better
9,atm,atm,30 Min,10 Min,0.341,632.0,0.003,
19,atm,atm,60 Min,10 Min,0.488,766.0,-0.004,
29,atm,atm,60 Min,30 Min,0.538,671.0,-0.008,
39,atm,atm,240 Min,10 Min,0.594,900.5,0.003,
49,atm,atm,240 Min,30 Min,0.964,879.0,-0.003,
59,atm,atm,240 Min,60 Min,0.496,879.5,0.005,


In [13]:
result_tb

Unnamed: 0,Factor_1,Factor_2,Time_Budget_1,Time_Budget_2,p_value,stat,avg_diff,Statistically_Better
0,sklearn,sklearn,30 Min,10 Min,0.352714,1283.5,0.003217,
1,sklearn-e,sklearn-e,30 Min,10 Min,1.7e-05,770.0,0.006854,1.0
2,sklearn-m,sklearn-m,30 Min,10 Min,0.210799,669.0,0.003549,
3,sklearn-v,sklearn-v,30 Min,10 Min,0.242449,1012.0,0.004924,
4,autoweka,autoweka,30 Min,10 Min,0.03936,271.5,0.007255,1.0
5,recipe,recipe,30 Min,10 Min,0.271948,35.0,-0.031325,
6,smartml,smartml,30 Min,10 Min,0.635767,662.0,0.007347,
7,smartml-e,smartml-e,30 Min,10 Min,0.520908,571.0,0.007556,
8,tpot,tpot,30 Min,10 Min,0.16973,154.0,0.009425,
9,atm,atm,30 Min,10 Min,0.341317,632.0,0.003224,


In [54]:
cols = ['sklearn_accuracy_mean', 'sklearn-e_accuracy_mean', 'sklearn-m_accuracy_mean', 'sklearn-v_accuracy_mean',
                'autoweka_accuracy_mean', 'recipe_valid_acc', 'smartml_valid_acc', 'smartml-e_valid_acc', 'tpot_accuracy_mean', 'atm_acc']
summary = pd.DataFrame(columns=['time_budget', 'tool', 'accuracy'])
for t in ['10 Min', '30 Min', '60 Min', '240 Min']:
    for c in cols:
        df1= pd.DataFrame({'time_budget': [t]*100, 'tool': [c]*100, 'accuracy': wil.sheet[t][c]})
        summary = pd.concat([summary, df1])
summary

Unnamed: 0,time_budget,tool,accuracy
0,10 Min,sklearn_accuracy_mean,1.000000
1,10 Min,sklearn_accuracy_mean,0.787060
2,10 Min,sklearn_accuracy_mean,0.781333
3,10 Min,sklearn_accuracy_mean,1.000000
4,10 Min,sklearn_accuracy_mean,0.968927
...,...,...,...
95,240 Min,atm_acc,1.000000
96,240 Min,atm_acc,1.000000
97,240 Min,atm_acc,1.000000
98,240 Min,atm_acc,1.000000


In [55]:
import researchpy as rp
s = summary[summary.accuracy!=0 & ((summary.tool == 'sklearn_v_accuracy_mean') ^ (summary.tool == 'sklearn_m_accuracy_mean'))]
rp.summary_cont(s.groupby(['time_budget', 'tool']))['accuracy'].round(3)#.to_latex(index=True)





Unnamed: 0_level_0,Unnamed: 1_level_0,N,Mean,SD,SE,95% Conf.,Interval
time_budget,tool,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
10 Min,atm_acc,75,0.888,0.124,0.014,0.86,0.917
10 Min,autoweka_accuracy_mean,86,0.848,0.161,0.017,0.814,0.883
10 Min,recipe_valid_acc,33,0.84,0.176,0.031,0.777,0.902
10 Min,sklearn-e_accuracy_mean,99,0.873,0.139,0.014,0.846,0.901
10 Min,sklearn-m_accuracy_mean,99,0.87,0.144,0.014,0.841,0.898
10 Min,sklearn-v_accuracy_mean,99,0.868,0.145,0.015,0.839,0.897
10 Min,sklearn_accuracy_mean,99,0.873,0.143,0.014,0.845,0.902
10 Min,smartml-e_valid_acc,97,0.831,0.176,0.018,0.795,0.866
10 Min,smartml_valid_acc,89,0.799,0.212,0.022,0.755,0.844
10 Min,tpot_accuracy_mean,43,0.894,0.117,0.018,0.858,0.93


In [19]:
import researchpy as rp
s = summary[summary.accuracy!=0 & ((summary.tool == 'sklearn_v_accuracy_mean') ^ (summary.tool == 'sklearn_m_accuracy_mean'))]
rp.summary_cont(s.groupby(['time_budget', 'tool']))['accuracy'].round(3)[['N', 'Mean', 'SD']].to_latex(index=True)





'\\begin{tabular}{llrrr}\n\\toprule\n       &                    &   N &   Mean &     SD \\\\\ntime\\_budget & tool &     &        &        \\\\\n\\midrule\n10 Min & atm\\_acc &  75 &  0.888 &  0.123 \\\\\n       & autoweka\\_accuracy\\_mean &  86 &  0.848 &  0.161 \\\\\n       & recipe\\_valid\\_acc &  33 &  0.840 &  0.176 \\\\\n       & sklearn-e\\_accuracy\\_mean &  99 &  0.873 &  0.139 \\\\\n       & sklearn-m\\_accuracy\\_mean &  99 &  0.870 &  0.144 \\\\\n       & sklearn-v\\_accuracy\\_mean &  99 &  0.868 &  0.145 \\\\\n       & sklearn\\_accuracy\\_mean &  99 &  0.873 &  0.143 \\\\\n       & smartml\\_valid\\_acc &  89 &  0.799 &  0.212 \\\\\n       & tpot\\_accuracy\\_mean &  43 &  0.894 &  0.117 \\\\\n30 Min & atm\\_acc &  74 &  0.903 &  0.116 \\\\\n       & autoweka\\_accuracy\\_mean &  90 &  0.845 &  0.161 \\\\\n       & recipe\\_valid\\_acc &  69 &  0.855 &  0.144 \\\\\n       & sklearn-e\\_accuracy\\_mean &  99 &  0.880 &  0.136 \\\\\n       & sklearn-m\\_accuracy\\_mean 