In [None]:
import glob
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import pickle
from tqdm import tqdm, tnrange, tqdm_notebook
import seaborn as sns
import os

# Load the results
results_folder = 'results/'
save_folder = 'figures/'

if not os.path.exists(save_folder):
    os.makedirs(save_folder)

binary_task_ids = [189865,
                    167200,
                    126026,
                    189860,
                    75127,
                    189862,
                    75105,
                    168798,
                    126029,
                    168796,
                    167190,
                    189866,
                    167104,
                    167083,
                    167184,
                    126025,
                    75097, 
                    167181,
                    168797,
                    189861,
                    167161,
                    167149,
                    ]

multiclass_task_ids  = [
                            168794,
                            189873,
                            167152,
                            189909,
                            189872,
                            168795, #This one has fewer than 10 instances of a class
                            167185,
                            189905,
                            189874,
                            75193,
                            167168,
                            168793,
                            189906,
                            167201,
                            189908,
                            168792,
                            189871
                            ]

taskid_palette = {}
for task_id in binary_task_ids:
    taskid_palette[task_id] = 'blue'

for task_id in multiclass_task_ids:
    taskid_palette[task_id] = 'red'

rename_exp_keys = {'tpot_untimed_30_gen_neg_log_loss' :'TPOT1',
                    'tpot2_untimed_30_gen_neg_log_loss':'TPOT2',
                    'tpot_untimed_30_gen_roc_auc':'TPOT1',
                    'tpot2_untimed_30_gen_roc_auc':'TPOT2',
                    }

In [None]:
def load_scores(save_folder):
    # Load the results
    save_files = glob.glob(save_folder + '/**/scores.pkl', recursive=True)
    results = []
    for save_file_i in tnrange(len(save_files)):
        save_file = save_files[save_file_i]
        #results.append(pd.read_pickle(save_file))
        try:
            r = pickle.load(open(save_file,'rb'))
            results.append(r)
        except:
            print('Error loading file: ', save_file)

    df = pd.DataFrame.from_records(results)
    del results
    experiments = df['exp_name'].unique()
    #count combinations of taskid and exp_name
    #show all rows
    pd.set_option('display.max_rows', None)
    print(df.groupby(['exp_name']).size())

    return df

df = load_scores(results_folder)
#add a column to indicate if the taskid is binary or multiclass
df['binary'] = df['taskid'].isin(binary_task_ids)
df['duration'] = df['duration']/60.0

df['selected_score'] = df.apply(lambda row: row['logloss']*-1 if row['binary']==False else row['auroc'], axis=1)
df['grouped_exp_name'] = df['exp_name'].replace(rename_exp_keys)

df

In [None]:
def make_scatter_and_diff_plot(df, m1, m2, metric, x='grouped_exp_name', log_scale=False, save_folder=None, name=None, binary_hue=False, binary_task_ids=binary_task_ids):
    df = df.copy()


    dftmp = df.copy()
    df_summary = dftmp.groupby(['taskid', x])[metric].describe().reset_index()
    df_summary['binary'] = df_summary['taskid'].isin(binary_task_ids)
    df_summary 
    piv = df_summary.pivot_table(index=x, columns='taskid', values='mean', aggfunc=np.mean).T.reset_index(inplace=False)
    piv['binary'] = piv['taskid'].isin(binary_task_ids)

    fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(7,4))
    if binary_hue:
        hue = 'binary'
    else:
        hue = None
    sns.scatterplot(data=piv, x=m1, y=m2, hue=hue, ax=ax1)
    ax1.axline([0, 0], [1, 1],c='r')
    ax1.set_aspect('equal', adjustable='box')
    ax1.set_xlabel(m1)
    ax1.set_ylabel(m2)
    ax1.set_title(f'Mean {metric}')
    if log_scale:
        ax1.set_yscale('log')
        ax1.set_xscale('log')
    if binary_hue:
        ax1.legend(bbox_to_anchor=(1.05, 1), loc='upper left')

    diffs = piv[m2] - piv[m1]
    ax2.hist(diffs)
    ax2.set_xlabel(f'{m2} - {m1} Mean {metric}')
    ax2.set_ylabel('count')
    plt.tight_layout()

    if save_folder is not None:
        plt.savefig(f'{save_folder}/{name}_{metric}_scatter.svg', bbox_inches='tight')


make_scatter_and_diff_plot(df[df['binary']==True], m1 = 'TPOT1', m2 = 'TPOT2',  metric='auroc', log_scale=False, save_folder=save_folder, name='Binary')

#make_scatter_and_diff_plot(df[df['binary']==False], m1 = 'TPOT1', m2 = 'TPOT2',  metric='logloss', log_scale=True, save_folder=save_folder, name='Multiclass')

make_scatter_and_diff_plot(df, m1 = 'TPOT1', m2 = 'TPOT2',  metric='duration', log_scale=False, save_folder=save_folder, name='duration', binary_hue=True)

In [None]:
def make_strip_plots(df, metric, x='exp_name', log_scale=False, smaller_is_better=True, save_folder=None, name=None, taskid_palette=None):
    df = df.copy(deep=True)
    df['taskid'] = df['taskid'].astype(str)


    df_summary =  df.groupby(['taskid', x])[metric].describe().reset_index()

    df_summary['mean_rank'] = df_summary.groupby(['taskid'])['mean'].rank(ascending=smaller_is_better, method='dense')
    df_summary['mean_rounded'] = df_summary['mean'].round(1)
    df_summary['mean_rounded_rank'] = df_summary.groupby(['taskid'])['mean_rounded'].rank(ascending=smaller_is_better, method='dense')


    mean_sorted_exp_names = df_summary.groupby([x])['mean'].describe().reset_index().sort_values('mean',ascending=False)[x].values.tolist()
    mean_sorted_exp_names.reverse()
    rank_sorted_exp_names = df_summary.groupby([x])['mean_rank'].describe().reset_index().sort_values('mean',ascending=True)[x].values
    rounded_rank_sorted_exp_names = df_summary.groupby([x])['mean_rounded_rank'].describe().reset_index().sort_values('mean',ascending=True)[x].values
    std_sorted_exp_names =  df_summary.groupby([x])['std'].describe().reset_index().sort_values('mean',ascending=True)[x].values
    mean_sorted_task_id = df_summary.groupby(['taskid'])['mean'].describe().reset_index().sort_values('mean',ascending=False)['taskid'].values.tolist()


    #mean scores by exp_name
    fig, ax = plt.subplots()
    sns.stripplot(x=x, y=metric, data=df, order=mean_sorted_exp_names, hue='taskid',hue_order=mean_sorted_task_id, ax=ax, alpha=0.5, size=5,dodge=True, jitter=False)
    sns.stripplot(x=x, y='mean', data=df_summary, order=mean_sorted_exp_names, hue='taskid',hue_order=mean_sorted_task_id, ax=ax, marker='D',size=6,dodge=True, jitter=False)
    #ax.set_xticklabels(ax.get_xticklabels(), rotation=90, fontsize=16)
    ax.set_ylabel(metric, fontsize=16)
    ax.set_xlabel('')
    if log_scale:
        ax.set_yscale('log')
    handles, labels = ax.get_legend_handles_labels()
    n = int(len(labels)/2)
    l = plt.legend(handles[0:n], labels[0:n],bbox_to_anchor=(1.05, 1), loc='upper left')
    if save_folder is not None:
        plt.savefig(f'{save_folder}/{name}_{metric}_by_exp_name.svg', bbox_inches='tight')

    #mean scores by taskid
    fig, ax = plt.subplots()
    sns.stripplot(x='taskid', y=metric, data=df, order=mean_sorted_task_id, hue=x,hue_order=mean_sorted_exp_names, ax=ax, alpha=0.5, size=5,dodge=True, jitter=False)
    sns.stripplot(x='taskid', y='mean', data=df_summary, order=mean_sorted_task_id, hue=x,hue_order=mean_sorted_exp_names, ax=ax, marker='D',size=6,dodge=True, jitter=False)
    #ax.set_xticklabels(ax.get_xticklabels(), rotation=90, fontsize=16)
    ax.set_ylabel(metric, fontsize=16)
    ax.set_xlabel('')
    if log_scale:
        ax.set_yscale('log')
    handles, labels = ax.get_legend_handles_labels()
    n = int(len(labels)/2)
    l = plt.legend(handles[0:n], labels[0:n],bbox_to_anchor=(1.05, 1), loc='upper left')
    if save_folder is not None:
        plt.savefig(f'{save_folder}/{name}_{metric}_by_taskid.svg', bbox_inches='tight')


    #std
    fig, ax = plt.subplots()
    ax = sns.stripplot(x=x, y='std', data=df_summary, order=mean_sorted_exp_names, hue='taskid',hue_order=mean_sorted_task_id, ax=ax)
    #ax.set_xticklabels(ax.get_xticklabels(), rotation=90, fontsize=16)
    ax.set_ylabel(f'{metric} STD', fontsize=16)
    ax.set_xlabel('')
    plt.legend(bbox_to_anchor=(1.05, 1), loc='upper left')
    if save_folder is not None:
        plt.savefig(f'{save_folder}/{name}_std.svg', bbox_inches='tight')


    # rank
    fig,ax = plt.subplots()
    ax = sns.stripplot(x=x, y='mean_rank', data=df_summary, order=mean_sorted_exp_names,hue='taskid', hue_order=mean_sorted_task_id, ax=ax)
    #ax.set_xticklabels(ax.get_xticklabels(), rotation=90, fontsize=16)
    ax.set_ylabel(f'{metric} Rank', fontsize=16)
    ax.invert_yaxis()
    ax.set_xlabel('')
    plt.legend(bbox_to_anchor=(1.05, 1), loc='upper left')
    if save_folder is not None:
        plt.savefig(f'{save_folder}/{name}_rank.svg', bbox_inches='tight')

    #rank box plot
    fig,ax = plt.subplots()
    ax = sns.boxplot(x=x, y='mean_rank', data=df_summary, order=mean_sorted_exp_names, ax=ax)
    ax.set_ylabel(f'{metric} Rank', fontsize=16)
    ax.invert_yaxis()
    ax.set_xlabel('')
    if save_folder is not None:
        plt.savefig(f'{save_folder}/{name}_rank_boxplot.svg', bbox_inches='tight')


make_strip_plots(df[df['binary']==True], x='grouped_exp_name', metric='auroc', log_scale=False, smaller_is_better=False, save_folder=save_folder,  name='Binary')
make_strip_plots(df[df['binary']==False],x='grouped_exp_name', metric='logloss', log_scale=True, smaller_is_better=True, save_folder=save_folder, name='Multiclass')

In [None]:
make_strip_plots(df, metric='selected_score', x='grouped_exp_name', log_scale=False, smaller_is_better=False, save_folder=save_folder,  name='Both', taskid_palette=taskid_palette)

In [None]:
from itertools import product

def make_wins_heatmap(df, metric, save_folder=None):
    ### wins plot
    df = df.copy()
    fig = plt.figure(figsize=(10, 10))
    
    algorithms = df['grouped_exp_name'].unique()  
    
    df_summary =  df.groupby(['taskid', 'grouped_exp_name'])[metric].describe().reset_index()
    df_piv = df_summary.pivot(index='grouped_exp_name', columns='taskid', values='mean')
            
    wins = pd.DataFrame(index=algorithms, columns=algorithms)
    for alg1, alg2 in product(algorithms, algorithms):
        wins.loc[alg1, alg2] = (df_piv.loc[alg1] >= df_piv.loc[alg2]+abs(df_piv.loc[alg2]*1.01)).sum()
    ax = sns.heatmap(data=wins.astype(int), annot=True, cmap='Blues',cbar=False,square = True, annot_kws={'size': 16})
    ax.xaxis.tick_top() # x axis on top
    ax.xaxis.set_label_position('top')
    plt.xticks(rotation=90)
    ax.tick_params(axis='both', which='major', labelsize=16)
    plt.yticks(rotation=0)
    #plt.title(f'Mean {name} : column > row*0.99 ', fontsize=16)
    fig.suptitle(f'Mean {metric}: Count of where row>=col+abs(col*1.01)', fontsize=16)
    if save_folder is not None:
        plt.savefig(f'{save_folder}/{metric}_wins.svg', bbox_inches='tight')
    
make_wins_heatmap(df, metric='selected_score', save_folder=save_folder)