In [1]:
# Example analysis for a batch of experiments
# We found the functionalities below is the most useful in practice
# It can automatically provides an overview of the trade-off for each design dimension
from IPython.core.display import display, HTML

display(HTML("<style>.container { width:95% !important; }</style>"))
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import copy
from scipy.stats import rankdata
from matplotlib.ticker import MaxNLocator

%matplotlib inline
sns.set(style='ticks',context='poster')
pd.set_option('display.max_columns', 500)
pd.set_option('display.max_rows', 500)
np.set_printoptions(precision=3, linewidth=200, suppress=True)


def list_exclude(a, b):
    return [item for item in a if item not in b]

def chunks(lst, n):
    """Yield successive n-sized chunks from lst."""
    for i in range(0, len(lst), n):
        yield lst[i:i + n]
        
column_exclude_options = ['format','name', 'task', 'trans', 'feature', 'label',
            'epoch', 'loss', 'loss_std', 
            'params', 'time_iter', 'time_iter_std', 'accuracy', 'accuracy_std', 
            'precision', 'precision_std', 'recall', 'recall_std', 'f1', 'f1_std', 'auc', 'auc_std']

def name_mapping(name):
    # you can add additional name mapping for your customize configurations
    mapping = {'act': 'Activation', 'bn':'Batch Normalization', 'drop':'Dropout', 'agg':'Aggregation',
                'l_mp':'MP layers', 'l_pre':'Pre-process layers', 'l_post': 'Post-process layers', 'stage': 'Layer connectivity',
                'lr': 'Learning rate', 'batch':'Batch size', 'optim': 'Optimizer', 'epoch': 'Training epochs', 
                'direct': 'Direction', 'head':'Multi-task heads', 'l_final':'Att final', 'l_type':'layer_type',
               'l_finalbn': 'Final BN', 'task': 'Task', 'subgraph':'subgraph', 'margin':'margin',
               'order':'order', 'norm':'norm'}
    if name in mapping:
        return mapping[name]
    else:
        return name

def get_acc(df_pivot, name, ax, plot_type='performance', has_y=True, rank_resolution=0.001, verbose=False):
    accs_np = df_pivot.fillna(df_pivot.min()).values.round(4)
    options = df_pivot.columns.values

    ranks_raw = {'Model ID':[], 'Accuracy':[], 'Acc. Ranking':[], name_mapping(name):[]}
    
    for i,row in enumerate(accs_np):
        # (1) rank is asceneding, so we neg the row
        rank_base = -row
        med = np.median(rank_base)
        for j in range(len(rank_base)):
            if abs(rank_base[j]-med) <= rank_resolution:
                rank_base[j] = med
        rank = rankdata(rank_base, method='min')
        for j in range(len(rank)):
            ranks_raw['Model ID'].append(i)
            ranks_raw['Accuracy'].append(accs_np[i,j])
            ranks_raw['Acc. Ranking'].append(rank[j])
            ranks_raw[name_mapping(name)].append(options[j])
    
    ranks_raw = pd.DataFrame(data=ranks_raw)     
    with sns.color_palette("muted"):
        if plot_type=='performance':
            splot = sns.violinplot(x=name_mapping(name), y="Accuracy",inner="box", data=ranks_raw, cut=0, ax=ax)
            ax.set_xlabel('',fontsize=48)
            if not has_y:
                ax.set_ylabel('',fontsize=48)
            else:
                ax.set_ylabel('AUC Dist.',fontsize=48)
        elif plot_type=='rank_bar':
            splot = sns.barplot(x=name_mapping(name), y="Acc. Ranking",data=ranks_raw, ax=ax)
            ax.set_ylim(bottom=1)
            ax.set_yticks([1,2])
            ax.set_xlabel('',fontsize=48)
            if not has_y:
                ax.set_ylabel('',fontsize=48)
            else:
                ax.set_ylabel('Rank Average',fontsize=48)
        elif plot_type=='rank_violin':
            sns.violinplot(x=name_mapping(name), y="Acc. Ranking",inner="box", data=ranks_raw, cut=0, ax=ax)
            ax.set_ylim(bottom=1)
            ax.yaxis.set_major_locator(MaxNLocator(integer=True))
            if not has_y:
                ax.set_ylabel('',fontsize=48)
            else:
                ax.set_ylabel('Rank Dist.',fontsize=48)
        ax.xaxis.label.set_size(48)
        ax.yaxis.label.set_size(48)
        for tick in ax.xaxis.get_major_ticks():
            tick.label.set_fontsize(40)
        for tick in ax.yaxis.get_major_ticks():
            tick.label.set_fontsize(40)

            
def plot_single(df, options_chunk, options, metric, rank_resolution):
    for names in options_chunk:
        col = 6
        row = 3
        f, axes = plt.subplots(nrows=row, ncols=col, figsize=(48, 14))
        for i,name in enumerate(names):
            name_others = copy.deepcopy(options)
            name_others.remove(name)
            df_pivot = pd.pivot_table(df, values=metric, index=name_others, columns=[name], aggfunc=np.mean)
            for j,plot_type in enumerate(['performance','rank_bar','rank_violin']):
                get_acc(df_pivot, name, axes[j, i], plot_type, has_y=True, rank_resolution=rank_resolution)
        plt.tight_layout()
        plt.subplots_adjust(wspace=0.5, hspace=0.2)
    #     f.savefig('figs/{}.png'.format(metric), dpi=150, bbox_inches='tight')
        plt.show()

def plot_analysis(fname, division='test', dataset=None, metric='accuracy', rank_resolution=0.001, f=None, filter_rm=None):
    #results_file_path = '../run/results/{}/agg/{}.csv'.format(fname, division)
    results_file_path = '../results_experiments/{}/agg/{}.csv'.format(fname, division)
    df = pd.read_csv(results_file_path)
    df = df.fillna(0)
    #df['epoch'] += 1
    df.replace('skipconcat','skipcat',inplace=True)
    df.replace('add','sum',inplace=True)

    
    if f is not None:
        for key, val in f.items():
            if type(val) == list:
                df = df[df[key].isin(val)]
            else:
                df = df[df[key]==val]
      
    if filter_rm is not None:
        for key, val in filter_rm.items():
            if type(val) == list:
                df = df[~df[key].isin(val)]
            else:
                df = df[df[key]!=val]


    # create and filter design dimensions
    options_raw = list_exclude(list(df.columns), column_exclude_options)
    options = []
    for name in options_raw:
        column_temp = copy.deepcopy(options_raw)
        column_temp.remove(name)
        df_pivot = pd.pivot_table(df, values=metric, index=column_temp, columns=[name], aggfunc=np.mean)
        if len(df_pivot.columns)!=1:
            options.append(name)
    options_chunk = list(chunks(options, 6))
    print(division, dataset, options_chunk)
    
    if dataset is None:
        for dataset in df['name'].unique():
            df_dataset = df[df['name']==dataset]
            print('Dataset: {}'.format(dataset))
            plot_single(df_dataset, options_chunk, options, metric, rank_resolution)
    elif dataset=='all':
        plot_single(df, options_chunk, options, metric, rank_resolution)
    else:
        df_dataset = df[df['name']==dataset]
        print('Dataset: {}'.format(dataset))
        plot_single(df_dataset, options_chunk, options, metric, rank_resolution)



  from IPython.core.display import display, HTML


In [5]:
layers = ['gin', 'gat', 'gcn']
layers_cuda = ['gin']
archs = ['cpu', 'cuda']
datasets = ['PROTEINS', 'ENZYMES', 'DD', 'NCI1', 'BZR_MD', 'COX2_MD']

datasets_cpu = ['BZR_MD','COX2_MD', 'ENZYMES']
datasets_cuda = ['PROTEINS', 'DD', 'NCI1']
res_our = []
for layer in layers:
    for dataset in datasets_cpu:
        df = pd.read_csv(f'../../DL-project_results/tu_base_grid_{layer}_pos_cpu/agg/val_best.csv')
        
        df = df[df['dataset'] == dataset]#.sort_values('accuracy', ascending=False)

        if len(df) == 0:
            continue
        dilated = df[df['type'] == 'dilapos_gnn'].sort_values('accuracy', ascending=False)

        res_our.append({'arch': 'our', 'dataset': dataset, 'layer': layer, 'accuracy': dilated['accuracy'].iloc[0], 'accuracy_std': dilated['accuracy_std'].iloc[0], 'pos': dilated['pos'].iloc[0]  })


for layer in layers_cuda:
    for dataset in datasets_cuda:
        df = pd.read_csv(f'../../DL-project_results/tu_base_grid_{layer}_pos_cuda/agg/val_best.csv')
        
        df = df[df['dataset'] == dataset]#.sort_values('accuracy', ascending=False)

        if len(df) == 0:
            continue
        dilated = df[df['type'] == 'dilapos_gnn'].sort_values('accuracy', ascending=False)
        res_our.append({'arch': 'our', 'dataset': dataset, 'layer': layer, 'accuracy': dilated['accuracy'].iloc[0], 'accuracy_std': dilated['accuracy_std'].iloc[0], 'pos': dilated['pos'].iloc[0] })


pd.DataFrame(res_our).sort_values('layer')#['arch', 'dataset', 'accuracy', 'accuracy_our']

Unnamed: 0,arch,dataset,layer,accuracy,accuracy_std,pos
3,our,BZR_MD,gat,0.8194,0.0438,False
4,our,COX2_MD,gat,0.8567,0.0775,False
5,our,ENZYMES,gat,0.7467,0.0476,True
6,our,BZR_MD,gcn,0.8097,0.0488,True
7,our,COX2_MD,gcn,0.7967,0.0752,True
8,our,ENZYMES,gcn,0.6633,0.0476,False
0,our,BZR_MD,gin,0.8,0.0573,False
1,our,COX2_MD,gin,0.81,0.0789,True
2,our,ENZYMES,gin,0.7333,0.0401,False
9,our,PROTEINS,gin,0.8027,0.0352,False


In [3]:
layer = 'gin'
d =  pd.read_csv(f'../../DL-project_results/tu_base_grid_{layer}_cuda/agg/val_best.csv')
d[d['dataset'] == 'PROTEINS'].sort_values('accuracy', ascending=False).head(1)

Unnamed: 0,d,type,dataset,layer,edges,k1,k2,h,batch_size,lr,batchnorm,act,dropout,gpool,scheduler,step_size,step_gamma,epoch,ckpt_epoch,ckpt_epoch_std,loss,loss_std,params,time_iter,time_iter_std,accuracy,accuracy_std,precision,precision_std,recall,recall_std,f1,f1_std,auc,auc_std,layers_mp
196,cuda,dilated_gnn,PROTEINS,ginconv_paper,False,3.0,1.0,64,32,0.01,True,relu,0.0,concat_across_sum_of_layers,step_lr_epochs,50,0.5,val_best,56.4,40.1577,0.6277,0.1513,30723.0,0.2892,0.0106,0.7973,0.0358,0.7892,0.0517,0.7123,0.088,0.7454,0.057,0.8139,0.0397,


In [4]:
pd.read_csv(f'../../DL-project_results/tu_base_grid_{layer}_cuda/agg/test_best.csv')

Unnamed: 0,d,type,dataset,layer,edges,k1,k2,h,batch_size,lr,batchnorm,act,dropout,gpool,scheduler,step_size,step_gamma,epoch,ckpt_epoch,ckpt_epoch_std,loss,loss_std,params,time_iter,time_iter_std,accuracy,accuracy_std,precision,precision_std,recall,recall_std,f1,f1_std,auc,auc_std,layers_mp
0,cuda,dilated_gnn,DD,ginconv_paper,False,1.0,1.0,32,32,0.01,True,relu,0.0,concat_across_sum_of_layers,step_lr_epochs,50,0.5,val_best,33.8,30.288,1.1503,0.5832,6371.0,0.3876,0.0185,0.7273,0.0544,0.679,0.0773,0.6323,0.1066,0.65,0.073,0.8049,0.0553,
1,cuda,dilated_gnn,DD,ginconv_paper,False,1.0,1.0,32,32,0.01,True,relu,0.5,concat_across_sum_of_layers,step_lr_epochs,50,0.5,val_best,67.9,56.3923,1.5219,0.4147,6371.0,0.3363,0.0091,0.7342,0.033,0.7172,0.0729,0.5775,0.0661,0.6357,0.053,0.8054,0.0419,
2,cuda,dilated_gnn,DD,ginconv_paper,False,1.0,1.0,32,128,0.01,True,relu,0.0,concat_across_sum_of_layers,step_lr_epochs,50,0.5,val_best,49.8,35.8408,1.1421,0.2872,6371.0,0.4273,0.0077,0.7461,0.0419,0.7441,0.0717,0.585,0.1184,0.6453,0.0733,0.8178,0.0401,
3,cuda,dilated_gnn,DD,ginconv_paper,False,1.0,1.0,32,128,0.01,True,relu,0.5,concat_across_sum_of_layers,step_lr_epochs,50,0.5,val_best,99.4,102.8049,1.4404,0.7343,6371.0,0.3898,0.0243,0.7308,0.0424,0.726,0.0869,0.5501,0.0898,0.6198,0.0719,0.819,0.0437,
4,cuda,dilated_gnn,DD,ginconv_paper,False,1.0,1.0,64,32,0.01,True,relu,0.0,concat_across_sum_of_layers,step_lr_epochs,50,0.5,val_best,62.9,51.2434,1.6644,0.7528,18883.0,0.2295,0.0072,0.7342,0.0277,0.7171,0.0799,0.602,0.1118,0.6417,0.0567,0.8158,0.0424,
5,cuda,dilated_gnn,DD,ginconv_paper,False,1.0,1.0,64,32,0.01,True,relu,0.5,concat_across_sum_of_layers,step_lr_epochs,50,0.5,val_best,79.2,62.8742,2.2017,1.2272,18883.0,0.4216,0.0299,0.7376,0.0335,0.7226,0.0723,0.5779,0.0764,0.6379,0.06,0.8166,0.0435,
6,cuda,dilated_gnn,DD,ginconv_paper,False,1.0,1.0,64,128,0.01,True,relu,0.0,concat_across_sum_of_layers,step_lr_epochs,50,0.5,val_best,37.3,25.2984,1.0012,0.2763,18883.0,0.2345,0.01,0.7308,0.0422,0.7049,0.0851,0.5948,0.0642,0.6409,0.0488,0.8102,0.0497,
7,cuda,dilated_gnn,DD,ginconv_paper,False,1.0,1.0,64,128,0.01,True,relu,0.5,concat_across_sum_of_layers,step_lr_epochs,50,0.5,val_best,60.4,49.2812,1.7538,0.9511,18883.0,0.2779,0.011,0.7282,0.0238,0.7411,0.0868,0.5275,0.0808,0.6075,0.0479,0.8202,0.0395,
8,cuda,dilated_gnn,DD,ginconv_paper,False,1.0,2.0,32,32,0.01,True,relu,0.0,concat_across_sum_of_layers,step_lr_epochs,50,0.5,val_best,51.1,53.3862,1.6745,0.7853,8645.0,0.3166,0.0146,0.7624,0.0388,0.7355,0.0974,0.6748,0.0869,0.6954,0.0513,0.8345,0.047,
9,cuda,dilated_gnn,DD,ginconv_paper,False,1.0,2.0,32,32,0.01,True,relu,0.5,concat_across_sum_of_layers,step_lr_epochs,50,0.5,val_best,42.6,52.2517,1.4962,1.1974,8645.0,0.2828,0.0069,0.7393,0.0589,0.6933,0.1061,0.6913,0.0902,0.6823,0.0593,0.8124,0.0649,
