In [1]:
import pandas as pd
import glob
import re

In [2]:
def read_results(path):
    pattern = "results/(.*?).csv"
    df = pd.read_csv(path)
    text = re.search(pattern, path).group(1).split('_')
    df['model'] = text[0]
    df['text_encoding'] = text[1]
    return df

In [3]:
paths = glob.glob('results/*.csv')

In [4]:
df = pd.concat(list(map(read_results,paths)), axis=0, ignore_index=True)

In [5]:
df_avg = round(df.groupby(['text_encoding','model']).mean(),2)
df_avg = df_avg.reindex(['GAT', 'GraphUNet', 'AGNN', 'SAGE','GIN','GCN'], level='model')
df_avg = df_avg.reindex(['tfidf', 'd2v', 'bert'], level='text_encoding')
df_avg.to_csv('../../results/linkpred_results_mean.csv')
df_avg

Unnamed: 0_level_0,Unnamed: 1_level_0,auc,ap
text_encoding,model,Unnamed: 2_level_1,Unnamed: 3_level_1
tfidf,GAT,0.79,0.78
tfidf,GraphUNet,0.79,0.77
tfidf,AGNN,0.83,0.78
tfidf,SAGE,0.85,0.87
tfidf,GIN,0.87,0.88
tfidf,GCN,0.87,0.89
d2v,GAT,0.79,0.77
d2v,GraphUNet,0.79,0.76
d2v,AGNN,0.83,0.8
d2v,SAGE,0.85,0.87


In [6]:
df_sd = round(df.groupby(['text_encoding','model']).std(),2)
df_sd = df_sd.reindex(['GAT', 'GraphUNet', 'AGNN', 'SAGE','GIN','GCN'], level='model')
df_sd = df_sd.reindex(['tfidf', 'd2v', 'bert'], level='text_encoding')
df_sd.to_csv('../../results/linkpred_results_std.csv')
df_sd

Unnamed: 0_level_0,Unnamed: 1_level_0,auc,ap
text_encoding,model,Unnamed: 2_level_1,Unnamed: 3_level_1
tfidf,GAT,0.0,0.01
tfidf,GraphUNet,0.03,0.04
tfidf,AGNN,0.01,0.02
tfidf,SAGE,0.0,0.01
tfidf,GIN,0.01,0.01
tfidf,GCN,0.0,0.0
d2v,GAT,0.0,0.01
d2v,GraphUNet,0.03,0.03
d2v,AGNN,0.02,0.02
d2v,SAGE,0.0,0.01


In [56]:
df = df_avg.join(df_sd, lsuffix='avg', rsuffix='std')

df['AUC'] = df.apply(lambda x: '{} ({})'.format(x.aucavg, x.aucstd),1)
df['AP'] = df.apply(lambda x: '{} ({})'.format(x.apavg, x.apstd),1)
df = df.loc[:,['AUC','AP']]
df.to_csv('../../results/linkpred_results.csv')
df

Unnamed: 0_level_0,Unnamed: 1_level_0,AUC,AP
text_encoding,model,Unnamed: 2_level_1,Unnamed: 3_level_1
tfidf,GAT,0.79 (0.0),0.78 (0.01)
tfidf,GraphUNet,0.79 (0.03),0.77 (0.04)
tfidf,AGNN,0.83 (0.01),0.78 (0.02)
tfidf,SAGE,0.85 (0.0),0.87 (0.01)
tfidf,GIN,0.87 (0.01),0.88 (0.01)
tfidf,GCN,0.87 (0.0),0.89 (0.0)
d2v,GAT,0.79 (0.0),0.77 (0.01)
d2v,GraphUNet,0.79 (0.03),0.76 (0.03)
d2v,AGNN,0.83 (0.02),0.8 (0.02)
d2v,SAGE,0.85 (0.0),0.87 (0.01)


In [55]:
print(df.to_latex())

\begin{tabular}{llll}
\toprule
     &     &          AUC &           AP \\
text\_encoding & model &              &              \\
\midrule
tfidf & GAT &   0.79 (0.0) &  0.78 (0.01) \\
     & GraphUNet &  0.79 (0.03) &  0.77 (0.04) \\
     & AGNN &  0.83 (0.01) &  0.78 (0.02) \\
     & SAGE &   0.85 (0.0) &  0.87 (0.01) \\
     & GIN &  0.87 (0.01) &  0.88 (0.01) \\
     & GCN &   0.87 (0.0) &   0.89 (0.0) \\
d2v & GAT &   0.79 (0.0) &  0.77 (0.01) \\
     & GraphUNet &  0.79 (0.03) &  0.76 (0.03) \\
     & AGNN &  0.83 (0.02) &   0.8 (0.02) \\
     & SAGE &   0.85 (0.0) &  0.87 (0.01) \\
     & GIN &  0.87 (0.01) &  0.89 (0.02) \\
     & GCN &   0.86 (0.0) &   0.88 (0.0) \\
bert & GAT &  0.78 (0.01) &  0.76 (0.01) \\
     & GraphUNet &  0.79 (0.03) &  0.76 (0.06) \\
     & AGNN &  0.84 (0.02) &  0.79 (0.02) \\
     & SAGE &   0.87 (0.0) &   0.89 (0.0) \\
     & GIN &  0.87 (0.02) &  0.88 (0.02) \\
     & GCN &  0.91 (0.01) &   0.91 (0.0) \\
\bottomrule
\end{tabular}

