In [None]:
import pandas as pd
import matplotlib.pyplot as plt

In [None]:
dataset = "elliptic"  # Change this to the dataset you want to analyze
use_intrinsic = True
intrinsic_str = "intrinsic" if use_intrinsic else "no_intrinsic"

files_with_results_TD = [
    'res/'+dataset+'_intrinsic_TD.csv',
    'res/'+dataset+'_positional_'+intrinsic_str+'_TD.csv',
    'res/'+dataset+'_deepwalk_'+intrinsic_str+'_TD.csv',
    'res/'+dataset+'_node2vec_'+intrinsic_str+'_TD.csv',
    'res/'+dataset+'_gcn_'+intrinsic_str+'_TD.csv',
    'res/'+dataset+'_sage_'+intrinsic_str+'_TD.csv',
    'res/'+dataset+'_gat_'+intrinsic_str+'_TD.csv',
    'res/'+dataset+'_gin_'+intrinsic_str+'_TD.csv'
]

files_with_results_TI = [
    'res/'+dataset+'_intrinsic_TI.csv',
    'res/'+dataset+'_positional_'+intrinsic_str+'_TI.csv',
    'res/'+dataset+'_deepwalk_'+intrinsic_str+'_TI.csv',
    'res/'+dataset+'_node2vec_'+intrinsic_str+'_TI.csv',
    'res/'+dataset+'_gcn_'+intrinsic_str+'_TI.csv',
    'res/'+dataset+'_sage_'+intrinsic_str+'_TI.csv',
    'res/'+dataset+'_gat_'+intrinsic_str+'_TI.csv',
    'res/'+dataset+'_gin_'+intrinsic_str+'_TI.csv'
]

if not use_intrinsic:
    files_with_results_TD = files_with_results_TD[1:]
    files_with_results_TI = files_with_results_TI[1:]

In [None]:
files_with_results_TD = [
    'res/'+dataset+'_intrinsic_unsupervised_'+intrinsic_str+'_TD.csv',
    'res/'+dataset+'_positional_unsupervised_'+intrinsic_str+'_TD.csv',
    'res/'+dataset+'_deepwalk_unsupervised_'+intrinsic_str+'_TD.csv',
    'res'+dataset+'_node2vec_unsupervised_'+intrinsic_str+'_TD.csv'
]

files_with_results_TI = [
    'res/'+dataset+'_intrinsic_unsupervised_'+intrinsic_str+'_TI.csv',
    'res/'+dataset+'_positional_unsupervised_'+intrinsic_str+'_TI.csv',
    'res/'+dataset+'_deepwalk_unsupervised_'+intrinsic_str+'_TI.csv',
    'res/'+dataset+'_node2vec_unsupervised_'+intrinsic_str+'_TI.csv'
]

In [None]:
for file in files_with_results_TI:
    try:
        df = pd.read_csv("../"+file)
        AUC_mean = df['AUC'].mean()
        AUC_std = df['AUC'].std()
        AP_mean = df['AP'].mean()
        AP_std = df['AP'].std()
        print(f'{file} ${AUC_mean:.4f} \pm {AUC_std:.4f}$ & ${AP_mean:.4f} \pm {AP_std:.4f}$')
    except:
        print(f'Error reading file {file}')

In [None]:
for file in files_with_results_TD:
    print('------------------')
    print(f'{file}')
    try:
        df = pd.read_csv("../"+file)
        for col in df.columns:
            if col != 'Unnamed: 0':
                df_mean = df[col].mean()
                df_std = df[col].std()
                print(f'{col} ${df_mean:.4f} \pm {df_std:.4f}$')
    except:
        print(f'Error reading file {file}')

In [None]:
file = files_with_results_TI[2]
df = pd.read_csv("../"+file)
df['Method'] = file.split('_')[1]  # Extract the method name from the file name


In [None]:
df.head()

In [None]:
df_list = []
for file in files_with_results_TI:
    try:
        df = pd.read_csv("../"+file)
        df['Method'] = file.split('_')[1]  # Extract the method name from the file name
        df_list.append(df)
    except:
        print(f'Error reading file {file}')

In [None]:
df_list

In [None]:
result = pd.concat(df_list, ignore_index=True)
result.head()

In [None]:
import seaborn as sns
import matplotlib.pyplot as plt
if use_intrinsic:
    title_plot = f'AUC-ROC for {dataset} dataset'
else:
    title_plot = f'AUC-ROC for {dataset} dataset without intrinsic features'

sns.boxplot(x='Method',y ='AUC', hue='Method', data=result, palette='tab10',showmeans=True, meanprops={'marker':'*', 'markerfacecolor':'xkcd:steel', 'markeredgecolor':'.3', 'markersize': 10}, medianprops={'color': 'black', 'linewidth':2,'label': '_median_', 'linewidth':3})
plt.title(title_plot)
plt.xlabel('Method')
plt.ylabel('AUC-ROC')
plt.xticks(rotation=45)
plt.tight_layout()
plt.grid(True,which="both",ls="--",c='gray', alpha=0.3)
plt.savefig(f'../res/{dataset}_AUC_ROC_{intrinsic_str}.pdf', bbox_inches='tight')
plt.show()

In [None]:
if use_intrinsic:
    title_plot = f'AUC-PR for {dataset} dataset'
else:
    title_plot = f'AUC-PR for {dataset} dataset without intrinsic features'

sns.boxplot(x='Method',y ='AP', hue='Method', data=result, palette='tab10',showmeans=True, meanprops={'marker':'*', 'markerfacecolor':'xkcd:steel', 'markeredgecolor':'.3', 'markersize': 10}, medianprops={'color': 'black', 'linewidth':2,'label': '_median_', 'linewidth':3})
plt.title(title_plot)
plt.xlabel('Method')
plt.ylabel('AUC-PR')
plt.xticks(rotation=45)
plt.tight_layout()
plt.grid(True,which="both",ls="--",c='gray', alpha=0.3)
plt.savefig(f'../res/{dataset}_AUC_PR_{intrinsic_str}.pdf', bbox_inches='tight')
plt.show()

In [None]:
fig,ax = plt.subplots(2, 2, figsize=(10, 6))
#use_intrinsic_list = [True, False]
use_intrinsic = True
metric_list = ['AUC', 'AP']
dataset_list = ['elliptic', 'ibm']
#for i, use_intrinsic in enumerate(use_intrinsic_list):
for i, dataset in enumerate(dataset_list):
    for j, metric in enumerate(metric_list):
        intrinsic_str = "intrinsic" if use_intrinsic else "no_intrinsic"

        files_with_results_TI = [
            'res/'+dataset+'_intrinsic_TI.csv',
            'res/'+dataset+'_positional_'+intrinsic_str+'_TI.csv',
            'res/'+dataset+'_deepwalk_'+intrinsic_str+'_TI.csv',
            'res/'+dataset+'_node2vec_'+intrinsic_str+'_TI.csv',
            'res/'+dataset+'_gcn_'+intrinsic_str+'_TI.csv',
            'res/'+dataset+'_sage_'+intrinsic_str+'_TI.csv',
            'res/'+dataset+'_gat_'+intrinsic_str+'_TI.csv',
            'res/'+dataset+'_gin_'+intrinsic_str+'_TI.csv'
        ]

        if not use_intrinsic:
            files_with_results_TI = files_with_results_TI[1:]

        df_list = []
        for file in files_with_results_TI:
            try:
                df = pd.read_csv("../"+file)
                df['Method'] = file.split('_')[1]  # Extract the method name from the file name
                df_list.append(df)
            except:
                print(f'Error reading file {file}')

        result = pd.concat(df_list, ignore_index=True)
        
        if metric == 'AUC':
            if use_intrinsic:
                title_plot = f'AUC-ROC for {dataset} dataset'
            else:
                title_plot = f'AUC-ROC for {dataset} dataset without intrinsic features'
        else:
            if use_intrinsic:
                title_plot = f'AUC-PR for {dataset} dataset'
            else:
                title_plot = f'AUC-PR for {dataset} dataset without intrinsic features'

        sns.boxplot(ax=ax[j,i],x='Method',y =metric, hue='Method', data=result, palette='tab10',showmeans=True, meanprops={'marker':'*', 'markerfacecolor':'xkcd:steel', 'markeredgecolor':'.3', 'markersize': 10}, medianprops={'color': 'black', 'linewidth':2,'label': '_median_', 'linewidth':3})
        ax[j,i].set_title(title_plot)
        ax[j,i].tick_params(axis='x', rotation=45)
        ax[j,i].grid(True, which="both", ls="--", c='gray', alpha=0.3)
plt.tight_layout()
plt.savefig(f'../res/AUC_ROC_PR.pdf', bbox_inches='tight')