# 1. rosmap ALMANAC

## 1.1 rosmap ALMANAC Pearson Correlation

In [1]:
gcn_decoder_test_list = [0.6, 0.64, 0.64, 0.52, 0.5714285714285714]
gat_decoder_test_list = [0.64, 0.64, 0.64, 0.72, 0.5]
unimp_decoder_test_list = [0.68, 0.6, 0.6, 0.64, 0.5714285714285714]
mixhop_decoder_test_list = [0.6, 0.6, 0.6, 0.56, 0.5]
pna_decoder_test_list = [0.6, 0.56, 0.56, 0.6, 0.5714285714285714]
gin_decoder_test_list = [0.48, 0.4, 0.52, 0.52, 0.5714285714285714]
m3net_decoder_test_list = [0.68, 0.68, 0.64, 0.76, 0.6071428571428571]


In [2]:
import scipy.stats as stats
decoder_lists = {
    "GCN": gcn_decoder_test_list,
    "GAT": gat_decoder_test_list,
    "UniMP": unimp_decoder_test_list,
    "MixHop": mixhop_decoder_test_list,
    "PNA": pna_decoder_test_list,
    "GIN": gin_decoder_test_list
}

for name, data in decoder_lists.items():
    t_stat, p_value = stats.ttest_ind(m3net_decoder_test_list, data, equal_var=False)
    print(f"{name} vs M3Net: p-value = {p_value:.6f}")

GCN vs M3Net: p-value = 0.049651
GAT vs M3Net: p-value = 0.332947
UniMP vs M3Net: p-value = 0.124477
MixHop vs M3Net: p-value = 0.014849
PNA vs M3Net: p-value = 0.017237
GIN vs M3Net: p-value = 0.001883


In [3]:
# convert those p-values to a csv table
import pandas as pd
p_values = []
for name, data in decoder_lists.items():
    # print(m3net_decoder_test_list, data)
    t_stat, p_value = stats.ttest_ind(m3net_decoder_test_list, data, equal_var=False)
    p_values.append(p_value)

df = pd.DataFrame({
    "Model": list(decoder_lists.keys()),
    "p-value": p_values
})
df.to_csv("ROSMAP_decoder_p_values.csv", index=False)

In [4]:
# calculate the average and std of the test results 
import numpy as np

gcn_avg = np.mean(gcn_decoder_test_list)
gcn_std = np.std(gcn_decoder_test_list)
gat_avg = np.mean(gat_decoder_test_list)
gat_std = np.std(gat_decoder_test_list)
unimp_avg = np.mean(unimp_decoder_test_list)
unimp_std = np.std(unimp_decoder_test_list)
mixhop_avg = np.mean(mixhop_decoder_test_list)
mixhop_std = np.std(mixhop_decoder_test_list)
pna_avg = np.mean(pna_decoder_test_list)
pna_std = np.std(pna_decoder_test_list)
gin_avg = np.mean(gin_decoder_test_list)
gin_std = np.std(gin_decoder_test_list)
m3net_avg = np.mean(m3net_decoder_test_list)
m3net_std = np.std(m3net_decoder_test_list)


# convery the avg and std to percentage and as string and make it as model comparison table (model as the rows and avg and std as the columns)
import pandas as pd
gcn_avg_str = "{:.2%}".format(gcn_avg)
gcn_std_str = "{:.2%}".format(gcn_std)
gat_avg_str = "{:.2%}".format(gat_avg)
gat_std_str = "{:.2%}".format(gat_std)
unimp_avg_str = "{:.2%}".format(unimp_avg)
unimp_std_str = "{:.2%}".format(unimp_std)
mixhop_avg_str = "{:.2%}".format(mixhop_avg)
mixhop_std_str = "{:.2%}".format(mixhop_std)
pna_avg_str = "{:.2%}".format(pna_avg)
pna_std_str = "{:.2%}".format(pna_std)
gin_avg_str = "{:.2%}".format(gin_avg)
gin_std_str = "{:.2%}".format(gin_std)
m3net_avg_str = "{:.2%}".format(m3net_avg)
m3net_std_str = "{:.2%}".format(m3net_std)


rosmap_model_comparison_table = pd.DataFrame({"avg":[gcn_avg_str, gat_avg_str, unimp_avg_str, mixhop_avg_str, pna_avg_str, gin_avg_str, m3net_avg_str], 
                                       "std":[gcn_std_str, gat_std_str, unimp_std_str, mixhop_std_str, pna_std_str, gin_std_str, m3net_std_str]}, 
                                       index=["GCN", "GAT", "UniMP", "MIXHOP", "PNA", "GIN", "M3NETFLOW"])

display(rosmap_model_comparison_table)


Unnamed: 0,avg,std
GCN,59.43%,4.53%
GAT,62.80%,7.11%
UniMP,61.83%,3.78%
MIXHOP,57.20%,3.92%
PNA,57.83%,1.82%
GIN,49.83%,5.71%
M3NETFLOW,67.34%,5.12%


# 2. Combine all the dataset results

In [5]:
# concat all datasets avg and std test results and loss results one two dataframes (combine avg and std as one column with plus minus sign in string; and model as rows and dataset as columns)
rosmap_model_comparison_table["ROSMAP"] = rosmap_model_comparison_table["avg"] + " ± " + rosmap_model_comparison_table["std"]
rosmap_model_comparison_table.drop(columns=["avg", "std"], inplace=True)

model_comparison_table = pd.concat([rosmap_model_comparison_table], axis=1)
display(model_comparison_table)
model_comparison_table.to_csv("./model_comparison_table.csv")

Unnamed: 0,ROSMAP
GCN,59.43% ± 4.53%
GAT,62.80% ± 7.11%
UniMP,61.83% ± 3.78%
MIXHOP,57.20% ± 3.92%
PNA,57.83% ± 1.82%
GIN,49.83% ± 5.71%
M3NETFLOW,67.34% ± 5.12%
