In [1]:
import pandas as pd
import itertools
import os

### Results for Group Separation Experiments

In [2]:
EXPERIMENT_RESULTS_PATH = 'C:/Users/Tobias/Eigene Dokumente/Research/similaritybench/experiments/results'
COMPARISON_TYPE_DICT = {
    "label_test": "group_separation",
    "layer_test": "monotonicity",
    "augmentation_test": "group_separation",
    "shortcut_test": "group_separation"
}

def AGG_DF_FILE_NAME(experiment, comparison_type, dataset):
    return f"{experiment}_{comparison_type}_{dataset}.csv"

def FULL_DF_FILE_NAME(experiment, comparison_type, dataset):
    return f"{experiment}_{comparison_type}_{dataset}_full.csv"

In [3]:
def get_pivot_table(experiment, dataset):
    path = os.path.join(EXPERIMENT_RESULTS_PATH, FULL_DF_FILE_NAME(experiment, COMPARISON_TYPE_DICT[experiment], dataset))
    
    df = pd.read_csv(path)
    data = df.loc[:, ["similarity_measure", "quality_measure", "value", "architecture", "representation_dataset"]]
    return data.pivot(index="similarity_measure", columns=["representation_dataset", "architecture", "quality_measure"], values="value")

In [4]:
def get_agg_pivot_table(experiment, datasets):
    dfs = []
    for dataset in datasets:
        path = os.path.join(EXPERIMENT_RESULTS_PATH, FULL_DF_FILE_NAME(experiment, COMPARISON_TYPE_DICT[experiment], dataset))
        df = pd.read_csv(path)
        data = df.loc[:, ["similarity_measure", "quality_measure", "value", "architecture", "representation_dataset"]]
        dfs.append(data.iloc[:])
    df_cc = pd.concat(dfs, axis=0)
    return df_cc.pivot_table(index="similarity_measure", columns=["representation_dataset", "architecture", "quality_measure"], values="value", aggfunc="mean")

In [5]:
get_agg_pivot_table("label_test", ["cora", "flickr", "ogbn-arxiv"])

representation_dataset,cora,cora,cora,cora,cora,cora,flickr,flickr,flickr,flickr,flickr,flickr,ogbn-arxiv,ogbn-arxiv,ogbn-arxiv,ogbn-arxiv,ogbn-arxiv,ogbn-arxiv
architecture,GAT,GAT,GCN,GCN,GraphSAGE,GraphSAGE,GAT,GAT,GCN,GCN,GraphSAGE,GraphSAGE,GAT,GAT,GCN,GCN,GraphSAGE,GraphSAGE
quality_measure,AUPRC,violation_rate,AUPRC,violation_rate,AUPRC,violation_rate,AUPRC,violation_rate,AUPRC,violation_rate,AUPRC,violation_rate,AUPRC,violation_rate,AUPRC,violation_rate,AUPRC,violation_rate
similarity_measure,Unnamed: 1_level_3,Unnamed: 2_level_3,Unnamed: 3_level_3,Unnamed: 4_level_3,Unnamed: 5_level_3,Unnamed: 6_level_3,Unnamed: 7_level_3,Unnamed: 8_level_3,Unnamed: 9_level_3,Unnamed: 10_level_3,Unnamed: 11_level_3,Unnamed: 12_level_3,Unnamed: 13_level_3,Unnamed: 14_level_3,Unnamed: 15_level_3,Unnamed: 16_level_3,Unnamed: 17_level_3,Unnamed: 18_level_3
AlignedCosineSimilarity,0.277452,0.447,0.296727,0.3768,0.281042,0.4118,0.141449,0.4726,0.461096,0.1504,0.277374,0.489,0.462207,0.198,0.471147,0.2198,0.436345,0.3446
CKA,0.274583,0.4866,0.273912,0.4736,0.274771,0.4686,0.129954,0.4808,0.342134,0.1608,0.43229,0.2202,0.726076,0.054,0.865779,0.0166,0.831647,0.0266
ConcentricityDifference,0.183074,0.3736,0.136865,0.4066,0.162598,0.3874,0.115978,0.4802,0.125012,0.4692,0.291945,0.2494,0.41341,0.167,0.327325,0.1946,0.569148,0.1006
DistanceCorrelation,0.274829,0.4788,0.281172,0.427,0.289221,0.3882,0.121305,0.474,0.421418,0.131,0.312483,0.3532,0.659332,0.0702,0.848036,0.021,0.803045,0.0352
EigenspaceOverlapScore,0.281568,0.382,0.272522,0.4294,0.273769,0.4544,0.128491,0.4742,0.275875,0.4822,0.284937,0.4234,0.275617,0.4944,0.272975,0.4874,0.276387,0.4606
Gulp,0.28136,0.3846,0.27677,0.4158,0.274428,0.4518,0.111432,0.4738,0.103242,0.5408,0.285205,0.4222,0.282807,0.4716,0.138971,0.4734,0.277413,0.4472
HardCorrelationMatch,0.2707,0.4956,0.271883,0.479,0.27331,0.4734,0.167077,0.3592,0.383127,0.1364,0.339107,0.2926,0.361708,0.3126,0.539895,0.1426,0.548372,0.1502
JaccardSimilarity,0.277164,0.4368,0.271703,0.4868,0.273186,0.4854,0.124418,0.4094,0.326535,0.2468,0.313439,0.316,0.351645,0.2138,0.683794,0.0728,0.288993,0.4846
LinearRegression,0.277083,0.4362,0.28261,0.4044,0.287879,0.4182,0.138999,0.464,0.157891,0.5308,0.340979,0.3042,0.302403,0.4556,0.283791,0.4028,0.369216,0.3278
MagnitudeDifference,0.165867,0.3734,0.11468,0.4668,0.176162,0.4248,0.104497,0.5308,0.455037,0.1564,0.3125,0.2016,0.176992,0.385,0.160646,0.3824,0.190985,0.388


In [6]:
get_agg_pivot_table("shortcut_test", ["cora", "flickr", "ogbn-arxiv"])

representation_dataset,cora,cora,cora,cora,cora,cora,flickr,flickr,flickr,flickr,flickr,flickr,ogbn-arxiv,ogbn-arxiv,ogbn-arxiv,ogbn-arxiv,ogbn-arxiv,ogbn-arxiv
architecture,GAT,GAT,GCN,GCN,GraphSAGE,GraphSAGE,GAT,GAT,GCN,GCN,GraphSAGE,GraphSAGE,GAT,GAT,GCN,GCN,GraphSAGE,GraphSAGE
quality_measure,AUPRC,violation_rate,AUPRC,violation_rate,AUPRC,violation_rate,AUPRC,violation_rate,AUPRC,violation_rate,AUPRC,violation_rate,AUPRC,violation_rate,AUPRC,violation_rate,AUPRC,violation_rate
similarity_measure,Unnamed: 1_level_3,Unnamed: 2_level_3,Unnamed: 3_level_3,Unnamed: 4_level_3,Unnamed: 5_level_3,Unnamed: 6_level_3,Unnamed: 7_level_3,Unnamed: 8_level_3,Unnamed: 9_level_3,Unnamed: 10_level_3,Unnamed: 11_level_3,Unnamed: 12_level_3,Unnamed: 13_level_3,Unnamed: 14_level_3,Unnamed: 15_level_3,Unnamed: 16_level_3,Unnamed: 17_level_3,Unnamed: 18_level_3
AlignedCosineSimilarity,0.135916,0.4744,0.153279,0.3646,0.280349,0.3718,0.35997,0.338,0.299808,0.2008,0.527227,0.0864,0.566886,0.1088,0.383656,0.1406,0.944316,0.0084
CKA,0.217533,0.3512,0.13675,0.3678,0.285521,0.298,0.117855,0.4684,0.123737,0.4634,0.46545,0.1062,0.579315,0.0796,0.444151,0.1314,0.873149,0.0322
ConcentricityDifference,0.131348,0.4986,0.134717,0.4794,0.192789,0.382,0.154738,0.3804,0.137792,0.522,0.458561,0.1086,0.391127,0.1934,0.303171,0.327,0.711618,0.0552
DistanceCorrelation,0.209859,0.3594,0.146246,0.3452,0.306975,0.2786,0.128919,0.4312,0.175935,0.33,0.460491,0.1126,0.564012,0.0904,0.44317,0.132,0.884731,0.0266
EigenspaceOverlapScore,0.08615,0.5648,0.109566,0.479,0.159225,0.4606,0.281368,0.404,0.140166,0.3292,0.319582,0.2624,0.411037,0.1702,0.244117,0.2108,0.527321,0.0588
Gulp,0.089878,0.5516,0.106936,0.4816,0.157116,0.4536,0.16114,0.356,0.110683,0.533,0.277657,0.4064,0.411573,0.1682,0.225199,0.2304,0.462335,0.103
HardCorrelationMatch,0.103171,0.48,0.109084,0.4346,0.17646,0.378,0.332267,0.2788,0.208394,0.2648,0.546456,0.0892,0.442824,0.1052,0.216439,0.2564,0.703211,0.0278
JaccardSimilarity,0.171477,0.3262,0.162074,0.3418,0.303141,0.2994,0.308633,0.285,0.430379,0.119,0.385761,0.1678,0.508573,0.1324,0.455164,0.1236,0.971198,0.0032
LinearRegression,0.169483,0.4172,0.134019,0.4044,0.257117,0.363,0.159012,0.4262,0.192686,0.3388,0.356119,0.2138,0.500928,0.1018,0.343531,0.2012,0.561853,0.069
MagnitudeDifference,0.120688,0.4904,0.112121,0.5182,0.1533,0.4052,0.13329,0.4308,0.127935,0.4626,0.569959,0.1374,0.383388,0.1804,0.162774,0.3482,0.296801,0.1894


In [7]:
get_agg_pivot_table("layer_test", ["cora", "flickr", "ogbn-arxiv"])

representation_dataset,cora,cora,cora,cora,cora,cora,flickr,flickr,flickr,flickr,flickr,flickr,ogbn-arxiv,ogbn-arxiv,ogbn-arxiv,ogbn-arxiv,ogbn-arxiv,ogbn-arxiv
architecture,GAT,GAT,GCN,GCN,GraphSAGE,GraphSAGE,GAT,GAT,GCN,GCN,GraphSAGE,GraphSAGE,GAT,GAT,GCN,GCN,GraphSAGE,GraphSAGE
quality_measure,correlation,violation_rate,correlation,violation_rate,correlation,violation_rate,correlation,violation_rate,correlation,violation_rate,correlation,violation_rate,correlation,violation_rate,correlation,violation_rate,correlation,violation_rate
similarity_measure,Unnamed: 1_level_3,Unnamed: 2_level_3,Unnamed: 3_level_3,Unnamed: 4_level_3,Unnamed: 5_level_3,Unnamed: 6_level_3,Unnamed: 7_level_3,Unnamed: 8_level_3,Unnamed: 9_level_3,Unnamed: 10_level_3,Unnamed: 11_level_3,Unnamed: 12_level_3,Unnamed: 13_level_3,Unnamed: 14_level_3,Unnamed: 15_level_3,Unnamed: 16_level_3,Unnamed: 17_level_3,Unnamed: 18_level_3
AlignedCosineSimilarity,0.810646,0.1,0.947177,0.054762,0.888639,0.090476,0.48585,0.246429,0.522619,0.394048,0.812619,0.170238,0.443469,0.245238,0.933435,0.085714,0.66398,0.227381
CKA,0.997449,0.004762,0.972143,0.032143,0.982687,0.030952,0.464558,0.314286,0.360068,0.353571,0.895816,0.119048,0.85068,0.115476,0.743946,0.104762,0.949932,0.07381
ConcentricityDifference,0.640306,0.122619,0.717585,0.214286,0.229694,0.440476,0.609074,0.160714,0.344524,0.469048,0.056769,0.494048,0.387612,0.24881,0.545442,0.325,0.421122,0.413095
DistanceCorrelation,0.998469,0.003571,0.988639,0.005952,0.998299,0.004762,0.431565,0.340476,0.644694,0.232143,0.992483,0.015476,0.887959,0.083333,0.728027,0.115476,0.923673,0.103571
EigenspaceOverlapScore,1.0,0.0,1.0,0.0,1.0,0.0,0.792223,0.089286,0.975816,0.015476,0.994218,0.010714,0.906219,0.082143,0.785884,0.052381,0.989694,0.021429
Gulp,0.719048,0.091667,0.930612,0.028571,1.0,0.0,0.23898,0.397619,0.441429,0.289286,0.802619,0.077381,0.348707,0.369048,0.855374,0.113095,0.994558,0.014286
HardCorrelationMatch,0.905782,0.066667,0.914456,0.047619,0.929592,0.078571,0.642925,0.204762,0.515204,0.291667,0.836667,0.167857,0.898435,0.114286,0.929252,0.082143,0.877993,0.125
JaccardSimilarity,0.998299,0.005952,1.0,0.0,0.99966,0.00119,0.942211,0.059524,0.985238,0.02381,0.964626,0.055952,0.986054,0.020238,0.983844,0.027381,0.947279,0.07619
LinearRegression,0.878776,0.045238,0.996803,0.007143,1.0,0.0,0.237517,0.39881,0.359184,0.344048,0.693469,0.090476,0.24932,0.383333,0.967925,0.02619,0.99966,0.00119
MagnitudeDifference,0.930306,0.083333,0.345,0.409524,0.573129,0.344048,0.58585,0.361905,0.673367,0.264286,0.498095,0.358333,0.324592,0.313095,0.477415,0.305952,0.568435,0.163095


In [23]:
path = os.path.join(EXPERIMENT_RESULTS_PATH, FULL_DF_FILE_NAME("layer_test", "monotonicity", "cora"))

df = pd.read_csv(path)
data = df.loc[:, ["similarity_measure", "quality_measure", "value", "architecture", "representation_dataset"]]
#data.groupby( ["similarity_measure", "quality_measure", "architecture", "representation_dataset"]).mean()
data.pivot_table(index="similarity_measure", columns=["representation_dataset", "architecture", "quality_measure"], values="value", aggfunc="mean")

representation_dataset,cora,cora,cora,cora,cora,cora
architecture,GAT,GAT,GCN,GCN,GraphSAGE,GraphSAGE
quality_measure,correlation,violation_rate,correlation,violation_rate,correlation,violation_rate
similarity_measure,Unnamed: 1_level_3,Unnamed: 2_level_3,Unnamed: 3_level_3,Unnamed: 4_level_3,Unnamed: 5_level_3,Unnamed: 6_level_3
AlignedCosineSimilarity,0.810646,0.1,0.947177,0.054762,0.888639,0.090476
CKA,0.997449,0.004762,0.972143,0.032143,0.982687,0.030952
ConcentricityDifference,0.640306,0.122619,0.717585,0.214286,0.229694,0.440476
DistanceCorrelation,0.998469,0.003571,0.988639,0.005952,0.998299,0.004762
EigenspaceOverlapScore,1.0,0.0,1.0,0.0,1.0,0.0
Gulp,0.719048,0.091667,0.930612,0.028571,1.0,0.0
HardCorrelationMatch,0.905782,0.066667,0.914456,0.047619,0.929592,0.078571
JaccardSimilarity,0.998299,0.005952,1.0,0.0,0.99966,0.00119
LinearRegression,0.878776,0.045238,0.996803,0.007143,1.0,0.0
MagnitudeDifference,0.930306,0.083333,0.345,0.409524,0.573129,0.344048


In [42]:
get_pivot_table("label_test", "cora")

representation_dataset,cora,cora,cora,cora,cora,cora
architecture,GCN,GCN,GAT,GAT,GraphSAGE,GraphSAGE
quality_measure,violation_rate,AUPRC,violation_rate,AUPRC,violation_rate,AUPRC
similarity_measure,Unnamed: 1_level_3,Unnamed: 2_level_3,Unnamed: 3_level_3,Unnamed: 4_level_3,Unnamed: 5_level_3,Unnamed: 6_level_3
AlignedCosineSimilarity,0.3768,0.296727,0.447,0.277452,0.4118,0.281042
CKA,0.4736,0.273912,0.4866,0.274583,0.4686,0.274771
ConcentricityDifference,0.4066,0.136865,0.3736,0.183074,0.3874,0.162598
DistanceCorrelation,0.427,0.281172,0.4788,0.274829,0.3882,0.289221
EigenspaceOverlapScore,0.4294,0.272522,0.382,0.281568,0.4544,0.273769
Gulp,0.4158,0.27677,0.3846,0.28136,0.4518,0.274428
HardCorrelationMatch,0.479,0.271883,0.4956,0.2707,0.4734,0.27331
JaccardSimilarity,0.4868,0.271703,0.4368,0.277164,0.4854,0.273186
LinearRegression,0.5956,0.096252,0.5638,0.126923,0.5818,0.108976
MagnitudeDifference,0.4668,0.11468,0.3734,0.165867,0.4248,0.176162


In [25]:
get_pivot_table("label_test", "flickr")

architecture,GCN,GCN,GAT,GAT,GraphSAGE,GraphSAGE
quality_measure,violation_rate,AUPRC,violation_rate,AUPRC,violation_rate,AUPRC
similarity_measure,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2
AlignedCosineSimilarity,0.1504,0.461096,0.4726,0.141449,0.489,0.277374
CKA,0.1608,0.342134,0.4808,0.129954,0.2202,0.43229
ConcentricityDifference,0.4692,0.125012,0.4802,0.115978,0.2494,0.291945
DistanceCorrelation,0.131,0.421418,0.474,0.121305,0.3532,0.312483
EigenspaceOverlapScore,0.4822,0.275875,0.4742,0.128491,0.4234,0.284937
Gulp,0.5408,0.103242,0.4738,0.111432,0.4222,0.285205
HardCorrelationMatch,0.1364,0.383127,0.3592,0.167077,0.2926,0.339107
JaccardSimilarity,0.2468,0.326535,0.4094,0.124418,0.316,0.313439
LinearRegression,0.4692,0.219668,0.536,0.101931,0.6958,0.084728
MagnitudeDifference,0.1564,0.455037,0.5308,0.104497,0.2016,0.3125


In [32]:
get_pivot_table("shortcut_test", "cora")

architecture,GCN,GCN,GAT,GAT,GraphSAGE,GraphSAGE
quality_measure,violation_rate,AUPRC,violation_rate,AUPRC,violation_rate,AUPRC
similarity_measure,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2
AlignedCosineSimilarity,0.3646,0.153279,0.4744,0.135916,0.3718,0.280349
CKA,0.3678,0.13675,0.3512,0.217533,0.298,0.285521
ConcentricityDifference,0.4794,0.134717,0.4986,0.131348,0.382,0.192789
DistanceCorrelation,0.3452,0.146246,0.3594,0.209859,0.2786,0.306975
EigenspaceOverlapScore,0.479,0.109566,0.5648,0.08615,0.4606,0.159225
Gulp,0.4816,0.106936,0.5516,0.089878,0.4536,0.157116
HardCorrelationMatch,0.4346,0.109084,0.48,0.103171,0.378,0.17646
JaccardSimilarity,0.3418,0.162074,0.3262,0.171477,0.2994,0.303141
LinearRegression,0.5956,0.084588,0.5828,0.102017,0.637,0.080087
MagnitudeDifference,0.5182,0.112121,0.4904,0.120688,0.4052,0.1533


In [34]:
get_pivot_table("shortcut_test", "flickr")

architecture,GCN,GCN,GAT,GAT,GraphSAGE,GraphSAGE
quality_measure,violation_rate,AUPRC,violation_rate,AUPRC,violation_rate,AUPRC
similarity_measure,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2
AlignedCosineSimilarity,0.2008,0.299808,0.338,0.35997,0.0864,0.527227
CKA,0.4634,0.123737,0.4684,0.117855,0.1062,0.46545
ConcentricityDifference,0.522,0.137792,0.3804,0.154738,0.1086,0.458561
DistanceCorrelation,0.33,0.175935,0.4312,0.128919,0.1126,0.460491
EigenspaceOverlapScore,0.3292,0.140166,0.404,0.281368,0.2624,0.319582
Gulp,0.533,0.110683,0.356,0.16114,0.4064,0.277657
HardCorrelationMatch,0.2648,0.208394,0.2788,0.332267,0.0892,0.546456
JaccardSimilarity,0.119,0.430379,0.285,0.308633,0.1678,0.385761
LinearRegression,0.6612,0.079394,0.5738,0.091982,0.7862,0.066711
MagnitudeDifference,0.4626,0.127935,0.4308,0.13329,0.1374,0.569959


In [31]:
get_pivot_table("shortcut_test", "ogbn-arxiv")

architecture,GCN,GCN,GAT,GAT,GraphSAGE,GraphSAGE
quality_measure,violation_rate,AUPRC,violation_rate,AUPRC,violation_rate,AUPRC
similarity_measure,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2
AlignedCosineSimilarity,0.1406,0.383656,0.1088,0.566886,0.0084,0.944316
CKA,0.1314,0.444151,0.0796,0.579315,0.0322,0.873149
ConcentricityDifference,0.327,0.303171,0.1934,0.391127,0.0552,0.711618
DistanceCorrelation,0.132,0.44317,0.0904,0.564012,0.0266,0.884731
EigenspaceOverlapScore,0.2108,0.244117,0.1702,0.411037,0.0588,0.527321
Gulp,0.2304,0.225199,0.1682,0.411573,0.103,0.462335
HardCorrelationMatch,0.2564,0.216439,0.1052,0.442824,0.0278,0.703211
JaccardSimilarity,0.1236,0.455164,0.1324,0.508573,0.0032,0.971198
LinearRegression,0.7988,0.06179,0.8982,0.055951,0.931,0.054
MagnitudeDifference,0.3482,0.162774,0.1804,0.383388,0.1894,0.296801
