In [None]:
import pandas as pd
from repsim.benchmark.paths import EXPERIMENT_RESULTS_PATH

## Group Separation

In [None]:
# path = "/root/similaritybench/experiments/results/nlp_aug_mnli_full.csv"
# path = "/root/similaritybench/experiments/results/nlp_sc_mnli_full.csv"


cleaned_dfs = []
for path, setting, dataset in [
        ("/root/similaritybench/experiments/results/nlp_aug_sst2.csv", "aug", "sst2"),
        ("/root/similaritybench/experiments/results/nlp_mem_sst2.csv", "mem", "sst2"),
        ("/root/similaritybench/experiments/results/nlp_shortcut_sst2.csv", "sc", "sst2"),
        ("/root/similaritybench/experiments/results/nlp_shortcut_mnli.csv", "sc", "mnli"),
        ("/root/similaritybench/experiments/results/nlp_aug_mnli.csv", "aug", "mnli"),
        ("/root/similaritybench/experiments/results/nlp_mem_mnli.csv", "mem", "mnli"),
    ]:
    print(setting, dataset)
    df = pd.read_csv(path)
    data = df.loc[2:].copy().reset_index(drop=True)
    print(data.columns)
    proper_name = {"quality_measure": "Similarity Measure", "AUPRC": "AUPRC", "violation_rate": "Violation Rate"}
    data.columns = [proper_name[col] for col in data.columns]
    data["Architecture"] = "BERT-Base"
    data.loc[:, "Violation Rate"] = data.loc[:, "Violation Rate"].astype(float)
    data.loc[:, "AUPRC"] = data.loc[:, "AUPRC"].astype(float)
    data = data.melt(
        id_vars=["Similarity Measure", "Architecture"],
        value_vars=["Violation Rate", "AUPRC"],
        var_name="Quality Metric",
        value_name="Score",
    )
    data["Setting"] = setting
    data["Dataset"] = dataset

    cleaned_dfs.append(data)

data = pd.concat(cleaned_dfs).reset_index(drop=True)
data.head()

In [None]:
for setting in data.Setting.unique():
    pivot = pd.pivot(data[data.Setting == setting], index="Similarity Measure", columns=["Quality Metric", "Dataset"], values="Score")
    print(pivot.to_latex())

In [None]:
pivot = data.pivot_table(index="Similarity Measure", columns=["Quality Metric", "Setting", "Dataset"], values="Score")
pivot


In [None]:
styled = pd.io.formats.style.Styler(
    pivot,
    precision=3,
    caption="Ability to separate groups of BERT representations in different settings.",
)
styled

In [None]:
latex_str = styled.to_latex(hrules=True, position="t", label="tab:nlp_groupsep")
latex_str = latex_str.split("\n")
latex_str = [r"\rowcolor{Gray}" + line  if i>=12 and (i-12)%2==0 else line for i, line in enumerate(latex_str[:-4])] + latex_str[-4:]
latex_str = "\n".join(latex_str)
print(latex_str)

In [None]:
groupsep_data = data.copy()

## Output Correlation

In [None]:
cleaned_dfs = []
for path, setting, dataset in [
    ("/root/similaritybench/experiments/results/correlation_nlp_aug_mnli_full.csv","aug","mnli"),
    ("/root/similaritybench/experiments/results/correlation_nlp_aug_sst2_full.csv","aug","sst2"),
    ("/root/similaritybench/experiments/results/correlation_nlp_mem_mnli_full.csv","mem","mnli"),
    ("/root/similaritybench/experiments/results/correlation_nlp_mem_sst2_full.csv","mem","sst2"),
    ("/root/similaritybench/experiments/results/correlation_nlp_sc_mnli_full.csv","sc","mnli"),
    ("/root/similaritybench/experiments/results/correlation_nlp_sc_sst2_full.csv","sc","sst2"),
    ]:
    df = pd.read_csv(path, index_col=0)
    df["Setting"] = setting
    df["Dataset"] = dataset
    cleaned_dfs.append(df)
data = pd.concat(cleaned_dfs).reset_index(drop=True)
data = data.rename(columns={"functional_similarity_measure": "Functional Similarity Measure", "similarity_measure": "Representational Similarity Measure", "quality_measure": "Quality Measure"})

In [None]:
pivot = data[data["Quality Measure"]=="spearmanr"].pivot_table(index="Representational Similarity Measure",columns=["Functional Similarity Measure", "Quality Measure", "Setting", "Dataset"],values="corr")
# pivot = pivot.apply(abs)
pivot = pivot.sort_values(by="Representational Similarity Measure")
pivot

In [None]:
styled = pd.io.formats.style.Styler(
    pivot,
    precision=3,
    caption="Absolute correlation between representational and functional similarity for BERT models.",
)
latex_str = styled.to_latex(hrules=True, position="t", label="tab:nlp_outputcorr", )
latex_str = latex_str.split("\n")
latex_str = [r"\rowcolor{Gray}" + line  if i>=12 and (i-12)%2==0 else line for i, line in enumerate(latex_str[:-4])] + latex_str[-4:]
latex_str = "\n".join(latex_str)
print(latex_str)


In [None]:
qmeasures = ["kendalltau"]
# qmeasures = ["pearsonr"]
# qmeasures = ["kendalltau", "spearmanr"]
pivot = data[data["Quality Measure"].isin(qmeasures)].pivot_table(index="Representational Similarity Measure",columns=["Quality Measure", "Setting", "Dataset", "Functional Similarity Measure", "architecture", ],values="corr")
pivot = pivot.apply(abs)
pivot = pivot.sort_values(by="Representational Similarity Measure")
pivot.corr("kendall")
# pivot

In [None]:
data["Quality Measure"].unique()

In [None]:
corr_data = data.copy()


## Combined Table (with preliminary values)

In [None]:
corr_setting = "aug"
corr_qmeasure = "spearmanr"
corr_funcsim = "JSD"
corr_dataset = "mnli"

merge_corr = corr_data.loc[
    (corr_data["Functional Similarity Measure"] == corr_funcsim) &
    (corr_data["Quality Measure"]==corr_qmeasure) &
    (corr_data["Setting"] == corr_setting) &
    (corr_data["Dataset"] == corr_dataset)
]
merge_corr["Score"] = merge_corr.loc[:, "corr"]
merge_corr.loc[:, "Similarity Measure"] = merge_corr.loc[:, "Representational Similarity Measure"]
merge_corr.loc[:, "Setting"] = corr_funcsim+corr_dataset+corr_setting
full_corr_setting = corr_funcsim+corr_dataset+corr_setting
merge_corr.loc[:, "Architecture"] = "BERT-Base"
merge_corr.loc[:, "Quality Metric"] = corr_qmeasure

group_dataset = "mnli"
group_qmeasure = "AUPRC"
merge_groups = groupsep_data.loc[
    (groupsep_data["Dataset"] == group_dataset) &
    (groupsep_data["Quality Metric"] == group_qmeasure)
]

merged = pd.concat([merge_groups, merge_corr], axis=0)
merged


In [None]:
measure_to_abbrv = {
    "AlignedCosineSimilarity": "AlignCos",
    "CKA": "CKA",
    "ConcentricityDifference": "ConcDiff",
    "DistanceCorrelation": "DistCorr",
    "EigenspaceOverlapScore": "EOS",
    "GeometryScore": "GS",
    "Gulp": "GULP",
    "HardCorrelationMatch": "HardCorr",
    "IMDScore": "IMD",
    "JaccardSimilarity": "Jaccard",
    "LinearRegression": "LinReg",
    "MagnitudeDifference": "MagDiff",
    "OrthogonalAngularShapeMetricCentered": "AngShape",
    "OrthogonalProcrustesCenteredAndNormalized": "OrthProc",
    "PWCCA": "PWCCA",
    "PermutationProcrustes": "PermProc",
    "ProcrustesSizeAndShapeDistance": "ProcDist",
    "RSA": "RSA",
    "RSMNormDifference": "RSMDiff",
    "RankSimilarity": "RankSim",
    "SVCCA": "SVCCA",
    "SecondOrderCosineSimilarity": "2nd-Cos",
    "SoftCorrelationMatch": "SoftCorr",
    "UniformityDifference": "UnifDiff"
}


In [None]:
fake_vision_data = merged.copy()
fake_vision_data.loc[:, "Architecture"] = "ResNetX"
fake_vision_data.loc[:, "Dataset"] = "ImageNet100"

fake_graph_data = merged.copy()
fake_graph_data.loc[:, "Architecture"] = "GraphSage"
fake_graph_data.loc[:, "Dataset"] = "ogbn-arxiv"

table_data = pd.concat([merged, fake_vision_data, fake_graph_data])
table_data["Similarity Measure"] = table_data["Similarity Measure"].map(measure_to_abbrv)


table_data.loc[table_data.Setting == full_corr_setting, "Score"] = -1 * table_data.loc[table_data.Setting == full_corr_setting, "Score"]

pivot = pd.pivot_table(table_data, index="Similarity Measure",columns=["Setting", "Quality Metric", "Architecture", "Dataset"], values="Score")
pivot = pivot.sort_values(by="Similarity Measure")

styled = pd.io.formats.style.Styler(
    pivot,
    precision=3,
    caption="Results overview for selected datasets and models.",
)

latex_str = styled.to_latex(hrules=True, position="t", label="tab:result_overview", )
latex_str = latex_str.split("\n")
latex_str = [r"\rowcolor{Gray}" + line  if i>=12 and (i-12)%2==0 else line for i, line in enumerate(latex_str[:-4])] + latex_str[-4:]
latex_str = "\n".join(latex_str)
print(latex_str)


## Combined Table (Paper Version)

In [None]:
import pandas as pd
from pathlib import Path
import re
import pandas.io.formats.style


measure_to_abbrv = {
    "AlignedCosineSimilarity": "AlignCos",
    "CKA": "CKA",
    "ConcentricityDifference": "ConcDiff",
    "DistanceCorrelation": "DistCorr",
    "EigenspaceOverlapScore": "EOS",
    "GeometryScore": "GS",
    "Gulp": "GULP",
    "HardCorrelationMatch": "HardCorr",
    "IMDScore": "IMD",
    "JaccardSimilarity": "Jaccard",
    "LinearRegression": "LinReg",
    "MagnitudeDifference": "MagDiff",
    "OrthogonalAngularShapeMetricCentered": "AngShape",
    "OrthogonalProcrustesCenteredAndNormalized": "OrthProc",
    "PWCCA": "PWCCA",
    "PermutationProcrustes": "PermProc",
    "ProcrustesSizeAndShapeDistance": "ProcDist",
    "RSA": "RSA",
    "RSMNormDifference": "RSMDiff",
    "RankSimilarity": "RankSim",
    "SVCCA": "SVCCA",
    "SecondOrderCosineSimilarity": "2nd-Cos",
    "SoftCorrelationMatch": "SoftCorr",
    "UniformityDifference": "UnifDiff"
}


In [None]:
cleaned_dfs = []
nlp_root = Path("/root/similaritybench/experiments/paper_results/nlp")
for path in nlp_root.iterdir():
    df = pd.read_csv(path, index_col=0)
    setting = path.name.split("_")[0]

    pattern = r'(?<=_)sst2(?=_)|(?<=_)mnli(?=_)'
    match = re.search(pattern, path.name)
    assert match is not None
    dataset = match.group(0)

    df["Setting"] = setting
    df["Dataset"] = dataset
    cleaned_dfs.append(df)

data = pd.concat(cleaned_dfs).reset_index(drop=True)
nlp_data = data


In [None]:
cleaned_dfs = []
root = Path("/root/similaritybench/experiments/paper_results/graph")
for path in root.iterdir():
    df = pd.read_csv(path, index_col=0)
    pattern = r"augmentation|label_test|layer_test|output_correlation|shortcut"
    match = re.search(pattern, path.name)
    pattern_to_setting = {
        "augmentation": "aug",
        "label_test": "mem",
        "layer_test": "mono",
        "output_correlation": "correlation",
        "shortcut": "sc",
    }
    setting = pattern_to_setting[match.group(0)]

    pattern = r"(?<=_)cora(?=_)|(?<=_)flickr(?=_)|(?<=_)ogbn-arxiv(?=_)"
    match = re.search(pattern, path.name)
    assert match is not None
    dataset = match.group(0)

    df["Setting"] = setting
    df["Dataset"] = dataset
    cleaned_dfs.append(df)

data = pd.concat(cleaned_dfs).reset_index(drop=True)
graph_data = data

In [None]:
cleaned_dfs = []
root = Path("/root/similaritybench/experiments/paper_results/vision")
for path in root.iterdir():
    df = pd.read_csv(path, index_col=0)
    pattern = r"augmentation|label_test|layer_test|correlation|shortcut"
    match = re.search(pattern, path.name)
    pattern_to_setting = {
        "augmentation": "aug",
        "label_test": "mem",
        "layer_test": "mono",
        "correlation": "correlation",
        "shortcut": "sc",
    }
    setting = pattern_to_setting[match.group(0)]

    pattern = r"(?<=_)in100(?=_)"
    match = re.search(pattern, path.name)
    assert match is not None
    dataset = match.group(0)

    df["Setting"] = setting
    df["Dataset"] = dataset
    cleaned_dfs.append(df)

data = pd.concat(cleaned_dfs).reset_index(drop=True)
vision_data = data

In [None]:
vision_data.Dataset.unique()

In [None]:
data = pd.concat([nlp_data, graph_data, vision_data])
print(data.columns)

data = data.rename(columns={"functional_similarity_measure": "Functional Similarity Measure", "similarity_measure": "Representational Similarity Measure", "quality_measure": "Quality Measure"})

idx = data.Setting == "correlation"
data.loc[idx, "value"] = data.loc[idx, "corr"]

idx = (data.Setting == "correlation") & (data["Functional Similarity Measure"] == "AbsoluteAccDiff")
data.loc[idx, "Setting"] = "acc_corr"

idx = (data.Setting == "correlation") & (data["Functional Similarity Measure"] != "JSD")
data = data.loc[~idx]

idx = (data.Setting.isin(["aug", "mem", "sc"])) & (data["Quality Measure"] != "AUPRC")
data = data.loc[~idx]

idx = (data.Setting.isin(["correlation", "acc_corr"])) & (data["Quality Measure"] != "spearmanr")
data = data.loc[~idx]

idx = (data.Setting.isin(["mono"])) & (data["Quality Measure"] != "correlation")
data = data.loc[~idx]


def beautify_df(data):
    data.loc[:, "Representational Similarity Measure"] = data["Representational Similarity Measure"].map(
        measure_to_abbrv
    )
    data.loc[:, "architecture"] = data["architecture"].map(
        {
            "BERT-L": "BERT",
            "GCN": "GCN",
            "GAT": "GAT",
            "GraphSAGE": "SAGE",
            "VGG11": "VGG11",
            "VGG19": "VGG19",
            "ResNet18": "RNet18",
            "ResNet34": "RNet34",
            "ResNet101": "RNet101",
            "ViT_B32": "ViT_B32",
            "ViT_L32": "ViT_L32",
        }
    )
    data.loc[:, "domain"] = data["domain"].map({"NLP": "Text", "GRAPHS": "Graph", "VISION": "Vision"})
    data.loc[:, "Dataset"] = data["Dataset"].map(
        {
            "mnli_aug_rate0": "MNLI",
            "mnli_mem_rate0": "MNLI",
            "mnli": "MNLI",
            "sst2_sc_rate0558": "SST2",
            "sst2_mem_rate0": "SST2",
            "mnli_sc_rate0354": "MNLI",
            "sst2_aug_rate0": "SST2",
            "sst2": "SST2",
            "flickr": "flickr",
            "ogbn-arxiv": "arXiv",
            "cora": "Cora",
            "in100": "IN100"
        }
    )
    data.loc[:, "Setting"] = data["Setting"].map(
        {
            "aug": "Augmentation",
            "mem": "Random Labels",
            "correlation": "JSD Corr.",
            "acc_corr": "Acc Corr.",
            "mono": "Layer Mono.",
            "sc": "Shortcuts",
        }
    )
    column_order = ["Acc Corr.", "JSD Corr.", "Random Labels", "Shortcuts", "Augmentation", "Layer Mono."]
    data.loc[:, "Setting"] = pd.Categorical(
        data["Setting"],
        categories=column_order,
        ordered=True,
    )
    data.loc[:, "Quality Measure"] = data["Quality Measure"].map(
        {"violation_rate": "Conformity Rate", "AUPRC": "AUPRC", "spearmanr": "Spearman", "correlation": "Spearman"}
    )
    data.loc[data["Quality Measure"] == "Conformity Rate", "value"] = (
        1 - data.loc[data["Quality Measure"] == "Conformity Rate", "value"]
    )  # must be run in conjunction with the above renaming

    data = data.rename(
        columns={
            "domain": "Modality",
            "architecture": "Arch.",
            "Representational Similarity Measure": "Sim Meas.",
            "Quality Measure": "Eval.",
            "Setting": "Scenario",
        }
    )
    data.loc[data.Scenario.isin(["Acc Corr.", "JSD Corr."]), "Type"] = "Grounding by Prediction"
    data.loc[data.Scenario.isin(["Random Labels", "Shortcuts", "Augmentation", "Layer Mono."]), "Type"] = (
        "Grounding by Design"
    )
    return data, column_order


data, column_order = beautify_df(data)
# Data Selection for Overview Table
idx = data["Dataset"].isin(["MNLI", "flickr", "IN100"]) & data["Arch."].isin(["SAGE", "BERT", "RNet18"])

pivot = pd.pivot_table(
    data.loc[idx],
    index="Sim Meas.",
    columns=["Type", "Scenario", "Eval.", "Modality", "Dataset", "Arch."],
    values="value",
)
pivot = pivot.sort_values(by="Sim Meas.")
pivot = pivot.reindex(column_order, axis="columns", level="Scenario")
pivot = pivot.reindex(["Grounding by Prediction", "Grounding by Design"], axis="columns", level="Type")
pivot

In [None]:
styled = pd.io.formats.style.Styler(
    pivot,
    precision=2,
    caption="Full Results.",
)

# highlight top 3 values (best red)
# latex_str = styled.highlight_quantile(q_left=0.86, axis=0, props="textbf:--rwrap;").to_latex(hrules=True, position="t", label="tab:result_overview", )  #top 3
# latex_str = styled.highlight_max(axis=0, props="textcolor{red}:--rwrap;").to_latex(hrules=True, position="t", label="tab:result_overview", )  # top1
latex_str = styled.highlight_max(axis=0, props="textbf:--rwrap;").to_latex(hrules=True, position="t", label="tab:result_overview", )  # top1


# ----- Manual modifications --------
latex_str = latex_str.split("\n")

# Center headers
pattern = r'\{r\}'
replacement = r'{c}'
# latex_str = [re.sub(pattern, replacement, line) if i in [5, 6] else line for i, line in enumerate(latex_str) ]  # if no type row
latex_str = [re.sub(pattern, replacement, line) if i in [5, 6, 7] else line for i, line in enumerate(latex_str) ]

# Remove measure row
# latex_str.pop(10)  # if no type row
latex_str.pop(11)

# Add vertical bars
line_no = 3
mod_line = latex_str[line_no][:17] + "".join(["|rrr"] * 6) + "}"
latex_str[line_no] = mod_line

# Make every second row gray
latex_str = [r"\rowcolor{Gray}" + line  if i>=12 and (i-12)%2==0 else line for i, line in enumerate(latex_str[:-4])] + latex_str[-4:]
latex_str = "\n".join(latex_str)
print(latex_str)


### NLP Tables

In [None]:
cleaned_dfs = []
nlp_root = Path("/root/similaritybench/experiments/paper_results/nlp")
for path in nlp_root.iterdir():
    df = pd.read_csv(path, index_col=0)
    setting = path.name.split("_")[0]

    pattern = r'(?<=_)sst2(?=_)|(?<=_)mnli(?=_)'
    match = re.search(pattern, path.name)
    assert match is not None
    dataset = match.group(0)

    df["Setting"] = setting
    df["Dataset"] = dataset
    cleaned_dfs.append(df)

data = pd.concat(cleaned_dfs).reset_index(drop=True)
data = data.rename(columns={"functional_similarity_measure": "Functional Similarity Measure", "similarity_measure": "Representational Similarity Measure", "quality_measure": "Quality Measure"})

data.loc[data.Setting == "mono", "Quality Measure"].unique()


idx = data.Setting == "correlation"
data.loc[idx, "value"] = data.loc[idx, "corr"]

idx = (data.Setting.isin(["correlation", "acc_corr"])) & (data["Quality Measure"] != "spearmanr")
data = data.loc[~idx]

idx = data.Setting == "correlation"
data.loc[idx, "Setting"] = data.loc[idx, "Setting"] + data.loc[idx, "Functional Similarity Measure"]
print(data.Setting.unique())

# idx = (data.Setting == "correlation") & (data["Functional Similarity Measure"] != "JSD")
# data = data.loc[~idx]

# idx = (data.Setting.isin(["aug", "mem", "sc"])) & (data["Quality Measure"] != "AUPRC")
# data = data.loc[~idx]

# idx = (data.Setting.isin(["mono"])) & (data["Quality Measure"] != "correlation")
# data = data.loc[~idx]

data.loc[:, "Representational Similarity Measure"] = data["Representational Similarity Measure"].map(
        measure_to_abbrv
)
data.loc[:, "architecture"] = data["architecture"].map(
    {"BERT-L": "BERT", "GCN": "GCN", "GAT": "GAT", "GraphSAGE": "SAGE"}
)
data.loc[:, "domain"] = data["domain"].map({"NLP": "Text", "GRAPHS": "Graph", "VISION": "Vision"})
data.loc[:, "Dataset"] = data["Dataset"].map(
    {
        "mnli_aug_rate0": "MNLI",
        "mnli_mem_rate0": "MNLI",
        "mnli": "MNLI",
        "sst2_sc_rate0558": "SST2",
        "sst2_mem_rate0": "SST2",
        "mnli_sc_rate0354": "MNLI",
        "sst2_aug_rate0": "SST2",
        "sst2": "SST2",
        "flickr": "flickr",
        "ogbn-arxiv": "arXiv",
        "cora": "Cora",
    }
)

data.loc[:, "Setting"] = data["Setting"].map(
    {
        "aug": "Augmentation",
        "mem": "Random Labels",
        "correlationJSD": "JSD Corr.",
        "correlationAbsoluteAccDiff": "Acc Corr.",
        "correlationDisagreement": "Disagr. Corr.",
        "mono": "Layer Mono.",
        "sc": "Shortcuts",
    }
)
column_order = ["Acc Corr.", "JSD Corr.", "Disagr. Corr.", "Random Labels", "Shortcuts", "Augmentation", "Layer Mono."]
data.loc[:, "Setting"] = pd.Categorical(
    data["Setting"],
    categories=column_order,
    ordered=True,
)

data.loc[:, "Quality Measure"] = data["Quality Measure"].map(
    {"violation_rate": "Conformity Rate", "AUPRC": "AUPRC", "spearmanr": "Spearman", "correlation": "Spearman"}
)
data.loc[data["Quality Measure"] == "Conformity Rate", "value"] = 1 - data.loc[data["Quality Measure"] == "Conformity Rate", "value"]  # must be run in conjunction with the above renaming

data = data.rename(
    columns={
        "domain": "Modality",
        "architecture": "Arch.",
        "Representational Similarity Measure": "Sim Meas.",
        "Quality Measure": "Eval.",
        "Setting": "Scenario",
    }
)

data.loc[data.Scenario.isin(["Acc Corr.", "JSD Corr.", "Disagr. Corr."]), "Type"] = "Grounding by Prediction"
data.loc[data.Scenario.isin(["Random Labels", "Shortcuts", "Augmentation", "Layer Mono."]), "Type"] = (
    "Grounding by Design"
)

In [None]:
idx = data.Modality == "Text"

pivot = pd.pivot_table(
    data.loc[idx],
    index="Sim Meas.",
    columns=["Type", "Scenario", "Eval.", "Modality", "Dataset", "Arch."],
    # columns=["Type", "Eval.", "Scenario", "Modality", "Dataset", "Arch."],
    values="value",
)
pivot = pivot.sort_values(by=["Sim Meas."])
pivot = pivot.reindex(column_order, axis="columns", level="Scenario")
pivot = pivot.reindex(["Grounding by Prediction", "Grounding by Design"], axis="columns", level="Type")
pivot

In [None]:
styled = pd.io.formats.style.Styler(
    pivot,
    precision=2,
    caption="Full Results.",
)

# highlight top 3 values (best red)
# latex_str = styled.highlight_quantile(q_left=0.86, axis=0, props="textbf:--rwrap;").to_latex(hrules=True, position="t", label="tab:result_overview", )  #top 3
# latex_str = styled.highlight_max(axis=0, props="textcolor{red}:--rwrap;").to_latex(hrules=True, position="t", label="tab:result_overview", )  # top1
latex_str = styled.highlight_max(axis=0, props="textbf:--rwrap;").to_latex(hrules=True, position="t", label="tab:result_overview", )  # top1


# ----- Manual modifications --------
latex_str = latex_str.split("\n")

# Center headers
pattern = r'\{r\}'
replacement = r'{c}'
# latex_str = [re.sub(pattern, replacement, line) if i in [5, 6] else line for i, line in enumerate(latex_str) ]  # if no type row
latex_str = [re.sub(pattern, replacement, line) if i in [5, 6, 7] else line for i, line in enumerate(latex_str) ]

# Remove measure row
# latex_str.pop(10)  # if no type row
latex_str.pop(11)

# # Add vertical bars
# line_no = 3
# mod_line = latex_str[line_no][:17] + "".join(["|rrr"] * 6) + "}"
# latex_str[line_no] = mod_line

# Make every second row gray
latex_str = [r"\rowcolor{Gray}" + line  if i>=12 and (i-12)%2==0 else line for i, line in enumerate(latex_str[:-4])] + latex_str[-4:]
latex_str = "\n".join(latex_str)
print(latex_str)


## Rankplots

In [None]:
import seaborn as sns

In [None]:
data = pd.concat([nlp_data, graph_data, vision_data])
data = data.rename(columns={"functional_similarity_measure": "Functional Similarity Measure", "similarity_measure": "Representational Similarity Measure", "quality_measure": "Quality Measure"})
data = data.reset_index()

idx = data.Setting == "correlation"
data.loc[idx, "value"] = data.loc[idx, "corr"]

idx = data["Quality Measure"].isin(["AUPRC", "spearmanr", "correlation"])
data = data.loc[idx]

idx = data.Setting == "correlation"
data.loc[idx, "Setting"] = data.loc[idx, "Setting"] + data.loc[idx, "Functional Similarity Measure"]

idx = ~(data.Setting == "mono")
data.loc[idx, "model"] = "agg"


data.head(3)
data["rank"] = data.groupby(["domain", "Setting", "Dataset", "architecture", "model"], as_index=True)["value"].rank(ascending=False)
data.head(3)
data.loc[:, "Representational Similarity Measure"] = data["Representational Similarity Measure"].map(
    measure_to_abbrv
)
data.loc[:, "architecture"] = data["architecture"].map(
    {
        "BERT-L": "BERT",
        "GCN": "GCN",
        "GAT": "GAT",
        "GraphSAGE": "SAGE",
        "VGG11": "VGG11",
        "VGG19": "VGG19",
        "ResNet18": "RNet18",
        "ResNet34": "RNet34",
        "ResNet101": "RNet101",
        "ViT_B32": "ViT_B32",
        "ViT_L32": "ViT_L32",
    }
)
data.loc[:, "domain"] = data["domain"].map({"NLP": "Language", "GRAPHS": "Graph", "VISION": "Vision"})
data.loc[:, "Dataset"] = data["Dataset"].map(
    {
        "mnli_aug_rate0": "MNLI",
        "mnli_mem_rate0": "MNLI",
        "mnli": "MNLI",
        "sst2_sc_rate0558": "SST2",
        "sst2_mem_rate0": "SST2",
        "mnli_sc_rate0354": "MNLI",
        "sst2_aug_rate0": "SST2",
        "sst2": "SST2",
        "flickr": "flickr",
        "ogbn-arxiv": "arXiv",
        "cora": "Cora",
        "in100": "IN100"
    }
)
data.loc[:, "Setting"] = data["Setting"].map(
    {
        "aug": "Augmentation",
        "mem": "Random Labels",
        "correlationJSD": "JSD Corr.",
        "correlationAbsoluteAccDiff": "Acc Corr.",
        "correlationDisagreement": "Disagr. Corr.",
        "acc_corr": "Acc Corr.",
        "mono": "Layer Mono.",
        "sc": "Shortcuts",
    }
)

data.loc[:, "Quality Measure"] = data["Quality Measure"].map(
    {"violation_rate": "Conformity Rate", "AUPRC": "AUPRC", "spearmanr": "Spearman", "correlation": "Spearman"}
)
data.loc[data["Quality Measure"] == "Conformity Rate", "value"] = (
    1 - data.loc[data["Quality Measure"] == "Conformity Rate", "value"]
)  # must be run in conjunction with the above renaming

data = data.rename(
    columns={
        "domain": "Modality",
        "architecture": "Arch.",
        "Representational Similarity Measure": "Sim Meas.",
        "Quality Measure": "Eval.",
        "Setting": "Scenario",
    }
)

data = data.sort_values(by=["Sim Meas."])

sns.set_theme("paper", style="white", font_scale=1.5)


sns.catplot(data=data, x="rank", y="Sim Meas.", hue="Modality", kind="bar", height=10, aspect=0.5, col="Modality")


In [None]:
avg_ranks = data.groupby(["Modality", "Sim Meas."])["rank"].agg(["mean", "median"]).reset_index()
avg_ranks = avg_ranks.rename(columns={"mean": "avg_rank", "median": "med_rank"})
avg_ranks

In [None]:
plot_data = pd.merge(data, avg_ranks).sort_values(by=["med_rank"])
for mod in plot_data.Modality.unique():
    # g = sns.catplot(
    #     data=plot_data[plot_data.Modality == mod],
    #     y="rank",
    #     x="Sim Meas.",
    #     hue="Modality",
    #     kind="box",
    #     height=5,
    #     aspect=2,
    #     col="Modality",
    #     palette={"Language": "C1", "Vision": "C2", "Graph": "C0"},
    #     legend=False,
    # )
    # ax = g.axes[0, 0]
    # ax.tick_params(axis="x", labelrotation=40)
    # xlabels = ax.get_xticklabels()
    # ax.set_xticklabels(xlabels, rotation=40, ha="right")
    g = sns.catplot(
        data=plot_data[plot_data.Modality == mod],
        x="rank",
        y="Sim Meas.",
        hue="Modality",
        kind="box",
        height=10,
        aspect=0.5,
        col="Modality",
        palette={"Language": "C1", "Vision": "C2", "Graph": "C0"},
        legend=False
    )

    # g.savefig(f"../../figs/aggregated_hor_{mod}.pdf", bbox_inches="tight")
    g.savefig(f"../../figs/aggregated_ver_{mod}.pdf", bbox_inches="tight")