# Printing and Plotting Results

Contains two sections:
1. How to create a summary table.
2. How to create plots showing the ranks of the similarity measures.

## Summary Table

This section of the notebook, creates the overview table in our paper. This code can be easily adjusted to also output more detailed tables.

In [None]:
import re
from pathlib import Path

import pandas as pd
import pandas.io.formats.style

import matplotlib.pyplot as plt

from repsim.benchmark.paths import BASE_PATH


measure_to_abbrv = {
    "AlignedCosineSimilarity": "AlignCos",
    "CKA": "CKA",
    "ConcentricityDifference": "ConcDiff",
    "DistanceCorrelation": "DistCorr",
    "EigenspaceOverlapScore": "EOS",
    "GeometryScore": "GS",
    "Gulp": "GULP",
    "HardCorrelationMatch": "HardCorr",
    "IMDScore": "IMD",
    "JaccardSimilarity": "Jaccard",
    "LinearRegression": "LinReg",
    "MagnitudeDifference": "MagDiff",
    "OrthogonalAngularShapeMetricCentered": "AngShape",
    "OrthogonalProcrustesCenteredAndNormalized": "OrthProc",
    "PWCCA": "PWCCA",
    "PermutationProcrustes": "PermProc",
    "ProcrustesSizeAndShapeDistance": "ProcDist",
    "RSA": "RSA",
    "RSMNormDifference": "RSMDiff",
    "RankSimilarity": "RankSim",
    "SVCCA": "SVCCA",
    "SecondOrderCosineSimilarity": "2nd-Cos",
    "SoftCorrelationMatch": "SoftCorr",
    "UniformityDifference": "UnifDiff",
    "RTD": "RTD",
}

measure_types = [
    ("AlignCos", "Alignment"),
    ("HardCorr", "Alignment"),
    ("AngShape", "Alignment"),
    ("LinReg", "Alignment"),
    ("OrthProc", "Alignment"),
    ("PermProc", "Alignment"),
    ("ProcDist", "Alignment"),
    ("SoftCorr", "Alignment"),

    ("EOS", "RSM"),
    ("CKA", "RSM"),
    ("DistCorr", "RSM"),
    ("GULP", "RSM"),
    ("RSA", "RSM"),
    ("RSMDiff", "RSM"),

    ("MagDiff", "Statistic"),
    ("ConcDiff", "Statistic"),
    ("UnifDiff", "Statistic"),

    ("GS", "Topology"),
    ("IMD", "Topology"),
    ("RTD", "Topology"),

    ("Jaccard", "Neighbors"),
    ("RankSim", "Neighbors"),
    ("2nd-Cos", "Neighbors"),

    ("PWCCA", "CCA"),
    ("SVCCA", "CCA"),

]

measure_type_order = ["CCA", "Alignment", "RSM", "Neighbors", "Topology", "Statistic"]


Step 1: Load all results.

In [2]:
# cleaned_dfs = []
# nlp_root = BASE_PATH /"paper_results" / "nlp"
# for path in nlp_root.glob("*.csv"):
#     df = pd.read_csv(path, index_col=0)
#     setting = path.name.split("_")[0]

#     pattern = r'(?<=_)sst2(?=_)|(?<=_)mnli(?=_)'
#     match = re.search(pattern, path.name)
#     assert match is not None
#     dataset = match.group(0)

#     df["Setting"] = setting
#     df["Dataset"] = dataset
#     cleaned_dfs.append(df)

# data = pd.concat(cleaned_dfs).reset_index(drop=True)
# nlp_data = data


In [3]:
cleaned_dfs = []
nlp_root = BASE_PATH /"paper_results" / "nlp_iclr"
for path in nlp_root.glob("*.csv"):
    df = pd.read_csv(path, index_col=0)
    setting = path.name.split("_")[0]

    pattern = r'(?<=_)sst2(?=_)|(?<=_)mnli(?=_)'
    match = re.search(pattern, path.name)
    assert match is not None
    dataset = match.group(0)

    token = path.name.split("_")[-1].split(".")[0]

    if "smollm" in path.name:
        # not true, but we want to group standard non-aggregated token results for the llm with the cls token results for bert and albert
        token = "cls"

    df["Token"] = token
    df["Setting"] = setting
    df["Dataset"] = dataset
    cleaned_dfs.append(df)

data = pd.concat(cleaned_dfs).reset_index(drop=True)
nlp_data = data


In [4]:
cleaned_dfs = []
root = BASE_PATH /"paper_results" /"graph"
for path in root.glob("*.csv"):
    if path.name.endswith("backup.csv"):
        continue

    df = pd.read_csv(path, index_col=0)
    pattern = r"augmentation|label_test|layer_test|output_correlation|shortcut"
    match = re.search(pattern, path.name)
    pattern_to_setting = {
        "augmentation": "aug",
        "label_test": "mem",
        "layer_test": "mono",
        "output_correlation": "correlation",
        "shortcut": "sc",
    }
    setting = pattern_to_setting[match.group(0)]

    pattern = r"(?<=_)cora(?=_)|(?<=_)flickr(?=_)|(?<=_)ogbn-arxiv(?=_)"
    match = re.search(pattern, path.name)
    assert match is not None
    dataset = match.group(0)

    df["Setting"] = setting
    df["Dataset"] = dataset
    cleaned_dfs.append(df)

data = pd.concat(cleaned_dfs).reset_index(drop=True)
graph_data = data

In [None]:
graph_data[(graph_data.representation_dataset=="cora") & (graph_data.Setting == "correlation") & (graph_data.quality_measure == "spearmanr")].groupby(["architecture", "functional_similarity_measure","similarity_measure"]).count()
graph_data[(graph_data.representation_dataset=="cora") & (graph_data.Setting == "correlation") & (graph_data.quality_measure == "spearmanr") & (graph_data.similarity_measure == "AlignedCosineSimilarity") & (graph_data.architecture == "GCN")]

In [6]:
cleaned_dfs = []
root = BASE_PATH /"paper_results" /"vision"
for path in root.glob("*.csv"):
    df = pd.read_csv(path, index_col=0)
    pattern = r"aug|mem|mono|correlation|sc"
    match = re.search(pattern, path.name)
    pattern_to_setting = {
        "aug": "aug",
        "mem": "mem",
        "mono": "mono",
        "correlation": "correlation",
        "sc": "sc",
    }
    setting = pattern_to_setting[match.group(0)]

    pattern = r"(?<=_)in100(?=_)|(?<=_)c100(?=_)"
    match = re.search(pattern, path.name)
    assert match is not None
    dataset = match.group(0)

    df["Setting"] = setting
    df["Dataset"] = dataset
    cleaned_dfs.append(df)

data = pd.concat(cleaned_dfs).reset_index(drop=True)
vision_data = data

## Clean and Pivot

Step 2: Combine data into a big dataframe, clean up column names etc., and select data to be shown in table.

In [None]:
# ----------------------------------------------------------------------------------------------------------------------
# Combine data
# ----------------------------------------------------------------------------------------------------------------------
data = pd.concat([nlp_data, graph_data, vision_data])
print(data.columns)

data = data.rename(
    columns={
        "functional_similarity_measure": "Functional Similarity Measure",
        "similarity_measure": "Representational Similarity Measure",
        "quality_measure": "Quality Measure",
    }
)

idx = data.Setting == "correlation"
data.loc[idx, "value"] = data.loc[idx, "corr"]

idx = (data.Setting == "correlation") & (data["Functional Similarity Measure"] == "AbsoluteAccDiff")
data.loc[idx, "Setting"] = "acc_corr"

# ----------------------------------------------------------------------------------------------------------------------
# Exclude data not to be shown in table.
# ----------------------------------------------------------------------------------------------------------------------
idx = (data.Setting == "correlation") & (data["Functional Similarity Measure"] != "JSD")
data = data.loc[~idx]

idx = (data.Setting.isin(["aug", "mem", "sc"])) & (data["Quality Measure"] != "AUPRC")
data = data.loc[~idx]

idx = (data.Setting.isin(["correlation", "acc_corr"])) & (data["Quality Measure"] != "spearmanr")
data = data.loc[~idx]

# idx = (data.Setting.isin(["mono"])) & (data["Quality Measure"] != "violation_rate")
idx = (data.Setting.isin(["mono"])) & (data["Quality Measure"] != "correlation")
data = data.loc[~idx]

idx = (data.Token.isin(["mean"]))
data = data.loc[~idx]


# ----------------------------------------------------------------------------------------------------------------------
# Clean up names etc.
# ----------------------------------------------------------------------------------------------------------------------


def beautify_df(data):
    data.loc[:, "Representational Similarity Measure"] = data["Representational Similarity Measure"].map(
        measure_to_abbrv
    )
    data.loc[:, "architecture"] = data["architecture"].map(
        {
            "smollm2-1.7b": "SmolLM2",
            "albert-base-v2": "ALBERT",
            "BERT-L": "BERT",
            "GCN": "GCN",
            "GAT": "GAT",
            "GraphSAGE": "SAGE",
            "VGG11": "VGG11",
            "VGG19": "VGG19",
            "ResNet18": "RNet18",
            "ResNet34": "RNet34",
            "ResNet101": "RNet101",
            "ViT_B32": "ViT_B32",
            "ViT_L32": "ViT_L32",
            "PGNN": "P-GNN",
        }
    )
    data.loc[:, "domain"] = data["domain"].map({"NLP": "Text", "GRAPHS": "Graph", "VISION": "Vision"})
    data.loc[:, "Dataset"] = data["Dataset"].map(
        {
            "mnli_aug_rate0": "MNLI",
            "mnli_mem_rate0": "MNLI",
            "mnli": "MNLI",
            "sst2_sc_rate0558": "SST2",
            "sst2_mem_rate0": "SST2",
            "sst2_sft": "SST2",
            "sst2_sft_sc_rate0558": "SST2",
            "mnli_sc_rate0354": "MNLI",
            "sst2_aug_rate0": "SST2",
            "sst2": "SST2",
            "flickr": "flickr",
            "ogbn-arxiv": "arXiv",
            "cora": "Cora",
            "in100": "IN100",
            "c100": "CIFAR100",
        }
    )
    data.loc[:, "Setting"] = data["Setting"].map(
        {
            "aug": "Augmentation",
            "mem": "Random Labels",
            "correlation": "JSD Corr.",
            "acc_corr": "Acc Corr.",
            "mono": "Layer Mono.",
            "sc": "Shortcuts",
        }
    )
    column_order = ["Acc Corr.", "JSD Corr.", "Random Labels", "Shortcuts", "Augmentation", "Layer Mono."]
    data.loc[:, "Setting"] = pd.Categorical(
        data["Setting"],
        categories=column_order,
        ordered=True,
    )
    data.loc[:, "Quality Measure"] = data["Quality Measure"].map(
        {"violation_rate": "Conformity Rate", "AUPRC": "AUPRC", "spearmanr": "Spearman", "correlation": "Spearman"}
    )
    data.loc[data["Quality Measure"] == "Conformity Rate", "value"] = (
        1 - data.loc[data["Quality Measure"] == "Conformity Rate", "value"]
    )  # must be run in conjunction with the above renaming

    data = data.rename(
        columns={
            "domain": "Domain",
            "architecture": "Arch.",
            "Representational Similarity Measure": "Sim Meas.",
            "Quality Measure": "Eval.",
            "Setting": "Test",
        }
    )
    data = pd.merge(data, pd.DataFrame.from_records(measure_types, columns=["Sim Meas.", "Measure Type"]), how="left", on="Sim Meas.")
    data.loc[:, "Measure Type"] = pd.Categorical(data["Measure Type"], categories=measure_type_order, ordered=True)
    data.loc[data.Test.isin(["Acc Corr.", "JSD Corr."]), "Type"] = "Grounding by Prediction"
    data.loc[data.Test.isin(["Random Labels", "Shortcuts", "Augmentation", "Layer Mono."]), "Type"] = (
        "Grounding by Design"
    )
    return data, column_order


data, column_order = beautify_df(data)

# ----------------------------------------------------------------------------------------------------------------------
# Create aggregated overview table
# ----------------------------------------------------------------------------------------------------------------------
idx = data["Dataset"].isin(["MNLI", "flickr", "IN100"]) & data["Arch."].isin(["SAGE", "BERT", "RNet18"])


pivot = pd.pivot_table(
    data.loc[idx],
    index=["Measure Type", "Sim Meas."],  # <---
    # index="Sim Meas.",
    columns=["Type", "Test", "Eval.", "Domain", "Dataset", "Arch."],
    values="value",
)
pivot = pivot.sort_values(by=["Measure Type", "Sim Meas."], axis="index")  # <---
# pivot = pivot.sort_values(by="Sim Meas.", axis="index")
pivot = pivot.reindex(measure_type_order, axis="index", level=0)  # <---
pivot = pivot.reindex(column_order, axis="columns", level="Test")
pivot = pivot.reindex(["Grounding by Prediction", "Grounding by Design"], axis="columns", level="Type")
pivot

### Turn values into strings

In [None]:
unpivot = pivot.unstack().unstack().dropna().reset_index()  # values will be in col "0"
unpivot.loc[:, 1] = unpivot.loc[:, 0].astype("str")
unpivot.loc[:, 1] = unpivot.loc[:, 0].apply(lambda x: f"{round(x, ndigits=2):.2f}")
pivot = unpivot.pivot(index=["Measure Type", "Sim Meas."],
    columns=["Type", "Test", "Eval.", "Domain", "Dataset", "Arch."],
    values=1,)
pivot = pivot.reindex(measure_type_order, axis="index", level=0)  # <---

unpivot
pivot

In [None]:
# Highlight the best values by bolding
for column in pivot.columns:
    col = pivot.loc[:, column].astype("float")
    idx = col == col.max()
    pivot.loc[idx, column] = pivot.loc[idx, column].apply(lambda s: r"\textbf{" + s + "}")
pivot

### Significance Indicators

In [None]:
idx = data["Dataset"].isin(["MNLI", "flickr", "IN100"]) & data["Arch."].isin(["SAGE", "BERT", "RNet18"]) & data.Test.isin(["Acc Corr.", "JSD Corr."])
data_corr = data.loc[idx].copy()


def pval_str(pval):
    # if pval == pd.notna
    if isinstance(pval, float):
        if pval <= 0.01:
            return r"$^{**}$"
            # return r"$^{\dagger}$"
        if pval <= 0.05:
            return r"$^{*\phantom{*}}$"
            # return r"$^{\ddagger}$"
    return r"$^{\phantom{**}}$"

def significance_via_text_style(pval):
    if pval <= 0.01:
        return [r"\underline{\underline{", r"}}"]
    if pval <= 0.05:
        return [r"\underline{", r"}"]
    return ["", ""]

data_corr["val_comb"] = data_corr["value"].apply(lambda x: f"{round(x, ndigits=2):.2f}") + data_corr["pval"].apply(pval_str)
# data_corr["val_comb"] = data_corr["pval"].apply(significance_via_text_style).apply(lambda x: x[0]) + data_corr["value"].apply(lambda x: f"{round(x, ndigits=2):.2f}") + data_corr["pval"].apply(significance_via_text_style).apply(lambda x: x[1])
data_corr

pivot_corr = data_corr.pivot(
    index=["Measure Type", "Sim Meas."],
    columns=["Type", "Test", "Eval.", "Domain", "Dataset", "Arch."],
    values=["val_comb"],
).sort_values(
    by=["Measure Type", "Sim Meas."],
).reindex(
    measure_type_order, axis="index", level=0
).reindex(
    column_order, axis="columns", level="Test"
).reindex(
    ["Graph", "Text", "Vision"], axis="columns", level="Domain"
).loc[:, "val_comb"]
pivot_corr

def floatify(s: str) -> str:
    r"""Turn a string like '-0.10$^{\phantom{**}}$' into '-0.10'"""
    if not isinstance(s, str):
        return s
    return s[:s.find("$")]

def separate_significance_indicator(s: str) -> str:
    r"""Turn a string like '-0.10$^{\phantom{**}}$' into '$^{\phantom{**}}$'"""
    if not isinstance(s, str):
        return s
    return s[s.find("$"):]

for column in pivot_corr.columns:
    col = pivot_corr.loc[:, column].apply(floatify).astype("float")
    identifiers = pivot_corr.loc[:, column].apply(separate_significance_indicator)
    idx = col == col.max()
    new_col = col.apply(lambda x: f"{x:.2f}").apply(lambda s: r"\textbf{" + s + "}") + identifiers
    pivot_corr.loc[idx, column] = new_col

pivot_corr

In [None]:
# pivot.loc[:, ("Grounding by Prediction")].astype("str", copy=False)
# pivot.loc[:, ("Grounding by Prediction", "Acc Corr.", "Spearman", "Graph", "flickr", "SAGE")] = pivot.loc[:, ("Grounding by Prediction", "Acc Corr.", "Spearman", "Graph", "flickr", "SAGE")].astype("str")
# pivot.loc[:, ("Grounding by Prediction")].dtypes

pivot.loc[:, ("Grounding by Prediction")] = pivot_corr
pivot

Step 3: Convert into latex table.

In [None]:
styled = pd.io.formats.style.Styler(
    pivot,
    precision=2,
)

# Highlight top value
# latex_str = styled.highlight_max(axis=0, props="textbf:--rwrap;").to_latex(
#     hrules=True,
#     position="t",
#     label="tab:result_overview",
# )
latex_str = styled.to_latex(hrules=True, position="t", label="tab:result_overview",)


# ----- Manual modifications --------
latex_str = latex_str.split("\n")

# Center headers
pattern = r"\{r\}"
replacement = r"{c}"
latex_str = [re.sub(pattern, replacement, line) if i in [5, 6, 7] else line for i, line in enumerate(latex_str)]

# Remove measure row
latex_str.pop(11)

# Add vertical bars
line_no = 2
# line_no = 3
mod_line = latex_str[line_no][:18] + "".join(["|rrr"] * 6) + "}"
latex_str[line_no] = mod_line

# Make the left-most cells white
latex_str = [
    r"\cellcolor{white}" + line if i >= 11 and (i - 11) % 2 == 0 else line for i, line in enumerate(latex_str[:-4])
] + latex_str[-4:]
latex_str = "\n".join(latex_str)
print(latex_str)

## TSNE

In [None]:
data = pd.concat([nlp_data, graph_data, vision_data])

idx = data.Setting == "correlation"
data.loc[idx, "value"] = data.loc[idx, "corr"]

idx = (data.Setting == "correlation") & (data["functional_similarity_measure"] == "AbsoluteAccDiff")
data.loc[idx, "Setting"] = "acc_corr"

idx = (data.Setting == "correlation") & (data["functional_similarity_measure"] == "JSD")
data.loc[idx, "Setting"] = "jsd_corr"

idx = (data.Setting == "correlation") & (data["functional_similarity_measure"] == "Disagreement")
data.loc[idx, "Setting"] = "disagr_corr"

data.loc[:, "similarity_measure"] = data.loc[:, "similarity_measure"].map(measure_to_abbrv)

data.functional_similarity_measure.unique()

data = data.loc[~data.similarity_measure.isin(["RTD", "IMD"])]
data = pd.merge(data, pd.DataFrame.from_records(measure_types, columns=["similarity_measure", "Measure Type"]), how="left", on="similarity_measure")

data.head(2)

In [None]:
def plot_tsne_mds(data: pd.DataFrame, suptitle=None):
    data = data.copy()
    # print(len(data))
    data = data.sort_values(by=["Measure Type", "similarity_measure"])
    # print(len(data))
    raw_scores = data.pivot_table(
        index=["Measure Type", "similarity_measure"],
        columns=["quality_measure", "architecture", "representation_dataset", "Token", "Setting", "Dataset"],
        values="value",
        dropna=False,
    ).dropna(axis="index", how="all").fillna(0)
    # print(len(raw_scores))
    # print(raw_scores.loc[raw_scores.index.get_level_values("similarity_measure") == "Jaccard"])
    raw_scores = raw_scores.reindex(measure_type_order, axis="index", level=0).reset_index(level=0, drop=True)
    print(len(raw_scores), suptitle)

    from sklearn.manifold import TSNE
    import numpy as np

    # Create TSNE plot
    tsne = TSNE(n_components=2, perplexity=3, random_state=42)
    tsne_scores = tsne.fit_transform(raw_scores)

    # Create MDS plot
    from sklearn.manifold import MDS

    mds = MDS(n_components=2, random_state=42)
    mds_scores = mds.fit_transform(raw_scores)

    # Create color maps for each measure type
    measure_types_unique = data["Measure Type"].unique()
    color_maps = {
        "CCA": plt.cm.Greys,
        "Alignment": plt.cm.Purples,
        "Neighbors": plt.cm.Greens,
        "RSM": plt.cm.Reds,
        "Topology": plt.cm.Oranges,
        "Statistic": plt.cm.copper,
    }

    # Create a mapping of measures to colors based on their type
    unique_measures = raw_scores.index
    measure_to_type = data.groupby("similarity_measure")["Measure Type"].first()
    colors = []
    for measure in unique_measures:
        measure_type = measure_to_type[measure]
        type_idx = np.where(measure_types_unique == measure_type)[0][0]
        # Get number of measures of this type
        n_measures_of_type = (measure_to_type == measure_type).sum()
        # Get position of this measure within its type
        pos_in_type = (measure_to_type[:measure] == measure_type).sum()
        # Create color gradient within type
        color = color_maps.get(measure_type, plt.cm.Greys)(0.3 + (0.7 * pos_in_type / n_measures_of_type))
        colors.append(color)

    # Create subplot with TSNE and MDS side by side
    fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(16, 6))

    # Plot TSNE
    for i, measure in enumerate(unique_measures):
        ax1.scatter(tsne_scores[i, 0], tsne_scores[i, 1], c=[colors[i]], label=measure)
        ax1.text(tsne_scores[i, 0], tsne_scores[i, 1], measure, fontsize=8)
    ax1.set_title("t-SNE")

    # Plot MDS
    for i, measure in enumerate(unique_measures):
        ax2.scatter(mds_scores[i, 0], mds_scores[i, 1], c=[colors[i]], label=measure)
        ax2.text(mds_scores[i, 0], mds_scores[i, 1], measure, fontsize=8)
    ax2.set_title("MDS")

    if suptitle is not None:
        fig.suptitle(suptitle)

    plt.legend(bbox_to_anchor=(1.05, 1), loc="upper left")
    plt.tight_layout()
    plt.show()


plot_tsne_mds(data, suptitle="All domains")
plot_tsne_mds(data.loc[data.domain == "NLP"], suptitle="NLP")
plot_tsne_mds(data.loc[data.domain == "GRAPHS"], suptitle="Graphs")
plot_tsne_mds(data.loc[data.domain == "VISION"], suptitle="Vision")

## Rankplots

Requires section above to be run as well.

In [15]:
import seaborn as sns

sns.set_theme("paper", style="white", font_scale=1.5)


Combine data similarly to before, but do not filter out specific parts.

In [16]:
data = pd.concat([nlp_data, graph_data, vision_data])
data = data.rename(
    columns={
        "functional_similarity_measure": "Functional Similarity Measure",
        "similarity_measure": "Representational Similarity Measure",
        "quality_measure": "Quality Measure",
    }
)
data = data.reset_index()

idx = data.Setting == "correlation"
data.loc[idx, "value"] = data.loc[idx, "corr"]

idx = data["Quality Measure"].isin(["AUPRC", "spearmanr", "correlation"])
data = data.loc[idx]

idx = data.Setting == "correlation"
data.loc[idx, "Setting"] = data.loc[idx, "Setting"] + data.loc[idx, "Functional Similarity Measure"]

idx = ~(data.Setting == "mono")
data.loc[idx, "model"] = "agg"

idx = data.Token.isna()
data.loc[idx, "Token"] = "NA"

# idx = data.Token.isin(["mean"])
# data = data.loc[~idx]

data["rank"] = data.groupby(["domain", "Setting", "Dataset", "architecture", "model", "Token"], as_index=True)["value"].rank(
    ascending=False
)

# combine layer mono results to equally weight experiments
idx = data.model != "agg"
data.loc[idx, "rank"] = data[idx].groupby(["domain", "Setting", "Dataset", "architecture", "Token"])["rank"].mean().reset_index()
data = data.drop_duplicates(subset=["domain", "Setting", "Dataset", "architecture", "Representational Similarity Measure", "Functional Similarity Measure", "Quality Measure"])

data.loc[:, "Representational Similarity Measure"] = data["Representational Similarity Measure"].map(measure_to_abbrv)
data.loc[:, "architecture"] = data["architecture"].map(
    {
        "smollm2-1.7b": "SmolLM2",
        "albert-base-v2": "ALBERT",
        "BERT-L": "BERT",
        "GCN": "GCN",
        "GAT": "GAT",
        "GraphSAGE": "SAGE",
        "VGG11": "VGG11",
        "VGG19": "VGG19",
        "ResNet18": "RNet18",
        "ResNet34": "RNet34",
        "ResNet101": "RNet101",
        "ViT_B32": "ViT_B32",
        "ViT_L32": "ViT_L32",
    }
)
data.loc[:, "domain"] = data["domain"].map({"NLP": "Language", "GRAPHS": "Graph", "VISION": "Vision"})
data.loc[:, "Dataset"] = data["Dataset"].map(
    {
        "mnli_aug_rate0": "MNLI",
        "mnli_mem_rate0": "MNLI",
        "mnli": "MNLI",
        "sst2_sc_rate0558": "SST2",
        "sst2_mem_rate0": "SST2",
        "sst2_sft": "SST2",
        "sst2_sft_sc_rate0558": "SST2",
        "mnli_sc_rate0354": "MNLI",
        "sst2_aug_rate0": "SST2",
        "sst2": "SST2",
        "flickr": "flickr",
        "ogbn-arxiv": "arXiv",
        "cora": "Cora",
        "in100": "IN100",
        "c100": "CIFAR100",
    }
)
data.loc[:, "Setting"] = data["Setting"].map(
    {
        "aug": "Augmentation",
        "mem": "Random Labels",
        "correlationJSD": "JSD Corr.",
        "correlationAbsoluteAccDiff": "Acc Corr.",
        "correlationDisagreement": "Disagr. Corr.",
        "acc_corr": "Acc Corr.",
        "mono": "Layer Mono.",
        "sc": "Shortcuts",
    }
)

data.loc[:, "Quality Measure"] = data["Quality Measure"].map(
    {"violation_rate": "Conformity Rate", "AUPRC": "AUPRC", "spearmanr": "Spearman", "correlation": "Spearman"}
)
data.loc[data["Quality Measure"] == "Conformity Rate", "value"] = (
    1 - data.loc[data["Quality Measure"] == "Conformity Rate", "value"]
)  # must be run in conjunction with the above renaming

data = data.rename(
    columns={
        "domain": "Modality",
        "architecture": "Arch.",
        "Representational Similarity Measure": "Sim Meas.",
        "Quality Measure": "Eval.",
        "Setting": "Scenario",
    }
)

data = data.sort_values(by=["Sim Meas."])

### Summary

Rank measures.

In [None]:
avg_ranks = data.groupby(["Modality", "Sim Meas."])["rank"].agg(["mean", "median"]).reset_index()
avg_ranks = avg_ranks.rename(columns={"mean": "avg_rank", "median": "med_rank"})
avg_ranks

Create plots.

In [None]:
plot_data = pd.merge(data, avg_ranks).sort_values(by=["med_rank"])
plot_data = pd.merge(plot_data, pd.DataFrame.from_records(measure_types, columns=["Sim Meas.", "Measure Type"]), how="left", on="Sim Meas.")

fig, axes = plt.subplots(1, 3, sharey=False, figsize=(7*0.8*3, 7))
fig

for i, mod in enumerate(["Graph", "Language", "Vision"]):
# for i, mod in enumerate(plot_data.Modality.unique()):
    # g = sns.catplot(
    #     data=plot_data[plot_data.Modality == mod],
    #     x="rank",
    #     y="Sim Meas.",
    #     hue="Measure Type",
    #     # hue="Modality",
    #     kind="box",
    #     height=7,
    #     aspect=0.8,
    #     col="Modality",
    #     palette="colorblind",
    #     # palette={"Language": "C1", "Vision": "C2", "Graph": "C0"},
    #     legend=True if mod=="Vision" else False
    #     # legend=False
    # )
    # ax = g.axes[0, 0]

    ax = axes[i]
    sns.boxplot(
        data=plot_data[plot_data.Modality == mod],
        x="rank",
        y="Sim Meas.",
        hue="Measure Type",
        hue_order=["Neighbors", "RSM", "Alignment", "Topology", "CCA", "Statistic"],
        # palette={
        #     "Neighbors": "C0",
        #     "RSM": "C1",
        #     "Alignment": "C2",
        #     "Topology": "C3",
        #     "CCA": "C4",
        #     "Statistic": "C5",
        # },
        palette="colorblind",
        legend=True if mod=="Vision" else False,
        ax=ax,
        # whis=(5.,95.)
    )
    ax.spines['right'].set_visible(False)
    ax.spines['top'].set_visible(False)

    ax.set_xlabel("Rank")
    ax.set_ylabel("Similarity Measures")

    fig.tight_layout()

    if mod == "Graph":
        ax.set_title("Graphs")
    else:
        ax.set_title(mod)

    if mod == "Vision":
        sns.move_legend(ax, loc="right", bbox_to_anchor=(1.45,0.5))
    # g.savefig(BASE_PATH / "figs" / f"aggregated_ver_{mod}.pdf", bbox_inches="tight")
    fig.savefig(BASE_PATH / "figs" / f"aggregated_ver_{mod}.pdf", bbox_inches="tight")

In [None]:
plot_data = pd.merge(data, avg_ranks).sort_values(by=["med_rank"])
for mod in plot_data.Modality.unique():
    for arch in plot_data["Arch."].unique():
        subdata = plot_data[(plot_data.Modality == mod) & (plot_data["Arch."] == arch)]
        if len(subdata) == 0:
            continue

        g = sns.catplot(
            data=plot_data[(plot_data.Modality == mod) & (plot_data["Arch."] == arch)],
            x="rank",
            y="Sim Meas.",
            hue="Modality",
            kind="box",
            height=7,
            aspect=0.8,
            col="Arch.",
            palette={"Language": "C1", "Vision": "C2", "Graph": "C0"},
            legend=False
        )
        ax = g.axes[0, 0]
        ax.set_xlabel("Rank")
        ax.set_ylabel("Similarity Measures")

        ax.set_title(f"{arch} {mod}")
        # g.savefig(BASE_PATH / "figs" / f"aggregated_ver_{mod}.pdf", bbox_inches="tight")

In [None]:
data["Arch."].unique()

In [None]:
for archs, fname in [
    (["VGG11", "VGG19"], "VGGs"),
    (["RNet18", "RNet34", "RNet101"], "ResNets")
    ]:
    avg_ranks = data[(data["Arch."].isin(archs))].groupby(["Modality", "Sim Meas."])["rank"].agg(["mean", "median"]).reset_index()
    # avg_ranks = data[data["Arch."].isin(archs)].groupby(["Modality", "Sim Meas."])["rank"].agg(["mean", "median"]).reset_index()
    avg_ranks = avg_ranks.rename(columns={"mean": "avg_rank", "median": "med_rank"})
    avg_ranks

    plot_data = pd.merge(data, avg_ranks).sort_values(by="med_rank")
    subdata = plot_data[(plot_data.Modality == "Vision") & (plot_data["Arch."].isin(archs))]

    g = sns.catplot(
        data=subdata,
        x="rank",
        y="Sim Meas.",
        hue="Modality",
        kind="box",
        height=7,
        aspect=0.8,
        col="Modality",
        palette={"Language": "C1", "Vision": "C2", "Graph": "C0"},
        legend=False
    )
    ax = g.axes[0, 0]
    ax.set_xlabel("Rank")
    ax.set_ylabel("Similarity Measures")

    ax.set_title(f"{fname}")
    g.savefig(BASE_PATH / "figs" / f"aggregated_ver_{fname}.pdf", bbox_inches="tight")
    print(BASE_PATH / "figs" / f"aggregated_ver_{fname}.pdf")

### Per Test

Uses only the AUPRC und spearman results

In [None]:
print(data["Eval."].unique())
data.head()

In [None]:
avg_ranks = data.groupby(["Modality", "Sim Meas.", "Scenario"])["rank"].agg(["mean", "median"]).reset_index()
avg_ranks = avg_ranks.rename(columns={"mean": "avg_rank", "median": "med_rank"})
avg_ranks

In [20]:
from IPython.display import display

In [None]:
plot_data = pd.merge(data, avg_ranks).sort_values(by=["med_rank"])
for test in plot_data.Scenario.unique():
    for mod in plot_data.Modality.unique():
        subplot_data = plot_data[(plot_data.Scenario == test) & (plot_data.Modality == mod)]
        # --------------------------- V1 ---------------------------------------
        # g = sns.catplot(
        #     data=plot_data[(plot_data.Scenario == test) & (plot_data.Modality == mod)],
        #     x="rank",
        #     y="Sim Meas.",
        #     hue="Modality",
        #     kind="swarm",
        #     height=7,
        #     aspect=0.8,
        #     col="Modality",
        #     palette={"Language": "C1", "Vision": "C2", "Graph": "C0"},
        #     legend=False
        # )

        # --------------------------- V2 ---------------------------------------
        # fig, ax = plt.subplots(1, 1, figsize=(7, 7*0.8))
        # g = sns.boxplot(
        #     data=plot_data[(plot_data.Scenario == test) & (plot_data.Modality == mod)],
        #     x="rank",
        #     y="Sim Meas.",
        #     hue="Modality",
        #     # height=7,
        #     # aspect=0.8,
        #     # col="Modality",
        #     palette={"Language": "C1", "Vision": "C2", "Graph": "C0"},
        #     # legend=False,
        #     ax=ax
        # )
        # g = sns.stripplot(
        #     data=plot_data[(plot_data.Scenario == test) & (plot_data.Modality == mod)],
        #     x="rank",
        #     y="Sim Meas.",
        #     hue="Modality",
        #     size=5,
        #     # height=7,
        #     # aspect=0.8,
        #     # col="Modality",
        #     palette={"Language": "C2", "Vision": "C3", "Graph": "C1"},
        #     # legend=False,
        #     ax=ax
        # )
        # ------------------------------------------------------------------------

        # # ax = g.axes[0, 0]
        # for i in range(len(subplot_data["Sim Meas."].unique())):
        #     if i % 2 == 0:
        #         ax.fill_between([0, 23], [-0.5 + i, -0.5 + i], [0.5 + i, 0.5 + i], color="gray", alpha=0.2)
        # ax.set_xlabel("Rank")
        # ax.set_ylabel("Similarity Measures")
        # if mod == "Graph":
        #     ax.set_title(f"{test} (Graphs)")
        # else:
        #     ax.set_title(f"{test} ({mod})")
        # g.savefig(BASE_PATH / "figs" / f"aggregated_ver_{mod}.pdf", bbox_inches="tight")

        display(subplot_data.head(2))

        break
    break

In [None]:
plot_data = pd.merge(data, avg_ranks).sort_values(by=["med_rank"])
plot_data.loc[plot_data.Scenario.isin(["Acc Corr.", "JSD Corr.", "Disagr. Corr."]), "Type"] = "Grounding by Prediction"
plot_data.loc[plot_data.Scenario.isin(["Random Labels", "Shortcuts", "Augmentation", "Layer Mono."]), "Type"] = (
        "Grounding by Design"
    )
column_order = ["Acc Corr.", "JSD Corr.", "Disagr. Corr.", "Random Labels", "Shortcuts", "Augmentation", "Layer Mono."]

# -------------------------------------------
# Create pivot with mean values. The values are converted to string for combination with stddev
pivot = pd.pivot_table(
    plot_data,
    index="Sim Meas.",
    columns=["Type", "Scenario", "Eval.", "Modality"],
    values="value",
    aggfunc="mean",
)
pivot = pivot.sort_values(by="Sim Meas.")
pivot = pivot.reindex(column_order, axis="columns", level="Scenario")
pivot = pivot.reindex(["Grounding by Prediction", "Grounding by Design"], axis="columns", level="Type")
pivot

unpivot = pivot.unstack().reset_index()  # values will be in col "0"
unpivot.loc[:, 1] = unpivot.loc[:, 0].astype("str")
unpivot.loc[:, 1] = unpivot.loc[:, 0].apply(lambda x: str(round(x, 2)))
pivot_mean = unpivot.pivot(index="Sim Meas.",
    columns=["Type", "Scenario", "Eval.", "Modality"],
    values=1,)
unpivot
pivot_mean

# display(pivot.head())

# -----------------------------------------------
# Create pivot with stddev values.
pivot = pd.pivot_table(
    plot_data,
    index="Sim Meas.",
    columns=["Type", "Scenario", "Eval.", "Modality"],
    values="value",
    aggfunc="std",
)
pivot = pivot.sort_values(by="Sim Meas.")
pivot = pivot.reindex(column_order, axis="columns", level="Scenario")
pivot = pivot.reindex(["Grounding by Prediction", "Grounding by Design"], axis="columns", level="Type")
pivot

unpivot = pivot.unstack().reset_index()  # values will be in col "0"
unpivot.loc[:, 1] = unpivot.loc[:, 0].astype("str")
unpivot.loc[:, 1] = unpivot.loc[:, 0].apply(lambda x: str(round(x, 2)))
pivot_std = unpivot.pivot(index="Sim Meas.",
    columns=["Type", "Scenario", "Eval.", "Modality"],
    values=1,)
unpivot
pivot_std
# display(pivot.head())

# -----------------------------------------
# Combine mean with stddev
pivot_comb = pivot_mean + r"$\pm$" + pivot_std



# -----------------------------------------
# Bold best metric scores
def floatify(s: str) -> str:
    """Turn a string like '-0.10$\pm$0.2' into '-0.10'"""
    return s[:s.find("$")]

def separate_significance_indicator(s: str) -> str:
    """Turn a string like '-0.10$^{\phantom{**}}$' into '$^{\phantom{**}}$'"""
    return s[s.find("$"):]


for column in pivot_comb.columns:
    col = pivot_comb.loc[:, column].apply(floatify).astype("float").copy()
    stddev = pivot_comb.loc[:, column].apply(separate_significance_indicator)
    idx = col == col.max()
    new_col = col.apply(lambda x: f"{x:.2f}").map(lambda s: r"\textbf{" + s + "}") + stddev.map(lambda s: r"\textbf{" + s + "}")
    pivot_comb.loc[idx, column] = new_col

pivot_comb

In [None]:
styled = pd.io.formats.style.Styler(
    pivot_comb,
    precision=2,
)

# Highlight top value
# latex_str = styled.highlight_max(axis=0, props="textbf:--rwrap;").to_latex(
#     hrules=True,
#     position="t",
#     label="tab:result_overview",
# )
latex_str = styled.to_latex(hrules=True, position="t", label="tab:results_agg_per_test",)


# ----- Manual modifications --------
latex_str = latex_str.split("\n")

# # Center headers
# pattern = r"\{r\}"
# replacement = r"{c}"
# latex_str = [re.sub(pattern, replacement, line) if i in [5, 6, 7] else line for i, line in enumerate(latex_str)]

# # Remove measure row
# latex_str.pop(11)

# # Add vertical bars
# line_no = 2
# # line_no = 3
# mod_line = latex_str[line_no][:17] + "".join(["|rrr"] * 6) + "}"
# latex_str[line_no] = mod_line

# Make every second row gray
latex_str = [
    r"\rowcolor{Gray}" + line if i >= 12 and (i - 12) % 2 == 0 else line for i, line in enumerate(latex_str[:-4])
] + latex_str[-4:]
latex_str = "\n".join(latex_str)
print(latex_str)

In [None]:
plot_data = pd.merge(data, avg_ranks).sort_values(by=["med_rank"])
plot_data.loc[plot_data.Scenario.isin(["Acc Corr.", "JSD Corr.", "Disagr. Corr."]), "Type"] = "Grounding by Prediction"
plot_data.loc[plot_data.Scenario.isin(["Random Labels", "Shortcuts", "Augmentation", "Layer Mono."]), "Type"] = (
        "Grounding by Design"
    )
column_order = ["Acc Corr.", "JSD Corr.", "Disagr. Corr.", "Random Labels", "Shortcuts", "Augmentation", "Layer Mono."]

# -------------------------------------------
# Create pivot with mean ranks. The values are converted to string for combination with stddev
pivot = pd.pivot_table(
    plot_data,
    index="Sim Meas.",
    columns=["Modality", "Type", "Scenario", "Eval."],
    values="rank",
    aggfunc="mean",
)
pivot = pivot.sort_values(by="Sim Meas.")
pivot = pivot.reindex(column_order, axis="columns", level="Scenario")
pivot = pivot.reindex(["Grounding by Prediction", "Grounding by Design"], axis="columns", level="Type")
pivot


In [None]:
for mod in data.Modality.unique():
    print(mod)
    subdata = pivot.loc[:, mod]
    # display(subdata.head(3))
    styled = pd.io.formats.style.Styler(
        subdata,
        precision=1,
    )

    # Highlight top value
    latex_str = styled.highlight_min(axis=0, props="textbf:--rwrap;").to_latex(
        hrules=True,
        position="t",
        label="tab:result_overview",
    )
    latex_str = styled.to_latex(hrules=True, position="t", label="tab:results_agg_per_test",)


    # ----- Manual modifications --------
    latex_str = latex_str.split("\n")

    # # Center headers
    # pattern = r"\{r\}"
    # replacement = r"{c}"
    # latex_str = [re.sub(pattern, replacement, line) if i in [5, 6, 7] else line for i, line in enumerate(latex_str)]

    # Remove measure row
    latex_str.pop(7)

    # Remove eval measure row
    latex_str.pop(6)

    # # Add vertical bars
    # line_no = 2
    # # line_no = 3
    # mod_line = latex_str[line_no][:17] + "".join(["|rrr"] * 6) + "}"
    # latex_str[line_no] = mod_line

    # Make every second row gray
    latex_str = [
        r"\rowcolor{Gray}" + line if i >= 7 and (i - 7) % 2 == 0 else line for i, line in enumerate(latex_str[:-4])
    ] + latex_str[-4:]
    latex_str = "\n".join(latex_str)
    print(latex_str)

tabelle mit ranks als index und entries als metrics

"rank" column: Rank des measures grouped by ["domain", "Setting", "Dataset", "architecture", "model"]

In [26]:
# plot_data = pd.merge(data.copy(), data.groupby(["Modality", "Scenario", "Sim Meas."])["rank"].mean().groupby(["Modality", "Scenario",]).rank().reset_index().rename(columns={"rank": "rank_per_test_and_mod"}))
# display(plot_data)
# plot_data.drop_duplicates(subset=["Modality", "Scenario", "Sim Meas."])

# for mod in data.Modality.unique():
#     subdata = plot_data.loc[plot_data.Modality == mod]
#     pd.pivot(subdata, index="rank_per_test_and_mod", columns=["Scenario"], values="Sim Meas.")
# # data.groupby(["Modality", "Scenario"])["avg_rank"].agg(["mean", "median"]).reset_index()

# # avg_ranks.groupby(["Modality", "Sim Meas.", "Scenario"])["avg_rank"].rank()

## Rebuttal PGNN tables

In [None]:
data = graph_data.copy()

data = data.rename(
    columns={
        "functional_similarity_measure": "Functional Similarity Measure",
        "similarity_measure": "Representational Similarity Measure",
        "quality_measure": "Quality Measure",
    }
)

idx = data.Setting == "correlation"
data.loc[idx, "value"] = data.loc[idx, "corr"]

idx = (data.Setting == "correlation") & (data["Functional Similarity Measure"] == "AbsoluteAccDiff")
data.loc[idx, "Setting"] = "acc_corr"

idx = (data.Setting == "correlation") & (data["Functional Similarity Measure"] == "Disagreement")
data.loc[idx, "Setting"] = "disagr_corr"

data.loc[:, "Representational Similarity Measure"] = data["Representational Similarity Measure"].map(
    measure_to_abbrv
)
data.loc[:, "architecture"] = data["architecture"].map(
    {
        "BERT-L": "BERT",
        "GCN": "GCN",
        "GAT": "GAT",
        "GraphSAGE": "SAGE",
        "VGG11": "VGG11",
        "VGG19": "VGG19",
        "ResNet18": "RNet18",
        "ResNet34": "RNet34",
        "ResNet101": "RNet101",
        "ViT_B32": "ViT_B32",
        "ViT_L32": "ViT_L32",
        "PGNN": "P-GNN",
    }
)
data.loc[:, "domain"] = data["domain"].map({"NLP": "Text", "GRAPHS": "Graph", "VISION": "Vision"})
data.loc[:, "Dataset"] = data["Dataset"].map(
    {
        "mnli_aug_rate0": "MNLI",
        "mnli_mem_rate0": "MNLI",
        "mnli": "MNLI",
        "sst2_sc_rate0558": "SST2",
        "sst2_mem_rate0": "SST2",
        "mnli_sc_rate0354": "MNLI",
        "sst2_aug_rate0": "SST2",
        "sst2": "SST2",
        "flickr": "flickr",
        "ogbn-arxiv": "arXiv",
        "cora": "Cora",
        "in100": "IN100",
    }
)
data.loc[:, "Setting"] = data["Setting"].map(
    {
        "aug": "Augmentation",
        "mem": "Random Labels",
        "correlation": "JSD Corr.",
        "acc_corr": "Acc Corr.",
        "disagr_corr": "Disagr. Corr.",
        "mono": "Layer Mono.",
        "sc": "Shortcuts",
    }
)
column_order = ["Acc Corr.", "Disagr. Corr.", "JSD Corr.", "Random Labels", "Shortcuts", "Augmentation", "Layer Mono."]
data.loc[:, "Setting"] = pd.Categorical(
    data["Setting"],
    categories=column_order,
    ordered=True,
)
data.loc[:, "Quality Measure"] = data["Quality Measure"].map(
    {"violation_rate": "Conformity Rate", "AUPRC": "AUPRC", "spearmanr": "Spearman", "correlation": "Spearman"}
)
data.loc[data["Quality Measure"] == "Conformity Rate", "value"] = (
    1 - data.loc[data["Quality Measure"] == "Conformity Rate", "value"]
)  # must be run in conjunction with the above renaming

data = data.rename(
    columns={
        "domain": "Modality",
        "architecture": "Arch.",
        "Representational Similarity Measure": "Sim Meas.",
        "Quality Measure": "Eval.",
        "Setting": "Test",
    }
)
data.loc[data.Test.isin(["Acc Corr.", "Disagr. Corr.", "JSD Corr."]), "Type"] = "Grounding by Prediction"
data.loc[data.Test.isin(["Random Labels", "Shortcuts", "Augmentation", "Layer Mono."]), "Type"] = (
    "Grounding by Design"
)

data = data[(~data["Eval."].isna()) & (data["Dataset"] == "Cora") & (data["Test"].isin(["Acc Corr.", "Disagr. Corr.", "JSD Corr.", "Layer Mono."]))]

pivot = pd.pivot_table(
    data,
    index="Sim Meas.",
    columns=["Type", "Test", "Eval.", "Modality", "Dataset", "Arch."],
    values="value",
)
pivot = pivot.sort_values(by="Sim Meas.")
pivot = pivot.reindex(column_order, axis="columns", level="Test")
pivot = pivot.reindex(["Grounding by Prediction", "Grounding by Design"], axis="columns", level="Type")
pivot = pivot.reindex(["GAT", "GCN", "SAGE", "P-GNN"], axis="columns", level="Arch.")
pivot

# turn vals into string
unpivot = pivot.unstack().reset_index()  # values will be in col "0"
unpivot.loc[:, 1] = unpivot.loc[:, 0].astype("str")
unpivot.loc[:, 1] = unpivot.loc[:, 0].apply(lambda x: f"{round(x, 2):.2f}")
pivot = unpivot.pivot(index="Sim Meas.",
    columns=["Type", "Test", "Eval.", "Modality", "Dataset", "Arch."],
    values=1,)
unpivot


for column in pivot.columns:
    col = pivot.loc[:, column].astype("float")
    idx = col == col.max()
    pivot.loc[idx, column] = pivot.loc[idx, column].apply(lambda s: r"\textbf{" + s + "}")


# add significance indicators
idx = data["Dataset"].isin(["Cora"]) & data.Test.isin(["Acc Corr.", "Disagr. Corr.", "JSD Corr."])
data_corr = data.loc[idx].copy()


def pval_str(pval):
    # if pval == pd.notna
    if isinstance(pval, float):
        if pval <= 0.01:
            return r"$^{**}$"
            # return r"$^{\dagger}$"
        if pval <= 0.05:
            return r"$^{*\phantom{*}}$"
            # return r"$^{\ddagger}$"
    return r"$^{\phantom{**}}$"

data_corr["val_comb"] = data_corr["value"].apply(lambda x: f"{round(x, ndigits=2):.2f}") + data_corr["pval"].apply(pval_str)
data_corr

pivot_corr = data_corr.drop_duplicates(
    subset=[
        "Sim Meas.",
        "Eval.",
        "model",
        "Modality",
        "Arch.",
        "representation_dataset",
        "identifier",
        "Test",
        "Dataset",
        "Functional Similarity Measure",
        "Type"
        ]).pivot(
    index="Sim Meas.",
    columns=["Type", "Test", "Eval.", "Modality", "Dataset", "Arch."],
    values=["val_comb"],
).sort_values(
    by="Sim Meas."
).reindex(
    column_order, axis="columns", level="Test"
).reindex(
    ["Graph", "Text", "Vision"], axis="columns", level="Modality"
).loc[:, "val_comb"]
display(pivot_corr.head(3))


def floatify(s: str) -> str:
    """Turn a string like '-0.10$^{\phantom{**}}$' into '-0.10'"""
    return s[:s.find("$")]

def separate_significance_indicator(s: str) -> str:
    """Turn a string like '-0.10$^{\phantom{**}}$' into '$^{\phantom{**}}$'"""
    return s[s.find("$"):]


for column in pivot_corr.columns:
    col = pivot_corr.loc[:, column].apply(floatify).astype("float")
    identifiers = pivot_corr.loc[:, column].apply(separate_significance_indicator)
    idx = col == col.max()
    new_col = col.apply(lambda x: f"{x:.2f}").apply(lambda s: r"\textbf{" + s + "}") + identifiers
    pivot_corr.loc[idx, column] = new_col


pivot.loc[:, ("Grounding by Prediction")] = pivot_corr
pivot

In [None]:
len(data_corr), len(data_corr.drop_duplicates(subset=["Sim Meas.", "Eval.", "model", "Modality", "Arch.", "representation_dataset", "identifier", "Test", "Dataset", "Functional Similarity Measure", "Type"]))
# data_corr.drop_duplicates().groupby(["Arch.", "Dataset", "Eval.", "Test", "Sim Meas."]).count()[(data_corr.drop_duplicates().groupby(["Arch.", "Dataset", "Eval.", "Test", "Sim Meas."])["value"].count() > 1)]

# data_corr[(data_corr["Sim Meas."] == "IMD") & (data_corr["Dataset"] == "Cora") & (data_corr["Arch."] == "GAT") & (data_corr["Test"] == "JSD Corr.")]

latex_str = pivot.to_latex()
print(latex_str)

## Survey Paper Summary

In [None]:
data

In [None]:
avg_ranks = data.groupby(["Scenario","Sim Meas."])["rank"].agg(["mean", "median"]).reset_index()
avg_ranks = avg_ranks.rename(columns={"mean": "avg_rank", "median": "med_rank"})
avg_ranks

In [None]:
data.groupby(["Scenario","Sim Meas."])["rank"].max().loc["JSD Corr."]

In [None]:
avg_ranks.pivot_table(values="avg_rank", index="Sim Meas.", columns="Scenario")

In [32]:
avg_ranks.to_csv("ranks_per_test.csv")

In [None]:
plot_data = pd.merge(data, avg_ranks).sort_values(by=["med_rank"])
g = sns.catplot(
    data=plot_data,
    x="med_rank",
    y="Sim Meas.",
    hue="Modality",
    kind="box",
    height=7,
    aspect=0.8,
    col="Scenario",
    palette={"Language": "C1", "Vision": "C2", "Graph": "C0"},
    legend=False
)
ax = g.axes[0, 0]
ax.set_xlabel("Rank")
ax.set_ylabel("Similarity Measures")