# Printing and Plotting Results

## Load all data

In [144]:
import re

import pandas as pd
import pandas.io.formats.style

from repsim.benchmark.paths import BASE_PATH
from IPython.display import display

measure_to_abbrv = {
    "AlignedCosineSimilarity": "AlignCos",
    "CKA": "CKA",
    "ConcentricityDifference": "ConcDiff",
    "DistanceCorrelation": "DistCorr",
    "EigenspaceOverlapScore": "EOS",
    "GeometryScore": "GS",
    "Gulp": "GULP",
    "HardCorrelationMatch": "HardCorr",
    "IMDScore": "IMD",
    "JaccardSimilarity": "Jaccard",
    "LinearRegression": "LinReg",
    "MagnitudeDifference": "MagDiff",
    "OrthogonalAngularShapeMetricCentered": "AngShape",
    "OrthogonalProcrustesCenteredAndNormalized": "OrthProc",
    "PWCCA": "PWCCA",
    "PermutationProcrustes": "PermProc",
    "ProcrustesSizeAndShapeDistance": "ProcDist",
    "RSA": "RSA",
    "RSMNormDifference": "RSMDiff",
    "RankSimilarity": "RankSim",
    "SVCCA": "SVCCA",
    "SecondOrderCosineSimilarity": "2nd-Cos",
    "SoftCorrelationMatch": "SoftCorr",
    "UniformityDifference": "UnifDiff"
}


Step 1: Load all results.

In [145]:
cleaned_dfs = []
nlp_root = BASE_PATH /"paper_results" / "nlp"
for path in nlp_root.glob("*.csv"):
    df = pd.read_csv(path, index_col=0)
    setting = path.name.split("_")[0]

    pattern = r'(?<=_)sst2(?=_)|(?<=_)mnli(?=_)'
    match = re.search(pattern, path.name)
    assert match is not None
    dataset = match.group(0)

    df["Setting"] = setting
    df["Dataset"] = dataset
    cleaned_dfs.append(df)

data = pd.concat(cleaned_dfs).reset_index(drop=True)
nlp_data = data


In [146]:
cleaned_dfs = []
root = BASE_PATH /"paper_results" /"graph"
for path in root.glob("*.csv"):
    if path.name.endswith("backup.csv"):
        continue

    df = pd.read_csv(path, index_col=0)
    pattern = r"augmentation|label_test|layer_test|output_correlation|shortcut"
    match = re.search(pattern, path.name)
    pattern_to_setting = {
        "augmentation": "aug",
        "label_test": "mem",
        "layer_test": "mono",
        "output_correlation": "correlation",
        "shortcut": "sc",
    }
    setting = pattern_to_setting[match.group(0)]

    pattern = r"(?<=_)cora(?=_)|(?<=_)flickr(?=_)|(?<=_)ogbn-arxiv(?=_)"
    match = re.search(pattern, path.name)
    assert match is not None
    dataset = match.group(0)

    df["Setting"] = setting
    df["Dataset"] = dataset
    cleaned_dfs.append(df)

data = pd.concat(cleaned_dfs).reset_index(drop=True)
graph_data = data

In [None]:
graph_data[(graph_data.representation_dataset=="cora") & (graph_data.Setting == "correlation") & (graph_data.quality_measure == "spearmanr")].groupby(["architecture", "functional_similarity_measure","similarity_measure"]).count()
graph_data[(graph_data.representation_dataset=="cora") & (graph_data.Setting == "correlation") & (graph_data.quality_measure == "spearmanr") & (graph_data.similarity_measure == "AlignedCosineSimilarity") & (graph_data.architecture == "GCN")]

In [148]:
cleaned_dfs = []
root = BASE_PATH /"paper_results" /"vision"
for path in root.glob("*.csv"):
    df = pd.read_csv(path, index_col=0)
    pattern = r"aug|mem|mono|correlation|sc"
    match = re.search(pattern, path.name)
    pattern_to_setting = {
        "aug": "aug",
        "mem": "mem",
        "mono": "mono",
        "correlation": "correlation",
        "sc": "sc",
    }
    setting = pattern_to_setting[match.group(0)]

    pattern = r"(?<=_)in100(?=_)"
    match = re.search(pattern, path.name)
    assert match is not None
    dataset = match.group(0)

    df["Setting"] = setting
    df["Dataset"] = dataset
    cleaned_dfs.append(df)

data = pd.concat(cleaned_dfs).reset_index(drop=True)
vision_data = data

In [None]:
# ----------------------------------------------------------------------------------------------------------------------
# Combine data
# ----------------------------------------------------------------------------------------------------------------------
data = pd.concat([nlp_data, graph_data, vision_data])
print(data.columns)


data = data.rename(
    columns={
        "functional_similarity_measure": "Functional Similarity Measure",
        "similarity_measure": "Representational Similarity Measure",
        "quality_measure": "Quality Measure",
    }
)

# Copy values from correlation experiment into same column for results scores like other experiments
idx = data.Setting == "correlation"
data.loc[idx, "value"] = data.loc[idx, "corr"]

# Exclude evaluation in output correlation experiments with Kendalltau und pearsonr. We only show Spearmanr
idx = (data.Setting == "correlation") & (data["Quality Measure"] != "spearmanr")
data = data.loc[~idx]

# Update the setting to be able to distinguish correlation results with different functional similarity measures easily
idx = data.Setting == "correlation"
data.loc[idx, "Setting"] = data.loc[idx, "Setting"] + data.loc[idx, "Functional Similarity Measure"]


# ----------------------------------------------------------------------------------------------------------------------
# Clean up names etc.
# ----------------------------------------------------------------------------------------------------------------------


def beautify_df(data):
    data.loc[:, "Representational Similarity Measure"] = data["Representational Similarity Measure"].map(
        measure_to_abbrv
    )
    data.loc[:, "architecture"] = data["architecture"].map(
        {
            "BERT-L": "BERT",
            "GCN": "GCN",
            "GAT": "GAT",
            "GraphSAGE": "SAGE",
            "PGNN": "PGNN",
            "VGG11": "VGG11",
            "VGG19": "VGG19",
            "ResNet18": "RNet18",
            "ResNet34": "RNet34",
            "ResNet101": "RNet101",
            "ViT_B32": "ViT B32",
            "ViT_L32": "ViT L32",
        }
    )
    data.loc[:, "domain"] = data["domain"].map({"NLP": "Text", "GRAPHS": "Graph", "VISION": "Vision"})
    data.loc[:, "Dataset"] = data["Dataset"].map(
        {
            "mnli_aug_rate0": "MNLI",
            "mnli_mem_rate0": "MNLI",
            "mnli": "MNLI",
            "sst2_sc_rate0558": "SST2",
            "sst2_mem_rate0": "SST2",
            "mnli_sc_rate0354": "MNLI",
            "sst2_aug_rate0": "SST2",
            "sst2": "SST2",
            "flickr": "Flickr",
            "ogbn-arxiv": "OGBN-Arxiv",
            "cora": "Cora",
            "in100": "IN100",
        }
    )
    data.loc[:, "Setting"] = data["Setting"].map(
        {
            "aug": "Augmentation",
            "mem": "Random Labels",
            "correlationJSD": "JSD Corr.",
            "correlationAbsoluteAccDiff": "Acc. Corr.",
            "correlationDisagreement": "Disagr. Corr.",
            "mono": "Layer Mono.",
            "sc": "Shortcuts",
        }
    )
    column_order = ["Acc. Corr.", "JSD Corr.", "Disagr. Corr.", "Random Labels", "Shortcuts", "Augmentation", "Layer Mono."]
    data.loc[:, "Setting"] = pd.Categorical(
        data["Setting"],
        categories=column_order,
        ordered=True,
    )
    data.loc[:, "Quality Measure"] = data["Quality Measure"].map(
        {"violation_rate": "Conformity Rate", "AUPRC": "AUPRC", "spearmanr": "Spearman", "correlation": "Spearman"}
    )
    data.loc[data["Quality Measure"] == "Conformity Rate", "value"] = (
        1 - data.loc[data["Quality Measure"] == "Conformity Rate", "value"]
    )  # must be run in conjunction with the above renaming

    data = data.rename(
        columns={
            "domain": "Modality",
            "architecture": "Arch.",
            "Representational Similarity Measure": "Sim Meas.",
            "Quality Measure": "Eval.",
            "Setting": "Test",
        }
    )
    data.loc[data.Test.isin(["Acc. Corr.", "JSD Corr.", "Disagr. Corr."]), "Type"] = "Grounding by Prediction"
    data.loc[data.Test.isin(["Random Labels", "Shortcuts", "Augmentation", "Layer Mono."]), "Type"] = (
        "Grounding by Design"
    )
    return data, column_order


data, column_order = beautify_df(data)


In [None]:
data

## Helper Functions for Tables

In [151]:
def pval_str(pval):
    if isinstance(pval, float):
        if pval <= 0.01:
            return r"$^{**}$"
        if pval <= 0.05:
            return r"$^{*\phantom{*}}$"
    return r"$^{\phantom{**}}$"

def floatify(s: str) -> str:
    r"""Turn a string like '-0.10$^{\phantom{**}}$' into '-0.10'"""
    return s[:s.find("$")]

def separate_significance_indicator(s: str) -> str:
    r"""Turn a string like '-0.10$^{\phantom{**}}$' into '$^{\phantom{**}}$'"""
    return s[s.find("$"):]

## Language Tables

In [None]:
# Select language results
datasets = ["MNLI", "SST2"]
archs = ["BERT"]
idx = data["Dataset"].isin(datasets) & data["Arch."].isin(archs)
tests_with_pvals = ["Acc. Corr.", "JSD Corr.", "Disagr. Corr."]


# Create pivot table
pivot = pd.pivot_table(
    data.loc[idx],  # type: ignore
    index="Sim Meas.",
    columns=["Type", "Test", "Eval.", "Modality", "Dataset", "Arch."],
    values="value",
)
pivot = pivot.sort_values(by="Sim Meas.")
pivot = pivot.reindex(column_order, axis="columns", level="Test")
pivot = pivot.reindex(["Grounding by Prediction", "Grounding by Design"], axis="columns", level="Type")
display(pivot.head())

# Turn values into strings for manipulation with significance markers
unpivot = pivot.unstack().reset_index()  # values will be in col "0"
unpivot.loc[:, 1] = unpivot.loc[:, 0].astype("str")
unpivot.loc[:, 1] = unpivot.loc[:, 0].apply(lambda x: f"{round(x, 2):.2f}")
pivot = unpivot.pivot(index="Sim Meas.",
    columns=["Type", "Test", "Eval.", "Modality", "Dataset", "Arch."],
    values=1,)
unpivot
display(pivot.head(3))

# Highlight the best values by bolding
for column in pivot.columns:
    col = pivot.loc[:, column].astype("float")
    idx = col == col.max()
    pivot.loc[idx, column] = pivot.loc[idx, column].apply(lambda s: r"\textbf{" + s + "}")
display(pivot.head(3))


# Add significance markers
# 1) select data that should get markers
idx = data["Dataset"].isin(datasets) & data["Arch."].isin(archs) & data.Test.isin(tests_with_pvals)
data_corr = data.loc[idx].copy()

# 2) Create new column with value and marker
data_corr["val_comb"] = data_corr["value"].apply(lambda x: f"{round(x, ndigits=2):.2f}") + data_corr["pval"].apply(pval_str)
display(data_corr.head(3))

# 3) Create pivot table for values with markers that can be inserted into the main pivot table
pivot_corr = data_corr.pivot(
    index="Sim Meas.",
    columns=["Type", "Test", "Eval.", "Modality", "Dataset", "Arch."],
    values=["val_comb"],
).sort_values(
    by="Sim Meas."
).reindex(
    column_order, axis="columns", level="Test"
).loc[:, "val_comb"]
display(pivot_corr.head())

# 4) Highlight the best scores by bolding
for column in pivot_corr.columns:
    col = pivot_corr.loc[:, column].apply(floatify).astype("float")
    identifiers = pivot_corr.loc[:, column].apply(separate_significance_indicator)
    idx = col == col.max()
    new_col = col.apply(lambda x: f"{x:.2f}").apply(lambda s: r"\textbf{" + s + "}") + identifiers
    pivot_corr.loc[idx, column] = new_col


# Insert into main pivot
pivot.loc[:, ("Grounding by Prediction")] = pivot_corr
display(pivot.head())


# Convert into latex file
styled = pd.io.formats.style.Styler(
    pivot,
    precision=2,
)

latex_str = styled.to_latex(
    hrules=True,
    position="h",
    label="tab:nlp_results",
    caption=r"\emph{Results of Test 1-6 for the language domain}. In all cases, we use BERT models.",
    column_format="l||rr|rr|rr||rr|rr|rr|rr|rr|rr|rr|rr"
)
# print(latex_str)

# ----- Manual modifications --------
lines = latex_str.split("\n")

# Add opening of resizebox
lines = lines[:3] + [r"\resizebox{\linewidth}{!}{"] + lines[3:]
# print("\n".join([f"{i}: {line}" for i, line in enumerate(lines)]))

# Center headers
pattern = r"\{r\}"
replacement = r"{c}"
lines = [re.sub(pattern, replacement, line) if i in [6, 7, 8, 9] else line for i, line in enumerate(lines)]
# print("\n".join([f"{i}: {line}" for i, line in enumerate(lines)]))

# Remove measure row
lines.pop(12)
# print("\n".join([f"{i}: {line}" for i, line in enumerate(lines)]))

# Remove modality row
lines.pop(9)
# print("\n".join([f"{i}: {line}" for i, line in enumerate(lines)]))

# Remove Arch. row
lines.pop(10)
# print("\n".join([f"{i}: {line}" for i, line in enumerate(lines)]))


# Make every second row gray
first_row_with_gray = 11
final_rows_to_exclude = 4
lines = [
    r"\rowcolor{Gray}" + line if i >= first_row_with_gray and (i - first_row_with_gray) % 2 == 0 else line for i, line in enumerate(lines[:-final_rows_to_exclude])
] + lines[-final_rows_to_exclude:]

# Add closing of resizebox
lines = lines[:-2] + [r"}"] + lines[-2:]

#
latex_str = "\n".join(lines)
print(latex_str)

with open("tables/nlp_everything.tex", "w") as f:
    f.write(latex_str)

## Vision Tables

In [None]:
# Select language results
datasets = ["IN100"]
archs = ["RNet18", "RNet34", "RNet101", "VGG11", "VGG19", "ViT B32", "ViT L32"]
idx = data["Dataset"].isin(datasets) & data["Arch."].isin(archs)
tests_with_pvals = ["Acc. Corr.", "JSD Corr.", "Disagr. Corr."]


# Create pivot table
pivot = pd.pivot_table(
    data.loc[idx],  # type: ignore
    index="Sim Meas.",
    columns=["Type", "Test", "Eval.", "Modality", "Dataset", "Arch."],
    values="value",
)
pivot = pivot.sort_values(by="Sim Meas.")
pivot = pivot.reindex(column_order, axis="columns", level="Test")
pivot = pivot.reindex(["Grounding by Prediction", "Grounding by Design"], axis="columns", level="Type")
display(pivot.head(3))

# Turn values into strings for manipulation with significance markers
unpivot = pivot.unstack().reset_index()  # values will be in col "0"
unpivot.loc[:, 1] = unpivot.loc[:, 0].astype("str")
unpivot.loc[:, 1] = unpivot.loc[:, 0].apply(lambda x: f"{round(x, 2):.2f}")
pivot = unpivot.pivot(
    index="Sim Meas.",
    columns=["Type", "Test", "Eval.", "Modality", "Dataset", "Arch."],
    values=1,
)
unpivot
display(pivot.head(3))

# Highlight the best values by bolding
for column in pivot.columns:
    col = pivot.loc[:, column].astype("float")
    idx = col == col.max()
    pivot.loc[idx, column] = pivot.loc[idx, column].apply(lambda s: r"\textbf{" + s + "}")
display(pivot.head(3))


# Add significance markers
# 1) select data that should get markers
idx = data["Dataset"].isin(datasets) & data["Arch."].isin(archs) & data.Test.isin(tests_with_pvals)
data_corr = data.loc[idx].copy()

# 2) Create new column with value and marker
data_corr["val_comb"] = data_corr["value"].apply(lambda x: f"{round(x, ndigits=2):.2f}") + data_corr["pval"].apply(
    pval_str
)
display(data_corr.head(3))

# 3) Create pivot table for values with markers that can be inserted into the main pivot table
pivot_corr = (
    data_corr.pivot(
        index="Sim Meas.",
        columns=["Type", "Test", "Eval.", "Modality", "Dataset", "Arch."],
        values=["val_comb"],
    )
    .sort_values(by="Sim Meas.")
    .reindex(column_order, axis="columns", level="Test")
    .loc[:, "val_comb"]
)
display(pivot_corr.head())

# 4) Highlight the best scores by bolding
for column in pivot_corr.columns:
    col = pivot_corr.loc[:, column].apply(floatify).astype("float")
    identifiers = pivot_corr.loc[:, column].apply(separate_significance_indicator)
    idx = col == col.max()
    new_col = col.apply(lambda x: f"{x:.2f}").apply(lambda s: r"\textbf{" + s + "}") + identifiers
    pivot_corr.loc[idx, column] = new_col


# Insert into main pivot
pivot.loc[:, ("Grounding by Prediction")] = pivot_corr

# Fix order of models
pivot = pivot.reindex(archs, axis="columns", level="Arch.")

display(pivot.head())

### Test 1

In [None]:
sub = pivot.loc[:, ("Grounding by Prediction", "Acc. Corr.")]
caption = (
    r"\emph{Vision - Test 1: Correlation to accuracy difference.}"
    " Full results of the correlation between the similarity measures and the absolute accuracy differences for all architectures."
)

# Convert into latex file
styled = pd.io.formats.style.Styler(
    sub,
    precision=2,
)

latex_str = styled.to_latex(
    hrules=True,
    position="h",
    label="tab:vision_results_test_1",
    caption=caption,
    column_format="c||ccccccc",
)
# print(latex_str)

# ----- Manual modifications --------
lines = latex_str.split("\n")

# Add opening of resizebox and centering
lines = lines[:3] + [r"\centering", r"\resizebox{0.7\linewidth}{!}{"] + lines[3:]
# print("\n".join([f"{i}: {line}" for i, line in enumerate(lines)]))

# Center headers
pattern = r"\{r\}"
replacement = r"{c}"
lines = [re.sub(pattern, replacement, line) if i in [7, 8, 9] else line for i, line in enumerate(lines)]
# print("\n".join([f"{i}: {line}" for i, line in enumerate(lines)]))

# Remove measure row
lines.pop(11)
# print("\n".join([f"{i}: {line}" for i, line in enumerate(lines)]))

# Remove modality row
lines.pop(8)
# print("\n".join([f"{i}: {line}" for i, line in enumerate(lines)]))

# Remove Arch. row
lines.pop(8)
# print("\n".join([f"{i}: {line}" for i, line in enumerate(lines)]))

# Add Test row
lines = lines[:7] + [r"Test & \multicolumn{7}{c}{Accuracy Correlation} \\"] + lines[7:]
# print("\n".join([f"{i}: {line}" for i, line in enumerate(lines)]))

# Make every second row gray
first_row_with_gray = 11
final_rows_to_exclude = 4
lines = [
    r"\rowcolor{Gray}" + line if i >= first_row_with_gray and (i - first_row_with_gray) % 2 == 0 else line for i, line in enumerate(lines[:-final_rows_to_exclude])
] + lines[-final_rows_to_exclude:]

# Add closing of resizebox
lines = lines[:-2] + [r"}"] + lines[-2:]

#
latex_str = "\n".join(lines)
# print(latex_str)

with open("tables/vision_test_1.tex", "w") as f:
    f.write(latex_str)

### Test 2

In [None]:
sub = pivot.loc[:, ("Grounding by Prediction", ["JSD Corr.", "Disagr. Corr."])]
caption = (
    r"\emph{Vision - Test 2: Correlation to Output difference.} "
    "Full results of the correlation between the similarity measures and the output differences for all architectures."
)
# display(sub.head())

# Convert into latex file
styled = pd.io.formats.style.Styler(
    sub,
    precision=2,
)

latex_str = styled.to_latex(
    hrules=True,
    position="h",
    label="tab:vision_results_test_2",
    caption=caption,
    column_format="c||ccccccc|ccccccc",
)
# print(latex_str)

# ----- Manual modifications --------
lines = latex_str.split("\n")

# Add opening of resizebox and centering
lines = lines[:3] + [r"\centering", r"\resizebox{\linewidth}{!}{"] + lines[3:]
# print("\n".join([f"{i}: {line}" for i, line in enumerate(lines)]))

# Center headers
pattern = r"\{r\}"
replacement = r"{c}"
lines = [re.sub(pattern, replacement, line) if i in [7, 8, 9] else line for i, line in enumerate(lines)]
# print("\n".join([f"{i}: {line}" for i, line in enumerate(lines)]))

# Remove measure row
lines.pop(13)
# print("\n".join([f"{i}: {line}" for i, line in enumerate(lines)]))

# Remove modality row
lines.pop(10)
# print("\n".join([f"{i}: {line}" for i, line in enumerate(lines)]))

# Remove Arch. row
lines.pop(10)
print("\n".join([f"{i}: {line}" for i, line in enumerate(lines)]))


# Make every second row gray
first_row_with_gray = 12
final_rows_to_exclude = 4
lines = [
    r"\rowcolor{Gray}" + line if i >= first_row_with_gray and (i - first_row_with_gray) % 2 == 0 else line for i, line in enumerate(lines[:-final_rows_to_exclude])
] + lines[-final_rows_to_exclude:]

# Add closing of resizebox
lines = lines[:-2] + [r"}"] + lines[-2:]

#
latex_str = "\n".join(lines)
# print(latex_str)

with open("tables/vision_test_2.tex", "w") as f:
    f.write(latex_str)

### Test 3 - 6

In [None]:
captions = [
    (
        r"\emph{Vision - Test 3: Random Labels.} "
    ),
    (
        r"\emph{Vision - Test 4: Shortcut Affinity.} " \
        "Full results of the similarity measures for distinguishing model groups trained with shortcuts of various correlation to the image label."
    ),
    (
        r"\emph{Vision - Test 5: Augmentation.} " \
        "Full results of the similarity measures for distinguishing model groups based on different additive gaussian noise augmentation used during training."
    ),
    (
        r"\emph{Vision - Test 6: Monotonicity.} "\
        "Full results of the similarity measures for the monotonicity test."
    )
]
columns = [
    ["Random Labels"],
    ["Shortcuts"],
    ["Augmentation"],
    ["Layer Mono."],
]

for i, column, caption in zip(range(3, 7), columns, captions):
    sub = pivot.loc[:, ("Grounding by Design", column)]

    display(sub.head())

    # Convert into latex file
    styled = pd.io.formats.style.Styler(
        sub,
        precision=2,
    )

    latex_str = styled.to_latex(
        hrules=True,
        position="h",
        label=f"tab:vision_results_test_{i}",
        caption=caption,
        column_format="c||ccccccc|ccccccc",
    )
    # print(latex_str)

    # ----- Manual modifications --------
    lines = latex_str.split("\n")

    # Add opening of resizebox and centering
    lines = lines[:3] + [r"\centering", r"\resizebox{\linewidth}{!}{"] + lines[3:]
    # print("\n".join([f"{i}: {line}" for i, line in enumerate(lines)]))

    # Center headers
    pattern = r"\{r\}"
    replacement = r"{c}"
    lines = [re.sub(pattern, replacement, line) if i in [7, 8, 9] else line for i, line in enumerate(lines)]
    # print("\n".join([f"{i}: {line}" for i, line in enumerate(lines)]))

    # Remove measure row
    lines.pop(13)
    # print("\n".join([f"{i}: {line}" for i, line in enumerate(lines)]))

    # Remove modality row
    lines.pop(10)
    # print("\n".join([f"{i}: {line}" for i, line in enumerate(lines)]))

    # Remove Arch. row
    lines.pop(10)
    print("\n".join([f"{i}: {line}" for i, line in enumerate(lines)]))

    # Make every second row gray
    first_row_with_gray = 12
    final_rows_to_exclude = 4
    lines = [
        r"\rowcolor{Gray}" + line if i >= first_row_with_gray and (i - first_row_with_gray) % 2 == 0 else line for i, line in enumerate(lines[:-final_rows_to_exclude])
    ] + lines[-final_rows_to_exclude:]

    # Add closing of resizebox
    lines = lines[:-2] + [r"}"] + lines[-2:]

    #
    latex_str = "\n".join(lines)
    # print(latex_str)

    with open(f"tables/vision_test_{i}.tex", "w") as f:
        f.write(latex_str)

## Graph Tables

In [None]:
# Select language results
datasets = ["Cora", "Flickr", "OGBN-Arxiv"]
archs = ["GCN", "SAGE", "GAT", "PGNN"]
idx = data["Dataset"].isin(datasets) & data["Arch."].isin(archs)
tests_with_pvals = ["Acc. Corr.", "JSD Corr.", "Disagr. Corr."]


# Create pivot table
pivot = pd.pivot_table(
    data.loc[idx],  # type: ignore
    index="Sim Meas.",
    columns=["Type", "Test", "Eval.", "Modality", "Dataset", "Arch."],
    values="value",
)
pivot = pivot.sort_values(by="Sim Meas.")
pivot = pivot.reindex(column_order, axis="columns", level="Test")
pivot = pivot.reindex(["Grounding by Prediction", "Grounding by Design"], axis="columns", level="Type")
display(pivot.head(3))

# Turn values into strings for manipulation with significance markers
unpivot = pivot.unstack().reset_index()  # values will be in col "0"
unpivot.loc[:, 1] = unpivot.loc[:, 0].astype("str")
unpivot.loc[:, 1] = unpivot.loc[:, 0].apply(lambda x: f"{round(x, 2):.2f}")
pivot = unpivot.pivot(
    index="Sim Meas.",
    columns=["Type", "Test", "Eval.", "Modality", "Dataset", "Arch."],
    values=1,
)
unpivot
display(pivot.head(3))

# Highlight the best values by bolding
for column in pivot.columns:
    col = pivot.loc[:, column].astype("float")
    idx = col == col.max()
    pivot.loc[idx, column] = pivot.loc[idx, column].apply(lambda s: r"\textbf{" + s + "}")
display(pivot.head(3))


# Add significance markers
# 1) select data that should get markers
idx = data["Dataset"].isin(datasets) & data["Arch."].isin(archs) & data.Test.isin(tests_with_pvals)
data_corr = data.loc[idx].copy()

# 2) Create new column with value and marker
data_corr["val_comb"] = data_corr["value"].apply(lambda x: f"{round(x, ndigits=2):.2f}") + data_corr["pval"].apply(
    pval_str
)
display(data_corr.head(3))

# 3) Create pivot table for values with markers that can be inserted into the main pivot table
pivot_corr = (
    data_corr.pivot(
        index="Sim Meas.",
        columns=["Type", "Test", "Eval.", "Modality", "Dataset", "Arch."],
        values=["val_comb"],
    )
    .sort_values(by="Sim Meas.")
    .reindex(column_order, axis="columns", level="Test")
    .loc[:, "val_comb"]
)
display(pivot_corr.head())

# 4) Highlight the best scores by bolding
for column in pivot_corr.columns:
    col = pivot_corr.loc[:, column].apply(floatify).astype("float")
    identifiers = pivot_corr.loc[:, column].apply(separate_significance_indicator)
    idx = col == col.max()
    new_col = col.apply(lambda x: f"{x:.2f}").apply(lambda s: r"\textbf{" + s + "}") + identifiers
    pivot_corr.loc[idx, column] = new_col


# Insert into main pivot
pivot.loc[:, ("Grounding by Prediction")] = pivot_corr

# Fix order of models
pivot = pivot.reindex(archs, axis="columns", level="Arch.")

display(pivot.head())

### Test 1

In [None]:
sub = pivot.loc[:, ("Grounding by Prediction", "Acc. Corr.")]
caption = (
    r"\emph{Graph - Test 1: Correlation to accuracy difference.}"
)

# Convert into latex file
styled = pd.io.formats.style.Styler(
    sub,
    precision=2,
)

latex_str = styled.to_latex(
    hrules=True,
    position="h",
    label="tab:graph_results_test_1",
    caption=caption,
    column_format="l||rrrr|rrr|rrr||",
)
# print(latex_str)

# ----- Manual modifications --------
lines = latex_str.split("\n")

# Add opening of resizebox and centering
lines = lines[:3] + [r"\centering", r"\resizebox{0.7\linewidth}{!}{"] + lines[3:]
# print("\n".join([f"{i}: {line}" for i, line in enumerate(lines)]))

# Center headers
pattern = r"\{r\}"
replacement = r"{c}"
lines = [re.sub(pattern, replacement, line) if i in [7, 8, 9] else line for i, line in enumerate(lines)]
# print("\n".join([f"{i}: {line}" for i, line in enumerate(lines)]))

# Remove measure row
lines.pop(11)
# print("\n".join([f"{i}: {line}" for i, line in enumerate(lines)]))

# Remove modality row
lines.pop(8)
# print("\n".join([f"{i}: {line}" for i, line in enumerate(lines)]))


# Add Test row
lines = lines[:7] + [r"Test & \multicolumn{10}{c}{Accuracy Correlation} \\"] + lines[7:]
# print("\n".join([f"{i}: {line}" for i, line in enumerate(lines)]))

# Make every second row gray
first_row_with_gray = 12
final_rows_to_exclude = 4
lines = [
    r"\rowcolor{Gray}" + line if i >= first_row_with_gray and (i - first_row_with_gray) % 2 == 0 else line for i, line in enumerate(lines[:-final_rows_to_exclude])
] + lines[-final_rows_to_exclude:]

# Add closing of resizebox
lines = lines[:-2] + [r"}"] + lines[-2:]

#
latex_str = "\n".join(lines)
# print(latex_str)

with open("tables/graph_test_1.tex", "w") as f:
    f.write(latex_str)

### Test 2

In [159]:
sub = pivot.loc[:, ("Grounding by Prediction", ["JSD Corr.", "Disagr. Corr."])]
caption = (
    r"\emph{Graph - Test 2: Correlation to Output difference.} "
    "Full results of the correlation between the similarity measures and the output differences for all architectures."
)
# display(sub.head())

# Convert into latex file
styled = pd.io.formats.style.Styler(
    sub,
    precision=2,
)

latex_str = styled.to_latex(
    hrules=True,
    position="h",
    label="tab:graph_results_test_2",
    caption=caption,
    column_format="l||rrrr|rrr|rrr||rrrr|rrr|rrr",
)
# print(latex_str)

# ----- Manual modifications --------
lines = latex_str.split("\n")

# Add opening of resizebox and centering
lines = lines[:3] + [r"\centering", r"\resizebox{\linewidth}{!}{"] + lines[3:]
# print("\n".join([f"{i}: {line}" for i, line in enumerate(lines)]))

# Center headers
pattern = r"\{r\}"
replacement = r"{c}"
lines = [re.sub(pattern, replacement, line) if i in [6, 7, 8, 9, 10, 11] else line for i, line in enumerate(lines)]
# print("\n".join([f"{i}: {line}" for i, line in enumerate(lines)]))

# Remove measure row
lines.pop(13)
# print("\n".join([f"{i}: {line}" for i, line in enumerate(lines)]))

# Remove modality row
lines.pop(10)
# print("\n".join([f"{i}: {line}" for i, line in enumerate(lines)]))


# Make every second row gray
first_row_with_gray = 13
final_rows_to_exclude = 4
lines = [
    r"\rowcolor{Gray}" + line if i >= first_row_with_gray and (i - first_row_with_gray) % 2 == 0 else line for i, line in enumerate(lines[:-final_rows_to_exclude])
] + lines[-final_rows_to_exclude:]

# Add closing of resizebox
lines = lines[:-2] + [r"}"] + lines[-2:]

#
latex_str = "\n".join(lines)
# print(latex_str)

with open("tables/graph_test_2.tex", "w") as f:
    f.write(latex_str)

### Test 3 - 6

In [None]:
captions = [
    (
        r"\emph{Graph - Test 3: Random Labels.} "
    ),
    (
        r"\emph{Graph - Test 4: Shortcut Affinity.} "
    ),
    (
        r"\emph{Graph - Test 5: Augmentation.} "
    ),
    (
        r"\emph{Graph - Test 6: Monotonicity.} "
    )
]
columns = [
    ["Random Labels"],
    ["Shortcuts"],
    ["Augmentation"],
    ["Layer Mono."],
]

for i, column, caption in zip(range(3, 7), columns, captions):
    sub = pivot.loc[:, ("Grounding by Design", column)]

    display(sub.head())

    # Convert into latex file
    styled = pd.io.formats.style.Styler(
        sub,
        precision=2,
    )

    latex_str = styled.to_latex(
        hrules=True,
        position="h",
        label=f"tab:graph_results_test_{i}",
        caption=caption,
        column_format="c||ccc|ccc|ccc|ccc|ccc|ccc" if "Random Labels" in column else "c||cccc|ccc|ccc|cccc|ccc|ccc",
    )
    # print(latex_str)

    # ----- Manual modifications --------
    lines = latex_str.split("\n")

    # Add opening of resizebox and centering
    lines = lines[:3] + [r"\centering", r"\resizebox{\linewidth}{!}{"] + lines[3:]
    # print("\n".join([f"{i}: {line}" for i, line in enumerate(lines)]))

    # Center headers
    pattern = r"\{r\}"
    replacement = r"{c}"
    lines = [re.sub(pattern, replacement, line) if i in [7, 8, 9, 10, 11, 12] else line for i, line in enumerate(lines)]
    # print("\n".join([f"{i}: {line}" for i, line in enumerate(lines)]))

    # Remove measure row
    lines.pop(13)
    # print("\n".join([f"{i}: {line}" for i, line in enumerate(lines)]))

    # Remove modality row
    lines.pop(10)
    # print("\n".join([f"{i}: {line}" for i, line in enumerate(lines)]))

    # Remove Arch. row
    # lines.pop(10)
    # print("\n".join([f"{i}: {line}" for i, line in enumerate(lines)]))


    # Make every second row gray
    first_row_with_gray = 13
    final_rows_to_exclude = 4
    lines = [
        r"\rowcolor{Gray}" + line if i >= first_row_with_gray and (i - first_row_with_gray) % 2 == 0 else line for i, line in enumerate(lines[:-final_rows_to_exclude])
    ] + lines[-final_rows_to_exclude:]

    # Add closing of resizebox
    lines = lines[:-2] + [r"}"] + lines[-2:]

    #
    latex_str = "\n".join(lines)
    # print(latex_str)

    with open(f"tables/graph_test_{i}.tex", "w") as f:
        f.write(latex_str)