In [2]:
dataframe_path = "default_path"
idx_layer_used = -1
list_comparison_levels = None
report_folder = None

In [None]:
import re
from IPython.display import display, Markdown
import pandas as pd
from pathlib import Path
import matplotlib.pyplot as plt
import pathlib

In [None]:
report_folder = (
    pathlib.Path(dataframe_path).parent / "report"
    if report_folder is None
    else pathlib.Path(report_folder)
)
report_folder.mkdir(exist_ok=True, parents=True)
df = pd.read_csv(dataframe_path)
pattern = re.compile(r"^\d+: .*$")
layers_names = [col for col in df.columns if pattern.match(col)]

if list_comparison_levels is not None:
    df = df[df["ComparisonLevel"].isin(list_comparison_levels)]

num_layers = len(layers_names) - 1

display(
    Markdown(
        f"# Analysis for dataset ***{dataframe_path}***\n, LayerIdx: ***{idx_layer_used}***"
    )
)

name_layer_used = layers_names[idx_layer_used]

mean_distances = df.groupby("ComparisonLevel").mean()
mean_distances_std = df.groupby("ComparisonLevel").std()

mean_distances_t_targeted_layer = mean_distances[name_layer_used].transpose()
mean_distances_t_targeted_layer_std = mean_distances_std[name_layer_used].transpose()

if list_comparison_levels is not None:
    mean_distances_t_targeted_layer = mean_distances_t_targeted_layer.reindex(
        list_comparison_levels
    )
    mean_distances_t_targeted_layer_std = mean_distances_t_targeted_layer_std.reindex(
        list_comparison_levels
    )

In [None]:
plt.figure(figsize=(5, 6))
plt.bar(
    mean_distances_t_targeted_layer.index,
    mean_distances_t_targeted_layer.values,
    yerr=mean_distances_t_targeted_layer_std,
    color=["blue", "orange"],
)
plt.title(
    f"Average Euclidean Distance for Layer {idx_layer_used}/{num_layers}: {name_layer_used}"
)
plt.xlabel("Comparison Level")
plt.ylabel("Average Euclidean Distance")
plt.savefig(
    str(pathlib.Path(report_folder) / "barplot_comparion_level_aggregated.png"),
    facecolor="white",
)
plt.show()

In [None]:
from statsmodels.stats.anova import AnovaRM

r = AnovaRM(
    data=df,
    depvar=name_layer_used,
    subject="MatchingLevels",
    within=["ComparisonLevel"],
    aggregate_func="mean",
).fit()

print(r.anova_table)

In [None]:
grouped = df.groupby(["ComparisonLevel", "MatchingLevels"]).mean()
std_dev = df.groupby(["ComparisonLevel", "MatchingLevels"]).std()

grouped = grouped.reset_index()
std_dev = std_dev.reset_index()

if grouped["MatchingLevels"].str.strip("[]").str.isdigit().all():
    grouped["MatchingLevels"] = grouped["MatchingLevels"].str.strip("[]").astype(int)
    grouped = grouped.sort_values("MatchingLevels")

    std_dev["MatchingLevels"] = std_dev["MatchingLevels"].str.strip("[]").astype(int)
    std_dev = std_dev.sort_values("MatchingLevels")

# Set the index again
grouped = grouped.set_index(["ComparisonLevel", "MatchingLevels"])
std_dev = std_dev.set_index(["ComparisonLevel", "MatchingLevels"])

last_layer = grouped[name_layer_used].unstack()
last_layer_std = std_dev[name_layer_used].unstack()

last_layer_transposed = last_layer.transpose()
base_width = 10
additional_width_per_bar = 0.5
num_bars = len(last_layer_transposed)
fig_width = base_width + num_bars * additional_width_per_bar

fig, ax = plt.subplots(figsize=(fig_width, 6))
ax.set_facecolor("white")
ax.figure.set_facecolor("white")

last_layer_transposed.plot(
    kind="bar", yerr=last_layer_std.transpose(), capsize=0, ax=ax
)

plt.title(
    f"Average Euclidean Distance for Layer {idx_layer_used}/{num_layers}: {name_layer_used}"
)

plt.xlabel("Sample Name")
plt.ylabel("Average Euclidean Distance")
plt.legend(title="Comparison Level")
plt.savefig(
    str(pathlib.Path(report_folder) / "barplot_comparion_level_all_samples.png"),
    facecolor="white",
)
plt.show()