In [None]:
from pathlib import Path

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns
from mpl_toolkits.axes_grid1.anchored_artists import AnchoredSizeBar
from PIL import Image
from scipy.optimize import linear_sum_assignment

In [None]:
results_dir = Path("./results")
data_dir = Path("./data")
fig_dir = results_dir / "figures"

In [None]:
fig_dir.mkdir(parents=True, exist_ok=True)

In [None]:
rename_methods = {
    "leiden": "Leiden HVG",
    "spatial_leiden": "SpatialLeiden HVG",
    "leiden_svg": "Leiden SVG",
    "spatial_leiden_svg": "SpatialLeiden SVG",
}

In [None]:
scale_factors = {
    "151676": 96.38356730700866,
    "151675": 96.39297012575229,
    "151674": 96.3779209567421,
    "151673": 96.40082438014726,
    "151672": 96.366076971651,
    "151671": 96.38644715313141,
    "151670": 96.36089958505312,
    "151669": 96.37890036828857,
    "151510": 96.8900012037373,
    "151509": 97.24346019692305,
    "151508": 96.36956627416598,
    "151507": 96.37511184430237,
}

In [None]:
def remove_tick_and_label(ax):
    ax.set(xticklabels=[], yticklabels=[], xlabel=None, ylabel=None)
    ax.tick_params(left=False, bottom=False)


def scatter_labels(df, name, ax, title="top"):
    _ = sns.scatterplot(df, x="x", y="y", hue=name, ax=ax, **scatter_kwargs)
    ax.set(aspect=1, xlim=(0, df["x"].max()), ylim=(df["y"].max(), 0))
    ax.legend([], [], frameon=False)
    remove_tick_and_label(ax)
    if title == "top":
        ax.set_title(name)
    elif title == "left":
        ax.set_ylabel(name)

# Supplementary Figure 1

In [None]:
sample = "Br8100_151673"


def format_weight(f):
    if float(f).is_integer():
        return str(int(float(f)))
    return f


files = pd.DataFrame(
    [(f.stem, f) for f in results_dir.glob(f"weightratio_impact/{sample}/*.tsv")],
    columns=["method", "file"],
).assign(weight=lambda df: df["method"].str.extract("_w([0-9.]+)"))

files[["method", None]] = files["method"].str.split("_w", expand=True)

files = files.assign(method=lambda df: df["method"].replace(rename_methods))

files.loc[lambda df: df["method"] == "SpatialLeiden HVG", "method"] = (
    files.loc[lambda df: df["method"] == "SpatialLeiden HVG", "method"]
    + " (1:"
    + files.loc[lambda df: df["method"] == "SpatialLeiden HVG", "weight"].map(
        format_weight
    )
    + ")"
)

files = (
    files.set_index("method")
    .astype({"weight": float})
    .sort_values("weight", na_position="first")["file"]
)

In [None]:
data = pd.read_table(data_dir / sample / "coordinates.tsv", index_col=0)
data -= data.min(axis=0)

for i, (name, file) in enumerate(files.items()):
    labels_df = pd.read_table(file, index_col=0, dtype="category")
    labels_df.columns = [name]
    data = data.merge(labels_df, left_index=True, right_index=True)

# match clusters
for i in range(2, data.shape[1] - 1):
    contingency_table = pd.crosstab(data.iloc[:, i + 1], data.iloc[:, i])
    row_ind, col_ind = linear_sum_assignment(contingency_table, maximize=True)
    data.iloc[:, i + 1] = data.iloc[:, i + 1].cat.rename_categories(
        dict(zip(contingency_table.index[row_ind], contingency_table.columns[col_ind]))
    )

In [None]:
scatter_kwargs = {"s": 8, "linewidth": 0}


fig, axs = plt.subplots(nrows=3, ncols=3, sharex=True, sharey=True, figsize=(7.5, 9))

for i, name in enumerate(data.columns[2:]):
    scatter_labels(data, name, axs[i // 3, i % 3])

fig.tight_layout(h_pad=2)
fig.savefig(fig_dir / "FigS1.pdf", dpi=600)

# Patient Plots

In [None]:
groundtruth = pd.DataFrame(
    [(f.parent.name, f) for f in data_dir.glob("*/labels.tsv")],
    columns=["sample", "groundtruth"],
).set_index("sample")

image_files = pd.DataFrame(
    [(f.parent.name, f) for f in data_dir.glob("*/H_E.tiff")],
    columns=["sample", "image"],
).set_index("sample")

coordinate_files = pd.DataFrame(
    [(f.parent.name, f) for f in data_dir.glob("*/coordinates.tsv")],
    columns=["sample", "coordinates"],
).set_index("sample")

results = {
    f.name: {tsv.stem: tsv for tsv in f.glob("*.tsv")} for f in results_dir.glob("Br*")
}

files = (
    pd.DataFrame(results)
    .transpose()
    .merge(groundtruth, left_index=True, right_index=True)
    .merge(image_files, left_index=True, right_index=True)
    .merge(coordinate_files, left_index=True, right_index=True)
    .rename(columns=rename_methods)
)

files.index = files.index.str.split("_", expand=True).set_names(["patient", "sample"])

In [None]:
import matplotlib as mpl

mpl.rcParams["axes.titlesize"] = "large"
mpl.rcParams["axes.labelsize"] = "large"

scatter_kwargs = {"s": 6, "linewidth": 0}

In [None]:
# disable DecompressionBombWarning
Image.MAX_IMAGE_PIXELS = None


mm = 1 / 2.54 / 10


def plot_patient(metadata, patient):
    metadata = metadata.loc[patient, :].sort_index()

    fig, axs = plt.subplots(
        nrows=8, ncols=4, sharex="col", sharey="col", figsize=(10, 20)
    )

    for i, (sample, files) in enumerate(metadata.iterrows()):
        px_per_um = scale_factors[sample] / 65
        image = np.array(Image.open(files.pop("image")))
        data = pd.read_table(files.pop("coordinates"), index_col=0)

        for name, file in files.items():
            labels_df = pd.read_table(file, index_col=0)
            labels_df.columns = [name]
            labels_df[name] = labels_df[name].astype("category")

            data = data.merge(labels_df, left_index=True, right_index=True)

        x_crop = (data["x"].min(), data["x"].max())
        y_crop = (data["y"].min(), data["y"].max())

        image = image[slice(*y_crop), slice(*x_crop)]
        data = data.assign(
            x=lambda df: df["x"] - x_crop[0], y=lambda df: df["y"] - y_crop[0]
        )

        _ = axs[0, i].imshow(image)
        axs[0, i].set(title=sample)
        remove_tick_and_label(axs[0, i])
        scale_bar = AnchoredSizeBar(
            axs[0, i].transData,
            px_per_um * 1_000,
            r"1 mm",
            loc="lower right",
            frameon=False,
        )
        axs[0, i].add_artist(scale_bar)
        if i == 0:
            axs[0, i].set_ylabel("H & E")
            title = "left"
        else:
            title = None

        scatter_labels(data, "groundtruth", axs[1, i], title)
        scatter_labels(data, "Leiden HVG", axs[2, i], title)
        scatter_labels(data, "Leiden SVG", axs[3, i], title)
        scatter_labels(data, "SpatialLeiden HVG", axs[4, i], title)
        scatter_labels(data, "SpatialLeiden SVG", axs[5, i], title)
        scatter_labels(data, "SpaGCN", axs[6, i], title)
        scatter_labels(data, "BayesSpace", axs[7, i], title)

    fig.tight_layout()

    return fig

## Supplementary Figure 2 (Br5292)

In [None]:
SFig2 = plot_patient(files, "Br5292")
SFig2.tight_layout(h_pad=2)
SFig2.savefig(fig_dir / "FigS2.pdf", dpi=600)

## Supplementary Figure 3 (Br5595)

In [None]:
SFig3 = plot_patient(files, "Br5595")
SFig3.tight_layout(h_pad=2)
SFig3.savefig(fig_dir / "FigS3.pdf", dpi=600)

## Supplementary Figure 4 (Br8100)

In [None]:
SFig4 = plot_patient(files, "Br8100")
SFig4.tight_layout(h_pad=2)
SFig4.savefig(fig_dir / "FigS4.pdf", dpi=600)