In [None]:
import scanpy as sc
import numpy as np
from tqdm.notebook import tqdm
import os
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

In [None]:
output_folder = r"D:/amonell/timecourse_final/analysis/cleaned"

In [None]:
ad_sp = sc.read(os.path.join(output_folder, "concatenated_integrated.h5ad"))

In [None]:
def get_celltype(celltype, ad_sp):
    ctype = ad_sp[ad_sp.obs.leiden.isin([celltype])]
    sc.pp.neighbors(ctype, use_rep="X_scVI")
    sc.tl.leiden(ctype, resolution=1.2)
    sc.tl.umap(ctype)
    return ctype


def reunite_with_ad(ad_sp, subset_ad, celltype):
    new_labels = []
    subclusters = ad_sp.obs.Sub_leiden.values
    clusters = ad_sp.obs.leiden.values
    idex = ad_sp.obs.index.values
    for i in tqdm(range(len(subclusters))):
        if clusters[i] == celltype:
            new_labels.append(subset_ad.obs.loc[idex[i], :].leiden)
        else:
            new_labels.append(subclusters[i])
    ad_sp.obs.Sub_leiden = new_labels
    return ad_sp

In [None]:
sc.pl.embedding(ad_sp, basis="mde", color=["leiden"], cmap="Blues")

In [None]:
ad_sp.obs["Sub_leiden"] = ad_sp.obs["leiden"]

##### The following code required a lot of manual investigation and external tools such as CellTypist to annotate cell types. Please skip the following code if you are not interested in the details of the cell type annotation, and use our celltype annotations in the 'integrated_celltyped.h5ad' object.

We include the MiguelCellTyping.xlsx file in this directory. This file was ultimately used to assign cell types to all of our sub-leiden clusters.

The following code was used to subcluster leiden clusters which we wanted to define with higher granularity

In [None]:
leiden_to_subset = "12"

In [None]:
mac = get_celltype(leiden_to_subset, ad_sp)

In [None]:
sc.pl.embedding(mac, basis="mde", color=["leiden"], vmax=1, size=0.1)

In [None]:
mac.obs["leiden"] = [leiden_to_subset + "_" + i for i in mac.obs.leiden]

In [None]:
ad_sp = reunite_with_ad(ad_sp, mac, leiden_to_subset)

In [None]:
sc.pl.embedding(ad_sp, basis="mde", color=["Sub_leiden"])

In [None]:
ad_sp.write(os.path.join(output_folder, "integrated_clustered.h5ad"))

### Make matrixplots for main clusters and subclusters

In [None]:
ad_sp = sc.read(os.path.join(output_folder, "integrated_clustered.h5ad"))

In [None]:
save_location = "D:/amonell/timecourse_final/analysis/cleaned/celltype_matrixplots"

In [None]:
sc.set_figure_params(dpi=300)

In [None]:
s = sc.pl.matrixplot(
    ad_sp,
    groupby="Sub_leiden",
    var_names=ad_sp.var.index.values,
    swap_axes=True,
    standard_scale="var",
    return_fig=True,
    show=False,
)
g = sns.clustermap(
    s.values_df,
    col_cluster=True,
    row_cluster=True,
    cmap="viridis",
    xticklabels=True,
    yticklabels=True,
    figsize=(80, 30),
)
g.ax_row_dendrogram.set_visible(False)
g.ax_col_dendrogram.set_visible(False)
g.ax_heatmap.grid(False)
plt.savefig(os.path.join(save_location, "all_subclusters.png"))

In [None]:
s = sc.pl.matrixplot(
    ad_sp,
    groupby="leiden",
    var_names=ad_sp.var.index.values,
    swap_axes=True,
    standard_scale="var",
    return_fig=True,
    show=False,
)
g = sns.clustermap(
    s.values_df,
    col_cluster=True,
    row_cluster=True,
    cmap="viridis",
    xticklabels=True,
    yticklabels=True,
    figsize=(80, 30),
)
g.ax_row_dendrogram.set_visible(False)
g.ax_col_dendrogram.set_visible(False)
g.ax_heatmap.grid(False)
plt.savefig(os.path.join(save_location, "all_original_leiden.png"))

In [None]:
subclustered_clusters = ["1", "3", "5", "6", "7", "8", "9", "10", "11", "12"]

In [None]:
for sub in subclustered_clusters:
    s = sc.pl.matrixplot(
        ad_sp[ad_sp.obs.leiden.isin([sub])],
        groupby="Sub_leiden",
        var_names=ad_sp.var.index.values,
        swap_axes=True,
        standard_scale="var",
        return_fig=True,
        show=False,
    )
    g = sns.clustermap(
        s.values_df,
        col_cluster=True,
        row_cluster=True,
        cmap="viridis",
        xticklabels=True,
        yticklabels=True,
        figsize=(80, 30),
    )
    g.ax_row_dendrogram.set_visible(False)
    g.ax_col_dendrogram.set_visible(False)
    g.ax_heatmap.grid(False)
    plt.savefig(os.path.join(save_location, f"leiden{sub}_subclusters.png"))
    plt.close()

In [None]:
def plot_topic_scatter(adata, topic_column):
    adata = adata[:10000, :]
    # Extract data for plotting
    topics = adata.obs[topic_column]
    spatial_coords = adata.obsm["X_spatial"]

    # Get unique topics
    unique_topics = topics.unique()

    # Create separate plots for each topic
    for topic in unique_topics:
        mask = topics == topic

        # Set up the plot for the current topic
        plt.figure(figsize=(10, 5), dpi=150)
        plt.scatter(
            spatial_coords[:, 0],
            spatial_coords[:, 1],
            color="lightgray",
            label="Other Cells",
            s=4,
            linewidths=0.1,
        )
        plt.scatter(
            spatial_coords[mask, 0],
            spatial_coords[mask, 1],
            color="red",
            label=f"Subcluster: {topic}",
            alpha=0.8,
            s=4,
            linewidths=0.1,
        )

        # Add labels and legend for the current plot
        plt.xlabel("X Spatial")
        plt.ylabel("Y Spatial")
        plt.title(f"Spatial Distribution of Subcluster: {topic}")
        plt.legend()
        plt.grid(False)

        # Save the plot with a unique filename for each topic
        plt.savefig(
            os.path.join(
                save_location, "location_images", f"leiden{topic}_subclusters.png"
            )
        )
        plt.show()
        plt.close()


try:
    os.mkdir(os.path.join(save_location, "location_images"))
except:
    None
plot_topic_scatter(ad_sp[ad_sp.obs["batch"] == "day7_SI_DMSO"], "Sub_leiden")

### Read in celltypes annotated by MiguelCell

In [None]:
ad_sp = sc.read(os.path.join(output_folder, "integrated_clustered.h5ad"))

In [None]:
annotations = pd.read_excel(
    "D:/amonell/timecourse_final/MiguelCellTyping.xlsx", index_col=1, header=1
)

In [None]:
annotations = annotations[["Class", "Type", "Subtype", "Immunocentric_Type"]]

In [None]:
annotations.index = annotations.index.values.astype(str)

In [None]:
ad_sp.obs = ad_sp.obs.merge(
    annotations, left_on="Sub_leiden", right_index=True, how="left"
)

In [None]:
sc.pl.embedding(ad_sp, basis="mde", color="Subtype", size=0.05)

In [None]:
ad_sp.obs["annotated"] = ad_sp.obs.Subtype.isin(["Undetermined", "Undertermined", None])

In [None]:
ad_sp.obs["annotated"] = [1 if i == True else 0 for i in ad_sp.obs["annotated"]]

In [None]:
sc.set_figure_params(ad_sp, figsize=(10, 10))
sc.pl.embedding(
    ad_sp, basis="mde", color=["annotated", "Sub_leiden"], legend_loc="on data"
)

In [None]:
after_filtering = ad_sp[
    ~ad_sp.obs.Subtype.isin(["Undetermined", "Undertermined", None])
]

In [None]:
after_filtering = after_filtering[~pd.isna(after_filtering.obs.Class)]

In [None]:
sc.pl.embedding(
    after_filtering,
    basis="mde",
    color=["Class", "Type", "Subtype", "Immunocentric_Type"],
    size=0.4,
    ncols=1,
    save="cell_annotations.png",
)

In [None]:
after_filtering.write(os.path.join(output_folder, "integrated_celltyped.h5ad"))