In [None]:
import scanpy as sc
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import warnings
import seaborn as sns
from scipy.stats import gaussian_kde
import matplotlib.colors as clr
from matplotlib.patches import Polygon
import ucell


warnings.filterwarnings("ignore")

In [None]:
zissou = [
    "#3A9AB2",
    "#6FB2C1",
    "#91BAB6",
    "#A5C2A3",
    "#BDC881",
    "#DCCB4E",
    "#E3B710",
    "#E79805",
    "#EC7A05",
    "#EF5703",
    "#F11B00",
]

colormap = clr.LinearSegmentedColormap.from_list("Zissou", zissou)
colormap_r = clr.LinearSegmentedColormap.from_list("Zissou", zissou[::-1])

In [None]:
adata = sc.read_h5ad("../data/adata/timecourse.h5ad")

In [None]:
batches = {
    # "day6_SI": {"x": 6200, "y": 6200},
    # "day6_SI_r2": {"x": 5800, "y": 5500},
    # "day8_SI_Ctrl": {"x": 2400, "y": 2400},
    # "day8_SI_r2": {"x": 3200, "y": 1500},
    # "day30_SI": {"x": 6400, "y": 2400},
    # "day30_SI_r2": {"x": 6200, "y": 6200},
    "day90_SI": {"x": 2400, "y": 2400},
    "day90_SI_r2": {"x": 1200, "y": 6200},
}

# subset anndata to the day 90 timepoint to make the calculation faster
adata = adata[adata.obs.batch.isin(batches.keys())]

In [None]:
sc.pp.normalize_total(adata, target_sum=1e4)
sc.pp.log1p(adata)

In [None]:
def import_kurd_et_al_signature(
    p_cutoff=0.01,
    lfc_cutoff=1,
):
    """
    Early precursors and molecular determinants of tissue-resident memory
    CD8+ T lymphocytes revealed by single-cell RNA sequencing
    -----------------------------------------------------
    Table S5 contains DE genes from cluster 3 and cluster 29
    - cluster 3 is the more stem like populations
    - cluster 29 the more effector like populations
    """
    df = pd.read_excel("../data/signatures/kurd.xlsx", skiprows=1)
    df = df[df["log10pval"] < np.log10(p_cutoff)]
    up = df[df["log2fold"] > lfc_cutoff]["Gene"].to_list()
    down = df[df["log2fold"] < lfc_cutoff]["Gene"].to_list()
    signatures = {
        "cluster_3": up,
        "cluster_29": down,
    }
    return signatures


# read signatures
def import_milner_et_al_signatures():
    """
    Heterogenous Populations of Tissue-Resident CD8+ T Cells Are
    Generated in Response to Infection and Malignancy
    --------------------------------------------------
    """
    s = {
        "Blimp1": pd.read_csv("../data/signatures/blimp.txt", header=None)[0].to_list(),
        "Id3": pd.read_csv("../data/signatures/id3.txt", header=None)[0].to_list(),
    }
    return s


# Python code to merge dict using update() method
def Merge(dict1, dict2):
    res = {**dict1, **dict2}
    return res


signatures = Merge(import_kurd_et_al_signature(), import_milner_et_al_signatures())

In [None]:
def scatter_with_gaussian_kde_weights(ax, x, y, weights, exp, **kwargs):
    """
    Plots a scatter plot colored by gaussian kde estimates.

    Parameters:
    - ax (matplotlib ax): The ax on which to plot the scatter plot.
    - x (np.array): The x values to perform the gaussian kde and scattering on.
    - y (np.array): The y values to perform the gaussian kde and scattering on.
    - **kwargs: Additional keyword arguments to pass to the scatter function.

    Returns:
    - None
    """

    xy = np.vstack([x, y])
    z = gaussian_kde(xy, weights=weights**exp)(xy)

    ax.scatter(x, y, c=z, **kwargs)


# Custom biexponential transformation. Returns x coordinates that have been transformed
def transformation(x, a=0.1, b=0.1, c=0.5, d=2.5, f=4, w=1):
    x = np.array(x)
    return a * np.exp(b * ((x - w))) - c * np.exp(-d * (x - w)) + f


def draw_gates(ax, gates, transformation, type="edge"):
    """
    Draws IMAP gates on a matplotlib ax.

    Parameters:
    - ax (matplotlib ax): The ax on which to draw the gates.
    - gates (dict): A dictionary containing the gates to draw.
    - transformation (function): A function to transform the x values of the gates.
    - type (str): The type of gate to draw. Can be "fill" or "edge".

    Returns:
    - None
    """
    for gate in gates:
        # Apply transformation to x values
        points = [
            [transformation(element[0])] + element[1:]
            for element in gates[gate]["edges"]
        ]

        if type == "fill":
            p = Polygon(points, facecolor=gates[gate]["fill"], edgecolor="none")
            ax.add_patch(p)
        elif type == "edge":
            p = Polygon(points, facecolor="none", edgecolor="#222222")
            ax.add_patch(p)

            ax.text(
                transformation(gates[gate]["label_position"]["x"]),
                gates[gate]["label_position"]["y"],
                gate,
                fontsize=6,
                color="#222222",
            )

In [None]:
# Coordinates of the gates
gates = {
    "Top": {
        "edges": [
            [0.15, 0.5],
            [0.6, 0.7],
            [0.8, 0.7],
            [0.8, 1.03],
            [0.15, 1.03],
        ],
        "label_position": {"x": 0.16, "y": 0.9},
        "fill": "#3A9AB224",
        "stroke": "#3A9AB2",
    },
    "Crypt": {
        "edges": [
            [0.15, 0.48],
            [0.6, 0.68],
            [0.8, 0.68],
            [0.8, 0.25],
            [0.2, 0],
            [0.15, 0],
        ],
        "label_position": {"x": 0.16, "y": 0.05},
        "fill": "#F11B0024",
        "stroke": "#F11B00",
    },
    "Muscularis": {
        "edges": [[0.22, 0], [0.8, 0.23], [6, 0.23], [6, 0], [0.22, 0]],
        "label_position": {"x": 0.6, "y": 0.05},
        "fill": "#BDC88135",
        "stroke": "#BDC881",
    },
}

In [None]:
# Create subplots
def plot_imaps_ucell(
    adata,
    batches,
    genes,
    ax_ticks=[0.15, 0.3, 0.6, 1, 6],
    transformation=transformation,
    gates=gates,
    dpi=100,
    exp=2,
):
    """
    Plots IMAPs weighted by a signature defined by "genes".

    Parameters:
    - adata (anndata): The anndata object containing the cells for IMAP plotting.
    - batches (list): A list of batches to plot.
    - genes (list): The gene signature to weight the IMAP by
    - ax_ticks (list): The x-axis ticks to use.
    - transformation (function): The transformation to apply to the x-axis.
    - gates (dict): The gates to draw on the IMAP.
    - dpi (int): The dpi of the plot.
    - exp (int): The exponent to use for the gaussian kde.

    Returns:
    - None
    """

    fig = plt.figure(figsize=(3 * len(genes), 3 * len(batches)), dpi=dpi)

    # Apply transformation
    adata.obs["epithelial_distance_transformed"] = transformation(
        adata.obs["epithelial_distance"]
    )

    for col, gene in enumerate(genes):
        print("Plotting value: " + str(gene))
        # Iterate over batches
        for i, bt in enumerate(batches):
            sub_adata = adata[adata.obs["batch"] == bt]

            if gene == "Distribution":
                gene_expression = np.ones(len(sub_adata))
            else:
                gene_expression = np.array(sub_adata.obs[gene])

            ax = fig.add_subplot(len(batches), len(genes), i * len(genes) + 1 + col)

            # Draw gates filled in background
            draw_gates(ax, gates=gates, transformation=transformation, type="fill")

            # Draw the density lines
            sns.kdeplot(
                data=sub_adata.obs,
                x="epithelial_distance_transformed",
                y="crypt_villi_axis",
                ax=ax,
                weights=gene_expression,
                color="#444444",
                linewidths=0.5,
            )

            # Colored scatter plot
            scatter_with_gaussian_kde_weights(
                ax=ax,
                x=sub_adata.obs["epithelial_distance_transformed"],
                y=sub_adata.obs["crypt_villi_axis"],
                weights=gene_expression,
                exp=exp,
                s=5,
                cmap="viridis" if gene == "Distribution" else colormap,
            )

            # Transform the tick labels and set them
            ax.set_xticks(transformation(ax_ticks))
            ax.set_xticklabels(ax_ticks)

            # Label the axes
            ax.set_xlabel("Epithelial Axis")
            ax.set_ylabel(f"{bt}\nCrypt-Villi Axis")

            ax.set_ylim(-0.02, 1.05)

            # Add a title
            if i == 0:
                ax.set_title(f"{gene}")
            else:
                ax.set_title(f"")
            draw_gates(ax, gates=gates, transformation=transformation)

    fig.tight_layout()

In [None]:
ax = sns.histplot(np.sum(adata.X > 0, axis=1)[adata.obs["Subtype"] == "Cd8_T-Cell_P14"])
ax.set_xlabel("Number of expressed genes")

In [None]:
ucell.add_scores(adata, signatures, maxRank=60, seed=42)

Figure 2j

In [None]:
plot_imaps_ucell(
    adata[adata.obs["Subtype"] == "Cd8_T-Cell_P14"],
    ["day90_SI"],
    genes=[
        "UCell_cluster_3",
        "UCell_cluster_29",
    ],
)

Extended data figure 2d

In [None]:
plot_imaps_ucell(
    adata[adata.obs["Subtype"] == "Cd8_T-Cell_P14"],
    ["day90_SI"],
    genes=[
        "UCell_Id3",
        "UCell_Blimp1",
    ],
)

## Quantification

In [None]:
adata_p14 = adata[adata.obs["Subtype"] == "Cd8_T-Cell_P14"]

In [None]:
def classify_cells(adata, gates, transformation=transformation):
    """
    Classify cells based on the IMAP gates.

    Parameters:
    - adata (anndata): The anndata object containing the cells to classify.
    - gates (dict): A dictionary containing the gates to classify the cells with.
    - transformation (function): A function to transform the x values of the gates.

    Returns:
    - result (geopandas dataframe): A geopandas dataframe containing the classified cells.
    """
    from shapely.geometry import Point
    from shapely.geometry.polygon import Polygon
    import geopandas as gpd

    adata.obs["epithelial_distance_transformed"] = transformation(
        adata.obs["epithelial_distance"]
    )
    adata.obs["gate"] = False

    print("Creating polygons")
    polygons = {}
    for gate in gates:
        # Apply transformation to x values
        points = [
            [transformation(element[0])] + element[1:]
            for element in gates[gate]["edges"]
        ]
        polygons[gate] = Polygon(points)
    polygons = gpd.GeoSeries(polygons)
    gpd_poly = gpd.GeoDataFrame({"gates": polygons}, geometry="gates")

    print("Creating cells")
    cells = gpd.GeoSeries.from_xy(
        adata.obs["epithelial_distance_transformed"], adata.obs["crypt_villi_axis"]
    )
    gpd_cells = gpd.GeoDataFrame({"cells": cells}, geometry="cells")

    print("Joining cells and polygons")
    result = gpd.sjoin(
        gpd_cells,
        gpd_poly,
        how="left",
    )
    return result


def get_mean_expression(adata, genes):
    """
    Calculate the mean expression of specified genes for each batch and gate in the provided AnnData object.

    Parameters:
    - adata (AnnData): Annotated data matrix with observations (rows) and variables (columns).
    - genes (list): A list of gene names for which mean expression is calculated.

    Returns:
    - pd.DataFrame: A DataFrame containing mean expression values for each gene, batch, and gate.
    """
    import re

    keys = genes + ["batch", "gate"]
    df = sc.get.obs_df(adata, keys=keys)
    grouped = (
        df.groupby(["batch", "gate"])
        .mean()
        .join(df.groupby(["batch", "gate"]).size().rename("group_size"))
        .reset_index()
    )
    grouped["Day"] = [re.findall(r"\d+", b)[0] for b in grouped["batch"]]

    # Melt into a longer form
    grouped = pd.melt(
        grouped,
        id_vars=["batch", "Day", "gate", "group_size"],
        var_name="gene",
        value_name="expression",
    )
    return grouped


def get_scaled_mean_expression(adata, genes):
    """
    Calculate the scaled mean expression of specified genes for each batch and gate in the provided AnnData object.

    Parameters:
    - adata (AnnData): Annotated data matrix with observations (rows) and variables (columns).
    - genes (list): A list of gene names for which scaled mean expression is calculated.

    Returns:
    - pd.DataFrame: A DataFrame containing scaled mean expression values for each gene, batch, and gate.
    """
    grouped = get_mean_expression(adata=adata, genes=genes)
    # Group the DataFrame by gene
    gene_groups = grouped.groupby("gene")

    # Apply the scaling function to each group
    def scale_group(group):
        gene_min = group["expression"].min()
        gene_max = group["expression"].max()
        group["expression"] = (group["expression"] - gene_min) / (gene_max - gene_min)
        return group

    df_scaled = gene_groups.apply(scale_group)

    return df_scaled

In [None]:
classification = classify_cells(adata_p14, gates)
adata_p14.obs["gate"] = classification["index_right"]

In [None]:
df = get_scaled_mean_expression(adata_p14, ["UCell_cluster_3", "UCell_cluster_29"])
df

In [None]:
df.pivot(columns="gate", values="expression", index=["gene", "batch"])

In [None]:
g = sns.FacetGrid(df[df["gate"] != "Muscularis"], col="gene", col_wrap=4, sharey=False)
g.map_dataframe(
    sns.barplot,
    x="gate",
    y="expression",
    capsize=0.2,
    edgecolor="black",
    linewidth=0.5,
    errwidth=1,
    color="#3A9AB2",
)
g.map_dataframe(sns.swarmplot, x="gate", y="expression", hue="batch")