In [None]:
%pip install git+https://github.com/maximilian-heeg/UCell.git

In [None]:
import numpy as np
import scanpy as sc

In [None]:
adata_human = sc.read("../data/adata/human.h5ad")

# Create the mouse signatures

In [None]:
adata_mouse = sc.read_h5ad("../data/adata/timecourse.h5ad")
adata_mouse = adata_mouse[
    adata_mouse.obs.batch.isin(["day90_SI", "day90_SI_r2"]),
    adata_mouse.var.index.str.upper().isin(adata_human.var.index),
]

adata_mouse.var.index = adata_mouse.var.index.str.upper()

In [None]:
sc.pp.normalize_total(adata_mouse, target_sum=1e4)
sc.pp.log1p(adata_mouse)

In [None]:
# Coordinates of the gates
gates_mouse = {
    "Top": {
        "edges": [
            [0.15, 0.5],
            [0.6, 0.7],
            [0.8, 0.7],
            [0.8, 1.03],
            [0.15, 1.03],
        ],
        "label_position": {"x": 0.16, "y": 0.9},
        "fill": "#3A9AB244",
        "stroke": "#3A9AB2",
    },
    "Crypt": {
        "edges": [
            [0.15, 0.48],
            [0.6, 0.68],
            [0.8, 0.68],
            [0.8, 0.25],
            [0.2, 0],
            [0.15, 0],
        ],
        "label_position": {"x": 0.16, "y": 0.05},
        "fill": "#F11B0044",
        "stroke": "#F11B00",
    },
    "Muscularis": {
        "edges": [[0.22, 0], [0.8, 0.23], [6, 0.23], [6, 0], [0.22, 0]],
        "label_position": {"x": 0.6, "y": 0.05},
        "fill": "#BDC88155",
        "stroke": "#BDC881",
    },
}

In [None]:
def filter_adata_expressed_in_n_cells(adata, percent=0.05):
    bin_Layer = adata.X > 0
    gene_expressed_in_percent_cells = np.mean(bin_Layer, axis=0)
    keep = gene_expressed_in_percent_cells > percent
    adata = adata[:, keep]
    return adata


adata_mouse_cd8 = adata_mouse[adata_mouse.obs.Subtype.isin(["Cd8_T-Cell_P14"])]
adata_mouse_cd8 = filter_adata_expressed_in_n_cells(adata_mouse_cd8)

In [None]:
# Custom biexponential transformation. Maybe not needed for IF data
def transformation(x, a=0.1, b=0.5, c=1, d=3, f=4, w=1):
    x = np.array(x)
    return a * np.exp(b * ((x - w))) - c * np.exp(-d * (x - w)) + f


def classify_cells(adata, gates, transformation=transformation):
    """
    Classify cells based on the gates.
    """
    from shapely.geometry import Point
    from shapely.geometry.polygon import Polygon
    import geopandas as gpd

    adata.obs["epithelial_distance_transformed"] = transformation(
        adata.obs["epithelial_distance"]
    )
    adata.obs["gate"] = False

    print("Creating polygons")
    polygons = {}
    for gate in gates:
        # Apply transformation to x values
        points = [
            [transformation(element[0])] + element[1:]
            for element in gates[gate]["edges"]
        ]
        polygons[gate] = Polygon(points)
    polygons = gpd.GeoSeries(polygons)
    gpd_poly = gpd.GeoDataFrame({"gates": polygons}, geometry="gates")

    print("Creating cells")
    cells = gpd.GeoSeries.from_xy(
        adata.obs["epithelial_distance_transformed"], adata.obs["crypt_villi_axis"]
    )
    gpd_cells = gpd.GeoDataFrame({"cells": cells}, geometry="cells")

    print("Joining cells and polygons")
    result = gpd.sjoin(
        gpd_cells,
        gpd_poly,
        how="left",
    )
    return result


classification = classify_cells(adata_mouse_cd8, gates_mouse)

adata_mouse_cd8.obs["gate"] = classification["index_right"]

In [None]:
adata_mouse_cd8 = adata_mouse_cd8[adata_mouse_cd8.obs.gate.isin(["Crypt", "Top"])]

In [None]:
sc.tl.rank_genes_groups(adata_mouse_cd8, groupby="gate", method="wilcoxon")

In [None]:
signature_genes = np.array(
    [list(i) for i in adata_mouse_cd8.uns["rank_genes_groups"]["names"]]
)[:15].T

signature_regions = {}
signature_names = ["Crypt", "Top"]
for i in range(len(signature_names)):
    signature_regions[signature_names[i]] = signature_genes[i]

signature_regions

### Humans

In [None]:
adata = adata_human[(adata_human.obs.peyers == 0) & (adata_human.obs.CD8_column == 1)]

In [None]:
signature_regions_human = {}
signature_names = ["Crypt", "Top"]
for i in range(len(signature_names)):
    signature_regions_human[signature_names[i]] = [
        c.upper() for c in signature_genes[i]
    ]

In [None]:
import ucell

ucell.add_scores(adata, signatures=signature_regions_human, maxRank=15)

In [None]:
sc.pl.embedding(adata, basis="mde", color=["UCell_Crypt", "UCell_Top"])

In [None]:
import scipy.stats as stats
import pandas as pd

In [None]:
top_greater_than_crypt = stats.zscore(adata.obs["UCell_Top"]) > stats.zscore(
    adata.obs["UCell_Crypt"]
)

In [None]:
adata.obs["Effector_Enriched"] = pd.Categorical(top_greater_than_crypt.values)

In [None]:
sc.pl.embedding(adata, basis="mde", color=["Effector_Enriched"])

In [None]:
pairs = [
    (x, y) for x, y in zip(adata.obs.index.values, adata.obs.Effector_Enriched.values)
]

In [None]:
cd8_dict = {}
for i in pairs:
    cd8_dict[i[0]] = i[1]

In [None]:
effector_like = []
for i in range(len(adata_human.obs.index.values)):
    if adata_human.obs.index.values[i] in list(cd8_dict.keys()):
        effector_like.append(cd8_dict.get(adata_human.obs.index.values[i]))
    else:
        effector_like.append("None")

In [None]:
adata_human.obs["Effector_like"] = effector_like

In [None]:
new_subtypes = []
for i in range(len(adata_human.obs.index)):
    if adata_human.obs["CD8_column"].values[i] == 1:
        if adata_human.obs["Effector_like"].values[i] == True:
            new_subtypes.append("CD8AB+ Effector Like")
        else:
            new_subtypes.append("CD8AB+ Stem Like")
    else:
        new_subtypes.append(adata_human.obs["Subtype"].values[i])

In [None]:
adata_human.obs["Subtype_split_cd8"] = pd.Categorical(new_subtypes)

In [None]:
adata_human.obs["Subtype_split_cd8"].cat.categories

In [None]:
adata_human = adata_human[adata_human.obs.peyers < 1]

In [None]:
import os

if not os.path.exists("tmp"):
    os.makedirs("tmp")
    print(f"Folder 'tmp' created successfully!")
else:
    print(f"Folder 'tmp' already exists.")

In [None]:
np.save("tmp/X.npy", adata_human.X)
np.save("tmp/var.npy", adata_human.var.index.values)
np.save("tmp/batch.npy", adata_human.obs.batch.values)
np.save("tmp/x_coord.npy", adata_human.obs.x.values)
np.save("tmp/y_coord.npy", adata_human.obs.y.values)
np.save("tmp/type.npy", adata_human.obs["Subtype_split_cd8"].values)