In [None]:
from spatialdata_io import visium_hd

In [None]:
import pandas as pd
import seaborn as sns
import scanpy as sc
import numpy as np

In [None]:
import infercnvpy as cnv

In [None]:
import squidpy as sq

In [None]:
from tqdm.notebook import tqdm

In [None]:
from statannotations.Annotator import Annotator

In [None]:
def pretty_ax(ax):
    ax.spines['right'].set_visible(False)
    ax.spines['top'].set_visible(False)
    ax.tick_params(
        axis='both',  
        which='both',      
        bottom=True,     
        top=False,
        left=False,
        labelbottom=True,
        labelleft = True)
    ax.spines["bottom"].set_linewidth(1.5)
    ax.spines["left"].set_linewidth(1.5)

In [None]:
sdata = visium_hd(path="/add/path/data/Lung_VisiumHD_10X")

In [None]:
for table in sdata.tables.values():
    table.var_names_make_unique()

In [None]:
sdata

In [None]:
import matplotlib.pyplot as plt
import spatialdata_plot

axes = plt.subplots(1, 1, figsize=(10, 5))
sdata.pl.render_images("Visium_HD_Human_Lung_Cancer_HD_Only_Experiment1_hires_image").pl.show(ax=axes[1], coordinate_systems="downscaled_hires",
                                                                                              title="Full image")

# Create metacells

In [None]:
adata= sdata["square_008um"].copy()

In [None]:
# mitochondrial genes, "MT-" for human, "Mt-" for mouse
adata.var["mt"] = adata.var_names.str.startswith("MT-")
# ribosomal genes
adata.var["ribo"] = adata.var_names.str.startswith(("RPS", "RPL"))
# hemoglobin genes
adata.var["hb"] = adata.var_names.str.contains("^HB[^(P)]")

In [None]:
sc.pp.calculate_qc_metrics(
    adata, qc_vars=["mt", "ribo", "hb"], inplace=True, log1p=True
)

In [None]:
sc.pp.filter_genes(adata, min_cells=100)
sc.pp.filter_cells(adata, min_counts=50)

In [None]:
adata.layers["counts"] = adata.X.copy()
sc.pp.normalize_total(adata)
sc.pp.log1p(adata)

sc.tl.pca(adata)

sc.pp.neighbors(adata)

In [None]:
sc.tl.leiden(adata, flavor="igraph", resolution=300, directed=False, n_iterations=2)

In [None]:
metacelled = []
for gr in tqdm(adata.obs.leiden.unique()):
    metacelled.append(pd.DataFrame(np.array(adata[adata.obs.leiden==gr].layers["counts"].sum(axis=0)).ravel(),columns=[gr],index=adata.var_names))

metacelled = pd.concat(metacelled,axis=1).T

metacelled = sc.AnnData(metacelled)

In [None]:
# mitochondrial genes, "MT-" for human, "Mt-" for mouse
metacelled.var["mt"] = metacelled.var_names.str.startswith("MT-")
# ribosomal genes
metacelled.var["ribo"] = metacelled.var_names.str.startswith(("RPS", "RPL"))
# hemoglobin genes
metacelled.var["hb"] = metacelled.var_names.str.contains("^HB[^(P)]")

In [None]:
sc.pp.calculate_qc_metrics(
    metacelled, qc_vars=["mt", "ribo", "hb"], inplace=True, log1p=True
)

In [None]:
metacelled.layers["counts"] = metacelled.X.copy()
sc.pp.normalize_total(metacelled)
sc.pp.log1p(metacelled)

sc.tl.pca(metacelled)

sc.pp.neighbors(metacelled)

In [None]:
sc.tl.umap(metacelled)

In [None]:
sc.tl.leiden(metacelled, flavor="igraph", resolution=1, directed=False, n_iterations=2)

In [None]:
sc.pl.umap(metacelled, color="leiden")

## Assign cell types

In [None]:
# markers from https://www.nature.com/articles/s41591-023-02327-2
high_level_ct = {"Epithelial": ["FXYD3", "EPCAM", "ELF3"], "Endothelial": ["CLDN5", "ECSCR", "CLEC14A"], 
                 "Immune": ["CD53", "PTPRC", "CORO1A"], "Stromal": ["COL1A2", "DCN", "MFAP4"], 
                 "Neuroendocrine": ["CELF3", "SLC6A17", "CDK5R2"]}

In [None]:
for ct in high_level_ct:
    sc.tl.score_genes(metacelled, gene_list=high_level_ct[ct], score_name=f"{ct}_score")

In [None]:
sc.pl.umap(metacelled, color=["leiden",'Epithelial_score', 'Endothelial_score', 
                         'Immune_score', 'Stromal_score', 'Neuroendocrine_score'],ncols=2)

In [None]:
for score in ['Epithelial_score',
                         'Immune_score',
                         'Endothelial_score', "Stromal_score", "Neuroendocrine_score"]:
    fig, ax = plt.subplots(1,1)
    sns.boxplot(data=metacelled.obs, x="leiden", y=score)

In [None]:
std_score = metacelled.obs[['Epithelial_score',
                         'Immune_score',
                         'Endothelial_score', "Stromal_score", "Neuroendocrine_score"]].copy()
std_score = (std_score - std_score.mean())/std_score.std()

In [None]:
mapping = pd.concat([metacelled.obs["leiden"],std_score],axis=1).groupby("leiden").mean().idxmax(axis=1)

In [None]:
mapping

In [None]:
mapping = mapping.str[:-6].to_dict()

In [None]:
putative_ct = metacelled.obs["leiden"].replace(mapping)

In [None]:
metacelled.obs["cell_type"] = putative_ct

In [None]:
metacelled.obs["HighMT"] = (metacelled.obs.pct_counts_mt>15).astype(int)

In [None]:
sc.pl.umap(metacelled, color=["cell_type","HighMT"],ncols=2)

## CNV inference to clean cell types and find malignant cells

In [None]:
gencode = pd.read_csv("/add/path/here/auxiliary_data/gencode_v41_positions.csv",index_col=0).set_index("gene_name")
gencode = gencode.loc[~gencode.index.duplicated()]

In [None]:
gencode.columns = ["chromosome", "start", "end", "strand", "gene_id"]

In [None]:
metacelled.var = pd.concat([metacelled.var, gencode.loc[gencode.index.intersection(metacelled.var_names)]],axis=1)

In [None]:
cnv.tl.infercnv(metacelled, 
                reference_key="cell_type", 
                reference_cat=["Immune","Stromal","Endothelial"])

In [None]:
cnv.pl.chromosome_heatmap(metacelled, groupby="cell_type")

In [None]:
cnv.tl.pca(metacelled)
cnv.pp.neighbors(metacelled)
cnv.tl.leiden(metacelled, flavor="igraph", resolution=0.2, directed=False, n_iterations=2)

In [None]:
cnv.pl.chromosome_heatmap(metacelled, groupby=["cnv_leiden","cell_type"], dendrogram=True)

In [None]:
cnv.tl.umap(metacelled)
cnv.tl.cnv_score(metacelled)

In [None]:
fig, ((ax1, ax2), (ax3, ax4)) = plt.subplots(2, 2, figsize=(11, 11))
ax4.axis("off")
cnv.pl.umap(
    metacelled,
    color="cnv_leiden",
    legend_loc="on data",
    legend_fontoutline=2,
    ax=ax1,
    show=False,
)
cnv.pl.umap(metacelled, color="cnv_score", ax=ax2, show=False)
cnv.pl.umap(metacelled, color="cell_type", ax=ax3)

In [None]:
metacelled.obs[["cnv_leiden","cnv_score"]].groupby("cnv_leiden").median()

In [None]:
metacelled.obs["CNV_status"] = metacelled.obs.cnv_leiden.replace({"0":"Healthy", "1": "Malignant", "2": "Malignant", "3": "Healthy",})

In [None]:
metacelled.obs["refined_celltype"] = metacelled.obs.cell_type.replace({"Epithelial": "Malignant epithelial"}).astype("str")

maldf = metacelled.obs[metacelled.obs["CNV_status"]=='Malignant']
metacelled.obs.loc[maldf[maldf.cell_type!="Epithelial"].index,"refined_celltype"] = "Uncertain"

heldf = metacelled.obs[metacelled.obs["CNV_status"]=='Healthy']
metacelled.obs.loc[heldf[heldf.cell_type=="Epithelial"].index,"refined_celltype"] = "Healthy epithelial"

In [None]:
ax = sc.pl.umap(metacelled, color=["refined_celltype"], palette={"Endothelial": "tab:blue", "Immune": "tab:orange",
                                                            "Stromal": "tab:red", "Malignant epithelial": "tab:green", 
                                                            "Healthy epithelial": "tab:brown", "Uncertain": "tab:purple"},
                                                            ncols=2, show=False)
ax.set_title("Cell type")
pretty_ax(ax)
ax.figure.set_size_inches(5,3)
ax.figure.savefig("/add/path/here/figures/lung_FFPE_celltypes.png", dpi=200, bbox_inches="tight")

In [None]:
adata.obs["refined_celltype"] = adata.obs.leiden.replace(metacelled.obs.refined_celltype.to_dict())

# Going back to single-spot resolution

In [None]:
sdata['square_008um'].obs["cell_type"] = adata.obs.refined_celltype
sdata['square_008um'].obs["pct_counts_mt"] = adata.obs.pct_counts_mt

sdata['square_008um'].obs["MT-RNA bins"] = pd.cut(sdata['square_008um'].obs["pct_counts_mt"],[0,5,10,15,20,25,100], labels=["0-5","5-10","10-15","15-20","20-25","25+"])

sdata['square_008um'].write("/add/path/here/processed_data/lung-visium-hd_square_008um.h5ad")

In [None]:
sdata['square_008um'] = sc.read_h5ad("/add/path/here/processed_data/lung-visium-hd_square_008um.h5ad")

In [None]:
ct_order = sdata['square_008um'].obs[["cell_type","pct_counts_mt"]].groupby("cell_type").median().sort_values("pct_counts_mt").drop("Uncertain").index

In [None]:
fig, ax = plt.subplots(1,1,figsize=(4,2))
sns.boxplot(data=sdata['square_008um'].obs, x="cell_type", y="pct_counts_mt", 
            order=ct_order, palette={"Endothelial": "tab:blue", "Immune": "tab:orange",
                                                            "Stromal": "tab:red", "Malignant epithelial": "tab:green", 
                                                            "Healthy epithelial": "tab:brown", "Uncertain": "tab:purple"})
ax.set_xlabel("")
ax.set_ylabel("% MT counts")

ax.set_xticks(ax.get_xticks(), ax.get_xticklabels(), rotation=45, ha="right")
pretty_ax(ax)

pairs = []
for ct in np.setdiff1d(ct_order,["Malignant epithelial"]):
     pairs.append(("Malignant epithelial",ct))
annot = Annotator(
        ax,
        pairs=pairs,
        data=sdata['square_008um'].obs, x="cell_type", y="pct_counts_mt", order=ct_order
    )
annot.configure(
    test="Mann-Whitney",
    loc="inside",
    text_format="star",
    show_test_name=False,
    verbose=2,
    comparisons_correction=None,
    fontsize=10,
)
annot.apply_test()
ax.set_ylim([0,30])
_, test_results = annot.annotate()  
fig.savefig("/add/path/here/figures/lung_FFPE_distcounts.svg", dpi=200, bbox_inches="tight")

In [None]:
from tqdm.notebook import tqdm
import matplotlib.cm as cm
from matplotlib.colors import LinearSegmentedColormap
import matplotlib.pyplot as plt
import spatialdata_plot
import matplotlib.patches as patches
from spatialdata import bounding_box_query

# modify the viridis colormap, so that the top color is a green (better visible on the H&E pink), and such that
# the value 0 leads to a transparent color
viridis = cm.get_cmap("viridis", 256)
# using 0.8 instead of 1.0 truncates the colormap
colors = viridis(np.linspace(0, 0.8, 256))

colors[:,-1]=0.8
colors[0, :] = [1.0, 1.0, 1.0, 0.0]

new_cmap = LinearSegmentedColormap.from_list("truncated_viridis", colors)

In [None]:
bb_w = 100
bb_h = 100

xcoords = np.arange(0,3250,bb_w)
ycoords = np.arange(0,3250,bb_h)

total_patches = []

median_values = {}
for i in tqdm(range(len(xcoords)-1)):
    for j in range(len(ycoords)-1):
        sdata_small = sdata.query.bounding_box(
            min_coordinate=[xcoords[i], ycoords[j]], max_coordinate=[xcoords[i+1], ycoords[j+1]], axes=("x", "y"), target_coordinate_system="downscaled_hires"
        )
        if "square_008um" in sdata_small:
            if sdata_small['square_008um'].shape[0]>=100:
                if sdata_small['square_008um'].obs.cell_type.value_counts().loc["Malignant epithelial"]>=25:
                    median_pct_counts = sdata_small['square_008um'].obs[["cell_type","pct_counts_mt"]].groupby("cell_type",observed=False).median().fillna(0)
                    median_values[f"{xcoords[i]}-{xcoords[i+1]}_{ycoords[j]}-{ycoords[j+1]}"] = median_pct_counts
                    rect = patches.Rectangle((xcoords[i], ycoords[j]), bb_w, bb_h, linewidth=0.1, edgecolor="white", facecolor=new_cmap(min(1,median_pct_counts.loc["Malignant epithelial","pct_counts_mt"]/10)))
        
                    total_patches.append(rect)

In [None]:
crop0 = lambda x: bounding_box_query(
    x, min_coordinate=[0, 0], max_coordinate=[3000, 3000], axes=("x", "y"), target_coordinate_system="downscaled_hires"
)

axes = plt.subplots(1, 1, figsize=(10, 5))
crop0(sdata).pl.render_images("Visium_HD_Human_Lung_Cancer_HD_Only_Experiment1_hires_image").pl.show(ax=axes[1], coordinate_systems="downscaled_hires",
                                                                                              title="Full image")

for c in total_patches:
    axes[1].add_patch(c)


In [None]:
crop0 = lambda x: bounding_box_query(
    x, min_coordinate=[0, 0], max_coordinate=[3000, 3000], axes=("x", "y"), target_coordinate_system="downscaled_hires"
)

axes = plt.subplots(1, 1, figsize=(10, 5))
crop0(sdata).pl.render_images("Visium_HD_Human_Lung_Cancer_HD_Only_Experiment1_hires_image").pl.show(ax=axes[1], coordinate_systems="downscaled_hires",
                                                                                              title="Full image")

In [None]:
new_cmap

In [None]:
pd.concat(median_values).unstack().droplevel(0,axis=1).fillna(0).sort_values("Malignant epithelial",ascending=False).head(10)

In [None]:
sdata["square_008um"].obs["trunc_pct_counts_mt"] = sdata["square_008um"].obs.pct_counts_mt.clip(0,15)

sdata["square_008um"].obs.cell_type = sdata["square_008um"].obs.cell_type.astype(str).replace({"nan": "Poor quality"}).astype("category")

# Region 1

In [None]:
from spatialdata import bounding_box_query

#crop0 = lambda x: bounding_box_query(
#    x, min_coordinate=[750, 1000], max_coordinate=[1000, 1250], axes=("x", "y"), target_coordinate_system="downscaled_hires"
#)
crop0 = lambda x: bounding_box_query(
    x, min_coordinate=[1700, 2000], max_coordinate=[2000, 2300], axes=("x", "y"), target_coordinate_system="downscaled_hires"
)
crop0(sdata).pl.render_images("Visium_HD_Human_Lung_Cancer_HD_Only_Experiment1_hires_image").pl.render_shapes(
        "Visium_HD_Human_Lung_Cancer_HD_Only_Experiment1_square_008um", color="cell_type", 
    groups=["Endothelial","Immune","Malignant epithelial","Stromal","Healthy epithelial","Uncertain","Poor quality"],
    palette=["tab:blue", "tab:orange", "tab:green", "tab:red", "tab:brown", "tab:purple", "white"],
    ).pl.show(coordinate_systems="downscaled_hires", title=f"Cell type", figsize=(10, 10))

In [None]:
import matplotlib.cm as cm
import matplotlib.pyplot as plt

import numpy as np
from matplotlib.colors import LinearSegmentedColormap

# modify the viridis colormap, so that the top color is a green (better visible on the H&E pink), and such that
# the value 0 leads to a transparent color
viridis = cm.get_cmap("viridis", 256)
# using 0.8 instead of 1.0 truncates the colormap
colors = viridis(np.linspace(0, 0.8, 256))
# set the color of zero to be transparent
colors[:,-1]=0.7
colors[0, :] = [1.0, 1.0, 1.0, 0.0]

new_cmap = LinearSegmentedColormap.from_list("truncated_viridis", colors)

crop0 = lambda x: bounding_box_query(
    x, min_coordinate=[1700, 2000], max_coordinate=[2000, 2300], axes=("x", "y"), target_coordinate_system="downscaled_hires"
)
crop0(sdata).pl.render_images("Visium_HD_Human_Lung_Cancer_HD_Only_Experiment1_hires_image").pl.render_shapes(
        "Visium_HD_Human_Lung_Cancer_HD_Only_Experiment1_square_008um", color="trunc_pct_counts_mt", cmap=new_cmap,
    ).pl.show(coordinate_systems="downscaled_hires", title=f"% MT-RNA", figsize=(10, 10))

In [None]:
from spatialdata import bounding_box_query

crop0 = lambda x: bounding_box_query(
    x, min_coordinate=[1700, 2000], max_coordinate=[2000, 2300], axes=("x", "y"), target_coordinate_system="downscaled_hires"
)
crop0(sdata).pl.render_images("Visium_HD_Human_Lung_Cancer_HD_Only_Experiment1_hires_image"
                             ).pl.show(coordinate_systems="downscaled_hires", title="H&E", figsize=(10, 10))

# Region 2

In [None]:
from spatialdata import bounding_box_query
#500-750_1750-2000
#crop0 = lambda x: bounding_box_query(
#    x, min_coordinate=[700, 600], max_coordinate=[1000, 900], axes=("x", "y"), target_coordinate_system="downscaled_hires"
#)
crop0 = lambda x: bounding_box_query(
    x, min_coordinate=[2200, 2600], max_coordinate=[2500, 2900], axes=("x", "y"), target_coordinate_system="downscaled_hires"
)
crop0(sdata).pl.render_images("Visium_HD_Human_Lung_Cancer_HD_Only_Experiment1_hires_image").pl.render_shapes(
        "Visium_HD_Human_Lung_Cancer_HD_Only_Experiment1_square_008um", color="cell_type", 
    groups=["Endothelial","Immune","Malignant epithelial","Stromal","Healthy epithelial","Uncertain","Poor quality"],
    palette=["tab:blue", "tab:orange", "tab:green", "tab:red", "tab:brown", "tab:purple", "white"],
    ).pl.show(coordinate_systems="downscaled_hires", title=f"Cell type", figsize=(10, 10))

In [None]:
import matplotlib.cm as cm
import matplotlib.pyplot as plt

import numpy as np
from matplotlib.colors import LinearSegmentedColormap

# modify the viridis colormap, so that the top color is a green (better visible on the H&E pink), and such that
# the value 0 leads to a transparent color
viridis = cm.get_cmap("viridis", 256)
# using 0.8 instead of 1.0 truncates the colormap
colors = viridis(np.linspace(0, 0.8, 256))
# set the color of zero to be transparent
colors[:,-1]=0.7
colors[0, :] = [1.0, 1.0, 1.0, 0.0]

new_cmap = LinearSegmentedColormap.from_list("truncated_viridis", colors)

crop0 = lambda x: bounding_box_query(
    x, min_coordinate=[2200, 2600], max_coordinate=[2500, 2900], axes=("x", "y"), target_coordinate_system="downscaled_hires"
)
crop0(sdata).pl.render_images("Visium_HD_Human_Lung_Cancer_HD_Only_Experiment1_hires_image").pl.render_shapes(
        "Visium_HD_Human_Lung_Cancer_HD_Only_Experiment1_square_008um", color="trunc_pct_counts_mt", cmap=new_cmap,
    ).pl.show(coordinate_systems="downscaled_hires", title=f"% MT-RNA", figsize=(10, 10))

In [None]:
from spatialdata import bounding_box_query
#2250-2500_2000-2225
crop0 = lambda x: bounding_box_query(
    x, min_coordinate=[2200, 2600], max_coordinate=[2500, 2900], axes=("x", "y"), target_coordinate_system="downscaled_hires"
)
crop0(sdata).pl.render_images("Visium_HD_Human_Lung_Cancer_HD_Only_Experiment1_hires_image"
                             ).pl.show(coordinate_systems="downscaled_hires", title="H&E", figsize=(10, 10))

# Region 3

In [None]:
from spatialdata import bounding_box_query
#500-750_1750-2000
crop0 = lambda x: bounding_box_query(
    x, min_coordinate=[200, 1600], max_coordinate=[500, 1900], axes=("x", "y"), target_coordinate_system="downscaled_hires"
)
crop0(sdata).pl.render_images("Visium_HD_Human_Lung_Cancer_HD_Only_Experiment1_hires_image").pl.render_shapes(
        "Visium_HD_Human_Lung_Cancer_HD_Only_Experiment1_square_008um", color="cell_type", 
    groups=["Endothelial","Immune","Malignant epithelial","Stromal","Healthy epithelial","Uncertain","Poor quality"],
    palette=["tab:blue", "tab:orange", "tab:green", "tab:red", "tab:brown", "tab:purple", "white"],
    ).pl.show(coordinate_systems="downscaled_hires", title=f"Cell type", figsize=(10, 10))

In [None]:
import matplotlib.cm as cm
import matplotlib.pyplot as plt

import numpy as np
from matplotlib.colors import LinearSegmentedColormap

# modify the viridis colormap, so that the top color is a green (better visible on the H&E pink), and such that
# the value 0 leads to a transparent color
viridis = cm.get_cmap("viridis", 256)
# using 0.8 instead of 1.0 truncates the colormap
colors = viridis(np.linspace(0, 0.8, 256))
# set the color of zero to be transparent
colors[:,-1]=0.7
colors[0, :] = [1.0, 1.0, 1.0, 0.0]

new_cmap = LinearSegmentedColormap.from_list("truncated_viridis", colors)

crop0 = lambda x: bounding_box_query(
    x, min_coordinate=[200, 1600], max_coordinate=[500, 1900], axes=("x", "y"), target_coordinate_system="downscaled_hires"
)
crop0(sdata).pl.render_images("Visium_HD_Human_Lung_Cancer_HD_Only_Experiment1_hires_image").pl.render_shapes(
        "Visium_HD_Human_Lung_Cancer_HD_Only_Experiment1_square_008um", color="trunc_pct_counts_mt", cmap=new_cmap,
    ).pl.show(coordinate_systems="downscaled_hires", title=f"% MT-RNA", figsize=(10, 10))

In [None]:
from spatialdata import bounding_box_query
#2250-2500_2000-2225
crop0 = lambda x: bounding_box_query(
    x, min_coordinate=[200, 1600], max_coordinate=[500, 1900], axes=("x", "y"), target_coordinate_system="downscaled_hires"
)
crop0(sdata).pl.render_images("Visium_HD_Human_Lung_Cancer_HD_Only_Experiment1_hires_image"
                             ).pl.show(coordinate_systems="downscaled_hires", title="H&E", figsize=(10, 10))