In [None]:
from pathlib import Path
import anndata as ad
import scanpy as sc
from PIL import Image
import numpy as np
import matplotlib.pyplot as plt
from shapely.geometry import Point, Polygon
import geopandas as gpd
from geopandas.tools import sjoin
import json
import pandas as pd
import stlearn as st
st.settings.set_figure_params(dpi=300)
from sklearn import metrics
import scipy.stats as stats
# import hvplot.pandas 

In [None]:
# stimage custom 
import sys
file = Path("/scratch/imb/Xiao/STimage/stimage").resolve() # path to src code
parent = file.parent
sys.path.append(str(parent))

from tqdm import tqdm

from stimage._utils import gene_plot

import pickle
import stlearn as st


In [None]:
def pred_to_sc_adata(adata):
    library_id = adata.obs['library_id'].values[0]
    adata.obs["imagerow"] = adata.obsm["spatial"][:,0] //10
    adata.obs["imagecol"] = adata.obsm["spatial"][:,1] //10
    adata.obsm["spatial"][:, [0, 1]]  = adata.obsm["spatial"][:, [1, 0]]
    img = Image.fromarray(adata.uns["spatial"][library_id]['images']['fulres'])
    img = img.resize((img.size[0]//10, img.size[1]//10))
    adata.uns["spatial"][library_id]['scalefactors'] = {}
    adata.uns["spatial"][library_id]['metadata'] = {}
    adata.uns["spatial"][library_id]['use_quality'] = "hires"
    adata.uns["spatial"][library_id]['scalefactors']["tissue_hires_scalef"] = 0.1
    adata.uns["spatial"][library_id]['scalefactors']["fiducial_diameter_fullres"] = 10
    adata.uns["spatial"][library_id]['scalefactors']["spot_diameter_fullres"] = 10
    adata.uns["spatial"][library_id]['images']['hires'] = np.array(img)
    return adata.copy()

In [None]:
BASE_PATH = Path('/scratch/imb/Xiao/')
PRED_PATH = Path('/scratch/imb/Xiao/Q2051/STimage_project/TCGA_pred')
ANNO_PATH = BASE_PATH / 'Q2051/STimage_project/STimage_dataset/RAW/TCGA_annotation_2'
OUT_PATH = BASE_PATH / 'STimage/development/TCGA_test/PLOTS'
OUT_PATH.mkdir(parents=True, exist_ok=True)

In [None]:
SAMPLE_LIST = ["TCGA-BH-A0C7-01B-01-TSA.deec957f-1264-4169-945e-24d4bd1988cd",
               "TCGA-BH-A0C7-01Z-00-DX1.C70D358E-C48F-4F69-86CE-3218E9C95837",
               "TCGA-A7-A0CD-01A-01-BSA.a48267de-7bf0-409f-a8b5-9712f2e06a90"]

In [None]:
SAMPLE_LIST = ["TCGA-3C-AALJ-01Z-00-DX1.777C0957-255A-42F0-9EEB-A3606BCF0C96",
               "TCGA-3C-AALJ-01Z-00-DX2.62DFE56B-B84C-40F9-9625-FCB55767B70D",
               "TCGA-A2-A3KD-01A-01-TSA.6CE03E54-350A-4AA7-A249-E55FF073FCB2"]

In [None]:
SAMPLE = SAMPLE_LIST[0]
TCGA_ID = SAMPLE.split('.')[0]

In [None]:
adata = ad.read_h5ad(PRED_PATH / f"pred_{SAMPLE}.h5ad")

In [None]:
adata = pred_to_sc_adata(adata)

In [None]:
adata.to_df()

In [None]:
adata.uns["spatial"]["TCGA_svs"]["images"]["fulres"].shape

In [None]:
ANNO_PATH

In [None]:
# Read annotation Json file and convert to geopandas dataframe
with open(ANNO_PATH / (f"{TCGA_ID}.geojson")) as f:
    anno = json.load(f)
    gdf_anno = gpd.GeoDataFrame.from_features(anno["features"])
    # gdf_anno = pd.concat([gdf_anno.drop(['classification'], axis=1), gdf_anno['classification'].apply(pd.Series)], axis=1)

In [None]:
gdf_anno

In [None]:
# gdf_anno["name"] = "Tumour"
# adata.obsm["spatial_40x"] = adata.obsm["spatial"].copy()
# adata.obsm["spatial_40x"][:,0] = adata.obsm["spatial"][:,0]
# adata.obsm["spatial_40x"][:,1] = adata.obsm["spatial"][:,1]

In [None]:
adata.obsm["spatial_40x"] = adata.obsm["spatial"].copy()
adata.obsm["spatial_40x"][:,0] = adata.obsm["spatial"][:,0]*2
adata.obsm["spatial_40x"][:,1] = adata.obsm["spatial"][:,1]*2

In [None]:
# convert visium spot coordinates to geopandas point object
pnts = gpd.GeoDataFrame(geometry=[Point(xy) for xy in zip(adata.obsm["spatial_40x"][:,0], adata.obsm["spatial_40x"][:,1])], index=adata.obs_names)

In [None]:
pnts

In [None]:
# assign annotation label for each spot if it falls in to annotation polygon
pnts_join = sjoin(pnts,gdf_anno, how="left")
pnts_join = pnts_join[~pnts_join.index.duplicated(keep='first')]

In [None]:
pnts_join

In [None]:
# add the annotation to visium anndata object
adata_anno = adata[pnts_join.index].copy()
adata_anno.obs["annotation"] = pnts_join["name"]
adata_anno.obs["annotation"].fillna("Others", inplace=True)
adata_anno.obs["annotation"] = pd.Categorical(adata_anno.obs["annotation"])

In [None]:
gdf_anno_20x = gdf_anno.scale(xfact=0.5, yfact=0.5, origin=(0,0))
# gdf_anno_20x = gdf_anno

In [None]:
# fig, ax = plt.subplots()
# gdf_anno_20x.plot(ax=ax)
# plt.imshow(adata.uns["spatial"]["TCGA_svs"]["images"]["fulres"])
# fig.savefig(OUT_PATH / f"{TCGA_ID}_annotation.pdf")

In [None]:
OUT_PATH

In [None]:
# st.pl.cluster_plot(adata_anno, use_label="annotation",fname=str(OUT_PATH / f"{TCGA_ID}_annotation_tile.pdf"),dpi=300)

In [None]:
adata = adata_anno.copy()

In [None]:
sc.pp.pca(adata)
sc.pp.neighbors(adata)
sc.tl.umap(adata)


In [None]:
sc.tl.louvain(adata, resolution=0.3, key_added="clusters")

In [None]:
# st.pl.cluster_plot(adata, use_label="clusters",fname=str(OUT_PATH / f"{TCGA_ID}_annotation_cluster.pdf"),dpi=300)

In [None]:
adata.obs["annotation"].unique()

In [None]:
adata.obs["clusters_"] = adata.obs["clusters"].isin(["0", "1","3"])
adata.obs["annotation_"] = adata.obs["annotation"].isin(['In-situ', 'Invasive'])

In [None]:
# Create a contingency table
contingency_table = pd.crosstab(adata.obs["clusters_"] , adata.obs["annotation_"]).values

In [None]:
pd.crosstab(adata.obs["clusters_"] , adata.obs["annotation_"])

In [None]:
odds_ratio, p_value = stats.fisher_exact(contingency_table)
print('Odds ratio:', odds_ratio)
print('P-value:', p_value)

In [None]:
sc.tl.rank_genes_groups(adata, groupby='clusters', method='t-test_overestim_var')

In [None]:
result = sc.get.rank_genes_groups_df(adata, group='0')
result_sorted = result.sort_values(by='logfoldchanges', ascending=False)

In [None]:
result_sorted.to_csv(OUT_PATH / f"{TCGA_ID}_annotation_cluster_DEG.csv")

In [None]:
result_sorted.iloc[0:20,:]

In [None]:
result_sorted.query('names == "ESR1"')

In [None]:
# st.pl.gene_plot(adata, gene_symbols="ESR1",size=3, cell_alpha=1, fname=str(OUT_PATH / f"{TCGA_ID}_ESR1.pdf"),dpi=300)

In [None]:
result_sorted.query('names == "GATA3"')

In [None]:
# st.pl.gene_plot(adata, gene_symbols="GATA3",size=3, cell_alpha=1, fname=str(OUT_PATH / f"{TCGA_ID}_GATA3.pdf"),dpi=300)

In [None]:
# st.pl.gene_plot(adata, gene_symbols="VEGFA",size=3, cell_alpha=1, fname=str(OUT_PATH / f"{TCGA_ID}_VEGFA.pdf"),dpi=300)

In [None]:
# st.pl.gene_plot(adata, gene_symbols="MPPED1",size=3, cell_alpha=1, fname=str(OUT_PATH / f"{TCGA_ID}_MPPED1.pdf"),dpi=300)

In [None]:
OUT_PATH