In [None]:
import matplotlib.pyplot as plt
import scanpy as sc
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import os

import celltypist

In [None]:
import session_info
session_info.show()

In [None]:
sc.settings.set_figure_params(dpi=80)

# Variables

In [None]:
sample_id = 'Hst45-HEA-0-FFPE-1'
path_adata = f'/home/kk837/rds/rds-teichlab-C9woKbOCf2Y/kk837/Foetal/anndata_objects/Xenium/{sample_id}_5K_filtered_raw.h5ad'
celltypist_model_dir = '/rfs/project/rfs-iCNyzSAaucw/kk837/notebooks/Foetal/Xenium/5K/celltypist_models'

In [None]:
os.getcwd()

# Read in data

In [None]:
adata = sc.read_h5ad(path_adata)
adata

# Normalise the counts

In [None]:
sc.pp.filter_genes(adata, min_cells=3)

In [None]:
adata.shape

In [None]:
sc.pp.normalize_total(adata, target_sum=1e4)
sc.pp.log1p(adata)

In [None]:
adata.X.data[:5]

# CellTypist prediction: coarse-grain

In [None]:
# coarse-grained
predictions_b2c = celltypist.annotate(adata, 
                                  model = f'{celltypist_model_dir}/global2coarse.pkl', 
                                  majority_voting = False)

# add to anndata
adata = predictions_b2c.to_adata(insert_labels = True, insert_conf = True)
adata.obs.rename(columns={'predicted_labels':'celltypist_coarse',
                              'conf_score':'conf_score_coarse'
                             },inplace=True)
adata.obs.head()

In [None]:
sc.pl.spatial(adata,color=['celltypist_coarse','conf_score_coarse'],cmap='RdPu',spot_size=20,vmax='p99',ncols=1)

In [None]:
# celltypist confidence
plt.figure(figsize=(6, 2))
sns.histplot(adata.obs[f'conf_score_coarse'], kde=False, bins=100)
# plt.yscale('log')
plt.title('coarse_grained')
plt.show()

In [None]:
# check
conf_thresh = 0.5 # starting from relaxed threshold
pd.crosstab(adata.obs['celltypist_coarse'],adata.obs[f'conf_score_coarse']<conf_thresh)

In [None]:
# finalised coarse-grained labels
conf_thresh = 0.5
adata.obs['celltypist_coarse_fin'] = adata.obs['celltypist_coarse'].astype('str').copy()
low_quality_cells = adata.obs_names[adata.obs[f'conf_score_coarse']<conf_thresh]
adata.obs.loc[low_quality_cells,'celltypist_coarse_fin'] = 'unassigned'
adata.obs['celltypist_coarse_fin'].value_counts()

In [None]:
sc.pl.spatial(adata,color=['celltypist_coarse_fin'],cmap='RdPu',spot_size=20,vmax='p99',ncols=1)

# CellTypist prediction: mid-grain

In [None]:
adata.obs['celltypist_coarse2midmod'] = np.nan
adata.obs['conf_score_coarse2midmod'] = np.nan

for celltype in adata.obs['celltypist_coarse_fin'].unique():
    if celltype not in ['unassigned', 'Epicardium']: # 'Epicardium' is also 'Epicardium' in mid-grained
        print(celltype)
        # subset
        adata_sub = adata[adata.obs['celltypist_coarse_fin']==celltype]
        # predict
        predictions = celltypist.annotate(adata_sub, 
                                      model = f'{celltypist_model_dir}/coarse2midmod_{celltype}.pkl', 
                                      majority_voting = False)
        # add to anndata
        adata_sub = predictions.to_adata(insert_labels = True, insert_conf = True)
        adata.obs.loc[adata_sub.obs_names,'celltypist_coarse2midmod'] = adata_sub.obs['predicted_labels']
        adata.obs.loc[adata_sub.obs_names,'conf_score_coarse2midmod'] = adata_sub.obs['conf_score']
        print('')

In [None]:
sc.pl.spatial(adata,color=['celltypist_coarse2midmod','conf_score_coarse2midmod'],
              cmap='RdPu',spot_size=20,vmax='p99',ncols=1)

In [None]:
# check
conf_thresh = 0.8
pd.crosstab(adata.obs['celltypist_coarse2midmod'],adata.obs[f'conf_score_coarse2midmod']<conf_thresh)

In [None]:
# finalised mid-grained labels
conf_thresh = 0.8
adata.obs['celltypist_mid_fin'] = adata.obs['celltypist_coarse_fin'].astype('str').copy()
adata.obs.replace({'celltypist_mid_fin':{'Epicardium':'EpicardialCells'}},inplace=True)
passed = adata.obs_names[adata.obs['conf_score_coarse2midmod']>=conf_thresh]
low_quality_cells = adata.obs_names[adata.obs[f'conf_score_coarse2midmod']<conf_thresh]

adata.obs.loc[passed,'celltypist_mid_fin'] = adata.obs.loc[passed,'celltypist_coarse2midmod'].copy()
adata.obs.loc[low_quality_cells,'celltypist_mid_fin'] = 'unassigned'
adata.obs['celltypist_mid_fin'].value_counts()

In [None]:
sc.pl.spatial(adata,color=['celltypist_mid_fin'],
              cmap='RdPu',spot_size=20,vmax='p99',ncols=1)

# celltypist prediction: fine-grain

In [None]:
adata.obs['celltypist_midmod2fine'] = np.nan
adata.obs['conf_score_midmod2fine'] = np.nan

for celltype in adata.obs['celltypist_mid_fin'].unique():
    if celltype not in ['unassigned', 'LymphaticEndothelialCells']: # 'LymphaticEndothelialCells' is also 'LymphaticEndothelialCells' in fine-grained
        print(celltype)
        # subset
        adata_sub = adata[adata.obs['celltypist_mid_fin']==celltype]
        # predict
        predictions = celltypist.annotate(adata_sub, 
                                      model = f'{celltypist_model_dir}/midmod2fine_{celltype}.pkl', 
                                      majority_voting = False)
        # add to anndata
        adata_sub = predictions.to_adata(insert_labels = True, insert_conf = True)
        adata.obs.loc[adata_sub.obs_names,'celltypist_midmod2fine'] = adata_sub.obs['predicted_labels']
        adata.obs.loc[adata_sub.obs_names,'conf_score_midmod2fine'] = adata_sub.obs['conf_score']
        print('')

In [None]:
sc.pl.spatial(adata,color=['celltypist_midmod2fine','conf_score_midmod2fine'],
              cmap='RdPu',spot_size=20,vmax='p99',ncols=1)

In [None]:
sc.pl.spatial(adata,color=['celltypist_midmod2fine'],
              groups=['SinoatrialNodePacemakerCells','AtrioventricularNodePacemakerCells'],
              cmap='RdPu',spot_size=20,vmax='p99',ncols=3)

In [None]:
sc.pl.spatial(adata,color=['celltypist_midmod2fine'],
              groups=['MacrophagesCX3CR1pos','MacrophagesLYVE1pos'],
              cmap='RdPu',spot_size=20,vmax='p99',ncols=3)

# Save

In [None]:
adata.write(f'/home/kk837/rds/rds-teichlab-C9woKbOCf2Y/kk837/Foetal/anndata_objects/Xenium/{sample_id}_5K_filtered_lognorm.h5ad')

In [None]:
print(f'/home/kk837/rds/rds-teichlab-C9woKbOCf2Y/kk837/Foetal/anndata_objects/Xenium/{sample_id}_5K_filtered_lognorm.h5ad')

In [None]:
adata