# merging and cleaning processed paediatric Visium data - post cell2location deconvolution

In [18]:
import numpy as np
import scanpy as sc
import pandas as pd
import matplotlib.pyplot as plt
from matplotlib import rcParams
from matplotlib import colors
import seaborn as sb
import re
import os
import scipy.stats
from numpy import asarray as ar
from collections import Counter
import scvi
import anndata as ad
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all" # to show output from all the lines in a cells
pd.set_option('display.max_column',None) # display all the columns in pandas
pd.options.display.max_rows = 100

from datetime import date
today = str(date.today())
sc.settings.verbosity = 1
sc.logging.print_version_and_date()
%load_ext autoreload
%autoreload 2

Running Scanpy 1.9.1, on 2023-08-02 13:56.
The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [19]:
## For correct plotting of the images
import matplotlib
from matplotlib import rcParams
rcParams['pdf.fonttype'] = 42
sc.settings.set_figure_params(dpi = 150, color_map = 'RdPu', dpi_save = 150, vector_friendly = True, format = 'pdf')

In [20]:
# output folder 
import os
os.getcwd()

'/nfs/team205/ny1/ThymusSpatialAtlas/Figure2'

# Orgenize analysis object 
this is divided to 4 steps:
1) load cell2loaction object "sp" which has only HVG genes 
2) load merged raw visium object "adata_vis" 
3) create a new object with all annotations and all raw genes
4) update tissue tag annotations from to generate a final annotated object 

In [None]:
# load c2l object 
adata_paed = sc.read('/nfs/team205/vk8/projects/thymus_atlas/results/thymus_atlas_v2_anno_v11_paed_sub_15K/cell2location_map/sp.h5ad')
adata_paed

In [None]:
# add 5% quantile, representing confident cell abundance, 'at least this amount is present', 
# to adata.obs with nice names for plotting
adata_paed.obs[adata_paed.uns['mod']['factor_names']] = adata_paed.obsm['q05_cell_abundance_w_sf']
adata_paed.obs['tot_cell_abundance'] = adata_paed.uns["mod"]["post_sample_means"]["w_sf"].sum(1).flatten()
adata_paed.obs['detection_sensit']  = adata_paed.uns["mod"]["post_sample_q05"]["detection_y_s"]
adata_paed

In [None]:
# recover original raw object and only recover model and annotations
adata_vis_all = sc.read("/nfs/team205/ny1/ThymusSpatialAtlas/Figure2/data/Thymus_atlas_v2_Visium_raw_2023-08-02.h5ad")
adata_vis = adata_vis_all[adata_vis_all.obs['Age_group'] != 'fetal'].copy()
del adata_vis_all
adata_vis

In [12]:
adata_paed_full = ad.AnnData(X = adata_vis.X, obs=adata_paed.obs, var=adata_vis.var, uns=adata_paed.uns, obsm=adata_paed.obsm)
del adata_vis
del adata_paed
adata_paed_full

AnnData object with n_obs × n_vars = 38245 × 36601
    obs: 'Sample', 'Sample_hr', 'SampleID', 'SlideID', 'Position', 'in_tissue', 'array_row', 'array_col', 'x', 'y', 'n_genes_by_counts', 'log1p_n_genes_by_counts', 'total_counts', 'log1p_total_counts', 'percent_mito', 'annotations_level_0', 'annotations_level_0_number', 'annotations_level_1', 'annotations_level_1_number', 'annotations_lobules_0', 'annotations_lobules_0_number', 'L2_dist_annotations_level_0_Artifacts', 'L2_dist_annotations_level_0_Cortex', 'L2_dist_annotations_level_0_Edge', 'L2_dist_annotations_level_0_Medulla', 'L2_dist_annotations_level_1_HS', 'L2_dist_annotations_level_1_PVS', 'L2_dist_annotations_level_1_fat', 'L2_dist_annotations_level_1_vessels', 'L2_dist_annotations_level_0_lymph', 'L2_dist_annotations_level_1_unassigned', 'cm', 'cc', 'cma_v2', 'manual_bin_cma_v2', 'manual_bin_cma_v2_int', 'Image_name', 'Image_jpg', 'section_thickness (um)', 'permebialisation(min)', 'RIN/DV200', 'Visium_type', 'Funding', 'Sequen

# embed new annotations 

In [13]:
# remove outdated annotations 
annotations_to_remove = ['L2_dist_Annotation_lv_0_Artifacts', 'L2_dist_Annotation_lv_0_Background', 
                         'L2_dist_Annotation_lv_0_Cortex', 'L2_dist_Annotation_lv_0_Edge', 'L2_dist_Annotation_lv_0_Medulla', 
                         'L2_dist_log10_Annotation_lv_1_HS', 'Annotation_lv_1', 'L2_dist_log10_Annotation_lv_1_Lymph', 
                         'L2_dist_log10_Annotation_lv_1_PVS', 'L2_dist_log10_Annotation_lv_1_Unassigned', 'Annotation_lobules', 
                         'L2_dist_Annotation_lv_0_Unassigned', 'Annotation_lobules_0', 
                         'Annotation_lobules_1', 'L2_dist_log10_Annotation_lv_1_Fat', 'Leiden_0_5', 
                         'L2_dist_log10_Annotation_lv_1_Large vessel']

adata_paed_full.obs.drop(columns=annotations_to_remove, errors='ignore', inplace=True)
adata_paed_full.obs.rename(columns={'Cortico_Medullar_Axis': 'cma_v1'}, inplace=True)
adata_paed_full.obs.rename(columns={'Annotation_lv_0': 'old_annotations_level_0'}, inplace=True)


adata_paed_full

AnnData object with n_obs × n_vars = 38245 × 36601
    obs: 'Sample', 'Sample_hr', 'SampleID', 'SlideID', 'Position', 'in_tissue', 'array_row', 'array_col', 'x', 'y', 'n_genes_by_counts', 'log1p_n_genes_by_counts', 'total_counts', 'log1p_total_counts', 'percent_mito', 'annotations_level_0', 'annotations_level_0_number', 'annotations_level_1', 'annotations_level_1_number', 'annotations_lobules_0', 'annotations_lobules_0_number', 'L2_dist_annotations_level_0_Artifacts', 'L2_dist_annotations_level_0_Cortex', 'L2_dist_annotations_level_0_Edge', 'L2_dist_annotations_level_0_Medulla', 'L2_dist_annotations_level_1_HS', 'L2_dist_annotations_level_1_PVS', 'L2_dist_annotations_level_1_fat', 'L2_dist_annotations_level_1_vessels', 'L2_dist_annotations_level_0_lymph', 'L2_dist_annotations_level_1_unassigned', 'cm', 'cc', 'cma_v2', 'manual_bin_cma_v2', 'manual_bin_cma_v2_int', 'Image_name', 'Image_jpg', 'section_thickness (um)', 'permebialisation(min)', 'RIN/DV200', 'Visium_type', 'Funding', 'Sequen

In [10]:
# assign nice colors 

In [14]:
# specify the colors for your categories
# color mapping
colors_dict = {'unassigned': 'gray', 'PVS': 'red', 'HS': 'green', 'vessels': 'blue', 'lymph':'red','Cortex': 'cyan', 'Medulla': 'green', 'Edge': 'brown', 'Artifacts': 'black'}



# get the categories in your data
categories = adata_paed_full.obs['annotations_level_0'].cat.categories

# map the colors to the categories
colors = []
for cat in categories:
    try:
        colors.append(colors_dict[cat])
    except KeyError:  # cat is not found in colors_dict
        colors.append('gray')  # use a default color

# assign the colors
adata_paed_full.uns['annotations_level_0_colors'] = colors

# get the categories in your data
categories = adata_paed_full.obs['annotations_level_1'].cat.categories

# map the colors to the categories
colors = []
for cat in categories:
    try:
        colors.append(colors_dict[cat])
    except KeyError:  # cat is not found in colors_dict
        colors.append('gray')  # use a default color

# assign the colors
adata_paed_full.uns['annotations_level_1_colors'] = colors



In [16]:
adata_paed_full

AnnData object with n_obs × n_vars = 38245 × 36601
    obs: 'Sample', 'Sample_hr', 'SampleID', 'SlideID', 'Position', 'in_tissue', 'array_row', 'array_col', 'x', 'y', 'n_genes_by_counts', 'log1p_n_genes_by_counts', 'total_counts', 'log1p_total_counts', 'percent_mito', 'annotations_level_0', 'annotations_level_0_number', 'annotations_level_1', 'annotations_level_1_number', 'annotations_lobules_0', 'annotations_lobules_0_number', 'L2_dist_annotations_level_0_Artifacts', 'L2_dist_annotations_level_0_Cortex', 'L2_dist_annotations_level_0_Edge', 'L2_dist_annotations_level_0_Medulla', 'L2_dist_annotations_level_1_HS', 'L2_dist_annotations_level_1_PVS', 'L2_dist_annotations_level_1_fat', 'L2_dist_annotations_level_1_vessels', 'L2_dist_annotations_level_0_lymph', 'L2_dist_annotations_level_1_unassigned', 'cm', 'cc', 'cma_v2', 'manual_bin_cma_v2', 'manual_bin_cma_v2_int', 'Image_name', 'Image_jpg', 'section_thickness (um)', 'permebialisation(min)', 'RIN/DV200', 'Visium_type', 'Funding', 'Sequen

In [17]:
adata_paed_full.write_h5ad('adata_paed_full_v11.h5ad')