In [1]:
import omicverse as ov
import scanpy as sc
import matplotlib.pyplot as plt
ov.ov_plot_set()
import pandas as pd
import anndata as ad
import numpy as np
from matplotlib import patheffects




   ____            _     _    __                  
  / __ \____ ___  (_)___| |  / /__  _____________ 
 / / / / __ `__ \/ / ___/ | / / _ \/ ___/ ___/ _ \ 
/ /_/ / / / / / / / /__ | |/ /  __/ /  (__  )  __/ 
\____/_/ /_/ /_/_/\___/ |___/\___/_/  /____/\___/                                              

Version: 1.6.3, Tutorials: https://omicverse.readthedocs.io/
All dependencies are satisfied.


In [2]:
np.random.seed(42)

In [3]:
path = "../2.SAM_reclustering/SAM/"

In [24]:
Hsap = sc.read_h5ad(f'{path}/Hsap_Ast.15.30.2500.DonorID.h5ad')
Mmus = sc.read_h5ad(f'{path}/Mmus_Ast.15.30.2500.DonorID.h5ad')
Pvit = sc.read_h5ad(f'{path}/Pvit_Ast.15.30.2500.DonorID.h5ad')
Pmar = sc.read_h5ad(f'{path}/Pmar_Ast.15.30.2500.DonorID.h5ad')

In [9]:
# modify for plotting
# Hsap
Hsap.obs['location'] = [i.split(' - ')[0] for i in Hsap.obs['Tissue']]

Hsap.obs['location'][Hsap.obs['location'].isin(['Head of hippocampus (HiH)',  'Amygdaloid complex (AMY)', 'Extended amygdala (EXA)',
                                    'Body of hippocampus (HiB)', 'Basal nuclei (BN)', 'Tail of Hippocampus (HiT)','Claustrum', 'Paleocortex (PalCx)',
                                    'Basal forebrain (BF)','Cerebral cortex (Cx)', 'Perirhinal cortex (area 35) (A35)'])] = 'Telencephalon'
Hsap.obs['location'][Hsap.obs['location'].isin(['Thalamus (THM)', 'Hypothalamus (HTH)', 'Epithalamus'])] = 'Diencephalon'
Hsap.obs['location'][Hsap.obs['location'].isin(['Midbrain (M)','Midbrain (RN)'])] = 'Mesencephalon'
Hsap.obs['location'][Hsap.obs['location'].isin(['Myelencephalon (medulla oblongata) (Mo)','Pons (Pn)', 'Cerebellum (CB)'])] = 'Rhombencephalon'
# Mmus
Mmus.obs['location'] = Mmus.obs['Tissue'].astype('string')
Mmus.obs['location'][Mmus.obs['location'].isin(['StriatDor', 'StriatVent', 'Amygd', 'HC', 'OB', 'CA1',
                                                'Ctx1','Ctx3','Ctx2','Ctx1.5','DentGyr','SScortex'])] = 'Telencephalon'
Mmus.obs['location'][Mmus.obs['location'].isin(['Thal', 'Hypoth'])] = 'Diencephalon'
Mmus.obs['location'][Mmus.obs['location'].isin(['MBd', 'MBv', ])] = 'Mesencephalon'
Mmus.obs['location'][Mmus.obs['location'].isin(['Medulla','CB'])] = 'Rhombencephalon'
# Pvit
Pvit.obs['location'] = Pvit.obs['Region'].astype('string')
Pvit.obs['location'][Pvit.obs['location'].isin(['Telencephalon', 'amDVR'])] = 'Telencephalon'
Pvit.obs['location'][Pvit.obs['location'].isin(['Thalamus+Hypothalamus'])] = 'Diencephalon'
Pvit.obs['location'][Pvit.obs['location'].isin(['Thalamus+Hypothalamus+Midbrain',
                                               'Optic Tectum + Midbrain', 'Optic Tectum'])] = 'Di-/Mesencephalon'
Pvit.obs['location'][Pvit.obs['location'].isin(['Cerebellum', 'posterior brain', 'Brainstem'])] = 'Rhombencephalon'
# Pmar
Pmar.obs['location'] = Pmar.obs['Putative_location'].astype('string')
Pmar.obs['location'][Pmar.obs['location'].isin(['Telencephalon'])] = 'Telencephalon'
Pmar.obs['location'][Pmar.obs['location'].isin(['Diencephalon', 'Diencephalon; Sub-Commissural Organ (SCO)',
                                               'Diencephalon; Pineal/Parapineal organs'])] = 'Diencephalon'
Pmar.obs['location'][Pmar.obs['location'].isin(['Diencephalon/Mesencephalon', 'Mesencephalon'])] = 'Di-/Mesencephalon'
Pmar.obs['location'][Pmar.obs['location'].isin(['Mesencephalon/Rhombencephalon', 'Rhombencephalon'])] = 'Rhombencephalon'
Pmar.obs['location'][Pmar.obs['location'].isin(['Prosencephalon', 'Whole brain', 
                                                'Whole brain; leptomeninges/choroid plexuses',
                                               'Cranial nerve ganglia'])] = 'WB and others'


In [10]:
color_dict = {'Telencephalon':'#DE582B', 'Diencephalon':'#A4E048', 'Di-/Mesencephalon':'#738061',
              'Mesencephalon':'#808080','Rhombencephalon':'#293890', 'WB and others': '#000000'}
ov.pl.embedding(
    Mmus, basis = 'X_umap', color = ['location'], show = False, palette=color_dict, frameon='small', save= ".Mmus_AST.pdf", size=2)
ov.pl.embedding(
    Hsap, basis = 'X_umap', color = ['location'], show = False, palette=color_dict, frameon='small', save= ".Hsap_AST.pdf", size=2)
ov.pl.embedding(
    Pvit, basis = 'X_umap', color = ['location'], show = False, palette=color_dict, frameon='small', save= ".Pvit_AST.pdf", size=2)
ov.pl.embedding(
    Pmar, basis = 'X_umap', color = ['location'], show = False, palette=color_dict, frameon='small', save= ".Pmar_AST.pdf", size=2)



<AxesSubplot: title={'center': 'location'}, xlabel='X_umap1', ylabel='X_umap2'>

In [11]:
meta_path = "."
umap = pd.read_csv(f'{meta_path}/Vertebrate_AST.UMAP.csv', index_col=0)
meta = pd.read_csv(f'{meta_path}/Vertebrate_AST.meta.csv', index_col=0)

Hsap.obsm['X_umap'] = umap.loc[Hsap.obs.index].values
Mmus.obsm['X_umap'] = umap.loc[Mmus.obs.index].values
Pvit.obsm['X_umap'] = umap.loc[Pvit.obs.index].values
Pmar.obsm['X_umap'] = umap.loc[Pmar.obs.index].values

In [12]:
vertebrate = ad.concat([Hsap, Mmus, Pvit, Pmar], join='outer', axis=0)

In [13]:
vertebrate.obs['Species'] =  pd.Categorical(vertebrate.obs['Species'], categories=['Hsap','Mmus','Pvit','Pmar'], ordered=True)

In [14]:
vertebrate.obs['Species'].value_counts()

Pvit    31316
Mmus    19015
Hsap    18895
Pmar     6389
Name: Species, dtype: int64

In [15]:
# subset some cells for balancing number of cells in different species, just for better visualisation
ID1 = vertebrate.obs[~vertebrate.obs['Species'].isin(['Hsap'])].sample(6000, random_state=42).index.tolist()
ID2 = vertebrate.obs[~vertebrate.obs['Species'].isin(['Mmus'])].sample(6000, random_state=42).index.tolist()
ID3 = vertebrate.obs[~vertebrate.obs['Species'].isin(['Pvit'])].sample(6000, random_state=42).index.tolist()
ID4 = vertebrate.obs[~vertebrate.obs['Species'].isin(['Pmar'])].sample(6000, random_state=42).index.tolist()
vertebrate = vertebrate[vertebrate.obs.index.isin(list(ID1+ID2+ID3+ID4))].copy()

In [16]:
color_dict = {'Hsap':'#989A9C', 'Mmus':'#F7D08D','Pvit':'#BF83A5','Pmar':'#8684B0'}
# plot species factors in neuorns and non-neurons atlas
fig, ax = plt.subplots(figsize=(4,4))
random_indices = np.random.permutation(list(range(vertebrate.shape[0])))
ov.pl.embedding(
    vertebrate[random_indices, :],
    basis="X_umap",
    color=['Species'],
    title='',
    show=False,  
    size=2,
    palette=color_dict,
    frameon='small',
    ax=ax, 
    save= ".vertebrate_AST.species.pdf"
)



<AxesSubplot: xlabel='X_umap1', ylabel='X_umap2'>

In [17]:
color_dict = {'Telencephalon':'#DE582B', 'Diencephalon':'#A4E048', 'Di-/Mesencephalon':'#738061',
              'Mesencephalon':'#808080','Rhombencephalon':'#293890', 'WB and others': '#000000'}# plot species factors in neuorns and non-neurons atlas
fig, ax = plt.subplots(figsize=(4,4))
random_indices = np.random.permutation(list(range(vertebrate.shape[0])))
ov.pl.embedding(
    vertebrate[random_indices, :],
    basis="X_umap",
    color=['location'],
    title='',
    show=False,  
    size=2,
    palette=color_dict,
    frameon='small',
    ax=ax, 
    save= ".vertebrate_AST.region.pdf"
)



<AxesSubplot: xlabel='X_umap1', ylabel='X_umap2'>

In [56]:
fig, ax = plt.subplots(figsize=(4,4))
ov.pl.embedding(
    Mmus, basis = 'X_umap', color = ['Tissue'], show = False, frameon='small', size=2,
    ax=ax, title='',
    save= ".Mmus_tissue.pdf")



<AxesSubplot: xlabel='X_umap1', ylabel='X_umap2'>

In [47]:
tmp = Mmus.obs['Description'].value_counts()
retained = tmp[tmp > 300].index.tolist()

['Non-telencephalon astrocytes, protoplasmic',
 'Telencephalon astrocytes, protoplasmic',
 'Telencephalon astrocytes, fibrous',
 'Non-telencephalon astrocytes, fibrous',
 'Olfactory astrocytes',
 'Bergmann glia',
 'Dorsal midbrain Myoc-expressing astrocyte-like',
 'Subventricular zone radial glia-like cells',
 'Dentate gyrus radial glia-like cells']

In [49]:
ops = sc.read_h5ad('../../../../01.data/02.atlas_final/2.samap/4.final/Mmus.wb.iter_cluster_annotated.h5ad')

In [50]:
Mmus_ = ops[ops.obs['Description'].isin(retained)].copy()

In [51]:
Mmus_.write_h5ad('Mmus.test.h5ad',compression='gzip')

In [52]:
Mmus_.raw

<anndata._core.raw.Raw at 0x7fc80a9a2c50>

In [54]:
Mmus_.raw.X

<19002x18787 sparse matrix of type '<class 'numpy.float32'>'
	with 15040379 stored elements in Compressed Sparse Column format>

In [53]:
Mmus_

AnnData object with n_obs × n_vars = 19002 × 18787
    obs: 'Age', 'AnalysisPool', 'AnalysisProject', 'Class', 'ClusterName', 'Clusters', 'Description', 'DonorID', 'MitoRiboRatio', 'Neurotransmitter', 'TaxonomyRank1', 'TaxonomyRank2', 'TaxonomyRank3', 'TaxonomyRank4', 'TaxonomySymbol', 'Tissue', 'cell_type', 'Species', 'iter.clustering', 'Refined family', 'Refined subtype'
    var: 'Accession', '_LogCV', '_LogMean', '_Selected', '_Total', '_Valid', 'mask_genes', 'means', 'variances', 'weights', 'spatial_dispersions', 'mean_correction'
    uns: 'dimred_indices', 'path_to_file', 'preprocess_args', 'ranked_genes', 'run_args'
    obsm: 'PCA', 'X_pca', 'X_processed', 'X_test', 'X_umap', 'tSNE'
    varm: 'PCs', 'PCs_SAMap'
    layers: 'X_disp'
    obsp: 'connectivities', 'distances', 'nnm'