## Import modules

In [1]:
import numpy as np
import numpy.random as random
import pandas as pd
import scanpy as sc
import matplotlib.pyplot as plt

In [2]:
import session_info
session_info.show()

In [3]:
sc.settings.verbosity = 3             # verbosity: errors (0), warnings (1), info (2), hints (3)
sc.settings.set_figure_params(dpi=200, facecolor='white', color_map = 'RdPu',)

## Read in data

In [4]:
adata = sc.read_h5ad('/nfs/team205/heart/anndata_objects/8regions/RNA_adult_full_scArches.h5ad')
adata

AnnData object with n_obs × n_vars = 704296 × 32732
    obs: 'sangerID', 'combinedID', 'donor', 'donor_type', 'region', 'region_finest', 'age', 'gender', 'facility', 'cell_or_nuclei', 'modality', 'kit_10x', 'flushed', 'n_genes', 'n_genes_by_counts', 'total_counts', 'total_counts_mt', 'pct_counts_mt', 'total_counts_ribo', 'pct_counts_ribo', 'scrublet_score', 'scrublet_leiden', 'cluster_scrublet_score', 'doublet_pval', 'doublet_bh_pval', 'batch_key', 'leiden_scVI', 'cell_type', 'cell_state_HCAv1', 'cell_state_scNym', 'cell_state_scNym_confidence', 'cell_state', 'latent_RT_efficiency', 'latent_cell_probability', 'latent_scale', 'n_counts', '_scvi_batch', '_scvi_labels', 'clus20', 'doublet_cls', 'original_or_new', 'batch', 'scANVI_predictions'
    var: 'gene_name-new', 'gene_name_scRNA-0-original', 'gene_name_snRNA-1-original', 'gene_name_multiome-2-original'
    obsm: 'X_scArches'

In [5]:
pd.crosstab(adata.obs['cell_type'],adata.obs['region'])

region,AVN,AX,LA,LV,RA,RV,SAN,SP
cell_type,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
Adipocyte,333,875,1649,203,749,552,1839,147
Atrial Cardiomyocyte,850,5,19509,24,15484,15,9857,62
Endothelial cell,4725,28683,14541,25686,11815,17882,8006,20167
Fibroblast,14674,14676,11672,18704,15026,18706,30176,14421
Lymphatic Endothelial cell,165,238,97,119,33,257,243,143
Lymphoid,1936,3367,3596,3061,3485,2515,3780,3182
Mast cell,142,277,162,450,57,193,270,302
Mesothelial cell,43,8,548,23,293,13,120,9
Mural cell,2809,19853,15316,26942,5160,15766,3711,15036
Myeloid,8174,5449,6124,6749,4108,5713,9637,5472


## Prepare SAN/AVN data and save 

Subsetting pattern
* SAN, aCM, original+new, all donor
* AVN, aCM+vCM, original+new, all donor

### SAN

In [6]:
san_aCM = adata[(adata.obs['region']=='SAN')&(adata.obs['cell_type']=='Atrial Cardiomyocyte')]
pd.crosstab(san_aCM.obs['cell_type'],san_aCM.obs['region'])

region,SAN
cell_type,Unnamed: 1_level_1
Atrial Cardiomyocyte,9857


In [7]:
san_aCM.write(f'/home/jovyan/mount/gdrive/Colab/data/anndata/RNA_adult_full_raw_SAN-aCM.h5ad')
san_aCM

View of AnnData object with n_obs × n_vars = 9857 × 32732
    obs: 'sangerID', 'combinedID', 'donor', 'donor_type', 'region', 'region_finest', 'age', 'gender', 'facility', 'cell_or_nuclei', 'modality', 'kit_10x', 'flushed', 'n_genes', 'n_genes_by_counts', 'total_counts', 'total_counts_mt', 'pct_counts_mt', 'total_counts_ribo', 'pct_counts_ribo', 'scrublet_score', 'scrublet_leiden', 'cluster_scrublet_score', 'doublet_pval', 'doublet_bh_pval', 'batch_key', 'leiden_scVI', 'cell_type', 'cell_state_HCAv1', 'cell_state_scNym', 'cell_state_scNym_confidence', 'cell_state', 'latent_RT_efficiency', 'latent_cell_probability', 'latent_scale', 'n_counts', '_scvi_batch', '_scvi_labels', 'clus20', 'doublet_cls', 'original_or_new', 'batch', 'scANVI_predictions'
    var: 'gene_name-new', 'gene_name_scRNA-0-original', 'gene_name_snRNA-1-original', 'gene_name_multiome-2-original'
    obsm: 'X_scArches'

In [8]:
san_aCM.X.data[:10]

array([ 1.,  1.,  1.,  1.,  2., 11.,  1.,  4.,  1.,  1.], dtype=float32)

### AVN

In [10]:
avn_avCM = adata[(adata.obs['region']=='AVN')&(adata.obs['cell_type'].isin(['Atrial Cardiomyocyte','Ventricular Cardiomyocyte']))]
pd.crosstab(avn_avCM.obs['cell_type'],avn_avCM.obs['region'])

region,AVN
cell_type,Unnamed: 1_level_1
Atrial Cardiomyocyte,850
Ventricular Cardiomyocyte,14768


In [11]:
avn_avCM.write(f'/home/jovyan/mount/gdrive/Colab/data/anndata/RNA_adult_full_raw_AVN-avCM.h5ad')
avn_avCM

View of AnnData object with n_obs × n_vars = 15618 × 32732
    obs: 'sangerID', 'combinedID', 'donor', 'donor_type', 'region', 'region_finest', 'age', 'gender', 'facility', 'cell_or_nuclei', 'modality', 'kit_10x', 'flushed', 'n_genes', 'n_genes_by_counts', 'total_counts', 'total_counts_mt', 'pct_counts_mt', 'total_counts_ribo', 'pct_counts_ribo', 'scrublet_score', 'scrublet_leiden', 'cluster_scrublet_score', 'doublet_pval', 'doublet_bh_pval', 'batch_key', 'leiden_scVI', 'cell_type', 'cell_state_HCAv1', 'cell_state_scNym', 'cell_state_scNym_confidence', 'cell_state', 'latent_RT_efficiency', 'latent_cell_probability', 'latent_scale', 'n_counts', '_scvi_batch', '_scvi_labels', 'clus20', 'doublet_cls', 'original_or_new', 'batch', 'scANVI_predictions'
    var: 'gene_name-new', 'gene_name_scRNA-0-original', 'gene_name_snRNA-1-original', 'gene_name_multiome-2-original'
    obsm: 'X_scArches'

In [12]:
avn_avCM.X.data[:10]

array([10., 20., 15., 23.,  4., 34., 30., 45.,  5., 38.], dtype=float32)