## Import modules

In [1]:
import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)

import numpy as np
import numpy.random as random
import pandas as pd
import scanpy as sc
import matplotlib.pyplot as plt

In [2]:
import session_info
session_info.show()

In [3]:
sc.settings.verbosity = 3            # verbosity: errors (0), warnings (1), info (2), hints (3)
sc.settings.set_figure_params(dpi=200, facecolor='white', color_map = 'RdPu',)

## Function

## Read in data

In [4]:
adata_dir = '/nfs/team205/heart/anndata_objects/8regions'

batch_key: Donor + Nuclei_or_Cells + Kit_10X

In [5]:
adata = sc.read_h5ad(f'{adata_dir}/RNA_adult-8reg_raw_rmdblcls_celltype-annotated.h5ad')
adata

AnnData object with n_obs × n_vars = 629041 × 32732
    obs: 'sangerID', 'combinedID', 'donor', 'donor_type', 'region', 'region_finest', 'age', 'gender', 'facility', 'cell_or_nuclei', 'modality', 'kit_10x', 'flushed', 'n_genes', 'n_genes_by_counts', 'total_counts', 'total_counts_mt', 'pct_counts_mt', 'total_counts_ribo', 'pct_counts_ribo', 'scrublet_score', 'scrublet_leiden', 'cluster_scrublet_score', 'doublet_pval', 'doublet_bh_pval', 'batch_key', 'leiden_scVI', 'cell_type', 'cell_state_HCAv1', 'cell_state_scNym', 'cell_state_scNym_confidence'
    var: 'gene_name_scRNA-0', 'gene_name_snRNA-1', 'gene_name_multiome-2'
    uns: 'cell_or_nuclei_colors', 'cell_state_HCAv1_colors', 'cell_state_scNym_colors', 'cell_type_colors', 'donor_colors', 'kit_10x_colors', 'leiden_scVI_colors', 'region_colors'
    obsm: 'X_scVI', 'X_umap', '_scvi_extra_continuous', 'latent_gene_encoding'
    obsp: 'connectivities', 'distances'

In [6]:
adata.X.data[:10]

array([1., 1., 1., 1., 2., 1., 1., 1., 5., 1.], dtype=float32)

In [7]:
genename_column='gene_name_multiome-2'

## Subset compartment

In [8]:
adata.obs['cell_type'].cat.categories

Index(['Ventricular Cardiomyocyte', 'Atrial Cardiomyocyte', 'Fibroblast',
       'Endothelial cell', 'Lymphatic Endothelial cell', 'Mural cell',
       'Mesothelial cell', 'Neuronal cell', 'Adipocyte', 'Myeloid', 'Lymphoid',
       'Mast cell'],
      dtype='object')

In [10]:
compartments={
    'aCM':['Atrial Cardiomyocyte'],
    'vCM':['Ventricular Cardiomyocyte'],
    'EC':['Endothelial cell'],
    'LEC':['Lymphatic Endothelial cell'],
    'Mural':['Mural cell'],
    'Meso':['Mesothelial cell'],
    'FB':['Fibroblast'],
    'Adip':['Adipocyte'], 
    'NC':['Neuronal cell'], 
    'Myelo':['Myeloid'], 
    'Lymph':['Lymphoid'],
    'Mast':['Mast cell'],
}

In [11]:
compartments.keys()

dict_keys(['aCM', 'vCM', 'EC', 'LEC', 'Mural', 'Meso', 'FB', 'Adip', 'NC', 'Myelo', 'Lymph', 'Mast'])

## Pre-process for scVI

In [12]:
sc.settings.set_figure_params(dpi=80, facecolor='white', color_map = 'RdPu',)

In [13]:
for sub in compartments.keys():

    adata_sub = adata[adata.obs['cell_type'].isin(compartments[sub])]
    
    # filter genes
    sc.pp.filter_genes(adata_sub, min_cells=3)

    # save
    adata_sub.write(f'{adata_dir}/scVI/subsets/{sub}_pp_allgene.h5ad')
    
    del adata_sub

filtered out 4646 genes that are detected in less than 3 cells
Trying to set attribute `.var` of view, copying.
filtered out 3305 genes that are detected in less than 3 cells
Trying to set attribute `.var` of view, copying.
filtered out 4686 genes that are detected in less than 3 cells
Trying to set attribute `.var` of view, copying.
filtered out 15263 genes that are detected in less than 3 cells
Trying to set attribute `.var` of view, copying.
filtered out 4979 genes that are detected in less than 3 cells
Trying to set attribute `.var` of view, copying.
filtered out 14996 genes that are detected in less than 3 cells
Trying to set attribute `.var` of view, copying.
filtered out 3671 genes that are detected in less than 3 cells
Trying to set attribute `.var` of view, copying.
filtered out 7077 genes that are detected in less than 3 cells
Trying to set attribute `.var` of view, copying.
filtered out 9050 genes that are detected in less than 3 cells
Trying to set attribute `.var` of view,