# Importing packages

In [1]:
import numpy as np
import pandas as pd
import anndata as ad
import scanpy as sc
from scipy.sparse import csr_matrix
print(ad.__version__)
print(sc.__version__)

0.10.5.post1
1.9.8


# Importing Data

In [2]:
vascular = ad.read_h5ad("../Data/hca_heart_vascular_raw.h5ad")

In [3]:
vascular

AnnData object with n_obs × n_vars = 195395 × 33538
    obs: 'NRP', 'age_group', 'cell_source', 'cell_type', 'donor', 'gender', 'n_counts', 'n_genes', 'percent_mito', 'percent_ribo', 'region', 'sample', 'scrublet_score', 'source', 'type', 'version', 'cell_states', 'Used'
    var: 'gene_ids-Harvard-Nuclei', 'feature_types-Harvard-Nuclei', 'gene_ids-Sanger-Nuclei', 'feature_types-Sanger-Nuclei', 'gene_ids-Sanger-Cells', 'feature_types-Sanger-Cells', 'gene_ids-Sanger-CD45', 'feature_types-Sanger-CD45'
    uns: 'cell_type_colors'
    obsm: 'X_pca', 'X_umap'

In [4]:
vascular.obs

Unnamed: 0,NRP,age_group,cell_source,cell_type,donor,gender,n_counts,n_genes,percent_mito,percent_ribo,region,sample,scrublet_score,source,type,version,cell_states,Used
AAGACCTGTACTTGAC-1-HCAHeart7606896,No,50-55,Sanger-Cells,Smooth_muscle_cells,D1,Female,5236.0,1714,0.055959,0.050420,AX,HCAHeart7606896,0.164733,Cells,DBD,V2,SMC2_art,Yes
ACAGCTACACAAGACG-1-HCAHeart7606896,No,50-55,Sanger-Cells,Pericytes,D1,Female,9139.0,3043,0.049568,0.065215,AX,HCAHeart7606896,0.185751,Cells,DBD,V2,PC3_str,Yes
ACATCAGCATGACATC-1-HCAHeart7606896,No,50-55,Sanger-Cells,Pericytes,D1,Female,901.0,459,0.130966,0.048835,AX,HCAHeart7606896,0.102990,Cells,DBD,V2,PC1_vent,Yes
ACCCACTTCTGTCCGT-1-HCAHeart7606896,No,50-55,Sanger-Cells,Smooth_muscle_cells,D1,Female,3419.0,1220,0.090962,0.041533,AX,HCAHeart7606896,0.164733,Cells,DBD,V2,SMC2_art,Yes
ACCTTTATCAATACCG-1-HCAHeart7606896,No,50-55,Sanger-Cells,Pericytes,D1,Female,5497.0,2177,0.075860,0.047480,AX,HCAHeart7606896,0.155556,Cells,DBD,V2,PC3_str,Yes
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
TTGCGTCTCGAAGGAC-1-H0037_septum,No,55-60,Harvard-Nuclei,Endothelial,H4,Male,723.0,567,0.017981,0.000000,SP,H0037_septum,0.226190,Nuclei,DBD,V3,EC2_cap,Yes
TTGCTGCTCTTTGCGC-1-H0037_septum,No,55-60,Harvard-Nuclei,Endothelial,H4,Male,515.0,389,0.015534,0.000000,SP,H0037_septum,0.065657,Nuclei,DBD,V3,EC3_cap,Yes
TTGGTTTTCTTAGCAG-1-H0037_septum,No,55-60,Harvard-Nuclei,Endothelial,H4,Male,820.0,594,0.006098,0.000000,SP,H0037_septum,0.065657,Nuclei,DBD,V3,EC2_cap,Yes
TTTGTTGAGGTAGATT-1-H0037_septum,No,55-60,Harvard-Nuclei,Endothelial,H4,Male,621.0,473,0.003221,0.000000,SP,H0037_septum,0.057715,Nuclei,DBD,V3,EC1_cap,Yes


In [5]:
#Looking at overall counts based on cell_type
pd.crosstab(vascular.obs['sample'],vascular.obs['cell_type'])

cell_type,Endothelial,Mesothelial,Pericytes,Smooth_muscle_cells
sample,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
H0015_LA_new,253,0,495,89
H0015_LV,523,0,1889,356
H0015_RA,276,0,152,137
H0015_RV,265,0,1267,163
H0015_apex,739,0,1508,263
...,...,...,...,...
HCAHeart8287124,9,0,45,1
HCAHeart8287125,26,0,7,7
HCAHeart8287126,327,0,552,37
HCAHeart8287127,392,0,357,49


In [6]:
#Selecting pericytes only

pericytes = vascular[vascular.obs['cell_type'] == "Pericytes"]

In [7]:
pericytes

View of AnnData object with n_obs × n_vars = 77856 × 33538
    obs: 'NRP', 'age_group', 'cell_source', 'cell_type', 'donor', 'gender', 'n_counts', 'n_genes', 'percent_mito', 'percent_ribo', 'region', 'sample', 'scrublet_score', 'source', 'type', 'version', 'cell_states', 'Used'
    var: 'gene_ids-Harvard-Nuclei', 'feature_types-Harvard-Nuclei', 'gene_ids-Sanger-Nuclei', 'feature_types-Sanger-Nuclei', 'gene_ids-Sanger-Cells', 'feature_types-Sanger-Cells', 'gene_ids-Sanger-CD45', 'feature_types-Sanger-CD45'
    uns: 'cell_type_colors'
    obsm: 'X_pca', 'X_umap'

In [8]:
#Checking the subsetting
crosstab = pd.crosstab(vascular.obs['sample'],vascular.obs['cell_type']=='Pericytes')
crosstab

cell_type,False,True
sample,Unnamed: 1_level_1,Unnamed: 2_level_1
H0015_LA_new,342,495
H0015_LV,879,1889
H0015_RA,413,152
H0015_RV,428,1267
H0015_apex,1002,1508
...,...,...
HCAHeart8287124,10,45
HCAHeart8287125,33,7
HCAHeart8287126,364,552
HCAHeart8287127,441,357


In [9]:
#Checking the count
column_sum = crosstab.sum()
column_sum

cell_type
False    117539
True      77856
dtype: int64

## Data categorization

In [10]:
#Sample wise categgorization
sample_names = pericytes.obs['sample'].unique()
sample_names

['HCAHeart7606896', 'HCAHeart7656534', 'HCAHeart7656535', 'HCAHeart7656536', 'HCAHeart7656537', ..., 'H0037_RA_corr', 'H0037_LV', 'H0037_LA_corr', 'H0037_RV', 'H0037_septum']
Length: 145
Categories (145, object): ['H0015_LA_new', 'H0015_LV', 'H0015_RA', 'H0015_RV', ..., 'HCAHeart8287125', 'HCAHeart8287126', 'HCAHeart8287127', 'HCAHeart8287128']

In [11]:
# Iterate over each unique sample name
for sample_name in sample_names:
    
    # Subsetting data based on the sample
    pericytes_sample_data = pericytes[pericytes.obs['sample'] == sample_name]

    # Saving subset of each sample as separate H5AD file
    pericytes_sample_data.write_h5ad(f"../sample_h5ad/pericytes_{sample_name}.h5ad")

In [12]:
#Checking 
HCAHeart7656534 = ad.read_h5ad("../sample_h5ad/pericytes_HCAHeart7656534.h5ad")

In [13]:
HCAHeart7656534.obs

Unnamed: 0,NRP,age_group,cell_source,cell_type,donor,gender,n_counts,n_genes,percent_mito,percent_ribo,region,sample,scrublet_score,source,type,version,cell_states,Used
AAAGCAAAGTCCAGGA-1-HCAHeart7656534,No,55-60,Sanger-Cells,Pericytes,D3,Male,5840.0,1963,0.122432,0.065068,LV,HCAHeart7656534,0.081779,Cells,DBD,V2,PC1_vent,Yes
AAAGTAGGTTTGTTGG-1-HCAHeart7656534,No,55-60,Sanger-Cells,Pericytes,D3,Male,9205.0,2903,0.061706,0.075068,LV,HCAHeart7656534,0.089530,Cells,DBD,V2,PC3_str,Yes
AAATGCCGTATAATGG-1-HCAHeart7656534,No,55-60,Sanger-Cells,Pericytes,D3,Male,10415.0,2866,0.073356,0.090831,LV,HCAHeart7656534,0.119266,Cells,DBD,V2,PC2_atria,Yes
AAGACCTGTATCGCAT-1-HCAHeart7656534,No,55-60,Sanger-Cells,Pericytes,D3,Male,1859.0,729,0.144701,0.011296,LV,HCAHeart7656534,0.062885,Cells,DBD,V2,PC4_CMC-like,Yes
AAGCCGCTCTTCCTTC-1-HCAHeart7656534,No,55-60,Sanger-Cells,Pericytes,D3,Male,6145.0,2132,0.082506,0.055980,LV,HCAHeart7656534,0.068564,Cells,DBD,V2,PC1_vent,Yes
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
TTGCCGTAGCTCCCAG-1-HCAHeart7656534,No,55-60,Sanger-Cells,Pericytes,D3,Male,4818.0,1727,0.077418,0.084267,LV,HCAHeart7656534,0.113475,Cells,DBD,V2,PC1_vent,Yes
TTGCGTCGTCTCGTTC-1-HCAHeart7656534,No,55-60,Sanger-Cells,Pericytes,D3,Male,5398.0,1926,0.102260,0.086328,LV,HCAHeart7656534,0.147122,Cells,DBD,V2,PC1_vent,Yes
TTTACTGTCTCGTATT-1-HCAHeart7656534,No,55-60,Sanger-Cells,Pericytes,D3,Male,7505.0,2564,0.059161,0.067422,LV,HCAHeart7656534,0.155556,Cells,DBD,V2,PC3_str,Yes
TTTACTGTCTTGGGTA-1-HCAHeart7656534,No,55-60,Sanger-Cells,Pericytes,D3,Male,12501.0,3448,0.064315,0.071834,LV,HCAHeart7656534,0.164733,Cells,DBD,V2,PC3_str,Yes
