# Importing packages

In [1]:
import numpy as np
import pandas as pd
import anndata as ad
import scanpy as sc
from scipy.sparse import csr_matrix

sc.settings.verbosity = 3  # verbosity: errors (0), warnings (1), info (2), hints (3)
sc.logging.print_header()
sc.settings.set_figure_params(dpi=80, facecolor="white")

  from .autonotebook import tqdm as notebook_tqdm


scanpy==1.9.8 anndata==0.10.5.post1 umap==0.5.5 numpy==1.26.4 scipy==1.12.0 pandas==2.2.1 scikit-learn==1.4.1.post1 statsmodels==0.14.1 igraph==0.10.8 louvain==0.8.1 pynndescent==0.5.11


# Importing Data

In [2]:
vascular = ad.read_h5ad("../Data/hca_heart_vascular_raw.h5ad")
vascular

AnnData object with n_obs × n_vars = 195395 × 33538
    obs: 'NRP', 'age_group', 'cell_source', 'cell_type', 'donor', 'gender', 'n_counts', 'n_genes', 'percent_mito', 'percent_ribo', 'region', 'sample', 'scrublet_score', 'source', 'type', 'version', 'cell_states', 'Used'
    var: 'gene_ids-Harvard-Nuclei', 'feature_types-Harvard-Nuclei', 'gene_ids-Sanger-Nuclei', 'feature_types-Sanger-Nuclei', 'gene_ids-Sanger-Cells', 'feature_types-Sanger-Cells', 'gene_ids-Sanger-CD45', 'feature_types-Sanger-CD45'
    uns: 'cell_type_colors'
    obsm: 'X_pca', 'X_umap'

### Looking at overall counts based on 'cell_type'

In [3]:
pd.crosstab(vascular.obs['sample'],vascular.obs['cell_type'])

cell_type,Endothelial,Mesothelial,Pericytes,Smooth_muscle_cells
sample,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
H0015_LA_new,253,0,495,89
H0015_LV,523,0,1889,356
H0015_RA,276,0,152,137
H0015_RV,265,0,1267,163
H0015_apex,739,0,1508,263
...,...,...,...,...
HCAHeart8287124,9,0,45,1
HCAHeart8287125,26,0,7,7
HCAHeart8287126,327,0,552,37
HCAHeart8287127,392,0,357,49


### Selecting pericytes only

In [4]:
pericytes = vascular[vascular.obs['cell_type'] == "Pericytes"]
pericytes

View of AnnData object with n_obs × n_vars = 77856 × 33538
    obs: 'NRP', 'age_group', 'cell_source', 'cell_type', 'donor', 'gender', 'n_counts', 'n_genes', 'percent_mito', 'percent_ribo', 'region', 'sample', 'scrublet_score', 'source', 'type', 'version', 'cell_states', 'Used'
    var: 'gene_ids-Harvard-Nuclei', 'feature_types-Harvard-Nuclei', 'gene_ids-Sanger-Nuclei', 'feature_types-Sanger-Nuclei', 'gene_ids-Sanger-Cells', 'feature_types-Sanger-Cells', 'gene_ids-Sanger-CD45', 'feature_types-Sanger-CD45'
    uns: 'cell_type_colors'
    obsm: 'X_pca', 'X_umap'

### Checking the subsetting

In [5]:
crosstab = pd.crosstab(vascular.obs['sample'],vascular.obs['cell_type']=='Pericytes')
crosstab

cell_type,False,True
sample,Unnamed: 1_level_1,Unnamed: 2_level_1
H0015_LA_new,342,495
H0015_LV,879,1889
H0015_RA,413,152
H0015_RV,428,1267
H0015_apex,1002,1508
...,...,...
HCAHeart8287124,10,45
HCAHeart8287125,33,7
HCAHeart8287126,364,552
HCAHeart8287127,441,357


### Checking the count

In [6]:
column_sum = crosstab.sum()
column_sum

cell_type
False    117539
True      77856
dtype: int64

## Data categorization

### Sample wise categorization

In [7]:
sample_names = pericytes.obs['sample'].unique()
sample_names

['HCAHeart7606896', 'HCAHeart7656534', 'HCAHeart7656535', 'HCAHeart7656536', 'HCAHeart7656537', ..., 'H0037_RA_corr', 'H0037_LV', 'H0037_LA_corr', 'H0037_RV', 'H0037_septum']
Length: 145
Categories (145, object): ['H0015_LA_new', 'H0015_LV', 'H0015_RA', 'H0015_RV', ..., 'HCAHeart8287125', 'HCAHeart8287126', 'HCAHeart8287127', 'HCAHeart8287128']

### Making a loop for h5ad file generation

In [8]:
# Iterate over each unique sample name
for sample_name in sample_names:
    
    # Subsetting data based on the sample
    pericytes_sample_data = pericytes[pericytes.obs['sample'] == sample_name]

    # Saving subset of each sample as separate H5AD file
    pericytes_sample_data.write_h5ad(f"/Volumes/LaCie/Github/HEART_VASCULAR/PC-Sample_files/PC_{sample_name}.h5ad")

### Checking anndata files

In [40]:
H0015_apex = ad.read_h5ad("../PC-Sample_files/PC_H0015_apex.h5ad")
H0015_apex

AnnData object with n_obs × n_vars = 1508 × 33538
    obs: 'NRP', 'age_group', 'cell_source', 'cell_type', 'donor', 'gender', 'n_counts', 'n_genes', 'percent_mito', 'percent_ribo', 'region', 'sample', 'scrublet_score', 'source', 'type', 'version', 'cell_states', 'Used'
    var: 'gene_ids-Harvard-Nuclei', 'feature_types-Harvard-Nuclei', 'gene_ids-Sanger-Nuclei', 'feature_types-Sanger-Nuclei', 'gene_ids-Sanger-Cells', 'feature_types-Sanger-Cells', 'gene_ids-Sanger-CD45', 'feature_types-Sanger-CD45'
    uns: 'cell_type_colors'
    obsm: 'X_pca', 'X_umap'