In [1]:
import warnings 
warnings.simplefilter('ignore')

import scanpy as sc
import scparadise
import muon as mu
import pandas as pd
import os

In [3]:
# Load normalized integrated data
mdata = mu.read_h5mu('Heart_3p_CITE/mdata_unintegrated_annotated.h5mu')

In [3]:
# Select RNA modality from MuData object (mdata)
mdata = mdata[mdata.obs['celltype_l2'] != 'classical DC1']
mdata = mdata[mdata.obs['celltype_l2'] != 'Neutrophil']
mdata = mdata[mdata.obs['celltype_l2'] != 'plasmacytoid DC']
mdata = mdata[mdata.obs['celltype_l2'] != 'ILC']
mdata = mdata[mdata.obs['celltype_l2'] != 'proliferating T']
adata = mdata.mod['rna'].copy()
del mdata

In [4]:
# Subset anndata object based on a selected marker genes
genes = pd.read_csv('Heart_3p_CITE/genes_for_AI.csv')
adata = adata[:, genes.genes].copy()

In [5]:
# Create list of samples to leave in reference train dataset
lst_reference = [12, 13, 17, 27, 28, 29, 30, 32, 39, 42]

In [6]:
# Create adata_train - 8 samples of 8 donors from unintegrated adata object (8 donors, 24 samples)
adata_train = adata[adata.obs['orig.ident'].isin(lst_reference)].copy()

In [7]:
# Balance dataset based on l3 annotation level
adata_balanced = scparadise.scnoah.balance(adata_train, 
                                           sample='orig.ident',
                                           celltype_l1='celltype_l1',
                                           celltype_l2='celltype_l2')

Successfully undersampled cell types: Capillary endothelial, Macrophage, Arterial endothelial, SMC, Pericyte, APOD+ fibroblast, CD55+ fibroblast

Successfully oversampled cell types: Venous endothelial, POSTN+FAP+ fibroblast, classical DC2, Myofibroblast, CD4+ T, CD8+ T, CD16+ Mono, ATF5+ fibroblast, NK CD56dim, Muscle, Neuron, Mast, Lymphatic endothelial, proliferating myeloid, NK CD56bright


In [8]:
# Train scadam model using adata_balanced dataset
scparadise.scadam.train(adata_balanced,
                        path = '',
                        model_name = 'model_Heart_scAdam_default',
                        celltype_l1 = 'celltype_l1',
                        celltype_l2 = 'celltype_l2',
                        eval_metric = ['balanced_accuracy','accuracy'])

Successfully saved genes names for training model

Successfully saved dictionary of dataset annotations

Train dataset contains: 57835 cells, it is 90.0 % of input dataset
Test dataset contains: 6427 cells, it is 10.0 % of input dataset

Accelerator: cuda
Start training
epoch 0  | loss: 2.09136 | train_balanced_accuracy: 0.47135 | train_accuracy: 0.57    | valid_balanced_accuracy: 0.46828 | valid_accuracy: 0.56675 |  0:00:03s
epoch 1  | loss: 0.98051 | train_balanced_accuracy: 0.76756 | train_accuracy: 0.78597 | valid_balanced_accuracy: 0.76807 | valid_accuracy: 0.78684 |  0:00:05s
epoch 2  | loss: 0.68525 | train_balanced_accuracy: 0.83105 | train_accuracy: 0.84126 | valid_balanced_accuracy: 0.82627 | valid_accuracy: 0.83709 |  0:00:08s
epoch 3  | loss: 0.58466 | train_balanced_accuracy: 0.86852 | train_accuracy: 0.87379 | valid_balanced_accuracy: 0.86547 | valid_accuracy: 0.87062 |  0:00:11s
epoch 4  | loss: 0.53039 | train_balanced_accuracy: 0.89579 | train_accuracy: 0.8995  | valid

In [9]:
# Create lists with test samples
lst_test_1 = ['1_6', '2_7', '4_5', '8_9']
lst_test_2 = ['41', '34', '15', '33']

In [10]:
for folder in lst_test_1:
    os.makedirs(os.path.join('Heart_3p_CITE/reports_model_Heart_scAdam_default', folder))
    adata_test = adata[adata.obs['orig.ident'].isin([int(folder[0]), int(folder[2])])].copy()
    # Predict annotation levels using pretrained scadam model
    adata_test = scparadise.scadam.predict(adata_test, 
                                           path_model = 'model_Heart_scAdam_default')
    # Create and save classification report of annotation levels
    scparadise.scnoah.report_classif_full(adata_test, 
                                          celltype = 'celltype_l1', 
                                          pred_celltype = 'pred_celltype_l1', 
                                          report_name = 'report_test_model_scAdam_default_celltype_l1.csv',
                                          save_path = os.path.join('Heart_3p_CITE/reports_model_Heart_scAdam_default', folder).replace("\\","/"),
                                          save_report = True)
    scparadise.scnoah.report_classif_full(adata_test, 
                                          celltype = 'celltype_l2', 
                                          pred_celltype = 'pred_celltype_l2', 
                                          report_name = 'report_test_model_scAdam_default_celltype_l2.csv',
                                          save_path = os.path.join('Heart_3p_CITE/reports_model_Heart_scAdam_default', folder).replace("\\","/"),
                                          save_report = True)
for folder in lst_test_2:
    os.makedirs(os.path.join('Heart_3p_CITE/reports_model_Heart_scAdam_default', folder))
    adata_test = adata[adata.obs['orig.ident'].isin([int(folder)])].copy()
    # Predict annotation levels using pretrained scadam model
    adata_test = scparadise.scadam.predict(adata_test, 
                                           path_model = 'model_Heart_scAdam_default')
    # Create and save classification report of annotation levels
    scparadise.scnoah.report_classif_full(adata_test, 
                                          celltype = 'celltype_l1', 
                                          pred_celltype = 'pred_celltype_l1', 
                                          report_name = 'report_test_model_scAdam_default_celltype_l1.csv',
                                          save_path = os.path.join('Heart_3p_CITE/reports_model_Heart_scAdam_default', folder).replace("\\","/"),
                                          save_report = True)
    scparadise.scnoah.report_classif_full(adata_test, 
                                          celltype = 'celltype_l2', 
                                          pred_celltype = 'pred_celltype_l2', 
                                          report_name = 'report_test_model_scAdam_default_celltype_l2.csv',
                                          save_path = os.path.join('Heart_3p_CITE/reports_model_Heart_scAdam_default', folder).replace("\\","/"),
                                          save_report = True)

Successfully loaded list of genes used for training model

Successfully loaded dictionary of dataset annotations

Successfully loaded model

Successfully added predicted celltype_l1 and cell type probabilities
Successfully added predicted celltype_l2 and cell type probabilities
Successfully saved report

Successfully saved report

Successfully loaded list of genes used for training model

Successfully loaded dictionary of dataset annotations

Successfully loaded model

Successfully added predicted celltype_l1 and cell type probabilities
Successfully added predicted celltype_l2 and cell type probabilities
Successfully saved report

Successfully saved report

Successfully loaded list of genes used for training model

Successfully loaded dictionary of dataset annotations

Successfully loaded model

Successfully added predicted celltype_l1 and cell type probabilities
Successfully added predicted celltype_l2 and cell type probabilities
Successfully saved report

Successfully saved report

S

In [14]:
import session_info
session_info.show()