In [1]:
import warnings 
warnings.simplefilter('ignore')

import celltypist
import scparadise
import scanpy as sc
import pandas as pd
import os

In [2]:
# Load normalized integrated data
adata = sc.read_h5ad('Mouse_aging_brain/adata_unintegrated.h5ad')

In [3]:
# Subset anndata object based on a selected marker genes
genes = pd.read_csv('Mouse_aging_brain/genes_for_AI.csv')
adata = adata[:, genes.genes].copy()

In [4]:
# Normalized expression to 10000 counts per cell (operating condition of the celltypist tool)
sc.pp.normalize_total(adata, target_sum = 10000)
sc.pp.log1p(adata)

In [5]:
# Create dataset for model training
adata_train = adata[adata.obs['orig.ident'].isin(['old1', 'oldex1', 'oldex2', 'young2'])].copy()

In [6]:
# Train and save model
# Training step
model = celltypist.train(adata_train, labels = 'Celltype', feature_selection = False, n_jobs=12)
# Save the model
model.write('Mouse_aging_brain/celltypist/models/Celltype.pkl')

🍳 Preparing data before training
🔬 Input data has 27038 cells and 668 genes
⚖️ Scaling input data
🏋️ Training data using logistic regression
✅ Model training done!


In [7]:
# Create list of test datasets
lst_test = ['young4', 'old2', 'old4', 'young1', 'oldex4']

In [9]:
for folder in lst_test:
    os.makedirs(os.path.join('Mouse_aging_brain/celltypist/reports', folder))
    adata_test = adata[adata.obs['orig.ident'].isin([folder])].copy()
    # Prediction step using pretrained models
    predictions = celltypist.annotate(adata_test, model = os.path.join('Mouse_aging_brain/celltypist/models/Celltype.pkl'), majority_voting = True)
    adata_test = predictions.to_adata()
    scparadise.scnoah.report_classif_full(adata_test, 
                                          celltype = 'Celltype', 
                                          pred_celltype = 'majority_voting', 
                                          report_name = 'report_celltypist_celltype_l1.csv',
                                          save_path = os.path.join('Mouse_aging_brain/celltypist/reports', folder),
                                          save_report = True)

🔬 Input data has 15019 cells and 668 genes
🔗 Matching reference genes in the model
🧬 668 features used for prediction
⚖️ Scaling input data
🖋️ Predicting labels
✅ Prediction done!
👀 Can not detect a neighborhood graph, will construct one before the over-clustering
⛓️ Over-clustering input data with resolution set to 10
🗳️ Majority voting the predictions
✅ Majority voting done!
🔬 Input data has 5932 cells and 668 genes
🔗 Matching reference genes in the model
🧬 668 features used for prediction
⚖️ Scaling input data


Successfully saved report



🖋️ Predicting labels
✅ Prediction done!
👀 Can not detect a neighborhood graph, will construct one before the over-clustering
⛓️ Over-clustering input data with resolution set to 10
🗳️ Majority voting the predictions
✅ Majority voting done!
🔬 Input data has 5497 cells and 668 genes
🔗 Matching reference genes in the model


Successfully saved report



🧬 668 features used for prediction
⚖️ Scaling input data
🖋️ Predicting labels
✅ Prediction done!
👀 Can not detect a neighborhood graph, will construct one before the over-clustering
⛓️ Over-clustering input data with resolution set to 10
🗳️ Majority voting the predictions
✅ Majority voting done!


Successfully saved report



🔬 Input data has 7393 cells and 668 genes
🔗 Matching reference genes in the model
🧬 668 features used for prediction
⚖️ Scaling input data
🖋️ Predicting labels
✅ Prediction done!
👀 Can not detect a neighborhood graph, will construct one before the over-clustering
⛓️ Over-clustering input data with resolution set to 10
🗳️ Majority voting the predictions
✅ Majority voting done!


Successfully saved report



🔬 Input data has 11664 cells and 668 genes
🔗 Matching reference genes in the model
🧬 668 features used for prediction
⚖️ Scaling input data
🖋️ Predicting labels
✅ Prediction done!
👀 Can not detect a neighborhood graph, will construct one before the over-clustering
⛓️ Over-clustering input data with resolution set to 10
🗳️ Majority voting the predictions
✅ Majority voting done!


Successfully saved report



In [10]:
import session_info
session_info.show()