In [1]:
import warnings 
warnings.simplefilter('ignore')

import scanpy as sc
import symphonypy as sp
import os
import scparadise
import pandas as pd

In [3]:
os.chdir('/mnt/c/Users/vadim/scRNA/scParadise/scripts_article')

In [4]:
# Load normalized integrated data
adata = sc.read_h5ad('Mouse_aging_brain/adata.h5ad')

In [5]:
# Create adata_train and test datasets for model training
adata_train = adata[adata.obs['orig.ident'].isin(['young2', 'old1', 'oldex1'])].copy()
adata_test_1 = adata[adata.obs['orig.ident'].isin(['old2'])].copy()
adata_test_2 = adata[adata.obs['orig.ident'].isin(['old4'])].copy()
adata_test_3 = adata[adata.obs['orig.ident'].isin(['oldex2'])].copy()
adata_test_4 = adata[adata.obs['orig.ident'].isin(['oldex4'])].copy()
adata_test_5 = adata[adata.obs['orig.ident'].isin(['young1'])].copy()
adata_test_6 = adata[adata.obs['orig.ident'].isin(['young4'])].copy()
for i in [adata_train, adata_test_1, adata_test_2, adata_test_3, adata_test_4, adata_test_5, adata_test_6]:
    i.layers['counts'] = i.X.copy()
    sc.pp.normalize_total(i, target_sum=None)
    sc.pp.log1p(i)
    i.raw = i

# Subset genes
genes = pd.read_csv('Mouse_aging_brain/genes_for_AI.csv')
adata_train = adata_train[:, genes.genes].copy()

In [6]:
# Harmony integration
adata_train.var['highly_variable'] = True
sc.pp.scale(adata_train, max_value=10) 
sc.pp.pca(adata_train, 
          use_highly_variable = True)
sp.pp.harmony_integrate(adata_train, 
                        key = "orig.ident", 
                        verbose = True, 
                        max_iter_harmony = 20)

2025-05-15 15:28:12,721 - harmonypy - INFO - Computing initial centroids with sklearn.KMeans...


Harmony integration with harmonypy is preforming.


2025-05-15 15:28:18,475 - harmonypy - INFO - sklearn.KMeans initialization complete.
2025-05-15 15:28:18,556 - harmonypy - INFO - Iteration 1 of 20
2025-05-15 15:28:22,476 - harmonypy - INFO - Iteration 2 of 20
2025-05-15 15:28:27,125 - harmonypy - INFO - Iteration 3 of 20
2025-05-15 15:28:28,306 - harmonypy - INFO - Converged after 3 iterations


In [8]:
# Create lists with samples
lst_test_folders = ['old2', 'old4', 'oldex2', 'oldex4', 'young1', 'young4']
lst_test_adatas = [adata_test_1, adata_test_2, adata_test_3, adata_test_4, adata_test_5, adata_test_6]
lst_annotations = ['Celltype']
lst_predictions = ['pred_Celltype']

In [9]:
for adata_test, folder in zip(lst_test_adatas, lst_test_folders):
    # Create adata_test - 2 samples of 2 donors from integrated adata object (8 donors, 24 samples)
    os.makedirs(os.path.join('Mouse_aging_brain/symphonypy_test/reports', folder))
    # Mapping Harmony coordinates
    sp.tl.map_embedding(adata_query = adata_test,
                        adata_ref = adata_train)
    # Cell types prediction
    sp.tl.transfer_labels_kNN(
        adata_query = adata_test,
        adata_ref = adata_train,
        ref_labels = lst_annotations,
        query_labels = lst_predictions
    )
    # Create and save classification report of selected annotation level
    for i, j in zip(lst_annotations, lst_predictions):
        file_save = 'report_test_symphonypy_' + 'celltype_l1' + '.csv'
        scparadise.scnoah.report_classif_full(adata_test, 
                                              celltype = i, 
                                              pred_celltype = j, 
                                              report_name = file_save,
                                              save_path = os.path.join('Mouse_aging_brain/symphonypy_test/reports', folder),
                                              save_report=True)

Successfully saved report

Successfully saved report

Successfully saved report

Successfully saved report

Successfully saved report

Successfully saved report



In [9]:
pip list

Package                   Version
------------------------- --------------
absl-py                   2.1.0
adjustText                1.3.0
aiobotocore               2.5.4
aiohappyeyeballs          2.4.2
aiohttp                   3.8.4
aioitertools              0.12.0
aiosignal                 1.3.1
airr                      1.5.1
alembic                   1.13.3
anndata                   0.9.1
annoy                     1.17.3
anyio                     3.6.2
appdirs                   1.4.4
argon2-cffi               23.1.0
argon2-cffi-bindings      21.2.0
array_api_compat          1.8
arrow                     1.2.3
asciitree                 0.3.3
asttokens                 2.4.1
async-lru                 2.0.4
async-timeout             4.0.2
attrs                     23.1.0
awkward                   2.7.1
awkward_cpp               42
babel                     2.16.0
bamnostic                 1.1.10
bbknn                     1.6.0
bcrypt                    4.2.1
beautifulsoup4            