In [1]:
import warnings 
warnings.simplefilter('ignore')

import scanpy as sc
import os
import muon as mu
import scparadise
import pandas as pd
import TOSICA
import numpy as np

In [2]:
# Load normalized integrated data
mdata = mu.read_h5mu('Heart_3p_CITE/mdata_unintegrated_annotated.h5mu')

In [3]:
# Select RNA modality from MuData object (mdata)
mdata = mdata[mdata.obs['celltype_l2'] != 'classical DC1']
mdata = mdata[mdata.obs['celltype_l2'] != 'Neutrophil']
mdata = mdata[mdata.obs['celltype_l2'] != 'plasmacytoid DC']
mdata = mdata[mdata.obs['celltype_l2'] != 'ILC']
mdata = mdata[mdata.obs['celltype_l2'] != 'proliferating T']
adata = mdata.mod['rna'].copy()
# Subset anndata object based on a selected marker genes
genes = pd.read_csv('Heart_3p_CITE/genes_for_AI.csv')
adata = adata[:, genes.genes].copy()
del mdata

In [4]:
# Create list of samples to leave in reference train dataset
lst_reference = [12, 13, 17, 27, 28, 29, 30, 32, 39, 42]

In [5]:
# Create adata_train
adata_train = adata[adata.obs['orig.ident'].isin(lst_reference)].copy()

In [6]:
# Create lists with paired samples and annotation levels
lst_test_1 = ['1_6', '2_7', '4_5', '8_9']
lst_test_2 = ['41', '34', '15', '33']
lst_annotations = ['celltype_l1', 'celltype_l2']

In [7]:
for folder in lst_test_1:
    os.makedirs(os.path.join('Heart_3p_CITE/TOSICA/reports', folder))
for folder in lst_test_2:
    os.makedirs(os.path.join('Heart_3p_CITE/TOSICA/reports', folder))

In [8]:
for i in lst_annotations:
    TOSICA.train(adata_train, 
             gmt_path = 'human_gobp', 
             label_name = i, 
             epochs = 3, 
             project = 'Heart_3p_CITE')
    for folder in lst_test_1:
        # Create adata_test
        adata_test = adata[adata.obs['orig.ident'].isin([int(folder[0]), int(folder[2])])].copy()
        adata_test = TOSICA.pre(adata_test, model_weight_path = 'Heart_3p_CITE/model-0.pth', project='Heart_3p_CITE')
        file_save = 'report_test_TOSICA_' + i + '.csv'
        scparadise.scnoah.report_classif_full(adata_test, 
                                              celltype = i, 
                                              pred_celltype = 'Prediction', 
                                              report_name = file_save,
                                              save_path = os.path.join('Heart_3p_CITE/TOSICA/reports/', folder),
                                              save_report=True)
    
    for folder in lst_test_2:
        # Create adata_test
        adata_test = adata[adata.obs['orig.ident'].isin([int(folder)])].copy()
        adata_test = TOSICA.pre(adata_test, model_weight_path = 'Heart_3p_CITE/model-0.pth', project='Heart_3p_CITE')
        file_save = 'report_test_TOSICA_' + i + '.csv'
        scparadise.scnoah.report_classif_full(adata_test, 
                                              celltype = i, 
                                              pred_celltype = 'Prediction', 
                                              report_name = file_save,
                                              save_path = os.path.join('Heart_3p_CITE/TOSICA/reports/', folder),
                                              save_report=True)

cuda:0
Mask loaded!
Model builded!


[train epoch 0] loss: 0.393, acc: 0.848: 100%|███████████████████████████████████| 18519/18519 [02:17<00:00, 134.25it/s]
[valid epoch 0] loss: 0.040, acc: 0.989: 100%|█████████████████████████████████████| 7936/7936 [00:32<00:00, 247.24it/s]
[train epoch 1] loss: 0.038, acc: 0.990: 100%|███████████████████████████████████| 18519/18519 [02:15<00:00, 136.35it/s]
[valid epoch 1] loss: 0.023, acc: 0.994: 100%|█████████████████████████████████████| 7936/7936 [00:32<00:00, 240.65it/s]
[train epoch 2] loss: 0.024, acc: 0.994: 100%|███████████████████████████████████| 18519/18519 [02:14<00:00, 137.63it/s]
[valid epoch 2] loss: 0.016, acc: 0.996: 100%|█████████████████████████████████████| 7936/7936 [00:32<00:00, 244.78it/s]


Training finished!
cuda:0
0
10000
15191
Successfully saved report

cuda:0
0
10000
13765
Successfully saved report

cuda:0
0
9377
Successfully saved report

cuda:0
0
10000
12603
Successfully saved report

cuda:0
0
6115
Successfully saved report

cuda:0
0
10000
10142
Successfully saved report

cuda:0
0
6410
Successfully saved report

cuda:0
0
6470
Successfully saved report

cuda:0
Mask loaded!
Model builded!


[train epoch 0] loss: 1.046, acc: 0.643: 100%|███████████████████████████████████| 32314/32314 [03:58<00:00, 135.73it/s]
[valid epoch 0] loss: 0.349, acc: 0.880: 100%|███████████████████████████████████| 13849/13849 [00:56<00:00, 244.17it/s]
[train epoch 1] loss: 0.308, acc: 0.894: 100%|███████████████████████████████████| 32314/32314 [03:59<00:00, 134.96it/s]
[valid epoch 1] loss: 0.214, acc: 0.927: 100%|███████████████████████████████████| 13849/13849 [00:56<00:00, 243.14it/s]
[train epoch 2] loss: 0.230, acc: 0.922: 100%|███████████████████████████████████| 32314/32314 [03:58<00:00, 135.75it/s]
[valid epoch 2] loss: 0.174, acc: 0.939: 100%|███████████████████████████████████| 13849/13849 [00:55<00:00, 247.49it/s]


Training finished!
cuda:0
0
10000
15191
Successfully saved report

cuda:0
0
10000
13765
Successfully saved report

cuda:0
0
9377
Successfully saved report

cuda:0
0
10000
12603
Successfully saved report

cuda:0
0
6115
Successfully saved report

cuda:0
0
10000
10142
Successfully saved report

cuda:0
0
6410
Successfully saved report

cuda:0
0
6470
Successfully saved report



In [11]:
pip list

Package                   Version
------------------------- --------------
absl-py                   2.1.0
aiohappyeyeballs          2.3.4
aiohttp                   3.10.1
aiosignal                 1.3.1
alembic                   1.13.2
anndata                   0.10.8
anyio                     4.4.0
argon2-cffi               23.1.0
argon2-cffi-bindings      21.2.0
array_api_compat          1.8
arrow                     1.3.0
asttokens                 2.4.1
async-lru                 2.0.4
async-timeout             4.0.3
attrs                     24.2.0
Babel                     2.15.0
beautifulsoup4            4.12.3
bleach                    6.1.0
cached-property           1.5.2
cell-gears                0.0.2
certifi                   2024.7.4
cffi                      1.17.0
charset-normalizer        3.3.2
chex                      0.1.86
click                     8.1.7
cloudpickle               3.1.1
colorlog                  6.8.2
comm                      0.2.2
contextlib2       