In [1]:
import sys
import os 
import torch

from torch import nn
from torch.utils.data import DataLoader
import numpy as np
sys.path.append("/home/local/kyeonghunjeong_920205/nipa_bu/COVID19/3.analysis/9.MIL/scAMIL_cell/scMILD")
import argparse
import json
import random
from src.utils import *
from src.dataset import *
from src.model import *
from datetime import datetime
from tqdm import tqdm
from termcolor import colored
import scanpy as sc
from scipy import sparse
import modin.pandas as pd
import ray
ray.init()
from sklearn.metrics import roc_auc_score, accuracy_score, precision_recall_curve, auc
from sklearn.model_selection import train_test_split


2024-06-13 11:19:27,156	INFO worker.py:1715 -- Started a local Ray instance. View the dashboard at [1m[32mhttp://127.0.0.1:8265 [39m[22m


In [2]:
dir_path = f'UC/'
base_path = f'../../data/{dir_path}'
target_dir = f'{base_path}/AE/'


In [3]:
adata = sc.read_h5ad(f'{base_path}/Fib.h5ad')
print(adata.shape)
adata = adata[adata.obs['Health'].isin(('Healthy','Inflamed'))]
mapping = {'Healthy': 0, 'Inflamed': 1}
adata.obs['disease_numeric'] = adata.obs['Health'].map(mapping)
adata.obs['sample_id_numeric'], _ = pd.factorize(adata.obs['Subject'])
print(adata.shape)

(31872, 2000)
(18725, 2000)




In [5]:
device_num = 6
device = torch.device(f'cuda:{device_num}' if torch.cuda.is_available() else 'cpu')
print("INFO: Using device: {}".format(device))


INFO: Using device: cuda:6


In [3]:

def save_cell_scores(saved_model_path, exp, test_dataset, label_encoder, device, suffix=None):
    instance_test_dataset = update_instance_labels_with_bag_labels(test_dataset, device)

    model_teacher = torch.load(f'{saved_model_path}/model_teacher_exp{exp}.pt', map_location=device)
    model_encoder = torch.load(f'{saved_model_path}/model_encoder_exp{exp}.pt', map_location=device)
    model_student = torch.load(f'{saved_model_path}/model_student_exp{exp}.pt', map_location=device)

    model_encoder.eval()
    model_student.eval()
    model_teacher.eval()
    with torch.no_grad():
        features = model_encoder(instance_test_dataset.data.clone().detach().float().to(device))[:, :model_teacher.input_dims].detach().requires_grad_(False)
        cell_score_teacher = model_teacher.attention_module(features).squeeze(0)
    
    features_np = features.cpu().detach().numpy()
    cell_score_teacher_np = cell_score_teacher.cpu().detach().numpy()

    df = pd.DataFrame(features_np, columns=[f'feature_{i}' for i in range(features_np.shape[1])])
    df['cell_type'] = label_encoder.inverse_transform(instance_test_dataset.instance_labels.cpu().detach().numpy())
    df['cell_score'] = cell_score_teacher_np
    df['bag_labels'] = instance_test_dataset.bag_labels.cpu().detach().numpy()
    df['instance_labels'] = instance_test_dataset.instance_labels.cpu().detach().numpy()
    df['cell_score_minmax'] = (df['cell_score'].values - min(df['cell_score'].values)) / (max(df['cell_score'].values) - min(df['cell_score'].values))
    if suffix is not None: 
        df.to_csv(f'cell_score_{exp}_{suffix}.csv', index=False)    
    else: 
        df.to_csv(f'cell_score_{exp}.csv', index=False)
        
    return 0

def save_test_data(exp, sample_labels, adata):
    split_ratio = [0.5, 0.25, 0.25]
    train_val_set, test_set = train_test_split(sample_labels, test_size=split_ratio[2], random_state=exp, stratify=sample_labels['disease_numeric'])
    train_set, val_set = train_test_split(train_val_set, test_size=split_ratio[1] / (1 - split_ratio[2]), random_state=exp, stratify=train_val_set['disease_numeric'])
    test_set.to_csv(f"test_set_barcodes_{exp}.csv")
    test_data = adata[adata.obs['sample_id_numeric'].isin(test_set['sample_id_numeric'])]    
    test_data.obs.to_csv(f"obs_{exp}.csv")


In [7]:
saved_model_paths = [
        '../../results/model_UC_ae_ed128_md64_lr0.001_100_0.3_3_15',
        '../../results/model_UC_ae_ed128_md64_lr0.001_100_0.3_100_15_baseline',
    ]

saved_model_paths = [
        #'/home/local/kyeonghunjeong_920205/nipa_bu/COVID19/3.analysis/9.MIL/scAMIL_cell/scMILD/results/UC_hyper2_model_ae_ed128_md16_lr0.0001_100_0.1_1_15__0613',
       # '/home/local/kyeonghunjeong_920205/nipa_bu/COVID19/3.analysis/9.MIL/scAMIL_cell/scMILD/results/UC_hyper2_baseline_model_ae_ed128_md16_lr0.0001_100_0.1_1_15__0613',
       '/home/local/kyeonghunjeong_920205/nipa_bu/COVID19/3.analysis/9.MIL/scAMIL_cell/scMILD/results/UC_hyper2_not_op_model_ae_ed128_md16_lr0.0001_100_0.1_1_15__0613'
    ]



for saved_model_path in saved_model_paths:
    for exp in range(1, 9):
        print(f'Experiment {exp}')
        _, _, test_dataset, label_encoder = load_dataset_and_preprocessors(base_path, exp, device)
        # suffix = 'baseline' if 'baseline' in saved_model_path else None
        suffix = 'not_op' if 'not_op' in saved_model_path else None
        save_cell_scores(saved_model_path, exp, test_dataset, label_encoder, device, suffix)
        torch.cuda.empty_cache()

Experiment 1




Experiment 2




Experiment 3




Experiment 4




Experiment 5




Experiment 6




Experiment 7




Experiment 8




*** SIGTERM received at time=1718279921 on cpu 54 ***
PC: @     0x7fe48bfbc68e  (unknown)  epoll_wait
    @     0x7fe48c1fd420  (unknown)  (unknown)
[2024-06-13 11:58:41,112 E 875141 875141] logging.cc:361: *** SIGTERM received at time=1718279921 on cpu 54 ***
[2024-06-13 11:58:41,112 E 875141 875141] logging.cc:361: PC: @     0x7fe48bfbc68e  (unknown)  epoll_wait
[2024-06-13 11:58:41,112 E 875141 875141] logging.cc:361:     @     0x7fe48c1fd420  (unknown)  (unknown)


: 

In [None]:
adata.obs.to_csv(f"{saved_model_path}/meta.csv")
adata.write(filename=f"{saved_model_path}/anndata_proc.h5ad")
sample_labels = adata.obs[['disease_numeric', 'sample_id_numeric']].drop_duplicates()