In [1]:
import os
import torch
import pandas as pd
import scanpy as sc

In [2]:
import MVAADT

In [3]:
"""
Sets the device to GPU if available, otherwise defaults to CPU.
Also sets the environment variable 'R_HOME' to the specified path.

- `device`: A torch.device object set to 'cuda:1' if a GPU is available, otherwise 'cpu'.
- `os.environ['R_HOME']`: Sets the R_HOME environment variable to the specified path for R installation.
"""
device = torch.device('cuda:1' if torch.cuda.is_available() else 'cpu')
os.environ['R_HOME'] = '/home/zxx/miniforge3/envs/MDI/lib/R'

In [4]:
# 导入必要的库
import os
import torch
import pandas as pd
import scanpy as sc
import matplotlib.pyplot as plt

# 读取数据
file_fold = '/home/zxx/MVAADT/data/Dataset1_Lymph_Node1/' # replace with your own file path

adata_omics1 = sc.read_h5ad(file_fold + 'adata_RNA.h5ad')
adata_omics2 = sc.read_h5ad(file_fold + 'adata_ADT.h5ad')


adata_omics1.var_names_make_unique()
adata_omics2.var_names_make_unique()

print(adata_omics1.shape)
print(adata_omics2.shape)


(3484, 18085)
(3484, 31)


  utils.warn_names_duplicates("var")
  utils.warn_names_duplicates("var")


In [5]:
# Specify data type
data_type = '10x'

# Fix random seed
from MVAADT.preprocess import fix_seed
random_seed = 2050
fix_seed(random_seed)

In [6]:
from MVAADT.preprocess import clr_normalize_each_cell, pca


# RNA
sc.pp.filter_genes(adata_omics1, min_cells=10)
sc.pp.highly_variable_genes(adata_omics1, flavor="seurat_v3", n_top_genes=3000)
sc.pp.normalize_total(adata_omics1, target_sum=1e4)
sc.pp.log1p(adata_omics1)
sc.pp.scale(adata_omics1)

adata_omics1_high =  adata_omics1[:, adata_omics1.var['highly_variable']]
adata_omics1.obsm['feat'] = pca(adata_omics1_high, n_comps=adata_omics2.n_vars-1)

# Protein
adata_omics2 = clr_normalize_each_cell(adata_omics2)
sc.pp.scale(adata_omics2)
adata_omics2.obsm['feat'] = pca(adata_omics2, n_comps=adata_omics2.n_vars-1)

#查看数据维度
print(adata_omics1.shape)
print(adata_omics2.shape)

(3484, 17954)
(3484, 31)


In [7]:
from MVAADT.preprocess import construct_neighbor_graph
data = construct_neighbor_graph(adata_omics1, adata_omics2, datatype=data_type)

cell_position_omics1: (3484, 2)
adata_omics1.uns['adj_spatial']:           x     y  value
0         0   193    1.0
1         0  3051    1.0
2         0  1490    1.0
3         1   781    1.0
4         1  3088    1.0
...     ...   ...    ...
10447  3482  1802    1.0
10448  3482    38    1.0
10449  3483  2974    1.0
10450  3483  1935    1.0
10451  3483  2264    1.0

[10452 rows x 3 columns]


In [8]:
# define model
from MVAADT.MVAADT import Train_MVAADT
model = Train_MVAADT(data, datatype=data_type, device=device)

# train model
output = model.train()

  return torch.sparse.FloatTensor(indices, values, shape)
  self.alpha = F.softmax(torch.squeeze(self.vu) + 1e-6)
100%|██████████| 800/800 [00:16<00:00, 47.72it/s]


Model training finished!



In [9]:
adata = adata_omics1.copy()
adata.obsm['emb_latent_omics1'] = output['emb_latent_omics1'].copy()
adata.obsm['emb_latent_omics2'] = output['emb_latent_omics2'].copy()
adata.obsm['GAN_Align'] = output['GAN_Align'].copy()

In [10]:
from MVAADT.utils import clustering
tool = 'mclust' # mclust, leiden, and louvain
clustering(adata, key='GAN_Align', add_key='GAN_Align', n_clusters=6, method=tool, use_pca=False)

R[write to console]:                    __           __ 
   ____ ___  _____/ /_  _______/ /_
  / __ `__ \/ ___/ / / / / ___/ __/
 / / / / / / /__/ / /_/ (__  ) /_  
/_/ /_/ /_/\___/_/\__,_/____/\__/   version 6.1.1
Type 'citation("mclust")' for citing this R package in publications.



fitting ...


In [None]:
import pandas as pd
from sklearn.metrics import (
    adjusted_rand_score, normalized_mutual_info_score, adjusted_mutual_info_score,
    v_measure_score, mutual_info_score, homogeneity_score
)

def load_annotation(file_path):
    """读取注释文件"""
    return pd.read_csv(file_path)

def convert_to_categorical(annotation, column_name):
    """将注释信息转换为分类变量"""
    return pd.Categorical(annotation[column_name], categories=annotation[column_name].unique(), ordered=True)

def calculate_clustering_metrics(true_labels, predicted_labels):
    """计算聚类评价指标"""
    metrics = {
        'ARI': adjusted_rand_score(true_labels, predicted_labels),
        'NMI': normalized_mutual_info_score(true_labels, predicted_labels),
        'AMI': adjusted_mutual_info_score(true_labels, predicted_labels),
        'v_measure': v_measure_score(true_labels, predicted_labels),
        'mutual_info': mutual_info_score(true_labels, predicted_labels),
        'homogeneity': homogeneity_score(true_labels, predicted_labels)
    }
    return metrics

def print_metrics(metrics):
    """打印聚类评价指标"""
    for name, value in metrics.items():
        print(f'{name}: {value}')

# 主程序
file_fold = '/home/zxx/MVAADT/data/Dataset1_Lymph_Node1/'  # 请替换为实际文件路径
annotation = load_annotation(file_fold + 'annotation.csv')
adata.obs['celltype'] = convert_to_categorical(annotation, 'manual-anno')
metrics = calculate_clustering_metrics(adata.obs['celltype'], adata.obs['GAN_Align'])
print_metrics(metrics)

In [17]:
import pandas as pd

file_path = 'xxx' #replace with your own file path
# read evaluation file
evaluation = pd.read_csv(file_path)
# update evaluation file
metrics_to_update = ['ARI', 'NMI', 'AMI', 'v_measure', 'mutual_info', 'homogeneity']
for metric in metrics_to_update:
    evaluation.loc['RNA with Translation(+D1)', metric] = metrics[metric]
# save evaluation file
evaluation.to_csv(file_path, index=False)