In [1]:
import sys
print(sys.executable)
print(sys.path)

/opt/anaconda3/envs/pytorch/bin/python
['/opt/anaconda3/envs/pytorch/lib/python39.zip', '/opt/anaconda3/envs/pytorch/lib/python3.9', '/opt/anaconda3/envs/pytorch/lib/python3.9/lib-dynload', '', '/opt/anaconda3/envs/pytorch/lib/python3.9/site-packages', '/opt/anaconda3/envs/pytorch/lib/python3.9/site-packages/setuptools/_vendor']


In [2]:
import scanpy as sc

In [3]:
path='./Camp.h5'
data = sc.read_h5ad(path)

X_all = data.X
y_all = data.obs.values[:,0]

Y = y_all
X = X_all
# 打印数据的基本信息
print(f"Data shape: {data.shape}")
print(f"Data type: {type(data.X)}")
print(f"Obs head:\n{data.obs.head()}")
print(f"Var head:\n{data.var.head()}")

Data shape: (777, 16270)
Data type: <class 'numpy.ndarray'>
Obs head:
         Group
cell_id       
A10_de       0
A11_de       0
A2_de        0
A3_de        0
A5_de        0
Var head:
Empty DataFrame
Columns: []
Index: [ESRG, OR4F5, OR4F29, OR4F16, SAMD11]


In [4]:
import sys
sys.path.append("..")
import argparse
import numpy as np
import torch
import torch.nn.functional as F
import time
import pandas as pd
from tqdm import tqdm
import random
import scanpy as sc
import matplotlib.pyplot as plt

# PyG相关导入
from torch_geometric.data import Data
from torch_geometric.loader import NeighborLoader
from torch_geometric.nn import GraphConv

# 导入自定义模块
import train
import models

# 固定随机种子
random.seed(42)
np.random.seed(42)
torch.manual_seed(42)
torch.cuda.manual_seed_all(42)

# 检查PyTorch和PyG版本
print(f"PyTorch Version: {torch.__version__}")
try:
    import torch_geometric
    print(f"PyG Version: {torch_geometric.__version__}")
except ImportError:
    print("PyG is not installed.")

# 定义get_device函数
def get_device():
    return torch.device("cuda" if torch.cuda.is_available() else "cpu")

# 加载数据
path = './Camp.h5'
data = sc.read_h5ad(path)

X_all = data.X  # 稀疏矩阵或ndarray
y_all = data.obs.values[:, 0]  # 假设obs的第一列为分类标签
Y = np.array(y_all)
X = np.array(X_all.todense() if hasattr(X_all, 'todense') else X_all)

####################################
# 下游分析参数
####################################
category = "real_data"
epochs = 10
batch_size = 128
pca_size = 50
nb_genes = 3000

model_name = "GraphConv"
normalize_weights = "log_per_cell"
same_edge_values = False
edge_norm = True
hidden_relu = False
hidden_bn = False
n_layers = 1
hidden_dim = 200
hidden = [300]
activation = F.relu

# 过滤高变基因和细胞
genes_idx, cells_idx = train.filter_data(X, highly_genes=nb_genes)
X = X[cells_idx][:, genes_idx]
Y = Y[cells_idx]

# 使用PyG构建图数据
t0 = time.time()
edge_index, node_feats, labels = train.make_graph_pyg(
    X,
    Y,
    dense_dim=pca_size,
    normalize_weights=normalize_weights  # 修复参数调用
)

# 创建PyG的Data对象
graph_data = Data(x=torch.tensor(node_feats, dtype=torch.float),
                  edge_index=torch.tensor(edge_index, dtype=torch.long),
                  y=torch.tensor(labels, dtype=torch.long))

# 定义训练节点id
train_ids = (graph_data.y != -1).nonzero(as_tuple=False).view(-1)

# 使用PyG的NeighborLoader进行采样
sampler_loader = NeighborLoader(
    graph_data,
    input_nodes=train_ids,
    num_neighbors=[-1],  # 全邻居
    batch_size=batch_size,
    shuffle=True
)

print(f"INPUT: {model_name} {hidden_dim}, {hidden}, {same_edge_values}, {edge_norm}")
t1 = time.time()

results = pd.DataFrame()

# 多次运行以评估稳定性
for run in range(3):
    t_start = time.time()
    torch.manual_seed(run)
    torch.cuda.manual_seed_all(run)
    np.random.seed(run)
    random.seed(run)

    # 初始化模型
    model = models.GCNAE(
        in_feats=pca_size,
        n_hidden=hidden_dim,
        n_layers=n_layers,
        activation=activation,
        dropout=0.1,
        hidden=hidden,
        hidden_relu=hidden_relu,
        hidden_bn=hidden_bn,
    ).to(get_device())

    if run == 0:
        print(">", model)

    optim = torch.optim.Adam(model.parameters(), lr=1e-5)

    # 训练模型
    scores = train.train(
        model,
        optim,
        epochs,
        sampler_loader,
        len(np.unique(Y)),
        plot=False
    )

    # 收集结果
    results = pd.concat([results, pd.DataFrame([scores])], ignore_index=True)

# 打印结果
print("Results:\n", results)
print("Mean:\n", results.mean())

Python executable: /opt/anaconda3/envs/pytorch/bin/python
Python path: ['/opt/anaconda3/envs/pytorch/lib/python39.zip', '/opt/anaconda3/envs/pytorch/lib/python3.9', '/opt/anaconda3/envs/pytorch/lib/python3.9/lib-dynload', '', '/opt/anaconda3/envs/pytorch/lib/python3.9/site-packages', '/opt/anaconda3/envs/pytorch/lib/python3.9/site-packages/setuptools/_vendor', '..', '/var/folders/3k/wtktqhhs2szgg7m8yyz29zgr0000gn/T/tmpfjqu3nkj', '/opt/anaconda3/envs/pytorch/lib/python3.9/site-packages']
Scanpy imported successfully!
PyTorch Version: 2.5.1
PyG Version: 2.6.1




INPUT: GraphConv 200, [300], False, True
> GCNAE(
  (layers): ModuleList(
    (0): GraphConv(50, 200)
  )
  (dropout): Dropout(p=0.1, inplace=False)
  (batchnorms): ModuleList()
  (decoder): InnerProductDecoder()
)
Epoch 1/10, Loss: 2.2848
Epoch 2/10, Loss: 2.2839
Epoch 3/10, Loss: 2.2869
Epoch 4/10, Loss: 2.2853
Epoch 5/10, Loss: 2.2862
Epoch 6/10, Loss: 2.2844
Epoch 7/10, Loss: 2.2858
Epoch 8/10, Loss: 2.2840
Epoch 9/10, Loss: 2.2842
Epoch 10/10, Loss: 2.2801
Epoch 1/10, Loss: 2.4227
Epoch 2/10, Loss: 2.4213
Epoch 3/10, Loss: 2.4141
Epoch 4/10, Loss: 2.4198
Epoch 5/10, Loss: 2.4210
Epoch 6/10, Loss: 2.4265
Epoch 7/10, Loss: 2.4229
Epoch 8/10, Loss: 2.4210
Epoch 9/10, Loss: 2.4211
Epoch 10/10, Loss: 2.4194
Epoch 1/10, Loss: 2.2879
Epoch 2/10, Loss: 2.2949
Epoch 3/10, Loss: 2.2880
Epoch 4/10, Loss: 2.2875
Epoch 5/10, Loss: 2.2902
Epoch 6/10, Loss: 2.2874
Epoch 7/10, Loss: 2.2893
Epoch 8/10, Loss: 2.2872
Epoch 9/10, Loss: 2.2888
Epoch 10/10, Loss: 2.2861
Results:
    kmeans_ari  kmeans_