# 代码学习

### 标准化
消除测序深度差异
使不同细胞间基因表达可比较
减少技术噪音影响

In [1]:
import scanpy as sc
import numpy as np

# 创建示例数据，明确指定为浮点类型
adata = sc.AnnData(X=np.array([
    [10.0, 20.0, 30.0],   # 使用浮点数
    [50.0, 100.0, 150.0]  # 使用浮点数
], dtype=np.float64))  # 显式指定为 float64

# 标准化
sc.pp.normalize_per_cell(adata)

print(adata.X)

  from pandas.core.computation.check import NUMEXPR_INSTALLED
  from pandas.core import (


[[30. 60. 90.]
 [30. 60. 90.]]


### 细胞数据分析

In [5]:
import scanpy as sc
import pandas as pd

# 创建 AnnData 对象
adata = sc.AnnData(X=np.random.rand(100, 20))

# 添加 DCA_split 列
adata.obs['DCA_split'] = ['train', 'test'] * 50

# 转换为类别类型
adata.obs['DCA_split'] = adata.obs['DCA_split'].astype('category')

# 查看类型
print(adata.obs['DCA_split'].dtype)  # category

# 打印信息
print('Preprocessed {} genes and {} cells'.format(adata.n_vars, adata.n_obs))

category
Preprocessed 20 genes and 100 cells


### Tutorial

In [3]:
import numpy as np
import pandas as pd
import scanpy as sc
import anndata as ad
from scipy.sparse import csr_matrix

In [4]:
counts = csr_matrix(np.random.poisson(1 , size=(100 , 20000)), dtype=np.float32)
adata = ad.AnnData(counts)
adata

AnnData object with n_obs × n_vars = 100 × 20000

In [5]:
# 进行数据的访问
adata.X

<100x20000 sparse matrix of type '<class 'numpy.float32'>'
	with 1263977 stored elements in Compressed Sparse Row format>

In [6]:
adata.obs_names = ['Cell_' + str(i) for i in range(adata.n_obs)]
adata.var_names = ['Gene_' + str(i) for i in range(adata.n_vars)]
print(adata.obs_names[:10])

Index(['Cell_0', 'Cell_1', 'Cell_2', 'Cell_3', 'Cell_4', 'Cell_5', 'Cell_6',
       'Cell_7', 'Cell_8', 'Cell_9'],
      dtype='object')


In [12]:
ct = np.random.choice(["B", "T", "Monocyte"], size=(adata.n_obs , ))
# 随机选取
adata.obs["cell_type"] = pd.Categorical(ct)  # Categoricals are preferred for efficiency
adata.obs

Unnamed: 0,cell_type
Cell_0,Monocyte
Cell_1,T
Cell_2,B
Cell_3,T
Cell_4,Monocyte
...,...
Cell_95,T
Cell_96,B
Cell_97,T
Cell_98,Monocyte


In [13]:
adata

AnnData object with n_obs × n_vars = 100 × 20000
    obs: 'cell_type'

In [14]:
bdata = adata[adata.obs.cell_type == "B", :]
bdata

View of AnnData object with n_obs × n_vars = 27 × 20000
    obs: 'cell_type'

In [20]:
adata[:5]

View of AnnData object with n_obs × n_vars = 5 × 20000
    obs: 'cell_type'