# 2. 网络分析

# 0. Import


In [1]:
import os
import sys

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import scanpy as sc
import seaborn as sns


In [2]:
import celloracle as co
co.__version__

INFO:matplotlib.font_manager:Failed to extract font properties from /usr/share/fonts/truetype/noto/NotoColorEmoji.ttf: In FT2Font: Can not load face (unknown file format; error code 0x2)


'0.12.0'

## 1. 读取

1. 原始adata的raw count数据

In [4]:
adata = sc.read("./data/adata_processed.h5ad")
# 暂时不用下采样了
cluster_key = "celltype"
adata

AnnData object with n_obs × n_vars = 9815 × 2000
    obs: 'sample', 'stage', 'sequencing.batch', 'theiler', 'celltype', 'pseudotime', 'n_counts_all', 'n_counts'
    var: 'Accession', 'Chromosome', 'End', 'Start', 'Strand', 'MURK_gene', 'Δm', 'scaled Δm', 'n_counts', 'mean', 'std'
    uns: 'celltype_colors', 'log1p', 'neighbors', 'pca'
    obsm: 'X_pca', 'X_umap'
    varm: 'PCs'
    layers: 'raw_count', 'spliced', 'unspliced'
    obsp: 'connectivities', 'distances'

2. GRN数据

In [5]:
base_GRN = co.data.load_mouse_scATAC_atlas_base_GRN()
base_GRN.head()

Unnamed: 0,peak_id,gene_short_name,9430076c15rik,Ac002126.6,Ac012531.1,Ac226150.2,Afp,Ahr,Ahrr,Aire,...,Znf784,Znf8,Znf816,Znf85,Zscan10,Zscan16,Zscan22,Zscan26,Zscan31,Zscan4
0,chr10_100050979_100052296,4930430F08Rik,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,chr10_101006922_101007748,SNORA17,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0
2,chr10_101144061_101145000,Mgat4c,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
3,chr10_10148873_10149183,9130014G24Rik,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,chr10_10149425_10149815,9130014G24Rik,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


## 2. 构造Oracle对象

In [6]:
oracle = co.Oracle()

1. 导入表达数据

In [7]:
adata.X = adata.layers["raw_count"].copy() # 这里使用了计数矩阵

oracle.import_anndata_as_raw_count(adata=adata,
                                   cluster_column_name=cluster_key,
                                   embedding_name="X_umap")



2. 导入GRN数据

In [8]:
oracle.import_TF_data(TF_info_matrix=base_GRN)
# 手动插入GRN就跳过了

## 3. KNN插补

1. pca

In [9]:
oracle.perform_PCA()

# Select important PCs
# 绘制前100个成分，并且取
plt.plot(np.cumsum(oracle.pca.explained_variance_ratio_)[:100])
n_comps = np.where(np.diff(np.diff(np.cumsum(oracle.pca.explained_variance_ratio_))>0.002))[0][0] # 暂时不理解这里的主成分数是怎么选的
plt.axvline(n_comps, c="k")
plt.show()
print(n_comps)
n_comps = min(n_comps, 50)

17


2. KNN imputation

In [10]:
n_cell = oracle.adata.shape[0]
print(f"cell number is :{n_cell}")

k = int(0.025*n_cell)
print(f"Auto-selected k is :{k}")

oracle.knn_imputation(n_pca_dims=n_comps, k=k, balanced=True, b_sight=k*8,
                      b_maxl=k*4, n_jobs=4)

cell number is :9815
Auto-selected k is :245


## 4. 保存和导入

In [11]:
oracle.to_hdf5("./adata.celloracle.oracle")

In [12]:
oracle = co.load_hdf5("./adata.celloracle.oracle")

## 5. GRN计算

In [13]:
sc.pl.umap(oracle.adata, color=cluster_key)

1. 获得GRNs

In [14]:
links = oracle.get_links(cluster_name_for_GRN_unit=cluster_key, alpha=10,
                         verbose_level=10)

  0%|          | 0/5 [00:00<?, ?it/s]

Inferring GRN for Blood progenitors 1...


  0%|          | 0/1805 [00:00<?, ?it/s]

Inferring GRN for Blood progenitors 2...


  0%|          | 0/1805 [00:00<?, ?it/s]

Inferring GRN for Erythroid1...


  0%|          | 0/1805 [00:00<?, ?it/s]

Inferring GRN for Erythroid2...


  0%|          | 0/1805 [00:00<?, ?it/s]

Inferring GRN for Erythroid3...


  0%|          | 0/1805 [00:00<?, ?it/s]

2. 导出GRNs

3. 改变GRN顺序

## 6. 网络处理

1. 过滤边

In [15]:
links.filter_links(p=0.001, weight="coef_abs", threshold_number=2000)

2. 度分布

In [16]:
links.plot_degree_distributions(plot_model=True, 
                                               #save=f"{save_folder}/degree_distribution/",
                                               )

Blood progenitors 1
Blood progenitors 2
Erythroid1
Erythroid2
Erythroid3


3. 计算网络得分

In [17]:
links.get_network_score()
links.merged_score.head()

Unnamed: 0,degree_all,degree_centrality_all,degree_in,degree_centrality_in,degree_out,degree_centrality_out,betweenness_centrality,eigenvector_centrality,cluster
Hdac2,128,0.215852,3,0.005059,125,0.210793,531.0,1.0,Blood progenitors 1
Hbb-bh1,2,0.003373,2,0.003373,0,0.0,0.0,0.596112,Blood progenitors 1
Tead2,76,0.128162,7,0.011804,69,0.116358,2389.0,0.599648,Blood progenitors 1
Rasip1,2,0.003373,2,0.003373,0,0.0,0.0,0.14135,Blood progenitors 1
Eno1,104,0.175379,10,0.016863,94,0.158516,5614.0,0.731711,Blood progenitors 1


4. 保存

In [19]:
links.to_hdf5(file_path="./data/links.celloracle.links")

In [21]:
links = co.load_hdf5(file_path="./data/links.celloracle.links")