1. 导入相关依赖库

In [None]:
import gseapy
from gseapy.plot import barplot, dotplot
import pandas as pd

2. 从文件读取基因列表

In [None]:
with open('./ExampleGenelist.txt', 'r') as fileObject:
    geneList = fileObject.readlines()
    geneList = [gene.strip('\n') for gene in geneList]
geneList

3. 查询适合的数据库

In [None]:
databases = [db for db in gseapy.get_library_name()]
databases

4. 进行Gene Ontology Enrichment并输出结果

In [None]:
results = pd.DataFrame()
# 需要的基因库
for db in ['GO_Biological_Process_2023', 'GO_Cellular_Component_2023', 'GO_Molecular_Function_2023']: 
    enr = gseapy.enrichr(geneList, db, organism='human')
    results = pd.concat([results, enr.res2d])
# 格式化Gene_set
mapping = {
    'GO_Biological_Process_2023': 'BP',
    'GO_Cellular_Component_2023': 'CC',
    'GO_Molecular_Function_2023': 'MF'
}
results['Gene_set'] = results['Gene_set'].replace(mapping)
# 计算基因数
results['Number of Genes'] = results['Genes'].apply(lambda x: x.count(';') + 1)
# 格式化Term
results['Term'] = results['Term'].apply(lambda x: x[: -12])
results

5. 作图，以默认的横向柱状图（横轴为Adjusted P-Value）为例

In [None]:
barplot(results, title="Top 10 Gene Ontology Enrichment Terms", top_term=10, group='Gene_set', color=['red', 'green', 'blue'], cutoff=1, figsize=(4, 12))

6. 进行KEGG Enrichment并输出结果

In [None]:
# 使用适合的KEGG基因库
enr = gseapy.enrichr(geneList, 'KEGG_2021_Human')
# 格式化Gene_set
mapping = {"KEGG_2021_Human": "KEGG"}
results = enr.res2d
results['Gene_set'] = results['Gene_set'].replace(mapping)
results

7. 作图，以气泡图为例

In [None]:
dotplot(enr.res2d, column='Combined Score', title='KEGG', size=60, figsize=(3,5))