## Notebook for running Gene Set Enrichment Analysis (GSEA) using gseapy

[gseapy docs](https://gseapy.readthedocs.io/en/latest/index.html)

In [1]:
!date

Fri Aug  6 08:00:00 UTC 2021


#### import libraries and set notebook variables

In [2]:
import gseapy

In [3]:
# this list is just closest gene to IPDGC meta5 GWAS index svariants
gene_list = ['EIF3KP1', 'KLHL7-DT', 'GDPD3', 'ITGAL', 'LENEP', 'TRIM72', 
             'MAPT', 'ZSCAN9', 'UBAP1', 'PHF24']

In [4]:
#Available databases : ‘Human’, ‘Mouse’, ‘Yeast’, ‘Fly’, ‘Fish’, ‘Worm’ 
gene_set_names = gseapy.get_library_name(database='Human')
# print(gene_set_names)

In [5]:
gene_set = 'GO_Biological_Process_2021'
enr_res = gseapy.enrichr(gene_list=gene_list,
                         organism='Human',
                         gene_sets=gene_set,
                         description='pathway',
                         cutoff = 0.5)
print(enr_res.results.shape)
display(enr_res.results.head())
gseapy.barplot(enr_res.res2d,title=gene_set)



(0, 10)


Unnamed: 0,Gene_set,Term,Overlap,P-value,Adjusted P-value,Old P-value,Old Adjusted P-value,Odds Ratio,Combined Score,Genes




In [6]:
gene_set = 'GO_Cellular_Component_2021'
enr_res = gseapy.enrichr(gene_list=gene_list,
                         organism='Human',
                         gene_sets=gene_set,
                         description='pathway',
                         cutoff = 0.5)
print(enr_res.results.shape)
display(enr_res.results.head())
gseapy.barplot(enr_res.res2d,title=gene_set)



(0, 10)


Unnamed: 0,Gene_set,Term,Overlap,P-value,Adjusted P-value,Old P-value,Old Adjusted P-value,Odds Ratio,Combined Score,Genes




In [7]:
gene_set = 'WikiPathways_2019_Human'
enr_res = gseapy.enrichr(gene_list=gene_list,
                         organism='Human',
                         gene_sets=gene_set,
                         description='pathway',
                         cutoff = 0.5)
print(enr_res.results.shape)
display(enr_res.results.head())
gseapy.barplot(enr_res.res2d,title=gene_set)



(0, 10)


Unnamed: 0,Gene_set,Term,Overlap,P-value,Adjusted P-value,Old P-value,Old Adjusted P-value,Odds Ratio,Combined Score,Genes




In [8]:
gene_set = 'OMIM_Disease'
enr_res = gseapy.enrichr(gene_list=gene_list,
                         organism='Human',
                         gene_sets=gene_set,
                         description='pathway',
                         cutoff = 0.5)
print(enr_res.results.shape)
display(enr_res.results.head())
gseapy.barplot(enr_res.res2d,title=gene_set)



(0, 10)


Unnamed: 0,Gene_set,Term,Overlap,P-value,Adjusted P-value,Old P-value,Old Adjusted P-value,Odds Ratio,Combined Score,Genes




#### we can also probably use this to ID cell clusters
maybe based on CellMarker_Augmented_2021 or Allen_Brain_Atlas_10x_scRNA_2021

In [9]:
# Astrocyte gene marker list
# gene_list = ['SLC1A3', 'GFAP', 'APOE', 'SLC1A2', 'SLC14A1', 'CPE', 'CLU', 'ALDOC', 'FAM19A1', 'AQP4', 'GJB6']

In [10]:
gene_set = 'CellMarker_Augmented_2021'
enr_res = gseapy.enrichr(gene_list=gene_list,
                         organism='Human',
                         gene_sets=gene_set,
                         description='pathway',
                         cutoff = 0.5)
print(enr_res.results.shape)
display(enr_res.results.head())
gseapy.barplot(enr_res.res2d,title=gene_set)



(0, 10)


Unnamed: 0,Gene_set,Term,Overlap,P-value,Adjusted P-value,Old P-value,Old Adjusted P-value,Odds Ratio,Combined Score,Genes




In [11]:
gene_set = 'Allen_Brain_Atlas_10x_scRNA_2021'
enr_res = gseapy.enrichr(gene_list=gene_list,
                         organism='Human',
                         gene_sets=gene_set,
                         description='pathway',
                         cutoff = 0.5)
print(enr_res.results.shape)
display(enr_res.results.head())
gseapy.barplot(enr_res.res2d,title=gene_set)



(0, 10)


Unnamed: 0,Gene_set,Term,Overlap,P-value,Adjusted P-value,Old P-value,Old Adjusted P-value,Odds Ratio,Combined Score,Genes


