In [None]:
import scanpy as sc
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
from scipy.stats import ttest_ind # importando uma biblioteca estatistica do scipy
from multiprocessing import Pool
import warnings
import gseapy as gp
from gseapy.plot import barplot, dotplot
warnings.filterwarnings("ignore")
sc.settings.verbosity = 3             
sc.logging.print_header()

In [None]:
#names_banco_de_dados = gp.get_library_name()
#print(names_banco_de_dados)

In [None]:
df = pd.read_csv('DEGS_feminino_WHO_6-8_vs_Ctrl - Copia.csv', sep= ',', index_col = 0)
df

In [None]:
#del df['Unnamed: 0']

In [None]:
# selecionando os genes up regulados
DEGs_up = (df[(df['logfoldchanges'] > 1.5) & (df['pvals_adj'] < 0.05)]).index.tolist()

In [None]:
# convertendo lista de genes em um dataframe com up e doen
genes_up = pd.DataFrame(list(zip(DEGs_up)), columns = ['Up_regulated'])
genes_up

In [None]:
#Exportando os genes up para uma tabela excel para uso posterior
genes_up.to_excel("GENES_UP_6-8_vs_ctrl.xlsx") # mudar o nome de acordo com a comparação e salvar

In [None]:
# selecionando os genes Down regulados
DEGs_down = (df[(df['logfoldchanges'] < -1.5) & (df['pvals_adj'] < 0.05)]).index.tolist()

In [None]:
genes_down = pd.DataFrame(list(zip(DEGs_down)), columns = ['Down_regulated'])
genes_down

In [None]:
#Exportando os genes down para uma tabela excel para uso posterior
genes_down.to_excel("GENES_Down_6-8_vs_ctrl.xlsx") # mudar o nome de acordo com a comparação e salvar

# enriquecimento usando o GSEAPY

 # enriquecimento dos genes UP regulados

In [None]:
# enrriquecimento usando GO biological process

enr_GOBP_up = gp.enrichr(gene_list=DEGs_up , 
gene_sets=['GO_Biological_Process_2021'], 
organism='Human', 
description='DEGs_up', 
outdir='test/enr_DEGs_GOBP_up', 
cutoff=0.5 
)

In [None]:
barplot(enr_GOBP_up.res2d,
        title='GO BP pacientes feminino \n WHO_6-8 vs Ctrl (up)',
        color = 'r',
        cutoff=0.05,
        figsize=(9 , 7.5),
        top_term=10,
        ofname='BIO_procs_F_WHO_6-8_vs_Ctrl.pdf')


In [None]:
# enrriquecimento usando GO molecular function

enr_GOMF_up = gp.enrichr(gene_list=DEGs_up ,
 gene_sets=['GO_Molecular_Function_2021'],
 organism='Human', 
 description='DEGs_up',
 outdir='test/enr_DEGs_GOMF_up',
 cutoff=0.5 
 )

In [None]:
barplot(enr_GOMF_up.res2d,
        title='GO MF pacientes feminino \n WHO_6-8 vs Ctrl (up)',
        color = 'r',
        cutoff=0.05,
        figsize=(9 , 7.5),
        top_term=10,
        ofname='Mol_func_F_WHO_6-8_vs_Ctrl.pdf')


In [None]:
# enrriquecimento com o REACTOME

enr_Reactome_up = gp.enrichr(gene_list= DEGs_up,
 gene_sets=['Reactome_2016'],
 organism='Human', 
 description='DEGs_up',
 outdir='test/enr_DEGs_Reactome_up',
 cutoff=0.5 
 )

In [None]:
barplot(enr_Reactome_up.res2d,title='REACTOME pacientes feminino \n WHO_6-8 vs Ctrl (up)',
        color = 'r',
        cutoff=0.05,
        figsize=(9 , 7.5),
        top_term=10,
        ofname='REACTOME_F_WHO_6-8_vs_Ctrl.pdf')


In [None]:
# enrriquecimento com o MSigDB_Hallmark_2020

enr_hallmark_up = gp.enrichr(gene_list= DEGs_up,
 gene_sets=['MSigDB_Hallmark_2020'],
 organism='Human', 
 description='DEGs_up',
 outdir='test/enr_DEGs_MSigDB_Hallmark_2020_up',
 cutoff=0.5 
 )

In [None]:
barplot(enr_hallmark_up.res2d,
        title='MSigDB Hallmark pacientes feminino \n WHO_6-8 vs Ctrl (up)',
        color = 'r',
        cutoff=0.05,
        figsize=(9 , 7.5),
        top_term=20,
        ofname='MSigDB_Hallmark_F_WHO_6-8_vs_Ctrl.pdf')

In [None]:
# enrriquecimento com o KEGG_2021_Human

enr_KEEG_up = gp.enrichr(gene_list= DEGs_up,
 gene_sets=['KEGG_2021_Human'],
 organism='Human', 
 description='DEGs_up',
 outdir='test/enr_DEGs_KEGG_2021_Human_up',
 cutoff=0.5 
 )

In [None]:
barplot(enr_KEEG_up.res2d,
        title='KEEG pacientes feminino \n WHO_6-8 vs Ctrl (up)',
        color = 'r',
        cutoff=0.05,
        figsize=(9 , 7.5),
        top_term=10,
        ofname='keeg_F_WHO_6-8_vs_Ctrl.pdf')


In [None]:
# enrriquecimento com vias canonicas e perturbações quimicas e geneticas C2_MSigDB canonical paths

enr_C2_cp_up = gp.enrichr(gene_list= DEGs_up,
 gene_sets='c2_cp.gmt',
 organism='Human', 
 description='DEGs_up',
 outdir='test/enr_C2_cp_up',
 cutoff=0.5 
 )

In [None]:
barplot(enr_C2_cp_up.res2d,
        title='C2_MSigDB pacientes feminino \n WHO_6-8 vs Ctrl (up)',
        color = 'r',
        cutoff=0.05,
        figsize=(9 , 7.5),
        top_term=10,
        ofname='C2_F_WHO_6-8_vs_Ctrl.pdf')

 # enrriquecimento de vias com genes DOWN

In [None]:
enr_GOBP_down = gp.enrichr(gene_list=DEGs_down ,
                 gene_sets=['GO_Biological_Process_2021'],
                 organism='Human', 
                 description='DEGs_down',
                 outdir='test/enr_DEGs_GOBP_down',
                 cutoff=0.5 
                )

In [None]:
barplot(enr_GOBP_down.res2d,title='GO BP pacientes feminino \n WHO_6-8 vs Ctrl (down)',color = 'b',
        cutoff=0.05,
        figsize=(9 , 7.5),
        top_term=10,
        ofname='GOBP_F_WHO_6-8_vs_Ctrl.pdf')


In [None]:
enr_GOMF_down = gp.enrichr(gene_list=DEGs_down ,
                 gene_sets=['GO_Molecular_Function_2021'],
                 organism='Human', 
                 description='DEGs_down',
                 outdir='test/enr_DEGs_GOMF_down',
                 cutoff=0.5 
                )

In [None]:
barplot(enr_GOMF_down.res2d,title='GO MF pacientes feminino \n WHO_6-8 vs Ctrl (down)',color = 'b',
        cutoff=0.05,
        figsize=(9 , 7.5),
        top_term=10,
        ofname='GOMF_F_WHO_6-8_vs_Ctrl.pdf')


In [None]:
enr_Reactome_down = gp.enrichr(gene_list=DEGs_down ,
                 gene_sets=['Reactome_2016'],
                 organism='Human', 
                 description='DEGs_down',
                 outdir='test/enr_DEGs_Reactome_down',
                 cutoff=0.5 
                )

In [None]:
barplot(enr_Reactome_down.res2d,title='Reactome pacientes feminino \n WHO_6-8 vs Ctrl (down)',color = 'b',
        cutoff=0.05,
        figsize=(9 , 7.5),
        top_term=10,
        ofname='Reactome_F_WHO_6-8_vs_Ctrl.pdf')


In [None]:
# enrriquecimento com o MSigDB_Hallmark_2020

enr_hallmark_down = gp.enrichr(gene_list=DEGs_down ,
                 gene_sets=['MSigDB_Hallmark_2020'],
                 organism='Human', 
                 description='DEGs_down',
                 outdir='test/enr_DEGs_hallmar_down',
                 cutoff=0.5 
                )

In [None]:
gp.barplot(enr_hallmark_down.res2d,
        title='MSigDB_Hallmark pacientes feminino \n WHO_6-8 vs Ctrl (down)',
        color = 'b',
        cutoff=0.5,
        figsize=(9 , 7.5),
        top_term=10,
        ofname='MSigDB_Hallmark_F_WHO_6-8_vs_Ctrl.pdf')

In [None]:
# enrriquecimento com o KEGG_2021_Human

enr_KEEG_down = gp.enrichr(gene_list= DEGs_down,
         gene_sets=['KEGG_2021_Human'],
         organism='Human',
         cutoff = 0.999996,
         description='DEGs_down',
         outdir='test/enr_DEGs_KEGG_2021_Human_down')

In [None]:
barplot(enr_KEEG_down.res2d,title='KEGG pacientes feminino \n WHO_6-8 vs Ctrl (down)',color = 'b',
        figsize=(9 , 7.5),
        top_term=10,
        cutoff = 0.999996,
        ofname='keeg_F_WHO_6-8_vs_Ctrl.pdf')


In [None]:
# enrriquecimento com vias canonicas e perturbações quimicas e geneticas C2_MSigDB canonical paths

enr_C2_cp_down = gp.enrichr(gene_list= DEGs_down,
         gene_sets='c2_cp.gmt',
         organism='Human', 
         description='DEGs_down',
         outdir='test/enr_C2_cp_down',
         cutoff=0.5)

In [None]:
barplot(enr_C2_cp_down.res2d,title='C2_MSigDB pacientes feminino \n WHO_6-8 vs Ctrl (down)',color = 'b',
        cutoff=0.05,
        figsize=(9 , 7.5),
        top_term=10,
        ofname='C2_F_WHO_6-8_vs_Ctrl.pdf')


In [None]:
enr_hallmark_down.results.head(10)


# enrriquecimento com a lista total de gens ranqueada (GSEA)

In [None]:
gene_rank = pd.read_csv('DEGS_feminino_WHO_6-8_vs_Ctrl.csv', sep= ',')

In [None]:
gene_rank

In [None]:
del gene_rank['logfoldchanges']
del gene_rank['pvals']
del gene_rank['pvals_adj']
del gene_rank['Unnamed: 0']


In [None]:
gene_rank

In [None]:
#criando uma lista ranqueada 
#gene_rank.sort_values(by=['logfoldchanges'], inplace=True, ascending=False)

In [None]:
#gene_rank

In [None]:
#Available databases : ‘Human’, ‘Mouse’, ‘Yeast’, ‘Fly’, ‘Fish’, ‘Worm’ 
gene_set_names = gp.get_library_name(organism='Human')
print(gene_set_names)

In [None]:
res = gp.prerank(rnk=gene_rank, gene_sets='MSigDB_Hallmark_2020')


In [None]:
resultado= res.res2d
resultado

In [None]:
resultado.to_excel("res_enrich_GSEApy_6-8_vs_ctrl.xlsx") # mudar o nome de acordo com a comparação e salvar

In [None]:
res.res2d.head(10)

In [None]:
import gseapy
from gseapy.plot import gseaplot

In [None]:
terms = res.res2d.index
terms[:51]

In [None]:
gseapy.gseaplot(rank_metric=res.ranking, term=terms[48], **res.results[terms[48]])
#gseapy.gseaplot(rank_metric=res.ranking, term=terms[49], **res.results[terms[49]], ofname='Androgen_Response_GSEA_plot.pdf')