In [None]:
import pandas as pd
import gseapy as gp
from numpy import log10
from gseapy.plot import dotplot

In [None]:
df = pd.read_csv('02_deseq_protein_coding.tsv', sep='\t')

In [None]:
print(df)

In [None]:
# Add a small value to padj to avoid issues with zero values
df['padj'] = df['padj'] + (10**-100)

In [None]:
# Calculate the score based on -log10(padj)
df['score'] = log10(df['padj']) * -1

In [None]:
# Define the sign_of_fold function
def sign_of_fold(fold):
    if fold < 0:
        return -1
    elif fold > 0:
        return 1
    else:
        return 1

In [None]:
# Calculate the foldsign and adjust the score
df['foldsign'] = df['log2FoldChange'].apply(sign_of_fold)
df['score'] = df['score'] * df['foldsign']

In [None]:
# Gene IDs to uppercase
df['gene_id'] = df['gene_id'].str.upper()

In [None]:
# Define the gene list for GSEA
gene_list = list(df['gene_id'])

In [None]:
# Perform prerank GSEA
prerank_res = gp.prerank(rnk=df[['gene_id', 'score']],
                         gene_sets='KEGG_2019_Mouse',  # Specify gene set
                         processes=4,  # Number of processes for parallel computation
                         permutation_num=10000,  # Number of permutations for statistical testing
                         outdir='GSEA_prerank_results',  # Output directory
                         graph_num=50,
                         min_size=100,  # Minimum number of genes in a gene set
                         #max_size=500,  # Maximum number of genes in a gene set
                         seed=42) 

In [None]:
# Perform enrichr GSEA using the same gene list
enrichr_res = gp.enrichr(gene_list=gene_list,
                         gene_sets=['KEGG_2019_Mouse'],  # Specify gene set
                         organism='mouse',
                         description='GSEA Enrichr Analysis',
                         outdir='GSEA__enrichr_results',
                         cutoff=0.5)  # Cutoff for gene set inclusion

In [None]:
dotplot(enrichr_res.res2d, title='KEGG_2019_Mouse',cmap='viridis_r', ofname='GSEA__enrichr_results/ScatterHeat_enrichR.pdf')