#### Required libraries

In [10]:
from gprofiler.gprofiler import GProfiler

#### Using a dummy set of proteins

In [12]:
p_seq = ["CDC15", "SLK19", "CLB2", "NET1", "CDC14", "SIC1", "BUB2", "DBF2", "MOB1", "SPO12", "TEM1"]

#### Defining enrichment analysis function

In [13]:
# Perform enrichment analysis on the protein list
def enrichment_analysis(protein_list, organism = "scerevisiae", sign_level = 0.05):
    
    # Initialize GProfiler object
    gp = GProfiler(return_dataframe=True)

    #Generate GProfiler with the information from protein sequences and organism
    results = gp.profile(
        organism=organism,
        query=protein_list,
        sources=['GO:BP', 'GO:MF', 'GO:CC', 'KEGG', 'REAC'],  # Include GO (BP, MF, CC), KEGG, and Reactome
        significance_threshold_method='fdr'  # FDR for multiple testing correction
    )
    
    # Filter for significant results (adjust p-value threshold as needed)
    significant_results = results[results['p_value'] < sign_level]
    
    return significant_results

#### Testing the enrichment analysis

In [19]:
enrichment_results = enrichment_analysis(p_seq)

if not enrichment_results.empty:
    # Separate and print results by source
    sources = ['GO:BP', 'GO:MF', 'GO:CC', 'KEGG', 'REAC']
    print("Significant enrichment terms found:")
    for source in sources:
        source_results = enrichment_results[enrichment_results['source'] == source]
        if not source_results.empty:
            print(f"\nResults from {source}:")
            print(source_results[['name', 'p_value', 'description']])
else:
    print("No significant enrichment terms found for the provided protein set.")


Significant enrichment terms found:

Results from GO:BP:
                                                  name       p_value  \
0                                    exit from mitosis  5.569109e-16   
1                  mitotic cell cycle phase transition  8.912514e-16   
2                          cell cycle phase transition  8.171816e-15   
3                      regulation of exit from mitosis  1.361110e-14   
4                             mitotic nuclear division  3.815448e-14   
..                                                 ...           ...   
279         protein localization to cell division site  4.172463e-02   
280  negative regulation of cellular component orga...  4.319332e-02   
282       positive regulation of developmental process  4.722677e-02   
284           regulation of cellular catabolic process  4.902173e-02   
285          positive regulation of organelle assembly  4.973730e-02   

                                           description  
0    "The cell cycle 