# Cis effects enrichment - g:Profiler

This notebook will use the [g:Profiler tool](https://biit.cs.ut.ee/gprofiler/gost) to look for pathways enriched for proteins that came up across multiple cancers in the cis effects analysis.

## Setup

In [1]:
import pandas as pd
import numpy as np
import gprofiler
import cptac.utils as ut
import IPython.display

In [2]:
def run_gprofiler(input_file, cancer_type=None):

    input_df = pd.read_csv(input_file, sep="\t")
    
    if cancer_type is not None:
        cancer_df = input_df[input_df["cancers"].str.contains(cancer_type)]
        protein_list = cancer_df["protein"].tolist()
    else:
        protein_list = input_df["protein"].tolist()
    
    gp = gprofiler.GProfiler(return_dataframe=True)
    
    results = gp.profile(
        organism="hsapiens",
        query=protein_list,
        ordered=False,
        sources=["GO:BP", "KEGG", "REAC", "WP", "TF", "MIRNA", "HPA", "CORUM", "HP"]
    )
    
    return results

### 7p trans effects

In [3]:
luad_7p_trans_results = run_gprofiler("pancancer_summary_7p_trans.tsv", cancer_type="luad")
luad_7p_trans_results

Unnamed: 0,source,native,name,p_value,significant,description,term_size,query_size,intersection_size,effective_domain_size,precision,recall,query,parents
0,GO:BP,GO:0019835,cytolysis,1.656833e-07,True,"""The rupture of cell membranes and the loss of...",28,245,9,17916,0.036735,0.321429,query_1,[GO:0009987]
1,GO:BP,GO:0016192,vesicle-mediated transport,4.942335e-06,True,"""A cellular transport process in which transpo...",2149,245,63,17916,0.257143,0.029316,query_1,"[GO:0006810, GO:0009987]"
2,KEGG,KEGG:04610,Complement and coagulation cascades,1.766383e-05,True,Complement and coagulation cascades,85,134,11,7747,0.08209,0.129412,query_1,[KEGG:00000]
3,REAC,REAC:R-HSA-166665,Terminal pathway of complement,3.162415e-05,True,Terminal pathway of complement,8,183,5,10588,0.027322,0.625,query_1,[REAC:R-HSA-166658]
4,GO:BP,GO:0006957,"complement activation, alternative pathway",3.511521e-05,True,"""Any process involved in the activation of any...",14,245,6,17916,0.02449,0.428571,query_1,"[GO:0006956, GO:0045087]"
5,GO:BP,GO:0006950,response to stress,0.0001599353,True,"""Any process that results in a change in state...",4112,245,93,17916,0.379592,0.022617,query_1,[GO:0050896]
6,GO:BP,GO:0006955,immune response,0.0002973098,True,"""Any immune system process that functions in t...",2228,245,60,17916,0.244898,0.02693,query_1,"[GO:0002376, GO:0050896]"
7,GO:BP,GO:0051649,establishment of localization in cell,0.0004087076,True,"""Any process, occuring in a cell, that localiz...",2810,245,70,17916,0.285714,0.024911,query_1,"[GO:0051234, GO:0051641]"
8,GO:BP,GO:0051125,regulation of actin nucleation,0.0005394889,True,"""Any process that modulates the frequency, rat...",33,245,7,17916,0.028571,0.212121,query_1,"[GO:0030833, GO:0045010]"
9,GO:BP,GO:0002376,immune system process,0.0006867659,True,"""Any process involved in the development or fu...",3199,245,76,17916,0.310204,0.023757,query_1,[GO:0008150]


In [4]:
luad_7p_trans_results[luad_7p_trans_results["source"] == "REAC"]

Unnamed: 0,source,native,name,p_value,significant,description,term_size,query_size,intersection_size,effective_domain_size,precision,recall,query,parents
3,REAC,REAC:R-HSA-166665,Terminal pathway of complement,3.2e-05,True,Terminal pathway of complement,8,183,5,10588,0.027322,0.625,query_1,[REAC:R-HSA-166658]
25,REAC,REAC:R-HSA-9006335,Signaling by Erythropoietin,0.015387,True,Signaling by Erythropoietin,23,183,5,10588,0.027322,0.217391,query_1,[REAC:R-HSA-162582]
31,REAC,REAC:R-HSA-168256,Immune System,0.033343,True,Immune System,2146,183,59,10588,0.322404,0.027493,query_1,[REAC:0000000]
33,REAC,REAC:R-HSA-168249,Innate Immune System,0.040254,True,Innate Immune System,1092,183,36,10588,0.196721,0.032967,query_1,[REAC:R-HSA-168256]
