# Cis effects enrichment - g:Profiler

This notebook will use the [g:Profiler tool](https://biit.cs.ut.ee/gprofiler/gost) to look for pathways enriched for proteins that came up across multiple cancers in the cis effects analysis.

## Setup

In [1]:
import pandas as pd
import numpy as np
import gprofiler
import cptac.utils as ut
import IPython.display

In [2]:
def run_gprofiler(input_file):

    input_df = pd.read_csv(input_file, sep="\t")
    protein_list = input_df["protein"].tolist()
    
    gp = gprofiler.GProfiler(return_dataframe=True)
    
    results = gp.profile(
        organism="hsapiens",
        query=protein_list,
        ordered=False,
    )
    
    return results

## Now with all data sources

### 8p cis effects

In [3]:
run_gprofiler("pancancer_summary_8p.tsv")

Unnamed: 0,source,native,name,p_value,significant,description,term_size,query_size,intersection_size,effective_domain_size,precision,recall,query,parents
0,GO:CC,GO:0005829,cytosol,2.1e-05,True,"""The part of the cytoplasm that does not conta...",5161,48,31,18856,0.645833,0.006007,query_1,"[GO:0005737, GO:0110165]"
1,GO:CC,GO:0005737,cytoplasm,0.000782,True,"""All of the contents of a cell excluding the p...",11653,48,44,18856,0.916667,0.003776,query_1,"[GO:0005622, GO:0110165]"
2,GO:CC,GO:0005622,intracellular,0.001216,True,"""The living contents of a cell; the matter con...",14605,48,48,18856,1.0,0.003287,query_1,[GO:0005575]
3,REAC,REAC:R-HSA-8853336,Signaling by plasma membrane FGFR1 fusions,0.010015,True,Signaling by plasma membrane FGFR1 fusions,3,36,2,10588,0.055556,0.666667,query_1,[REAC:R-HSA-1839124]
4,GO:BP,GO:0031468,nuclear envelope reassembly,0.030525,True,"""The reformation of the nuclear envelope follo...",18,47,3,17916,0.06383,0.166667,query_1,[GO:0006998]
5,CORUM,CORUM:5211,RAF1-PPP2-PIN1 complex,0.049957,True,RAF1-PPP2-PIN1 complex,5,13,2,3627,0.153846,0.4,query_1,[CORUM:0000000]
6,CORUM,CORUM:5234,IKBKB-CDC37-KIAA1967-HSP90AB1-HSP90AA1 complex,0.049957,True,IKBKB-CDC37-KIAA1967-HSP90AB1-HSP90AA1 complex,5,13,2,3627,0.153846,0.4,query_1,[CORUM:0000000]


### 8q cis effects

In [4]:
run_gprofiler("pancancer_summary_8q.tsv")

Unnamed: 0,source,native,name,p_value,significant,description,term_size,query_size,intersection_size,effective_domain_size,precision,recall,query,parents
0,TF,TF:M00716_1,Factor: ZF5; motif: GSGCGCGR; match class: 1,0.000002,True,Factor: ZF5; motif: GSGCGCGR; match class: 1,14195,104,99,19940,0.951923,0.006974,query_1,[TF:M00716_0]
1,TF,TF:M11529_0,Factor: E2F-2; motif: GCGCGCGCNCS; match class: 0,0.000005,True,Factor: E2F-2; motif: GCGCGCGCNCS; match class: 0,16398,104,104,19940,1.000000,0.006342,query_1,[TF:M11529]
2,TF,TF:M11529,Factor: E2F-2; motif: GCGCGCGCNCS,0.000005,True,Factor: E2F-2; motif: GCGCGCGCNCS,16398,104,104,19940,1.000000,0.006342,query_1,[TF:M00000]
3,TF,TF:M11531_1,Factor: E2F-2; motif: GCGCGCGCGYW; match class: 1,0.000007,True,Factor: E2F-2; motif: GCGCGCGCGYW; match class: 1,12404,104,92,19940,0.884615,0.007417,query_1,[TF:M11531_0]
4,GO:CC,GO:0005622,intracellular,0.000013,True,"""The living contents of a cell; the matter con...",14605,99,96,18856,0.969697,0.006573,query_1,[GO:0005575]
5,GO:MF,GO:0003723,RNA binding,0.000015,True,"""Interacting selectively and non-covalently wi...",1922,101,31,18134,0.306931,0.016129,query_1,[GO:0003676]
6,TF,TF:M04869_0,Factor: Egr-1; motif: GCGCATGCG; match class: 0,0.000015,True,Factor: Egr-1; motif: GCGCATGCG; match class: 0,11303,104,87,19940,0.836538,0.007697,query_1,[TF:M04869]
7,TF,TF:M04869,Factor: Egr-1; motif: GCGCATGCG,0.000015,True,Factor: Egr-1; motif: GCGCATGCG,11303,104,87,19940,0.836538,0.007697,query_1,[TF:M00000]
8,TF,TF:M11531_0,Factor: E2F-2; motif: GCGCGCGCGYW; match class: 0,0.000016,True,Factor: E2F-2; motif: GCGCGCGCGYW; match class: 0,13350,104,95,19940,0.913462,0.007116,query_1,[TF:M11531]
9,TF,TF:M11531,Factor: E2F-2; motif: GCGCGCGCGYW,0.000016,True,Factor: E2F-2; motif: GCGCGCGCGYW,13350,104,95,19940,0.913462,0.007116,query_1,[TF:M00000]
