# Use Case 5: Gene set enrichment analysis

<b>Import standard data analysis imports, as well as the gseapy which will allow us to perform a Gene set enrichment analysis</b>

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import gseapy as gp

<b>Import the CPTAC data</b>

In [2]:
import CPTAC

Loading Clinical Data...
Loading Proteomics Data...
Loading Transcriptomics Data...
Loading CNA Data...
Loading Phosphoproteomics Data...
Loading Somatic Data...

 ******PLEASE READ******


<b>Retrieve the clinical and proteomics dataframes</b>

In [3]:
clinical = CPTAC.get_clinical()
proteomics = CPTAC.get_proteomics()

<b>For this example we will be separating the protein abudance based on the clinical MSI. Our first step is to combine the MSI information into the proteomics dataframe utilizing the <code>CPTAC.compare_clinical()</code> function</b>

In [4]:
msiProt = CPTAC.compare_clinical(clinical, proteomics, 'MSI')

<b>Separate the proteomics into two groups based on whether MSI is MSI-H or other </b>

In [17]:
high = msiProt['MSI'] == "MSI-H"
other = msiProt['MSI'] != "MSI-H"
highMSI = msiProt[high]
otherMSI = msiProt[other]

<b>Find which genes are upregulated in each partition</b>

In [35]:
#My first attempt at finding upregulated genes
highMean = highMSI.mean()
otherMean = otherMSI.mean()
highUp = highMean[highMean > 0]
otherUp = otherMean[otherMean > 0]

<b>Then use the genes that are up-regulated in these partitions to do a gene set enrichment analysis</b>

In [36]:
#gene_list = Get up-regulated genes in partitions
high_list = highUp.index #This will change to whatever the up-regulated genes are
high_list = high_list.tolist()
other_list = otherUp.index
other_list = other_list.tolist()
enr = gp.enrichr(gene_list = high_list, description='MSI partitions', gene_sets='KEGG_2016', outdir='test/enrichr_kegg',cutoff=.5)
enr.res2d

Unnamed: 0,Term,Overlap,P-value,Adjusted P-value,Old P-value,Old Adjusted P-value,Z-score,Combined Score,Genes,Gene_set
0,Ribosome_Homo sapiens_hsa03010,119/137,2.338334e-53,6.804553e-51,4.309688e-17,1.254119e-14,-1.746138,211.610229,RPL4;RPL5;RPL30;RPL3;RPL32;RPL31;RPL34;RPL8;RP...,KEGG_2016
1,Metabolic pathways_Homo sapiens_hsa01100,497/1239,5.833389e-35,8.487582e-33,6.507530e-10,9.468456e-08,-1.986755,156.609703,PI4K2B;CDA;GLDC;UXS1;XYLT2;ENO1;EPRS;ENO3;GCSH...,KEGG_2016
2,Spliceosome_Homo sapiens_hsa03040,90/134,3.839576e-25,3.724389e-23,4.445053e-09,4.311702e-07,-1.747320,98.233035,ISY1;RBM25;DDX46;EIF4A3;HNRNPU;PRPF19;USP39;PQ...,KEGG_2016
3,Proteasome_Homo sapiens_hsa03050,39/44,5.547560e-19,4.035850e-17,1.208454e-06,7.033200e-05,-1.584085,66.588233,PSMD12;PSMD11;PSMD14;PSMD13;POMP;PSMA7;PSMB10;...,KEGG_2016
4,Carbon metabolism_Homo sapiens_hsa01200,71/113,1.041301e-17,6.060369e-16,1.170344e-06,7.033200e-05,-1.608748,62.907624,GPI;GLDC;ADPGK;ENO1;ENO3;HK2;HK1;HK3;MCEE;IDH3...,KEGG_2016
5,Lysosome_Homo sapiens_hsa04142,73/123,3.274764e-16,1.588260e-14,3.852628e-06,1.868525e-04,-1.649005,58.795478,HEXB;CLTC;CTSZ;HEXA;AP4E1;CLTA;CTSV;TCIRG1;NAG...,KEGG_2016
6,RNA transport_Homo sapiens_hsa03013,87/172,2.256998e-13,9.382662e-12,5.150902e-05,1.745205e-03,-1.721630,50.133114,EIF4A1;POP7;NUP107;POP1;GEMIN2;RPP30;POP4;EIF4...,KEGG_2016
7,Pyrimidine metabolism_Homo sapiens_hsa00240,61/105,3.352453e-13,1.219455e-11,3.769758e-05,1.567142e-03,-1.598319,45.909974,CDA;PNP;TK1;UPP1;POLE;ENTPD3;ENTPD4;ENTPD5;CAD...,KEGG_2016
8,Peroxisome_Homo sapiens_hsa04146,51/83,1.421859e-12,4.597344e-11,5.583464e-05,1.745205e-03,-1.475792,40.258202,ABCD4;PECR;ABCD3;ECI2;SCP2;NUDT19;MLYCD;ACAA1;...,KEGG_2016
9,Biosynthesis of amino acids_Homo sapiens_hsa01230,47/74,1.893758e-12,5.510836e-11,5.997269e-05,1.745205e-03,-1.481749,39.996045,PRPS2;SHMT2;SHMT1;ENO1;MAT2B;ENO3;MAT2A;PGK1;I...,KEGG_2016
