In [1]:
import pandas as pd
import plot_utils 
import numpy as np
import math

  import pandas.util.testing as tm


In [2]:
trans = pd.read_csv('~/WhenMutationsDontMatter/PIK3CA/csv_files/proteomics.csv', index_col=0)
trans.head()

Unnamed: 0,Difference_In_Median_Brca,P_Value_Brca,Difference_In_Median_Endo,P_Value_Endo,Difference_In_Median,P_Value
A2ML1,-1.2612,0.165194,1.10585,0.037044,,
AADAT,-1.332,0.027477,-0.18535,0.690613,,
AAGAB,-0.09455,0.909622,0.2435,0.139632,0.139,0.039189
AASDHPPT,0.51445,0.00362,-0.1059,0.428884,-0.0628,0.973515
AATF,-0.16905,0.042271,-0.1817,0.915482,0.025,0.568821


In [3]:
trans.loc[trans.index.str.contains("MRP")]

Unnamed: 0,Difference_In_Median_Brca,P_Value_Brca,Difference_In_Median_Endo,P_Value_Endo,Difference_In_Median,P_Value
MRPL1,-0.80680,0.001484,0.46890,0.016136,-0.02600,0.609241
MRPL10,-0.66070,0.009033,0.28640,0.028447,-0.05890,0.743664
MRPL11,-0.51630,0.007482,0.13730,0.196728,0.02100,0.469380
MRPL12,-1.10110,0.000742,0.22960,0.327302,-0.05765,0.493599
MRPL13,-0.73475,0.000691,-0.07585,0.879091,-0.04060,0.424842
...,...,...,...,...,...,...
MRPS36,-0.58130,0.029715,0.04435,0.530703,-0.09960,0.772906
MRPS5,-0.54360,0.021536,0.13950,0.033181,0.05120,0.420665
MRPS6,-1.07630,0.000188,0.14430,0.072316,-0.06050,0.781283
MRPS7,-0.72595,0.001004,0.23400,0.059056,0.11500,0.232498


In [4]:
def significant(row):
    if row['P_Value_Brca'] > .05:
        row['P_Value_Brca'] = np.nan
        row['Difference_In_Median_Brca'] = np.nan

    if row['P_Value_Endo'] > .05:
        row['P_Value_Endo'] = np.nan
        row['Difference_In_Median_Endo'] = np.nan
        

    if row['P_Value'] > .05:
        row['P_Value'] = np.nan
        row['Difference_In_Median'] = np.nan
        
    if math.isnan(row['P_Value']):
        row['P_Value'] = np.nan
        row['Difference_In_Median'] = np.nan

    return row
    
        

In [5]:
sig = trans.apply(significant, axis=1)
sig.head()

Unnamed: 0,Difference_In_Median_Brca,P_Value_Brca,Difference_In_Median_Endo,P_Value_Endo,Difference_In_Median,P_Value
A2ML1,,,1.10585,0.037044,,
AADAT,-1.332,0.027477,,,,
AAGAB,,,,,0.139,0.039189
AASDHPPT,0.51445,0.00362,,,,
AATF,-0.16905,0.042271,,,,


In [6]:
MRP = sig.loc[sig.index.str.contains("MRP")]
MRP.head()

Unnamed: 0,Difference_In_Median_Brca,P_Value_Brca,Difference_In_Median_Endo,P_Value_Endo,Difference_In_Median,P_Value
MRPL1,-0.8068,0.001484,0.4689,0.016136,,
MRPL10,-0.6607,0.009033,0.2864,0.028447,,
MRPL11,-0.5163,0.007482,,,,
MRPL12,-1.1011,0.000742,,,,
MRPL13,-0.73475,0.000691,,,,


In [7]:
brca = MRP.drop(columns=['Difference_In_Median_Endo', 'P_Value_Endo','Difference_In_Median','P_Value'])
brca['cancer_type'] = "brca"
brca.columns = ['Difference_In_Median', 'P_Value','cancer_type']
brca.head()

endo = MRP.drop(columns=['Difference_In_Median_Brca', 'P_Value_Brca','Difference_In_Median','P_Value'])
endo['cancer_type'] = "endo"
endo.columns = ['Difference_In_Median', 'P_Value','cancer_type']
endo.head()

colon = MRP.drop(columns=['Difference_In_Median_Brca', 'P_Value_Brca','Difference_In_Median_Endo','P_Value_Endo'])
colon['cancer_type'] = "colon"
colon.head()

c_and_e = colon.append(endo)
df = c_and_e.append(brca)

df.head()

Unnamed: 0,Difference_In_Median,P_Value,cancer_type
MRPL1,,,colon
MRPL10,,,colon
MRPL11,,,colon
MRPL12,,,colon
MRPL13,,,colon


In [8]:
df["comparison"] = df.index
df

# log p-vals for right scale in plot (bigger circle, smaller pval)
df['size'] = df['P_Value'].apply(lambda x: -1*(np.log(x)/10))

df.head()

Unnamed: 0,Difference_In_Median,P_Value,cancer_type,comparison,size
MRPL1,,,colon,MRPL1,
MRPL10,,,colon,MRPL10,
MRPL11,,,colon,MRPL11,
MRPL12,,,colon,MRPL12,
MRPL13,,,colon,MRPL13,


In [9]:
plot_utils.plotCircleHeatMap(df, circle_var = 'P_Value', color_var='Difference_In_Median', x_axis= 'cancer_type', y_axis = 'comparison')




In [10]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from scipy import stats
import gseapy as gp
from gseapy.plot import barplot, dotplot

In [11]:
gene_list = df['comparison']

In [14]:
enr = gp.enrichr(gene_list = gene_list, description='Tumor_partition', gene_sets='Reactome_2016', 
                       outdir='test/enrichr_kegg__pos_neg_corr')

In [19]:
enr.res2d.head()

Unnamed: 0,Gene_set,Term,Overlap,P-value,Adjusted P-value,Old P-value,Old Adjusted P-value,Odds Ratio,Combined Score,Genes
0,Reactome_2016,Mitochondrial translation elongation Homo sapi...,63/84,1.507897e-161,2.307083e-158,0,0,230.769231,85455.111621,MRPS17;MRPS15;MRPS16;MRPS11;MRPS12;MRPL38;MRPL...
1,Reactome_2016,Mitochondrial translation initiation Homo sapi...,63/84,1.507897e-161,1.153542e-158,0,0,230.769231,85455.111621,MRPS17;MRPS15;MRPS16;MRPS11;MRPS12;MRPL38;MRPL...
2,Reactome_2016,Mitochondrial translation termination Homo sap...,63/84,1.507897e-161,7.690277e-159,0,0,230.769231,85455.111621,MRPS17;MRPS15;MRPS16;MRPS11;MRPS12;MRPL38;MRPL...
3,Reactome_2016,Mitochondrial translation Homo sapiens R-HSA-5...,63/90,3.178254e-158,1.215682e-155,0,0,215.384615,78109.68582,MRPS17;MRPS15;MRPS16;MRPS11;MRPS12;MRPL38;MRPL...
4,Reactome_2016,Organelle biogenesis and maintenance Homo sapi...,63/326,8.26245e-113,2.52831e-110,0,0,59.46201,15345.979086,MRPS17;MRPS15;MRPS16;MRPS11;MRPS12;MRPL38;MRPL...


In [17]:
enr.res2d['Term'][1]

'Mitochondrial translation initiation Homo sapiens R-HSA-5368286'

In [18]:
enr.res2d['Genes'][1]

'MRPS17;MRPS15;MRPS16;MRPS11;MRPS12;MRPL38;MRPL39;MRPS10;MRPL37;MRPL34;MRPL35;MRPL32;MRPL41;MRPL42;MRPL3;MRPL2;MRPL40;MRPL1;MRPL9;MRPS28;MRPS26;MRPS25;MRPS22;MRPS23;MRPS18B;MRPL49;MRPS18A;MRPS21;MRPL47;MRPL48;MRPS7;MRPL45;MRPS6;MRPL46;MRPS5;MRPL43;MRPL44;MRPL52;MRPL53;MRPS9;MRPL51;MRPS35;MRPS36;MRPL18;MRPS33;MRPL19;MRPS34;MRPL16;MRPS31;MRPL17;MRPL14;MRPL15;MRPL12;MRPL13;MRPL10;MRPL54;MRPL11;MRPL20;MRPL27;MRPL28;MRPL24;MRPL22;MRPL30'