# PIK3CA trans phospho site anaylsis 

This notebook performs t-tests of phosphorylation abundance between PIK3CA hotspot mutations (E542K,E545K,and H1047R)and wildtype tumors (no PIK3CA mutation of any kind) for a set of interacting genes. The first gene set is from wikipathway's interacting genes for PIK3CA. The second set is genes are closer to PIK3CA in the EGFR pathway. 

In [1]:
import pandas as pd
import numpy as np
import seaborn as sns
import scipy.stats
import matplotlib.pyplot as plt
import sys 
import re
import cptac
import statsmodels.stats.multitest
import operator
import cptac.utils as u
import warnings
warnings.filterwarnings('ignore')

  import pandas.util.testing as tm


In [9]:
br = cptac.Brca()
endo = cptac.Endometrial()
col = cptac.Colon()

                                                

In [10]:
def get_all_interacting_proteins(gene, num_results_in = 25):
    bioplex_list = list(u.get_interacting_proteins_bioplex(gene))
    wiki_list = list(u.get_interacting_proteins_wikipathways(gene))

    biogrid_list = list(u.get_interacting_proteins_biogrid(gene, num_results=num_results_in))
    string_list = list(u.get_interacting_proteins_string(gene, num_results=num_results_in))

    all_int = bioplex_list + wiki_list + biogrid_list + string_list
    interacting_list = list(set(all_int))
    
    #print('Total interacting proteins: ', len(interacting_list))
    return interacting_list

In [11]:
def rename_duplicate_cols(df):
    cols = pd.Series(df.columns[:])

    for dup in cols[cols.duplicated()].unique(): 
        cols[cols[cols == dup].index.values.tolist()] = [dup + '_' + str(i) if i != 0 else dup for i in range(sum(cols == dup))]

    # rename the columns with the cols list.
    df.columns=cols
    return df

In [12]:
prot_list =  list(u.get_interacting_proteins_wikipathways("PIK3CA"))
phos_list = ["PIK3CA_Location","PIK3CA_Mutation"]
for gene in prot_list:
    phos = gene + "_phosphoproteomics"
    phos_list.append(phos)
len(phos_list)


1599

In [13]:
phospho_sites = ["PIK3CA_phosphoproteomics","AKT1_phosphoproteomics","AKT2_phosphoproteomics","AKT3_phosphoproteomics","MTOR_phosphoproteomics","PDK1_phosphoproteomics","CASP3_phosphoproteomics","CASP7_phosphoproteomics","CASP9_phosphoproteomics",
                 "EGFR_phosphoproteomics", "HRAS_phosphoproteomics","NRAS_phosphoproteomics","KRAS_phosphoproteomics","DOCK1_phosphoproteomics","PIK3CA_Mutation","PIK3CA_Location"]


In [14]:
def get_trans_results(cancer, phos_list):
    phos = cancer.join_omics_to_mutations(omics_df_name = "phosphoproteomics",mutations_genes="PIK3CA", tissue_type="tumor")
    if cancer == br:
        phos = u.reduce_multiindex(df=phos, levels_to_drop=["Database_ID","Peptide"])
    if cancer == col:
        phos = u.reduce_multiindex(df=phos, levels_to_drop=["Database_ID"])

    genefilter = phos.columns.get_level_values("Name").isin(phos_list)
    phos_pik3ca = phos[phos.columns[genefilter]]
    phos_pik3ca = cptac.utils.reduce_multiindex(phos_pik3ca, flatten=True)
    phos_pik3ca['PIK3CA_Mutation'] = [','.join(map(str, l)) for l in phos_pik3ca['PIK3CA_Mutation']]
    phos_pik3ca['PIK3CA_Location'] = [','.join(map(str, l)) for l in phos_pik3ca['PIK3CA_Location']]
    phos_pik3ca  = rename_duplicate_cols(phos_pik3ca)
    hotspot = phos_pik3ca[(phos_pik3ca.PIK3CA_Location.str.contains('E542K'))| 
                    phos_pik3ca.PIK3CA_Location.str.contains('E545K')| 
                    phos_pik3ca.PIK3CA_Location.str.contains('H1047R')]
    hotspot["PIK3CA_Mutation"] = "Hotspot"
    wt = phos_pik3ca[(phos_pik3ca.PIK3CA_Mutation.str.contains('Wildtype'))]
    hotspot_wt = pd.concat([hotspot, wt])

    hotspot_wt.head(50)
    cols = hotspot_wt.columns.to_list()
    cols.remove("PIK3CA_Mutation")
    cols.remove("PIK3CA_Location")
    phos_pval = u.wrap_ttest(hotspot_wt, 'PIK3CA_Mutation', cols,return_all=True, pval_return_corrected= True, correction_method= "FDR_bh")
    return phos_pval
    

# Brca

In [15]:
brca_results = get_trans_results(br,phos_list)
brca_results

Unnamed: 0,Comparison,P_Value
0,DOCK1_phosphoproteomics_S1764,0.034737
1,PAK2_phosphoproteomics_S141T143,0.034737
2,AKT2_phosphoproteomics_S34,0.117138
3,PPP1R12A_phosphoproteomics_T853,0.127249
4,NCOR1_phosphoproteomics_S2335,0.147162
...,...,...
4589,MED1_phosphoproteomics_S1207,0.999731
4590,FLNB_phosphoproteomics_T1278,0.999731
4591,NFIB_phosphoproteomics_S410_2,0.999735
4592,NFIB_phosphoproteomics_S410,0.999735


In [16]:
brca_results2 = get_trans_results(br,phospho_sites)
brca_results2

Unnamed: 0,Comparison,P_Value
0,DOCK1_phosphoproteomics_S1764,0.000696
1,AKT2_phosphoproteomics_S34,0.001759
2,DOCK1_phosphoproteomics_S1879,0.088654
3,DOCK1_phosphoproteomics_S1785,0.088654
4,AKT1_phosphoproteomics_S477,0.090415
5,DOCK1_phosphoproteomics_S1777,0.110194
6,MTOR_phosphoproteomics_S2478,0.189698
7,DOCK1_phosphoproteomics_S1702,0.220209
8,MTOR_phosphoproteomics_S2478S2481,0.220209
9,AKT1_phosphoproteomics_S129,0.327649


# Endo

In [17]:
Endo_results = get_trans_results(endo,phos_list)
Endo_results

Unnamed: 0,Comparison,P_Value
0,PPP1R12A_phosphoproteomics_Y769,0.974884
1,PKP2_phosphoproteomics_S183,0.974884
2,PPP1R12B_phosphoproteomics_S903,0.999730
3,PPP1R12B_phosphoproteomics_S900,0.999730
4,PPP1R12B_phosphoproteomics_S447,0.999730
...,...,...
4919,PARD3_phosphoproteomics_S1261,0.999894
4920,ARID4B_phosphoproteomics_Y795,0.999894
4921,TYK2_phosphoproteomics_S884,0.999894
4922,FLNA_phosphoproteomics_T2336,0.999894


In [18]:
Endo_results2 = get_trans_results(endo,phospho_sites)
Endo_results2

Unnamed: 0,Comparison,P_Value
0,AKT1_phosphoproteomics_S124,0.995437
1,EGFR_phosphoproteomics_S1026,0.995437
2,EGFR_phosphoproteomics_S1039,0.995437
3,EGFR_phosphoproteomics_S1042,0.995437
4,EGFR_phosphoproteomics_S1045,0.995437
5,EGFR_phosphoproteomics_S1064,0.995437
6,EGFR_phosphoproteomics_S1071,0.995437
7,EGFR_phosphoproteomics_S1166,0.995437
8,EGFR_phosphoproteomics_S991,0.995437
9,EGFR_phosphoproteomics_T1041,0.995437


# Colon 

In [19]:
Colon_results = get_trans_results(col,phos_list)
Colon_results

Unnamed: 0,Comparison,P_Value
0,RGL1_phosphoproteomics_T636,0.242819
1,SCRIB_phosphoproteomics_S1348,0.242819
2,FLNC_phosphoproteomics_S1728,0.242819
3,ARID4B_phosphoproteomics_S790,0.242819
4,TSC2_phosphoproteomics_S1420,0.242819
...,...,...
1728,PRKD1_phosphoproteomics_S249,0.999293
1729,PXN_phosphoproteomics_S130,0.999293
1730,PKN1_phosphoproteomics_S69,0.999293
1731,MAP4K4_phosphoproteomics_S842,0.999399


In [20]:
Colon_results2 = get_trans_results(col,phospho_sites)
Colon_results2

Unnamed: 0,Comparison,P_Value
0,EGFR_phosphoproteomics_Y1092,0.083389
1,AKT1_phosphoproteomics_S124,0.520953
2,MTOR_phosphoproteomics_S1261,0.520953
3,EGFR_phosphoproteomics_S991,0.520953
4,EGFR_phosphoproteomics_S1071,0.520953
5,EGFR_phosphoproteomics_S1064,0.520953
6,MTOR_phosphoproteomics_S2478,0.520953
7,MTOR_phosphoproteomics_S2481,0.520953
8,DOCK1_phosphoproteomics_S1704,0.520953
9,AKT2_phosphoproteomics_T451,0.520953


# Test Plots

In [None]:
endo_pval = endo_phos_pval.loc[endo_phos_pval.Comparison == "PAK2_phosphoproteomics_S141"]
endo_p = endo_pval.P_Value
col_pval = col_phos_pval.loc[col_phos_pval.Comparison == "PAK2_phosphoproteomics_S141"]
col_p = col_pval.P_Value
brca_pval = brca_phos_pval.loc[brca_phos_pval.Comparison == "PAK2_phosphoproteomics_S141"]
brca_p = brca_pval.P_Value
brca_p

In [None]:
col_phos_df = col_phos_df.assign(cancer = 'Colon')
brca_phos_df = brca_phos_df.assign(cancer = 'Brca')
brca_phos_df = brca_phos_df.rename(columns={"PAK2_phosphoproteomics_S141T143":"PAK2_phosphoproteomics_S141"})
endo_phos_df = endo_phos_df.assign(cancer = 'Endometrial')
df = brca_phos_df.append(col_phos_df)
df2_hotspot = df.append(endo_phos_df)
df2_hotspot.columns


In [None]:
endo_phos_df = hotspot_wt[["PIK3CA_Mutation","PAK2_phosphoproteomics_S141","DOCK1_phosphoproteomics_S1764"]]


In [None]:
# cis plot
gene = 'PIK3CA'
plt.rcParams['figure.figsize']=(11.7, 8.5) #size of plot
sns.set(font_scale = 1.4)

boxplot = sns.boxplot(x='cancer', y=" PAK2_phosphoproteomics_S141", data = df2_hotspot, hue = 'PIK3CA_Mutation'
                      ,hue_order = ["Wildtype_Tumor", "Hotspot"], showfliers = False)   
#boxplot.set_title('Pancancer cis effect of PIK3CA Missense')
boxplot = sns.stripplot(x='cancer', y=" PAK2_phosphoproteomics_S141", data = df2_hotspot, jitter = True, 
                           color = ".3", hue = 'PIK3CA_Mutation', hue_order = ["Wildtype_Tumor", "Hotspot"], dodge = True)
boxplot.set(xlabel = "\nPIK3CA Mutation Status", ylabel = "PAK2_S141"+' phosphoproteomics')

# format legend
handles, labels = boxplot.get_legend_handles_labels()
plt.legend(handles[0:3], labels[0:3])

# Create significance symbols:
# * P < 0.015   ** P < 0.001   *** P < 0.0001 
cancer_pvals = {'Brca':lp_hotspot, 'Colon':col_hotspot, 'Endo':en_hotspot}

# create pval annotations
symbols = {}
print('p-values: \n')
for cancer in cancer_pvals:
    val = str(cancer_pvals[cancer])
    if cancer_pvals[cancer]  < 0.0001:
        symbols[cancer] = '***'
    elif cancer_pvals[cancer]  < 0.001:
        symbols[cancer] = '**'  
    elif cancer_pvals[cancer]  < 0.05:
        symbols[cancer] = '*'
    elif cancer_pvals[cancer]  > 0.05:
        symbols[cancer] = 'ns'
    print(cancer, val, '\n')


format_pval_annotation(symbols['Brca'], -0.5, 0.5, 2.5)  # Luad
format_pval_annotation(symbols['Colon'], 0.6, 1.4, 1.5)  # Colon
format_pval_annotation(symbols['Endo'], 1.7, 2.4, 1) # Endometrial
plt.show()
plt.clf()
plt.close()