# PIK3CA trans effect anaylsis: transcriptomics  

This notebook performs t-tests of transcriptomic abundance between PIK3CA hotspot mutations (E542K,E545K,and H1047R)and wildtype tumors (no PIK3CA mutation of any kind) for proteins within th PI3K-AKT wiki pathway.  

In [1]:
import pandas as pd
import numpy as np
import seaborn as sns
import scipy.stats
import matplotlib.pyplot as plt
import sys 
import re
import cptac
import statsmodels.stats.multitest
import operator
import cptac.utils as u
import warnings
warnings.filterwarnings('ignore')

  import pandas.util.testing as tm


In [2]:
br = cptac.Brca()
endo = cptac.Endometrial()
col = cptac.Colon()

                                                

In [3]:
#cptac version
cptac.version()

'0.8.5'

In [6]:
prot = u.get_proteins_in_pathways('PI3K-Akt Signaling Pathway', 'wikipathways')
proteins = list(prot.member)

prot_list = [prot + "_transcriptomics" for prot in proteins]
prot_list.append("PIK3CA_Mutation")
prot_list.append("PIK3CA_Location")


In [7]:
def rename_duplicate_cols(df):
    cols = pd.Series(df.columns[:])

    for dup in cols[cols.duplicated()].unique(): 
        cols[cols[cols == dup].index.values.tolist()] = [dup + '_' + str(i) if i != 0 else dup for i in range(sum(cols == dup))]

    # rename the columns with the cols list.
    df.columns=cols
    return df

In [15]:
def get_trans_results(cancer, prot_list):
    rna = cancer.join_omics_to_mutations(omics_df_name = "transcriptomics",mutations_genes="PIK3CA", tissue_type="tumor")

    genefilter = rna.columns.get_level_values("Name").isin(prot_list)
    rna_pik3ca = rna[rna.columns[genefilter]]
    rna_pik3ca['PIK3CA_Mutation'] = [','.join(map(str, l)) for l in rna_pik3ca['PIK3CA_Mutation']]
    rna_pik3ca['PIK3CA_Location'] = [','.join(map(str, l)) for l in rna_pik3ca['PIK3CA_Location']]
    rna_pik3ca  = rename_duplicate_cols(rna_pik3ca)
    hotspot = rna_pik3ca[(rna_pik3ca.PIK3CA_Location.str.contains('E542K'))| 
                    rna_pik3ca.PIK3CA_Location.str.contains('E545K')| 
                    rna_pik3ca.PIK3CA_Location.str.contains('H1047R')]
    hotspot["PIK3CA_Mutation"] = "Hotspot"
    wt = rna_pik3ca[(rna_pik3ca.PIK3CA_Mutation.str.contains('Wildtype'))]
    hotspot_wt = pd.concat([hotspot, wt])
    cols = hotspot_wt.columns.to_list()
    cols.remove("PIK3CA_Mutation")
    cols.remove("PIK3CA_Location")
    rna_pval = u.wrap_ttest(hotspot_wt, 'PIK3CA_Mutation', cols,return_all=True, pval_return_corrected= True, correction_method= "FDR_bh")
    return rna_pval
    

# Brca

In [9]:
brca_results = get_trans_results(br,prot_list)
brca_results

Unnamed: 0,Comparison,P_Value
0,COL4A5_transcriptomics,0.262590
1,COL4A6_transcriptomics,0.383544
2,HGF_transcriptomics,0.383544
3,BCR_transcriptomics,0.383544
4,CDK2_transcriptomics,0.383544
...,...,...
303,FGF9_transcriptomics,0.996510
304,SGK1_transcriptomics,0.998960
305,CDK4_transcriptomics,0.998960
306,PDGFB_transcriptomics,0.998960


# Endo

In [16]:
Endo_results = get_trans_results(endo,prot_list)
Endo_results


Unnamed: 0,Comparison,P_Value
0,IFNB1_transcriptomics,0.402600
1,AKT1_transcriptomics,0.995599
2,MAP2K2_transcriptomics,0.995599
3,MAP2K1_transcriptomics,0.995599
4,LPAR6_transcriptomics,0.995599
...,...,...
333,FGFR4_transcriptomics,0.995599
334,G6PC_transcriptomics,0.995599
335,VWF_transcriptomics,0.995599
336,TLR2_transcriptomics,0.996988


# Colon 

In [17]:
Colon_results = get_trans_results(col,prot_list)
Colon_results

Unnamed: 0,Comparison,P_Value
0,ITGB3_transcriptomics,0.199389
1,IL4R_transcriptomics,0.199389
2,LAMB3_transcriptomics,0.199389
3,PPP2R5C_transcriptomics,0.199389
4,CDK4_transcriptomics,0.199389
...,...,...
232,GNG2_transcriptomics,0.987884
233,PIK3R5_transcriptomics,0.987884
234,IL2RA_transcriptomics,0.987884
235,SGK2_transcriptomics,0.987884
