In [1]:
import pandas as pd
import numpy as np
import scipy.stats
import collections
import re

import CPTAC.Endometrial as en
import CPTAC.Algorithms as al

Welcome to the CPTAC data service package. Available datasets may be
viewed using CPTAC.list_data(). In order to access a specific data
set, import a CPTAC subfolder using either 'import CPTAC.Dataset' or
'from CPTAC import Dataset'.
******
Version: 0.3.1
******
Loading Endometrial CPTAC data:
Loading Dictionary...
Loading Clinical Data...
Loading Acetylation Proteomics Data...
Loading Proteomics Data...
Loading Transcriptomics Data...
Loading CNA Data...
Loading Phosphoproteomics Data...
Loading Somatic Mutation Data...

 ******PLEASE READ******
CPTAC is a community resource project and data are made available
rapidly after generation for community research use. The embargo
allows exploring and utilizing the data, but the data may not be in a
publication until July 1, 2019. Please see
https://proteomics.cancer.gov/data-portal/about/data-use-agreement or
enter embargo() to open the webpage for more details.


In [2]:
somatic = en.get_mutations()
clinical = en.get_clinical()
prot = en.get_proteomics()
gene = 'PTEN'
omics_mutations = en.append_mutations_to_omics(mutation_genes=gene, omics_df=prot, omics_genes=gene)
omics_mutations = omics_mutations.loc[omics_mutations['Sample_Status'] == 'Tumor']

fm = al.get_frequently_mutated(somatic, omics_mutations, .15, show_percentage=False)
print(fm)

ovarian
['PTEN', 'PIK3CA', 'ARID1A', 'PIK3R1', 'KRAS', 'CTNNB1', 'CTCF', 'KMT2B', 'TP53', 'ZFHX3', 'ZFHX4', 'DOCK3', 'INPPL1', 'FBXW7', 'MUC16', 'KMT2D', 'HERC2', 'RPL22', 'RBM27', 'NSD1', 'SYNE1', 'SCAF4', 'PCLO', 'JAK1', 'CCDC168', 'ZNF469', 'LMAN1', 'OBSCN', 'AHNAK', 'HUWE1', 'DNAH17']


In [3]:
for entry in fm:
    print(entry)

PTEN
PIK3CA
ARID1A
PIK3R1
KRAS
CTNNB1
CTCF
KMT2B
TP53
ZFHX3
ZFHX4
DOCK3
INPPL1
FBXW7
MUC16
KMT2D
HERC2
RPL22
RBM27
NSD1
SYNE1
SCAF4
PCLO
JAK1
CCDC168
ZNF469
LMAN1
OBSCN
AHNAK
HUWE1
DNAH17


In [9]:
proteomics = en.get_proteomics()
for protein in fm:
    try:
        print("\nGene: ", protein)
        interacting_proteins = al.get_interacting_proteins(protein)
        ips = []
        for ip in interacting_proteins:
            if ip in proteomics.columns:
                ips.append(ip)

        protdf = en.append_mutations_to_omics(mutation_genes=[protein], omics_df=proteomics, omics_genes=ips)
        protdf = protdf.loc[protdf['Sample_Status'] == 'Tumor']
        #print(protdf)

        for ind, row in protdf.iterrows():
            if 'Wildtype_Tumor' not in row[protein+"_Mutation"]:
                protdf.at[ind,'Label'] = 'Mutated'
            else:
                protdf.at[ind,'Label'] = 'Wildtype'

        #print(protdf)
        protdf = protdf.drop(protein+"_Mutation",axis=1)
        protdf = protdf.drop(protein+"_Location",axis=1)
        protdf = protdf.drop("Sample_Status",axis=1)

        columns_to_comp = []
        for ip in ips:
            columns_to_comp.append(ip+"_proteomics")

        wrap_results = al.wrap_ttest(protdf, 'Label', columns_to_comp)
        
        if wrap_results is not None:
            print(wrap_results)
            print("\n\n")
        
    except:
        print("Error with: ", protein, "!")



Gene:  PTEN
           Comparison   P_Value
0   PIK3CA_proteomics  0.000017
1  CSNK2A1_proteomics  0.000102
2     TP53_proteomics  0.000935




Gene:  PIK3CA
No significant comparisons.

Gene:  ARID1A
           Comparison       P_Value
0   ARID1A_proteomics  1.025018e-10
1     DPF2_proteomics  2.166609e-07
2  SMARCB1_proteomics  6.885135e-05
3  SMARCC2_proteomics  8.270473e-05
4  SMARCE1_proteomics  1.498027e-04
5  SMARCD1_proteomics  1.744915e-04
6    BCL7C_proteomics  4.379537e-04
7    CCND1_proteomics  1.499832e-03




Gene:  PIK3R1
Error with:  PIK3R1 !

Gene:  KRAS
No significant comparisons.

Gene:  CTNNB1
            Comparison       P_Value
0    CTNNB1_proteomics  1.374820e-11
1    CTNNA2_proteomics  3.221686e-09
2      LEF1_proteomics  1.083350e-07
3  CTNNBIP1_proteomics  2.340440e-06
4      CDH2_proteomics  1.718146e-04
5     AXIN1_proteomics  1.402113e-03




Gene:  CTCF
No significant comparisons.

Gene:  KMT2B
No significant comparisons.

Gene:  TP53
          Comparison

  **kwargs)
  ret = ret.dtype.type(ret / rcount)


No significant comparisons.

Gene:  DNAH17
No significant comparisons.


In [11]:
testdf = en.append_mutations_to_omics(mutation_genes=['PIK3R1'], omics_df=proteomics, omics_genes=['PIK3R1'])
print(testdf)

AttributeError: 'float' object has no attribute 'tolist'

In [7]:
for col in proteomics.columns:
    if 'PIK' in col:
        print(col)

LOC110117498-PIK3R3
PIK3AP1
PIK3C2A
PIK3C2B
PIK3C3
PIK3CA
PIK3CB
PIK3CD
PIK3CG
PIK3IP1
PIK3R1
PIK3R2
PIK3R4
PIKFYVE
