In [1]:
import pandas as pd
import numpy as np
import scipy.stats
from scipy.stats import pearsonr
import matplotlib.pyplot as plt
import re
import seaborn as sns
import statsmodels.stats.multitest
import sys
import urllib3
import json

import CPTAC.Endometrial as CPTAC

Welcome to the CPTAC data service package. Available datasets may be
viewed using CPTAC.list(). In order to access a specific data set,
import a CPTAC subfolder using either 'import CPTAC.Dataset' or 'from
CPTAC import Dataset'.
******
Version: 0.2.5
******
Loading Endometrial CPTAC data:
Loading Dictionary...
Loading Clinical Data...
Loading Acetylation Proteomics Data...
Loading Proteomics Data...
Loading Transcriptomics Data...
Loading CNA Data...
Loading Phosphoproteomics Data...
Loading Somatic Mutation Data...

 ******PLEASE READ******
CPTAC is a community resource project and data are made available
rapidly after generation for community research use. The embargo
allows exploring and utilizing the data, but the data may not be in a
publication until July 1, 2019. Please see
https://proteomics.cancer.gov/data-portal/about/data-use-agreement or
enter embargo() to open the webpage for more details.


## Edit the following to configure the notebook

In [21]:
'''Edit Below'''

gene = "PTEN" #The gene you want to examine

number_of_interacting_proteins_to_examine = 30 #The number of interacting proteins you want to look at

hotspot = True #(True or False) Whether you want to examine the effect of hotspot mutations in this gene

proteomics = True #(True or False) Whether you want to see results of proteomic data

phosphoproteomics  = True #(True or False) Whether you want to see results of phosphoproteomic data

transcriptomics = True #(True or False) Whether you want to see results of transcriptomic data

subtype = True #(True or False) Whether you want to examine impact of subtype

dataset = 'Endometrial' #options are Endometrial, Ovarian, etc.

In [22]:
'''Use urllib3 to access the string database api, gather list of interacting proteins'''
urllib3.disable_warnings()
string_api_url = "https://string-db.org/api"
output_format = "json"
method = "network"

'''Use the specified gene and homo sapiens species code'''
my_gene = [gene]
species = "9606"

'''format the api request to collect the appropriate information'''
request_url = string_api_url + "/" + output_format + "/" + method + "?"
request_url += "identifiers=%s" % "%0d".join(my_gene)
request_url += "&" + "species=" + species
request_url += "&" + "limit=" + str(number_of_interacting_proteins_to_examine)

'''Send a request to the API, print the response status'''
try:
    http = urllib3.PoolManager()
    response = http.request('GET',request_url)
    print("Accessing String database, response status: ", response.status)
except urllib3.HTTPError as err:
    error_message = err.read()
    print(error_message)
    sys.exit()
     
'''Get the data from the API's response'''
data = response.data
y = json.loads(data)

'''Make a list of the resulting interacting proteins'''
interacting_proteins = []
for entry in y:
    if entry["preferredName_A"] not in interacting_proteins:
        interacting_proteins.append(entry["preferredName_A"])
    if entry["preferredName_B"] not in interacting_proteins:
        interacting_proteins.append(entry["preferredName_B"])

print("\nTop " + str(number_of_interacting_proteins_to_examine) + " " + gene + " interacting proteins: \n")
for ip in interacting_proteins:
    print(ip)

Accessing String database, response status:  200

Top 30 PTEN interacting proteins: 

MDM2
CSNK2A1
STUB1
PDGFRB
PIK3R2
PIK3C3
CSNK2A2
SLC9A3R1
PIK3R3
USP13
PIK3CA
TP53
EGFR
PIK3CB
INPPL1
PTK2
USP7
WWP2
XIAP
PTEN
PREX2
MAST2
OTUD3
PIK3CD
ROCK1
SHC1
ITPKB
NEDD4
INPP4B
PIK3R1
AKT1
