In [1]:
import pandas as pd
import numpy as np
import scipy.stats
from scipy.stats import pearsonr
import matplotlib.pyplot as plt
import re
import seaborn as sns
import statsmodels.stats.multitest
import sys
import urllib3
import json
import operator
import collections

import cptac

In [31]:
'''
@Param protein:
    The name of the protein that you want to generate a list of interacting proteins for.

@Param number (default=25):
    The number of interacting proteins that you want to get.
    
@Return:
    A list of proteins known by the String api to be interacting partners with the specified protein.
    Returns None if specified protein isn't found in String database, or connection to String api fails.
    
    
This method takes as a parameter the name of a protein. It then accesses the STRING database, through
a call to their public API, and generates a list of proteins known to be interacting partners with the specified
protein. Optional second parameter is number (which by default is 25), which specifies in the API call how many
interacting partners to retrieve from the database. The list of interacting proteins is returned to the caller
as a python list.
'''

def get_interacting_proteins_string(protein, number=25):
    '''Use urllib3 to access the string database api, gather list of interacting proteins'''
    urllib3.disable_warnings()
    string_api_url = "https://string-db.org/api"
    output_format = "json"
    method = "network"

    '''Use the specified gene and homo sapiens species code'''
    my_protein = [protein]
    species = "9606"

    '''Format the api request to collect the appropriate information'''
    request_url = string_api_url + "/" + output_format + "/" + method + "?"
    request_url += "identifiers=%s" % "%0d".join(my_protein)
    request_url += "&" + "species=" + species
    request_url += "&" + "limit=" + str(number)

    '''Send a request to the API, print the response status'''
    try:
        http = urllib3.PoolManager()
        response = http.request('GET',request_url)
        '''Catch exception if it fails while accessing the api'''
    except urllib3.HTTPError as err:
        error_message = err.read()
        print("Error accessing STRING api, " , error_message)
        sys.exit()
    
    '''Get the data from the api response'''
    interacting_proteins = []
    if response.status == 200: 
        '''Get the data from the API's response'''
        data = response.data
        y = json.loads(data)
        
        print(data)

        '''Make a list of the resulting interacting proteins'''
        for entry in y:
            if entry["preferredName_A"] not in interacting_proteins:
                interacting_proteins.append(entry["preferredName_A"])
            if entry["preferredName_B"] not in interacting_proteins:
                interacting_proteins.append(entry["preferredName_B"])
        
        return interacting_proteins
        
        '''If we didnt get a successful response from the api, notify the caller and return None'''
    else:
        print("\nSpecified gene was not found in String database, double check that you have it correctly!")
        return None

In [50]:
def get_interacting_proteins_bioplex(gene_name, probability_interaction=.5):
    '''Store interacting proteins in a list'''
    interacting_proteins = []
    urllib3.disable_warnings()
    
    '''Configure url for request'''
    
    request_url = "http://bioplex.hms.harvard.edu/bioplexDisplay/api/api.php?geneQuery=" + gene_name + "&pintLow=" + str(probability_interaction)
    #request_url = "https://webservice.thebiogrid.org/interactions/?searchNames=true&excludeGenes=true&geneList=" + gene_name +"&includeInteractors=true&format=json&taxId=9606&start=0&max=" + str(number) + "&accesskey=0ff59dcf3511928e78aad499688381c9"
    try:
        '''Send request, get response'''
        http = urllib3.PoolManager()
        response = http.request('GET',request_url)
        
        '''If response was successful'''
        if response.status == 200: 
            '''Get the data from the API's response'''
            data = response.data
            #print(data)
            y = json.loads(data)
            
            '''Add name of each protein to list of interacting proteins'''
            for entry in y:
                if entry['SymbolA'] not in interacting_proteins:
                    interacting_proteins.append(entry['SymbolA'])
                if entry['SymbolB'] not in interacting_proteins:
                    interacting_proteins.append(entry['SymbolB'])
            
            '''Return this list to caller'''
            return interacting_proteins
        
        else:
            '''If response was not successful, notify caller of error, return None'''
            print("Error accessing api!")
            return None
        
        '''Catch exception, notify caller of errorm return None'''
    except Exception as err:
        print("Error accessing api, " , err)
        return None

In [51]:
def get_iteracting_proteins_biogrid(gene_name, number=25):
    '''Store interacting proteins in a list'''
    interacting_proteins = []
    urllib3.disable_warnings()
    
    '''Configure url for request'''
    request_url = "https://webservice.thebiogrid.org/interactions/?searchNames=true&excludeGenes=true&geneList=" + gene_name +"&includeInteractors=true&format=json&taxId=9606&start=0&max=" + str(number) + "&accesskey=0ff59dcf3511928e78aad499688381c9"
    try:
        '''Send request, get response'''
        http = urllib3.PoolManager()
        response = http.request('GET',request_url)
        
        '''If response was successful'''
        if response.status == 200: 
            '''Get the data from the API's response'''
            data = response.data
            y = json.loads(data)
            
            '''Add name of each protein to list of interacting proteins'''
            for entry in y:
                if y[entry]['OFFICIAL_SYMBOL_A'] not in interacting_proteins:
                    interacting_proteins.append(y[entry]['OFFICIAL_SYMBOL_A'])
            
            '''Return this list to caller'''
            return interacting_proteins
        
        else:
            '''If response was not successful, notify caller of error, return None'''
            print("Error accessing api!")
            return None
        
        '''Catch exception, notify caller of errorm return None'''
    except Exception as err:
        print("Error accessing api, " , err)
        return None

In [52]:
interacting_proteins = get_interacting_proteins_bioplex('8289')

interacting_proteins

['BCL7C',
 'ARID1A',
 'BCL7A',
 'DPF3',
 'DPF2',
 'SMARCE1',
 'SS18',
 'SMARCD1',
 'SMARCC2']

In [49]:
ips  = get_iteracting_proteins_biogrid('ARID1A', number=300)
ips

['MAP2K4',
 'MYPN',
 'ACVR1',
 'GATA2',
 'RPA2',
 'ARF1',
 'ARF3',
 'XRN1',
 'APP',
 'APLP1',
 'CITED2',
 'EP300',
 'APOB',
 'ARRB2',
 'CSF1R',
 'PRRC2A',
 'LSM1',
 'SLC4A1',
 'BCL3',
 'ADRB1',
 'BRCA1',
 'ARVCF',
 'PCBD1',
 'PSEN2',
 'CAPN3',
 'ITPR1',
 'MAGI1',
 'RB1',
 'TSG101',
 'ORC1',
 'ORC4',
 'MCM5',
 'CDC7',
 'ORC2',
 'DBF4',
 'AKAP8',
 'MCM2',
 'RPA1',
 'RPA3',
 'ORC5',
 'MCM3',
 'YWHAZ',
 'DOCK8',
 'TRIP10',
 'CDKN3',
 'RBBP8',
 'HAND2',
 'CLTC',
 'IGFBP3',
 'TXN',
 'ATF2',
 'CCND3',
 'RALBP1',
 'PPIB',
 'CSRP1',
 'CSK',
 'DCN',
 'SIM2',
 'SIM1',
 'NPAS4',
 'HBEGF',
 'PCNA',
 'TOPBP1',
 'FLNA',
 'LSM3',
 'LSM4',
 'LSM5',
 'CREBBP',
 'NUMA1',
 'EXOSC4',
 'PLCG1',
 'KTN1',
 'KDR',
 'PPARGC1B',
 'SMARCA4',
 'DDX54',
 'ESR1',
 'FGFR1',
 'TGFB1',
 'SUMO1',
 'MPP6',
 'GABRG2',
 'SFN',
 'TGFB1I1',
 'NR3C1',
 'GCK',
 'HSPA5',
 'SLC2A2',
 'SLC2A4',
 'GRIK2',
 'GRIK1',
 'CCNT1',
 'RGS19',
 'HSF2',
 'LIMK1',
 'FGF7',
 'C1QBP',
 'EIF3E',
 'HNRNPA1',
 'HNRNPM',
 'UBAP2L',
 'IMMT',
 'IFT8