In [17]:
#size=-1 to get all genes, but takes a long time
#if statements might need to be added in case keys not in dict when parsing

#this function will query wikipathways for all interactions of a given gene for the Homo Sapiens species
#some interactions won't be genes

import urllib3
import json

def findInteractions(geneName): #retuns list of interacting elements
    interactionList = []
    
    try:
        requestURL = "http://webservice.wikipathways.org/findInteractions?query=" + geneName + "&format=json" 
        http = urllib3.PoolManager()
        response = http.request('GET', requestURL)

        if response.status == 200: #if success, get data from API response
            responseData = response.data
            #print(type(responseData))
            responseDataDict = json.loads(responseData)
            #print(responseDataDict)
            resultList = list(responseDataDict.values())
            interactionsList = resultList[0]

            for interactionDict in interactionsList:

                if interactionDict["species"] == "Homo sapiens":
                    rightValuesList = interactionDict["fields"]["right"]["values"]
                    for value in rightValuesList:
                        if (value not in interactionList) and (geneName.casefold() not in value.casefold()):
                            interactionList.append(value)

                    leftValuesList = interactionDict["fields"]["left"]["values"]
                    for value in leftValuesList:
                        if (value not in interactionList) and (geneName.casefold() not in value.casefold()):
                            interactionList.append(value)
                else:
                    continue       
        else:
            print("Something went wrong with the response status")

        return interactionList
    except:
        print("There was a problem getting a response from the WikiPathways API")
        


In [18]:
#function to split each item with a newline into two items

def fixNewlines(interactingList):
    newlineFreeList = []
    for item in interactingList:
        if "\n" in item:
            nList = item.split("\n")
            for n in nList:
                newlineFreeList.append(n)
        elif "\t" in item:
            tList = item.split("\t")
            for t in tList:
                newlineFreeList.append(t)
        else:
            newlineFreeList.append(item)
    return newlineFreeList

In [19]:
#METHOD 2: GO THROUGH EACH GENE IN MY UNFILTERED LIST AND SEE IF IT IS IN UNIPROT
        
import certifi
import json
import urllib3
#import requests, sys

def geneInUniprot(geneName):  #pass in the interacting list from findInteractions
    try:
        requestURL = "https://www.ebi.ac.uk/proteins/api/proteins?offset=0&size=-1&gene=" + geneName + "&organism=homo%20sapiens&format=json"
        http = urllib3.PoolManager(cert_reqs='CERT_REQUIRED', ca_certs=certifi.where())
        response = http.request('GET', requestURL)
    except:
        return "n"

    if response.status == 200: #if success, get data from API response
        responseData = response.data
        responseDataList = json.loads(responseData)
        if (len(responseDataList) != 0) and ("accession" in responseDataList[0].keys()) and (geneName != "DNA"):
            return geneName
        else:
            return "n"
    else:
        #print("Something went wrong with the response status")
        return "n"
        
#p = geneInUniprot("TP53")
#print(p)

In [20]:
#Method 2: check individually if each gene in interacting list is a uniprot gene
#FASTER

import time
start_time = time.time()

interactingList = findInteractions("TP53")

#filteredGenes = [geneInUniprot(x) for x in interactingList if geneInUniprot(x) != "n"]

fixedInteractingList = fixNewlines(interactingList)

filteredGenes = []
for gene in fixedInteractingList:
    if geneInUniprot(gene) != "n":
        filteredGenes.append(gene)
print(sorted(filteredGenes))

print("--- %s minutes ---" % ((time.time() - start_time))/60)

['APC', 'ARF', 'ARNT', 'ATAD2', 'ATF2', 'ATM', 'ATR', 'BAK1', 'BAX', 'BBC3', 'BCL2', 'BCL2L1', 'BCL2L11', 'BCL3', 'BID', 'BLM', 'BMF', 'BNIP3', 'BNIP3L', 'BOK', 'CAPN1', 'CAPN2', 'CASP8', 'CCNA2', 'CCNB1', 'CCNB2', 'CCNB3', 'CCND1', 'CCNE1', 'CDC2', 'CDC25C', 'CDC42', 'CDK1', 'CDK5R1', 'CDKN1A', 'CDKN1B', 'CDKN1C', 'CDKN2A', 'CEBPZ', 'CHEK2', 'COL9A1', 'COL9A3', 'CREBBP', 'CTNNB1', 'Chk1', 'Chk2', 'DDB2', 'DDIT4', 'EGFR', 'ELK1', 'EP300', 'FANCI', 'FAS', 'FASLG', 'FGF7', 'GADD45A', 'GADD45B', 'GADD45G', 'GRIN1', 'GSK3B', 'HBEGF', 'HIF1A', 'HMGB1', 'IFI16', 'ING1', 'ING2', 'JNK', 'JUN', 'LBR', 'MAPK10', 'MAPK11', 'MAPK12', 'MAPK13', 'MAPK14', 'MAPK8', 'MAPK9', 'MDM2', 'MDM4', 'MTA2', 'MTDH', 'MYC', 'NANOG', 'OTUD5', 'OTX2', 'PCNA', 'PIDD', 'PLAC8', 'PMAIP1', 'POLK', 'PRKAA1', 'PRKAB1', 'PRKAG1', 'RAD17', 'RB1', 'RCHY1', 'RFC3', 'RFC4', 'RHOA', 'RHOB', 'RHOC', 'RNF144B', 'ROCK2', 'ROS', 'RRM2B', 'S100A6', 'SAT1', 'SAT2', 'SERPINE1', 'SESN1', 'SESN2', 'SFN', 'SLC11A2', 'SNAI2', 'SNURF', '

In [21]:
#this function will filter out all interactions that aren't proteins in UNIPROT

import certifi
import json
import urllib3
#import requests, sys

def filterInteractionList(interactionList):  #pass in the interacting list from findInteractions
    geneInteractionsUNIPROT = []
    filteredGenes = []
    try:
        #requestURL = "https://www.ebi.ac.uk/proteins/api/proteins?"
        requestURL = "https://www.ebi.ac.uk/proteins/api/proteins?offset=0&size=-1&organism=Homo%20sapiens&format=json"
        #response = requests.get(requestURL)
        http = urllib3.PoolManager(cert_reqs='CERT_REQUIRED', ca_certs=certifi.where())
        response = http.request('GET', requestURL)
    except:
        print("There was a problem getting a response from the UNIPROT API")
        return

    if response.status == 200: #if success, get data from API response

        responseData = response.data
        responseDataList = json.loads(responseData)
        #print("num total genes: " + str(len(responseDataList)))
        for accessionDict in responseDataList:
            if "gene" in accessionDict.keys():
                aList = accessionDict["gene"]
                for aDict in aList:
                    if ("name" in aDict.keys()) and ("value" in aDict["name"]):
                        aGene = aDict["name"]["value"]
                        if aGene not in geneInteractionsUNIPROT:
                            geneInteractionsUNIPROT.append(aGene)
        for gene in interactionList:
            if gene in geneInteractionsUNIPROT:
                filteredGenes.append(gene)
        return filteredGenes
    else:
        print("Something went wrong with the response status")

In [None]:
#Method 1: get a list of all uniprot genes and intersect them with interacting genes to filter out non-genes
#SLOWER
interactingList = findInteractions("TP53")
finalGeneList = filterInteractionList(interactingList)
print(len(finalGeneList))
print(finalGeneList)

Wiki Pathways: query for interacting genes given one gene