### BioGRID interaction retrieval

In [1]:
import requests, sys, json
import pandas as pd
import pickle

In [5]:
### BioGRID access key
BIOGRID_ACCESS_KEY = 'a207ffa23db79e55a7d7a5a69b79f0c9'

### BioGRID server
biogrid_server = f'https://webservice.thebiogrid.org/interactions/?accesskey={BIOGRID_ACCESS_KEY}'

In [6]:
gene_list = ['BRCA', 'BRAF']

In [7]:
# the rules outlined in the Wiki: https://wiki.thebiogrid.org/doku.php/biogridrest
params = {
    "accesskey": BIOGRID_ACCESS_KEY,
    "format": "json",  # Return results in TAB2 format
    "geneList": "|".join(gene_list),  # Must be | separated
    "searchNames": "true",  # Search against official names
    "includeInteractors": "false",  # Set to true to get any interaction involving EITHER gene, set to false to get interactions between genes
    "includeHeader": "true",
}

# Additional options to try, you can uncomment them as necessary
# params["start"] = 5 # Specify where to start fetching results from if > 10,000 results being returned
# params["max"] = 10 # Specify the number of results to return, max is 10,000
# params["interSpeciesExcluded"] = "false" # true or false, If ‘true’, interactions with interactors from different species will be excluded (ex. no Human -> Mouse interactions)
# params["selfInteractionsExcluded"] = "false" # true or false, If ‘true’, interactions with one interactor will be excluded. (ex. no STE11 -> STE11 interactions)
# params["searchIds"] = "false" # true or false, If ‘true’, ENTREZ_GENE, ORDERED LOCUS and SYSTEMATIC_NAME (orf) will be examined for a match with the geneList
# params["searchSynonyms"] = "false" # true or false, If ‘true’, SYNONYMS will be examined for a match with the geneList
# params["searchBiogridIds"] = "false" # true or false, If ‘true’, BIOGRID INTERNAL IDS will be examined for a match with the geneList
# params["excludeGenes"] = "false" # true or false, If 'true' the geneList becomes a list of genes to EXCLUDE rather than to INCLUDE
# params["includeInteractorInteractions"] = "true" # true or false, If ‘true’ interactions between the geneList’s first order interactors will be included. Ignored if includeInteractors is ‘false’ or if excludeGenes is set to ‘true’.
# params["htpThreshold"] = 50 # Any publication with more than this many interactions will be excluded
# params["throughputTag"] = "any" # any, low, high. If set to low, only `low throughput` interactions will be returned, if set to high, only `high throughput` interactions will be returned
# params["additionalIdentifierTypes"] = "SGD|FLYBASE|REFSEQ" # You can specify a | separated list of additional identifier types to search against (see get_identifier_types.py)

r = requests.get(biogrid_server, params=params)
interactions = r.json()

# Create a hash of results by interaction identifier
data = {}
for interaction_id, interaction in interactions.items():
    data[interaction_id] = interaction
    # Add the interaction ID to the interaction record, so we can reference it easier
    data[interaction_id]["INTERACTION_ID"] = interaction_id

# Load the data into a pandas dataframe
dataset = pd.DataFrame.from_dict(data, orient="index")

columns = [
    "INTERACTION_ID",
    "ENTREZ_GENE_A",
    "ENTREZ_GENE_B",
    "OFFICIAL_SYMBOL_A",
    "OFFICIAL_SYMBOL_B",
    "EXPERIMENTAL_SYSTEM",
    "PUBMED_ID",
    "PUBMED_AUTHOR",
    "THROUGHPUT",
    "QUALIFICATIONS",
]
dataset = dataset[columns]
dataset


Unnamed: 0,INTERACTION_ID,ENTREZ_GENE_A,ENTREZ_GENE_B,OFFICIAL_SYMBOL_A,OFFICIAL_SYMBOL_B,EXPERIMENTAL_SYSTEM,PUBMED_ID,PUBMED_AUTHOR,THROUGHPUT,QUALIFICATIONS
830686,830686,673,673,BRAF,BRAF,Affinity Capture-MS,17979178,Gloeckner CJ (2007),Low Throughput,-
2640589,2640589,673,673,BRAF,BRAF,FRET,28205554,Li Z (2017),High Throughput,Time-resolved fluorescence energy transfer (TR...
2777812,2777812,673,673,BRAF,BRAF,Affinity Capture-MS,31980649,Kennedy SA (2020),High Throughput,-
