# Get resources from bio.tools

See the [bio.tools API reference](https://biotools.readthedocs.io/en/latest/api_reference.html) and [API usage guide](https://biotools.readthedocs.io/en/latest/api_usage_guide.html) on how to query [bio.tools](https://bio.tools/) records.

JSON schema for bio.tools: https://github.com/bio-tools/biotoolsSchema/blob/master/jsonschema/biotoolsj.json

In [1]:
import os
import requests
from pathlib import Path
from motbxtools import motbxschema
import pprint
pp = pprint.PrettyPrinter(indent = 2)

CWD = Path.cwd()
if CWD.name != "notebooks":
    print("Make sure to run this notebook from the 'notebooks' directory.")

MOTBX_DIR = CWD.parent # home directory of this repository
# path to directory where resources YAML file are saved
RESOURCES_DIR = MOTBX_DIR.joinpath("resources/external/biotools")
RESOURCES_DIR.mkdir(parents=True, exist_ok=True)
# path to JSON SCHEMA file defining structure of MOTBX resources
SCHEMA_JSON = MOTBX_DIR.joinpath("schema/motbxschema.json")
# load MOTBX schema used for validation
schema = motbxschema.MotbxSchema(SCHEMA_JSON)

# bio.tools REST API
base_url = "https://bio.tools/api"
sign_in_url = f"{base_url}/rest-auth/login/" # for submitting resources

headers = {
  'Accept': 'application/json',
  'Content-Type': 'application/json'
}

url = f"{base_url}/tool/"

## Perform some test queries

In [2]:
# search for rources from BBMRI collection
params = {
    #"q": ["multi-omics"],
    "collectionID": "BBMRI" # "Rare Disease", "RD-connect"
}
response = requests.request("GET", url, headers = headers, params = params)

# structure of result
print(response.json().keys())

# number of results
print(response.json()["count"])

# print 1st result
pp.pprint(response.json()["list"][0])

dict_keys(['count', 'next', 'previous', 'list'])
1
{ 'accessibility': 'Open access',
  'additionDate': '2015-12-02T11:00:49Z',
  'biotoolsCURIE': 'biotools:molgenis',
  'biotoolsID': 'molgenis',
  'collectionID': [ 'NBIC',
                    'Rare Disease',
                    'BBMRI',
                    'BioMedBridges Tools',
                    'ELIXIR-NL'],
  'community': None,
  'confidence_flag': None,
  'cost': 'Free of charge',
  'credit': [ { 'email': None,
                'fundrefid': None,
                'gridid': None,
                'name': 'UMC Groningen and collaborators',
                'note': None,
                'orcidid': None,
                'rorid': None,
                'typeEntity': 'Institute',
                'typeRole': ['Provider'],
                'url': 'http://www.rug.nl/'},
              { 'email': None,
                'fundrefid': None,
                'gridid': None,
                'name': 'BioMedBridges',
                'note': None,
        

In [3]:
# resources from rare disease collection
params = {
    "q": "Genomics",
    "collectionID": "Rare Disease",
    "accessibility": "Open access"
}
response = requests.request("GET", url, headers = headers, params = params)
print(response.json()["count"])
pp.pprint([(record["name"]) for record in response.json()["list"]])

44
[ 'ProtVar',
  'Oligogenic resource for variant analysis (ORVAL)',
  'Digenic Diseases Database (DIDA)',
  'Variant Combination Pathogenicity Predictor (VarCoPP) 2.0',
  'OLIDA: OLIgogenic diseases DAtabase',
  'ReMM score',
  'ANNOVAR',
  'UCSC Genome Browser',
  '1000Genomes',
  'CADD']


In [4]:
# use a general search term
params = {
    "q": "translational medicine"
}
response = requests.request("GET", url, headers = headers, params = params)
print(response.json()["count"])

29


In [5]:
# query collection EUCAIM, open access only
params = {
    "accessibility": "Open access",
    "collectionID": "EUCAIM"
}
response = requests.request("GET", url, headers = headers, params = params)
print(response.json()["count"])

7


In [6]:
# summarise previous results - record name and license
print(response.json()["list"][0].keys())
pp.pprint([(record["name"], record["license"]) for record in response.json()["list"]])

dict_keys(['name', 'description', 'homepage', 'biotoolsID', 'biotoolsCURIE', 'version', 'otherID', 'relation', 'function', 'toolType', 'topic', 'operatingSystem', 'language', 'license', 'collectionID', 'maturity', 'cost', 'accessibility', 'elixirPlatform', 'elixirNode', 'elixirCommunity', 'link', 'download', 'documentation', 'publication', 'credit', 'community', 'owner', 'additionDate', 'lastUpdate', 'editPermission', 'validated', 'homepage_status', 'elixir_badge', 'confidence_flag'])
[ ('Trace4Harmonization', 'Proprietary'),
  ('MITK', 'BSD-3-Clause'),
  ('FAIR4Health Data Curation Tool', 'CC-BY-4.0'),
  ('Fed-BioMed', 'Apache-2.0'),
  ('RadiomicsEnabler', 'Proprietary'),
  ('Virtual Imaging Platform', 'CECILL-B'),
  ('MuGVRE', 'Apache-2.0')]


## Store resources from bio.tools in MOTBX format

In [7]:
# define query parameters
params = {
    "collectionID": "Rare Diseases",
    "accessibility": "Open access",
    "toolType": "Database portal",
    "maturity": "Mature",
    "operation": "Data retrieval",  # EDAM operation
    "topic": "Data integration and warehousing",  # EDAM topic
    "page": 1
}

# get response from API
response = requests.request("GET", url, headers = headers, params = params).json()
print(f'Query yields {response["count"]} items')
if response["count"] > 0:
    pp.pprint([(r["name"], r["homepage"]) for r in response["list"]])

all_items = response["list"]  # collect responses
# go though all available pages
while response["next"] != None:
    params["page"] += 1
    response = requests.request("GET", url, headers = headers, params = params).json()
    pp.pprint([(r["name"], r["homepage"]) for r in response["list"]])
    all_items += response["list"]
    

Query yields 27 items
[ ('MINT', 'https://mint.bio.uniroma2.it/'),
  ('OMIM', 'http://www.ncbi.nlm.nih.gov/entrez/query.fcgi?db=OMIM'),
  ('ClinVar', 'http://www.ncbi.nlm.nih.gov/clinvar/'),
  ('NCBI Resources', 'http://www.ncbi.nlm.nih.gov'),
  ('DisGeNET', 'http://www.disgenet.org/'),
  ('WikiPathways', 'http://www.wikipathways.org'),
  ('Ensembl', 'http://www.ensembl.org/'),
  ('dbGAP', 'http://www.ncbi.nlm.nih.gov/gap'),
  ('gnomAD', 'http://gnomad.broadinstitute.org/'),
  ('ORDO', 'http://www.ebi.ac.uk/ols/ontologies/ordo')]
[ ('Orphanet', 'http://www.orpha.net/consor/cgi-bin/index.php'),
  ('UMD', 'http://www.umd.be/'),
  ('PhenomeCentral', 'https://phenomecentral.org'),
  ('EGA', 'https://ega-archive.org/'),
  ('Diseasecard', 'http://bioinformatics.ua.pt/diseasecard/'),
  ('PharmGKB', 'http://www.pharmgkb.org'),
  ('LOVD', 'http://www.lovd.nl/3.0/home'),
  ('COEUS', 'http://bioinformatics.ua.pt/coeus/'),
  ('RD-Connect Sample Catalogue', 'https://samples.rd-connect.eu/'),
  ('DO

In [14]:
for item in all_items:
    print(item["name"], item["homepage"], item["biotoolsID"])
    MOTBX_resource = {
        "resourceID": item["biotoolsCURIE"],
        "resourceTitle": f'External resource registered at bio.tools: {item["name"]}',
        "resourceDescription": item["description"],
        "resourceUrl": item["homepage"].replace("http://", "https://"),
        "resourceCategory": "Data Management and Stewardship",
        "resourceSubcategory": "Databases and catalogues",
        "resourceTags": ["database"],
        "resourceKeywords": item["collectionID"] + [
            o["term"] for i in item["function"] for o in i["operation"]] + [
            v["term"] for i in item["function"] for e in i["input"] for k,v in e.items() if isinstance(e, dict) and isinstance(v, dict)] + [
            v["term"] for i in item["function"] for e in i["output"] for k,v in e.items() if isinstance(e, dict) and isinstance(v, dict)]
    }
    if not MOTBX_resource["resourceDescription"].endswith("."):
        MOTBX_resource["resourceDescription"] += "."
    MOTBX_resource["resourceDescription"] += " Metadata for this resource was programmatically retrieved from elixir's bio.tools registry."
    # create resource object
    resource = motbxschema.MotbxResource(
        RESOURCES_DIR.joinpath(f'{item["biotoolsID"]}.yaml'),
        resource=MOTBX_resource)
    try:
        resource.validate(schema)
    except Exception:
        print("  Resource could not be validated")
        continue
    # write to YAML file
    resource.save()
    

MINT https://mint.bio.uniroma2.it/ mint
OMIM http://www.ncbi.nlm.nih.gov/entrez/query.fcgi?db=OMIM omim
ClinVar http://www.ncbi.nlm.nih.gov/clinvar/ clinvar
NCBI Resources http://www.ncbi.nlm.nih.gov ncbi_resources
DisGeNET http://www.disgenet.org/ disgenet
WikiPathways http://www.wikipathways.org wikipathways
Ensembl http://www.ensembl.org/ ensembl
dbGAP http://www.ncbi.nlm.nih.gov/gap dbgap
gnomAD http://gnomad.broadinstitute.org/ gnomad
ORDO http://www.ebi.ac.uk/ols/ontologies/ordo ordo
Orphanet http://www.orpha.net/consor/cgi-bin/index.php orphanet
UMD http://www.umd.be/ umd
  Resource could not be validated
PhenomeCentral https://phenomecentral.org phenomecentral
  Resource could not be validated
EGA https://ega-archive.org/ ega
Diseasecard http://bioinformatics.ua.pt/diseasecard/ diseasecard
PharmGKB http://www.pharmgkb.org pharmgkb
LOVD http://www.lovd.nl/3.0/home lovd
COEUS http://bioinformatics.ua.pt/coeus/ coeus
RD-Connect Sample Catalogue https://samples.rd-connect.eu/ rd-co