In [1]:
# Import requests and set the OBIS API base URL. 
import requests
import json
import pandas as pd
import urllib

# Convenience function to pretty print JSON objects
def print_json(myjson):
    print(json.dumps(
        myjson,
        sort_keys=True,
        indent=4,
        separators=(',', ': ')
    ))
    

# Initialize the base URL for OBIS. This variable will be used for every API call
OBIS_URL = "https://api.obis.org/v3"

In [2]:
# We are not sure which node ID to query so lets get all of the OTN nodes.

# node
req = requests.get(f'{OBIS_URL}/node')
nodes_json = req.json()

# count the amount of OBIS nodes
f"Total Nodes: {nodes_json['total']}"

'Total Nodes: 34'

In [3]:
# Prints all the names and IDs for each node
for node in nodes_json['results']:
    print(f'Name: {node["name"]} - ID: {node["id"]}')

Name: AfrOBIS - ID: 14fc439c-707d-41d6-a3d4-b9d2696205fe
Name: Antarctic OBIS - ID: dc6c6ea2-83f5-4b18-985a-9efff6320d69
Name: Arctic OBIS - ID: da50007b-7871-46cf-8530-441b5836d2c1
Name: Caribbean OBIS - ID: 8385435b-bcf5-4bec-b827-8b480163d479
Name: ESP OBIS - ID: fd3a5df5-6a6f-46ca-a9c6-e61896a3f355
Name: EurOBIS - ID: 4bf79a01-65a9-4db6-b37b-18434f26ddfc
Name: Fish OBIS - ID: dcb0c76d-46a1-4e07-9a69-98cf3fd67576
Name: HAB OBIS - ID: 33dec23c-af65-4fb1-a437-79543c562ef0
Name: IndOBIS - ID: 1a3b0f1a-4474-4d73-9ee1-d28f92a83996
Name: International Seabed Authority - ID: 9d2d95be-32eb-4d81-8911-32cb8bc641c8
Name: MedOBIS - ID: 1ad35eb9-c615-4733-864a-b585aebcfb70
Name: OBIS Argentina - ID: 464a96d8-c17e-4bbb-b6b8-778e1fb687c4
Name: OBIS Australia - ID: 2a57cd59-6799-4579-955e-27c9af97aea4
Name: OBIS Black Sea - ID: bdb3b59b-7dad-4c06-a2d6-3e576158cc4c
Name: OBIS Brazil - ID: dde0dbd3-92fb-41e6-9f51-b1ae930a934b
Name: OBIS Canada - ID: 7dfb2d90-9317-434d-8d4e-64adf324579a
Name: OBIS Chi

In [4]:
# Oh look, the OBIS USA is a OBIS node, lets just return our record using the id value:
nodeID = 'b7c47783-a020-4173-b390-7b57c4fa1426'
# node/{nodeID}
req = requests.get(f'{OBIS_URL}/node/{nodeID}')
obis_usa_json = req.json()

# Show OBIS-USA node record
print_json(obis_usa_json)

{
    "results": [
        {
            "contacts": [
                {
                    "email": "albenson@usgs.gov",
                    "givenname": "Abby",
                    "oceanexpert_id": 25483,
                    "surname": "Benson"
                },
                {
                    "email": "sbristol@usgs.gov",
                    "givenname": "Sky",
                    "oceanexpert_id": 25731,
                    "surname": "Bristol"
                }
            ],
            "description": "Ocean Biodiversity Information System USA (OBIS-USA) brings together marine biological observation data \u2013 recorded observations of identifiable marine species at a known time and place, collected primarily from U.S. Waters or with U.S. funding.",
            "feeds": [
                {
                    "id": "753ce293-c4c0-4ee5-bf22-361db7b89e3a",
                    "url": "https://ipt.geome-db.org/rss.do"
                },
                {
                    

In [5]:
req = requests.get(f'{OBIS_URL}/dataset?nodeid={nodeID}')
datasets = req.json()
print('Number of datasets in OBIS-USA:', datasets['total'])

Number of datasets in OBIS-USA: 146


Lets print out the metadata from one of the datasets.

In [6]:
print_json(datasets['results'][0])

{
    "abstract": "World Ocean Database 2009 (WOD09) is a collection of scientifically quality-controlled ocean profile and plankton data that includes measurements of temperature, salinity, oxygen, phosphate, nitrate, silicate, chlorophyll, alkalinity, pH, pCO2, TCO2, Tritium, delta-13Carbon, delta-14Carbon, delta-18Oxygen, Freons, Helium, delta-3Helium, Neon, and plankton. A discussion of data sources is provided. Data are both historical and modern with the most recent data from 2008.World Ocean Database 2009 is an update of World Ocean Database 2005. It expands on the older version by including new variables, data types, and additional historical, as well as modern, observations. It contains all data from earliest observation through our collection as of Dec. 31, 2009. The 2009 database, updated from the 2005 edition, is significantly larger providing approximately 9.1 million temperature profiles and 3.5 million salinity reports. The 2009 database also captures 29 categories of sc

Now, lets iterate through all the datasets and collect metadata into a Pandas DataFrame. We're skipping over the ipt from ipt.geome-db because the website doesn't load: https://ipt.geome-db.org/resource?r=dipnet

In [9]:
# Lets grab out some metadata about each dataset

from bs4 import BeautifulSoup

columns = ['title','url','size_raw','size_MB']

df = pd.DataFrame(
        columns=columns
    )

for dataset in datasets['results']:
    if 'ipt.geome-db.org' not in dataset['url']:
        print(dataset['title'])
        print(dataset['url'])
        html_text = requests.get(dataset['url']).text
        soup = BeautifulSoup(html_text, 'html.parser')
        
        size_raw = soup.find('td').text.split('(')[1].split(')')[0]
        size = float(size_raw.split(" ")[0].replace(",",""))
        size_unit = size_raw.split(" ")[1]
        
        #convert sizes to MB
        if size_unit == 'KB':
            size = size*0.001
        elif size_unit == 'MB':
            size = size
        
        df_init = pd.DataFrame(
                    {"title": dataset['title'],
                     "url": dataset['url'],
                     "size_raw": size_raw,
                     "size_MB": size,
                     },
                  index=[1])

        df = pd.concat([df, df_init], ignore_index=True)
df

World Ocean Database 2009
https://www1.usgs.gov/obis-usa/ipt/resource?r=wod_2009
NOAA Deep Sea Corals Research and Technology Program
https://www1.usgs.gov/obis-usa/ipt/resource?r=noaa_dsc_rtp
NOAA Pacific Islands Fisheries Science Center, Ecosystem Sciences Division, National Coral Reef Monitoring Program: Stratified random surveys (StRS) of reef fish in the U.S. Pacific Islands
https://www1.usgs.gov/obis-usa/ipt/resource?r=ncrmp_nspc_fish_pacific
SEFSC CAGES Alabama Fish Length Data with CPUE
https://www1.usgs.gov/obis-usa/ipt/resource?r=sefsc_cages_alabama_lengths_cpue_20141103
Northeast Fisheries Science Center Bottom Trawl Survey Data
https://www1.usgs.gov/obis-usa/ipt/resource?r=nefsc_bottom_trawl_surveys_coml
NOAA Southeast Fisheries Science Center (SEFSC) Fisheries Log Book System (FLS) Commercial Pelagic Logbook Data
https://www1.usgs.gov/obis-usa/ipt/resource?r=sefsc_logbook
NOAA AFSC North Pacific Groundfish Observer
https://www1.usgs.gov/obis-usa/ipt/resource?r=afsc_northpa

TPWD HARC Texas Coastal Fisheries Upper Laguna Madre Bag Seine
https://www1.usgs.gov/obis-usa/ipt/resource?r=tpwd_harc_texasupperlagunamadre_bagseine_20150202
TPWD HARC Texas Coastal Fisheries Aransas Bay Gill Net
https://www1.usgs.gov/obis-usa/ipt/resource?r=tpwd_harc_texasaransasbay_gillnet_20150130
VIMS Chesapeake Bay Multispecies Monitoring and Assessment Program
https://www1.usgs.gov/obis-usa/ipt/resource?r=vims_chesmmap
Florida Keys Reef Visual Census 1995
https://www1.usgs.gov/obis-usa/ipt/resource?r=floridakeysreefvisualcensus1995
TPWD HARC Texas Coastal Fisheries, Corpus Christi Bay Gill Net
https://www1.usgs.gov/obis-usa/ipt/resource?r=tpwd_harc_texascorpuschristibay_gillnet_20150130
TPWD HARC Texas Coastal Fisheries Lower Laguna Madre Gill Net
https://www1.usgs.gov/obis-usa/ipt/resource?r=tpwd_harc_texaslowerlagunamadre_gillnet_20150130
Florida Keys Reef Visual Census 2004
https://www1.usgs.gov/obis-usa/ipt/resource?r=floridakeysreefvisualcensus2004
TPWD HARC Texas Coastal F

Coral Reef Evaluation and Monitoring Project Florida Keys 2008
https://www1.usgs.gov/obis-usa/ipt/resource?r=coralreefevaluationandmonitoringproject-2008
Coral Reef Evaluation and Monitoring Project Florida Keys 2007
https://www1.usgs.gov/obis-usa/ipt/resource?r=coralreefevaluationandmonitoringproject-2007
BOEM Beaufort Sea, Alaska, Fish and Invertebrate Haul and Catch, with Oceanography, 2008
https://www1.usgs.gov/obis-usa/ipt/resource?r=boem_alaska_2008
NOAA coral reef monitoring invertebrate data from sites across Micronesia from 2009-09-29 to 2015-09-25
https://www1.usgs.gov/obis-usa/ipt/resource?r=noaa_micronesia_reef_monitoring_invert
Macrofauna collected on colonization surfaces at the East Pacific Rise 9 50 N hydrothermal vent field in 1998-2017
https://www1.usgs.gov/obis-usa/ipt/resource?r=whoi_epr_deepseavents_speciescounts
Stable Isotope Compositions of tubeworms, mussels, and their associated fauna in Gulf of Mexico hydrocarbon seeps
https://www1.usgs.gov/obis-usa/ipt/resou

Unnamed: 0,title,url,size_raw,size_MB
0,World Ocean Database 2009,https://www1.usgs.gov/obis-usa/ipt/resource?r=...,220 MB,220.000
1,NOAA Deep Sea Corals Research and Technology P...,https://www1.usgs.gov/obis-usa/ipt/resource?r=...,15 MB,15.000
2,"NOAA Pacific Islands Fisheries Science Center,...",https://www1.usgs.gov/obis-usa/ipt/resource?r=...,67 MB,67.000
3,SEFSC CAGES Alabama Fish Length Data with CPUE,https://www1.usgs.gov/obis-usa/ipt/resource?r=...,10 MB,10.000
4,Northeast Fisheries Science Center Bottom Traw...,https://www1.usgs.gov/obis-usa/ipt/resource?r=...,24 MB,24.000
...,...,...,...,...
140,Coral Reef Evaluation and Monitoring Project D...,https://www1.usgs.gov/obis-usa/ipt/resource?r=...,26 KB,0.026
141,Coral Reef Evaluation and Monitoring Project D...,https://www1.usgs.gov/obis-usa/ipt/resource?r=...,26 KB,0.026
142,Vessel line-transect surveys of Arctic cetacea...,https://www1.usgs.gov/obis-usa/ipt/resource?r=...,6 KB,0.006
143,Macrofauna collected on colonization panels at...,https://www1.usgs.gov/obis-usa/ipt/resource?r=...,4 KB,0.004


Print out statistics about the package sizes (in MB).

In [20]:
print('sum\t',df['size_MB'].sum())
print(df['size_MB'].describe())

sum	 849.5550000000001
count    145.000000
mean       5.859000
std       19.648869
min        0.004000
25%        0.097000
50%        0.843000
75%        7.000000
max      220.000000
Name: size_MB, dtype: float64
