<a href="https://colab.research.google.com/github/BCODMO/Data-Use-Examples/blob/master/notebooks/species_erddap_json.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

Use the ERDDAP api to pull data in json format from BCO-DMO then get a unique species list

# Resources
* BCO-DMO ERDDAP Documentation https://erddap.bco-dmo.org/erddap/tabledap/documentation.html
* See workshop materials for more on ERDDAP: https://k-rns.github.io/workshop_data_reuse/

In [None]:
import requests
import pandas as pd

In [None]:
dataset_id = '752795'
# Dataset Landing Page (see metadata and file access)
# https://www.bco-dmo.org/dataset/752795

url =  "https://erddap.bco-dmo.org/erddap/tabledap/bcodmo_dataset_752795.json"

response = requests.get(url)

# you can check the status code returned from your api call
response.status_code

200

In [None]:
# We can get the response text as string with
response.text

'{\n  "table": {\n    "columnNames": ["System", "Group", "Species", "BM", "HR", "Refs"],\n    "columnTypes": ["String", "String", "String", "int", "float", "String"],\n    "columnUnits": ["unitless", "unitless", "unitless", "grams (g)", "square kilometers (km^2)", "unitless"],\n    "rows": [\n      ["M", "B", "Sterna forsteri", 149, 58, "(150)"],\n      ["M", "B", "Ptychoramphus aleuticus", 164, 3008, "(151|152)"],\n      ["M", "B", "Brachyramphus marmoratus", 220, 127, "(153)"],\n      ["M", "B", "Calonectris diomedea", 535, 485776, "(154)"],\n      ["M", "B", "Alca torda", 600, 2201, "(155)"],\n      ["M", "B", "Uria aalge", 907, 815, "(155|156)"],\n      ["M", "B", "Sula sula", 956, 5454, "(157)"],\n      ["M", "B", "Melanitta nigra", 1052, 1298, "(158)"],\n      ["M", "B", "Procellaria aequinoctialis", 1213, 683000, "(159)"],\n      ["M", "B", "Procellaria conspicillata", 1278, 596546, "(160)"],\n      ["M", "B", "Papasula abbotti", 1572, 108503, "(161)"],\n      ["M", "B", "Somate

In [None]:
# use type() to tell you what the type of your variable is
type(response.text)

str

In [None]:
# The data is in json format, let's load it as json into a python dictionary
j = response.json()

print(j)

{'table': {'columnNames': ['System', 'Group', 'Species', 'BM', 'HR', 'Refs'], 'columnTypes': ['String', 'String', 'String', 'int', 'float', 'String'], 'columnUnits': ['unitless', 'unitless', 'unitless', 'grams (g)', 'square kilometers (km^2)', 'unitless'], 'rows': [['M', 'B', 'Sterna forsteri', 149, 58, '(150)'], ['M', 'B', 'Ptychoramphus aleuticus', 164, 3008, '(151|152)'], ['M', 'B', 'Brachyramphus marmoratus', 220, 127, '(153)'], ['M', 'B', 'Calonectris diomedea', 535, 485776, '(154)'], ['M', 'B', 'Alca torda', 600, 2201, '(155)'], ['M', 'B', 'Uria aalge', 907, 815, '(155|156)'], ['M', 'B', 'Sula sula', 956, 5454, '(157)'], ['M', 'B', 'Melanitta nigra', 1052, 1298, '(158)'], ['M', 'B', 'Procellaria aequinoctialis', 1213, 683000, '(159)'], ['M', 'B', 'Procellaria conspicillata', 1278, 596546, '(160)'], ['M', 'B', 'Papasula abbotti', 1572, 108503, '(161)'], ['M', 'B', 'Somateria mollissima', 2067, 68, '(162)'], ['M', 'B', 'Eudyptes filholi', 2330, 2605360, '(163)'], ['M', 'B', 'Thalas

In [None]:
type(j)

dict

In [None]:
# Get the column names out of that dictionary. You can see in the json it
#  is heirarchically under 'table' then 'columnNames'
j['table']['columnNames']


['System', 'Group', 'Species', 'BM', 'HR', 'Refs']

In [None]:
#let's get the data rows out of that dictionary. You can see in the json it
#  is heirarchically under 'table' then 'rows'

# let's see the first 20 rows
j['table']['rows'][0:20]

[['M', 'B', 'Sterna forsteri', 149, 58, '(150)'],
 ['M', 'B', 'Ptychoramphus aleuticus', 164, 3008, '(151|152)'],
 ['M', 'B', 'Brachyramphus marmoratus', 220, 127, '(153)'],
 ['M', 'B', 'Calonectris diomedea', 535, 485776, '(154)'],
 ['M', 'B', 'Alca torda', 600, 2201, '(155)'],
 ['M', 'B', 'Uria aalge', 907, 815, '(155|156)'],
 ['M', 'B', 'Sula sula', 956, 5454, '(157)'],
 ['M', 'B', 'Melanitta nigra', 1052, 1298, '(158)'],
 ['M', 'B', 'Procellaria aequinoctialis', 1213, 683000, '(159)'],
 ['M', 'B', 'Procellaria conspicillata', 1278, 596546, '(160)'],
 ['M', 'B', 'Papasula abbotti', 1572, 108503, '(161)'],
 ['M', 'B', 'Somateria mollissima', 2067, 68, '(162)'],
 ['M', 'B', 'Eudyptes filholi', 2330, 2605360, '(163)'],
 ['M', 'B', 'Thalassarche chrysostoma', 3508, 640000, '(164)'],
 ['M', 'B', 'Thalassarche melanophrys', 3564, 444000, '(164)'],
 ['M', 'B', 'Pygoscelis antarctica', 3800, 327, '(165)'],
 ['M', 'B', 'Gaviia immer', 5186, 547, '(166)'],
 ['M', 'B', 'Pygoscelis papua', 5190

In [None]:
# Let's get this in a data frame

df = pd.DataFrame(data=j['table']['rows'],columns=j['table']['columnNames'])

# Print the dataframe
df

Unnamed: 0,System,Group,Species,BM,HR,Refs
0,M,B,Sterna forsteri,149,58.0000,(150)
1,M,B,Ptychoramphus aleuticus,164,3008.0000,(151|152)
2,M,B,Brachyramphus marmoratus,220,127.0000,(153)
3,M,B,Calonectris diomedea,535,485776.0000,(154)
4,M,B,Alca torda,600,2201.0000,(155)
...,...,...,...,...,...,...
838,T,R,Conolophus pallidus,5600,0.0056,(291)
839,T,R,Varanus bengalensis,5744,0.0940,(291)
840,T,R,Varanus albigularis,7250,12.0000,(291)
841,T,R,Stigmochelys pardalis,10600,2.1000,(297)


In [None]:
# Get Unique Species List

# let's get the values from the Species column with df.Species
# Then get the unique values only with .unique()
# Then make that a list with tolist()
sp_list = df.Species.unique().tolist()

# And let's shorten the list for demonstration purposes.
sp_list = sp_list[0:20]
sp_list

['Sterna forsteri',
 'Ptychoramphus aleuticus',
 'Brachyramphus marmoratus',
 'Calonectris diomedea',
 'Alca torda',
 'Uria aalge',
 'Sula sula',
 'Melanitta nigra',
 'Procellaria aequinoctialis',
 'Procellaria conspicillata',
 'Papasula abbotti',
 'Somateria mollissima',
 'Eudyptes filholi',
 'Thalassarche chrysostoma',
 'Thalassarche melanophrys',
 'Pygoscelis antarctica',
 'Gaviia immer',
 'Pygoscelis papua',
 'Eudyptes chrysolophus',
 'Ctenochaetus striatus']