<a href="https://colab.research.google.com/github/BCODMO/Data-Use-Examples/blob/master/notebooks/species_erddap_csv.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

Use the ERDDAP api to pull data in csv format from BCO-DMO then get a unique species list

# Resources
* BCO-DMO ERDDAP Documentation https://erddap.bco-dmo.org/erddap/tabledap/documentation.html
* See workshop materials for more on ERDDAP: https://k-rns.github.io/workshop_data_reuse/

In [None]:
import requests
import pandas as pd

In [None]:
dataset_id = '752795'# '850202' #850190
# Dataset Landing Page (see metadata and file access)
# https://www.bco-dmo.org/dataset/752795

url_csv =  "https://erddap.bco-dmo.org/erddap/tabledap/bcodmo_dataset_752795.csv"

response = requests.get(url_csv)
response.status_code

200

In [None]:
# We can get the response text as string with
csv_as_string = response.text

# use type() to tell you what the type of your variable is
type(csv_as_string)


str

In [None]:

#let's see just the first characters to see what we are working with
print(csv_as_string[0:500])

System,Group,Species,BM,HR,Refs
unitless,unitless,unitless,grams (g),square kilometers (km^2),unitless
M,B,Sterna forsteri,149,58.0,(150)
M,B,Ptychoramphus aleuticus,164,3008.0,(151|152)
M,B,Brachyramphus marmoratus,220,127.0,(153)
M,B,Calonectris diomedea,535,485776.0,(154)
M,B,Alca torda,600,2201.0,(155)
M,B,Uria aalge,907,815.0,(155|156)
M,B,Sula sula,956,5454.0,(157)
M,B,Melanitta nigra,1052,1298.0,(158)
M,B,Procellaria aequinoctialis,1213,683000.0,(159)
M,B,Procellaria conspicillata,1278,59


In [None]:
# import the StrinIO function so we can load the csv as a string since
#    it isn't in a file we can call by filename.
from io import StringIO

# let's read the data using the Pandas
# read_csv() function

df =  pd.read_csv(StringIO(csv_as_string),sep=',')

# Print the dataframe
df

Unnamed: 0,System,Group,Species,BM,HR,Refs
0,unitless,unitless,unitless,grams (g),square kilometers (km^2),unitless
1,M,B,Sterna forsteri,149,58.0,(150)
2,M,B,Ptychoramphus aleuticus,164,3008.0,(151|152)
3,M,B,Brachyramphus marmoratus,220,127.0,(153)
4,M,B,Calonectris diomedea,535,485776.0,(154)
...,...,...,...,...,...,...
839,T,R,Conolophus pallidus,5600,0.0056,(291)
840,T,R,Varanus bengalensis,5744,0.094,(291)
841,T,R,Varanus albigularis,7250,12.0,(291)
842,T,R,Stigmochelys pardalis,10600,2.1,(297)


In [None]:
# drop the first data row (it's units not data)
# Drop first row
#  we are telling it to drop rows at index[0] which is the first row
df.drop(index=df.index[0],inplace=True)
df

Unnamed: 0,System,Group,Species,BM,HR,Refs
1,M,B,Sterna forsteri,149,58.0,(150)
2,M,B,Ptychoramphus aleuticus,164,3008.0,(151|152)
3,M,B,Brachyramphus marmoratus,220,127.0,(153)
4,M,B,Calonectris diomedea,535,485776.0,(154)
5,M,B,Alca torda,600,2201.0,(155)
...,...,...,...,...,...,...
839,T,R,Conolophus pallidus,5600,0.0056,(291)
840,T,R,Varanus bengalensis,5744,0.094,(291)
841,T,R,Varanus albigularis,7250,12.0,(291)
842,T,R,Stigmochelys pardalis,10600,2.1,(297)


In [None]:
# Get Unique Species List

# let's get the values from the Species column with df.Species
# Then get the unique values only with .unique()
# Then make that a list with tolist()
sp_list = df.Species.unique().tolist()

# And let's shorten the list for demonstration purposes.
sp_list = sp_list[0:20]
sp_list

['Sterna forsteri',
 'Ptychoramphus aleuticus',
 'Brachyramphus marmoratus',
 'Calonectris diomedea',
 'Alca torda',
 'Uria aalge',
 'Sula sula',
 'Melanitta nigra',
 'Procellaria aequinoctialis',
 'Procellaria conspicillata',
 'Papasula abbotti',
 'Somateria mollissima',
 'Eudyptes filholi',
 'Thalassarche chrysostoma',
 'Thalassarche melanophrys',
 'Pygoscelis antarctica',
 'Gaviia immer',
 'Pygoscelis papua',
 'Eudyptes chrysolophus',
 'Ctenochaetus striatus']