In [None]:
%%capture
!pip install --no-cache-dir shapely
!pip install -U folium

%matplotlib inline
import os
import time
import folium
from datetime import datetime
from shapely.geometry import Point
from shapely.geometry.polygon import Polygon
import matplotlib as mpl
from matplotlib.collections import PatchCollection
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import requests
from datascience import *
from shapely import geometry as sg, wkt
from scripts.espm_module import *
import json
import random
from IPython.core.display import display, HTML
import ipywidgets as widgets
plt.style.use('seaborn')

---

# Sagehen Creek Field Station<a id='sagehen'></a>

![sagehen](http://gk12calbio.berkeley.edu/images/sagehen_view.jpg)

What are the specimens that occur in Sagehen Creek FS?

In [None]:
req = GBIFRequest()  # creating a request to the API
params = {'institutionCode': 'scfs'}  # setting our parameters (the specific species we want)
pages = req.get_pages(params)  # using those parameters to complete t he request
records = [rec for page in pages for rec in page['results'] if rec.get('decimalLatitude')]  # sift out valid records
records[:5]  # print first 5 records

Let's make this a `DataFrame` again:

In [None]:
records_df = pd.read_json(json.dumps(records))
records_df.head()

How many records did we get?

In [None]:
len(records_df)

We can plot the different classes:

In [None]:
records_df['class'].value_counts().plot.barh()

And species, but there are a lot, so we'll look at the most common 10:

In [None]:
records_df['species'].value_counts()[:10].plot.barh()

And genus:

In [None]:
records_df['genus'].value_counts()[:10].plot.barh()

So what are all the unique species observed?

In [None]:
unique_obs = set(records_df['species'].dropna())
unique_obs

In [None]:
records_df[records_df["species"].notnull()]

We can also map these observations in their reserve:

In [None]:
color_dict, html_key = assign_colors(records_df[records_df["species"].notnull()], "species")
display(HTML(html_key))

In [None]:
mape = folium.Map([39.274061, -120.394561], zoom_start=10)

for r in records_df.iterrows():
    lat = r[1]['decimalLatitude']
    long = r[1]['decimalLongitude']
    try:
        folium.CircleMarker((lat, long), color=color_dict[r[1]['species']], popup=r[1]['species']).add_to(mape)
    except:
        pass
mape

### Ecoengine API

We can use UCB's [EcoEngine API](https://ecoengine.berkeley.edu/) just like we used the GBIF API to get back a checklist of species for a specific station. We'll ask for what's on the checklist for Sagehen:

In [None]:
eco_req = EcoEngineRequest()

params = {"footprint": "sagehen"}
checklist_sagehen = eco_req.get_scientific_names_from_checklists(params)
checklist_sagehen

How much does this overlapped with what's been observed and recorded in the GBIF API? Let's grab all the observations from the Sagehen Creek FS:

In [None]:
req = GBIFRequest()  # creating a request to the API
params = {'basisOfRecord': "HUMAN_OBSERVATION",
          'stateProvince': "California",
          'locality': "Sagehen Creek Field Station"}  # setting our parameters (the specific species we want)
pages = req.get_pages(params, thresh=500)  # using those parameters to complete the request
records = [rec for page in pages for rec in page['results'] if rec.get('decimalLatitude')]  # sift out valid records
records_df = pd.read_json(json.dumps(records))
records_df.head()

How many do we have?

In [None]:
len(records_df)

That's not too many and it looks mostly like birds, so it probably won't overlap with any of the checklists:

In [None]:
sagehen_geom = stn_features[1]["geometry"]
observed_scientific_names = [s.split("(")[0].strip().split(",")[0] for s in records_df['scientificName']]
observed_scientific_names

In [None]:
set(unique_obs).intersection(set(observed_scientific_names))

:(