In [1]:
import sys
import os
import pandas as pd
import numpy as np
import datetime as dt
import json
import requests
import base64
import sqlite3 as db
sys.path.append('../')

pd.options.display.max_rows = 99

In [2]:
github_userName = 'Tanag3r'
ebird_token = 'j6c7l80ga2ib'
db_name = 'trailheadDirectBirds_sous.db'

In [3]:
##connect to database
def connectDB():
    try:
        cnx = db.connect(db_name)
    except Exception as cnxError:
        raise UserWarning(f'Unable to connect to database due to: {cnxError}')
    return cnx

In [4]:
def getTrailheadRef():
    trailheadRef = pd.DataFrame()
    cnx = connectDB()
    try:
        trailheadRef = pd.read_sql('select * from trailheadRef;',con=cnx)
        trailheadRef.set_index('index',inplace=True)
        trailheadRef.convert_dtypes()
    except db.DatabaseError as dbError:
        raise UserWarning(dbError.__cause__,dbError)
    cnx.close()
    return trailheadRef

In [6]:
##  For the sake of demonstration please assume the data in the 'trailheadRef' comes from a King County Metro service
##  and/or is user-provided so some cleaning is required
def clean_trailheadRef(Latitude: float,Longitude: float, Address: str):
    cnx = connectDB()
    cur = cnx.cursor()
    trailheads = getTrailheadRef()
    ##set types for error checking, other cleaning
    trailheads.Latitude = trailheads['Latitude'].fillna(0.00).astype('float64',errors='ignore')
    trailheads.Longitude = trailheads['Longitude'].fillna(0.00).astype('float64',errors='ignore')
    trailheads.Address = trailheads['Address'].astype(str)

    updateMask = trailheads.apply(lambda x: bool(x.Address.strip()) and (x.Latitude == 0.00) and (x.Longitude == 0.00),axis=1)
    ##write stops that fail the updateMask checks to the db as logs
    try:
        trailheads[updateMask].copy().to_sql('geo_errorLogs',con=cnx,if_exists='append')
    except db.DatabaseError as dbError:
        cur.execute('INSERT INTO sql_errorLogs VALUES (?, ?)',dbError,str(dt.date.today()))
    cnx.close()
    return trailheads[~updateMask]
    

    ##needsGeo = trailheads[updateMask].copy()
    ##TODO: #25 fetch coordinates of trailheads in the update mask using Nominatim address service
    ##TODO #48 merge or call results of clean_trailheadRef() with dropCases()
#def dropCases():
    #dropCase = clean_trailheadRef()
    #dropMask = dropCase.apply(lambda y: (y.Address == '') | (y.Address == 'None'),axis=1)
    #noGeo = [y for y in list(dropCase[dropMask].StopName) if y!='']
    #if noGeo:
        #raise Exception('unable to fetch coordinates for {",".join(noGeo)}')
    #return dropCase[~dropMask]
    ##if there is no address or coordinates, drop the line


Trailhead eBird hotspot update

1. Ask the eBird API for the latest list of hotspots for each trailhead
2. Add new eBird hotspots to the table 'Hotspots' in the trailheadDirectBirds_sous database
3. Update hotspot data

In [7]:
def get_trailheadHotspots(radius: int = 4,fmt: str = 'json'):
    trailheadRef = getTrailheadRef()
    cleanTrailheads = pd.DataFrame(clean_trailheadRef(trailheadRef.Latitude,trailheadRef.Longitude,trailheadRef.Address))
    import time
    trailheadHotspots = []
    try:
        for StopName in cleanTrailheads.itertuples():
            time.sleep(0.3)
            ebird_url = 'https://api.ebird.org/v2/ref/hotspot/geo?'
            ebird_auth_header = {'X-eBirdApiToken': ebird_token}
            ebird_params = {
                'lat': str(StopName.Latitude),
                'lng': str(StopName.Longitude),
                'dist': str(radius),
                'fmt': str(fmt)
            }
            ebird_request = requests.get(ebird_url,headers=ebird_auth_header,params=ebird_params)
            ebird_response = pd.DataFrame(ebird_request.json())
            if ebird_request.status_code == requests.codes.ok:
                ebird_response['StopName'] = StopName.StopName
                trailheadHotspots.append(ebird_response)
            ebird_request.raise_for_status()
    except Exception as ee:
        raise UserWarning(ee)
    all_trailheadHotspots = pd.DataFrame()
    all_trailheadHotspots = pd.concat(trailheadHotspots,ignore_index=True)
    return all_trailheadHotspots

In [8]:
##hotspots = get_trailheadHotspots()

def update_trailheadHotspots():
    cnx = connectDB()
    new_hotspots = pd.DataFrame(get_trailheadHotspots())
    ##new_hotspots = pd.DataFrame(hotspots)
    new_hotspots['latestUpdate'] = dt.datetime.today()
    try:
        current_hotspots = pd.DataFrame()
        current_hotspots = pd.read_sql('select * from hotspots',cnx,parse_dates=["latestObsDt","latestUpdate"])
        current_hotspots.set_index('index',inplace=True)
    except Exception as ex:
        raise UserWarning(ex.__cause__)
    else:
        frames = [new_hotspots,current_hotspots]
        all_hotspots = pd.concat(frames,ignore_index=True)
        all_hotspots.sort_values(by=['StopName','locId','latestObsDt','latestUpdate'],ascending=False,inplace=True)
        all_hotspots.drop_duplicates(subset=['StopName','locId'],keep='first',inplace=True)
        all_hotspots.to_sql(name='Hotspots_cooking',con=cnx,if_exists='append')
        cnx.close()
    return all_hotspots

In [9]:
gogo = update_trailheadHotspots()

Unnamed: 0,locId,locName,countryCode,subnational1Code,subnational2Code,lat,lng,latestObsDt,numSpeciesAllTime,StopName,latestUpdate
38,L854460,Squak Mountain SP,US,US-WA,US-WA-033,47.510302,-122.047720,2022-01-23 10:16,72.0,SquakMountain,2022-03-03 20:15:16.470153
39,L8312096,Tiger Mountain State Forest--Poo Poo Point Trail,US,US-WA,US-WA-033,47.500638,-122.021262,2022-02-01 10:00,59.0,SquakMountain,2022-03-03 20:15:16.470153
40,L5915435,"stakeout Hooded Oriole, East Renton Highlands ...",US,US-WA,US-WA-033,47.481462,-122.086774,2021-04-19 18:15,32.0,SquakMountain,2022-03-03 20:15:16.470153
37,L4980186,Lake Kathleen,US,US-WA,US-WA-033,47.478255,-122.088232,2022-02-13 15:12,114.0,SquakMountain,2022-03-03 20:15:16.470153
35,L4381196,Cedar Grove Natual Area,US,US-WA,US-WA-033,47.462973,-122.080936,2021-08-10 19:11,32.0,SquakMountain,2022-03-03 20:15:16.470153
...,...,...,...,...,...,...,...,...,...,...,...
31,L8312096,Tiger Mountain State Forest--Poo Poo Point Trail,US,US-WA,US-WA-033,47.500638,-122.021262,2022-02-01 10:00,59.0,ChiricoTrail_PooPooPoint,2022-03-03 20:15:16.470153
29,L8102503,Issaquah Salmon Hatchery,US,US-WA,US-WA-033,47.529503,-122.039512,2022-01-27 08:01,35.0,ChiricoTrail_PooPooPoint,2022-03-03 20:15:16.470153
33,L424117,Tradition Plateau,US,US-WA,US-WA-033,47.529496,-121.996205,2022-02-11 05:59,106.0,ChiricoTrail_PooPooPoint,2022-03-03 20:15:16.470153
34,L3438602,"stakeout Brambling Mt. Fury Circle SW, Issaqua...",US,US-WA,US-WA-033,47.525718,-122.047998,2020-07-29 13:03,45.0,ChiricoTrail_PooPooPoint,2022-03-03 20:15:16.470153
