In [1]:
import pandas as pd
import requests
import datetime as dt
import json
import sys
import os
import sqlite3 as db

In [2]:
ebird_token = 'j6c7l80ga2ib'
db_name = 'trailheadDirectBirds_sous.db'

In [3]:
##connect to database
def connectDB():
    try:
        cnx = db.connect(db_name)
    except Exception as cnxError:
        raise UserWarning(f'Unable to connect to database due to: {cnxError}')
    return cnx

In [25]:
def get_hotspots():
    trailheadHotspots = pd.DataFrame()
    histHotObs = pd.DataFrame()
    cnx = connectDB()
    try:
        #get the latest hotspots from the database
        #trailheadHotspots = pd.read_sql('select * from Hotspots',con=cnx,parse_dates=['latestObsDt','latestUpdate'])   #Excluding Discovery Park until a later date
        trailheadHotspots = pd.read_sql('select * from Hotspots where Hotspots.StopName not like "DiscoveryPark%"',con=cnx,parse_dates=['latestObsDt','latestUpdate'])
        trailheadHotspots.set_index('index',inplace=True)
        trailheadHotspots.sort_values(by=['locId','latestObsDt'],ascending=False,ignore_index=True,inplace=True)
        uniqueHotspots = trailheadHotspots[['locId','latestObsDt']].drop_duplicates(subset=['locId'],keep='first')
        #get hotspots from the observations in the database
        histHotObs = pd.read_sql('select locId,obsDt,loadDate from historicObservations',con=cnx,parse_dates=['obsDt','loadDate'])
        min_obsDt = min(histHotObs['obsDt'])
        histHotObs.sort_values(by=['locId','obsDt'],ascending=False,ignore_index=True,inplace=True)
        histHotObs.drop_duplicates(subset=['locId'],keep='first',inplace=True)
        #merge the tables
        hotspots = pd.merge(left=uniqueHotspots,right=histHotObs,how='left',left_on='locId',right_on='locId')
        #remove hotspots without a recent checklist
        agingMask = hotspots.apply(lambda y: bool(y.loadDate is not(pd.NaT) or bool(y.latestObsDt < y.loadDate)),axis=1)
        hotspots = hotspots[~agingMask]
        #replace null, NaT observation date values with the earliest value found in the table
        hotspots.loc[(hotspots['obsDt'].isna()) | (hotspots['obsDt'].isnull()) | (hotspots['obsDt'] == ''),'obsDt'] = min_obsDt
        hotspots.loc[(hotspots['latestObsDt'].isnull()) | (hotspots['latestObsDt']<hotspots['obsDt']),'latestObsDt'] = dt.datetime.today()
        #recency check
        for x in hotspots:
            dateDiff = hotspots['latestObsDt']-hotspots['obsDt']
            hotspots['dateDiff'] = dateDiff.dt.days
        ##avoiding the big gaps for now....
        hotspots['run'] = hotspots['dateDiff'].between(7,1500,inclusive='both')
        hotspots = hotspots[hotspots['run'] == True]
        dfk = hotspots[['locId','latestObsDt','obsDt']]
    except Exception as ee:
        raise UserWarning(ee)
    finally: cnx.close()
    return dfk

In [26]:
get_hotspots()

Unnamed: 0,locId,latestObsDt,obsDt
0,L854460,2022-03-25 16:12:00.000000,2022-01-23 10:16:00
1,L8365620,2022-03-24 10:30:00.000000,2022-02-08 09:20:00
3,L8102503,2022-03-28 07:32:00.000000,2022-01-21 14:43:00
4,L7672326,2022-03-11 12:05:00.000000,2021-05-31 15:27:00
5,L6448904,2022-03-06 20:44:00.000000,2022-02-10 14:40:00
8,L4980186,2022-03-08 20:29:00.000000,2022-02-13 15:12:00
17,L3199734,2022-03-30 13:22:14.962816,2019-01-01 08:18:00
22,L11323884,2022-03-30 13:22:14.962816,2019-01-01 08:18:00
23,L10445835,2022-02-19 09:10:00.000000,2020-01-07 12:30:00


In [33]:
##TODO #23 recency refactor: pare down results of hotspot batches to match known Trailhead Direct eBird hotspots

In [28]:
def get_histObs():
    import time
    from datetime import timedelta
    ObsHist = []
    cnx = connectDB()
    try:
        dfk_test = pd.DataFrame(get_hotspots().head(10))
        if dfk_test.empty:
            raise Exception('The provided dataframe is empty. There are either no eBird hotspots with new checklists or an error has occurred in the evaluation of the get_hotposts() function.')
        for locId in dfk_test.itertuples():
            startDate = locId.obsDt + timedelta(days=1)
            for x in pd.date_range(start=startDate,end=locId.latestObsDt,freq='D'):
                time.sleep(0.5)
                ymd = '{}/{}/{}'.format(x.year,x.month,x.day)
                ebird_baseUrl = 'https://api.ebird.org/v2/data/obs/'
                ebird_url = ebird_baseUrl + locId.locId + '/historic/' + ymd
                ebird_auth = {'X-eBirdApiToken': ebird_token}
                ebird_params = {
                    'fmt':'json',
                    'detail':'simple'
                }
                ebird_request = requests.get(ebird_url,params=ebird_params,headers=ebird_auth)
                if ebird_request.status_code == requests.codes.ok:
                    ebird_resp = pd.DataFrame(ebird_request.json())
                    ebird_resp['loadDate'] = dt.datetime.today()
                    ObsHist.append(ebird_resp)
                ebird_request.raise_for_status()
        historicObservations = pd.concat(ObsHist)
        historicObservations.to_sql(name='historicObservations_cooking',con=cnx,if_exists='append')
        cnx.close()
    except Exception as exc:
        raise exc
    return historicObservations

In [29]:
tgr = get_histObs()
tgr.head()

Unnamed: 0,loadDate,speciesCode,comName,sciName,locId,locName,obsDt,howMany,lat,lng,obsValid,obsReviewed,locationPrivate,subId
0,2022-03-30 13:23:40.362758,stejay,Steller's Jay,Cyanocitta stelleri,L854460,Squak Mountain SP,2022-03-06 14:33,1.0,47.510302,-122.04772,True,False,False,S104347104
1,2022-03-30 13:23:40.362758,amerob,American Robin,Turdus migratorius,L854460,Squak Mountain SP,2022-03-06 14:33,3.0,47.510302,-122.04772,True,False,False,S104347104
2,2022-03-30 13:23:40.362758,daejun,Dark-eyed Junco,Junco hyemalis,L854460,Squak Mountain SP,2022-03-06 14:33,4.0,47.510302,-122.04772,True,False,False,S104347104
0,2022-03-30 13:23:51.182500,brdowl,Barred Owl,Strix varia,L854460,Squak Mountain SP,2022-03-18 18:29,1.0,47.510302,-122.04772,True,False,False,S105108845
1,2022-03-30 13:23:51.182500,haiwoo,Hairy Woodpecker,Dryobates villosus,L854460,Squak Mountain SP,2022-03-18 18:29,1.0,47.510302,-122.04772,True,False,False,S105108845
