In [1]:
import pandas as pd
import requests
import datetime as dt
import json
import sys
import os

In [2]:
##get latest trailhead hotspot dataset

trailheadHotspots = pd.DataFrame()
trailheadHotspots = pd.read_csv('https://raw.githubusercontent.com/Tanag3r/trailheadDirect_birds/main/trailheadHotspots/trailheadHotspots_Y2022_M1_D11.csv')
trailheadHotspots.head(5)

Unnamed: 0,locId,locName,countryCode,subnational1Code,subnational2Code,lat,lng,latestObsDt,numSpeciesAllTime,StopName
0,L10128988,Berntsen Park,US,US-WA,US-WA-033,47.538439,-122.043281,2021-12-15 11:46,15.0,EastSunsetWay
1,L10129014,"Confluence Park, Issaquah",US,US-WA,US-WA-033,47.536469,-122.039342,2022-01-08 12:54,44.0,EastSunsetWay
2,L8365620,Issaquah High School,US,US-WA,US-WA-033,47.52249,-122.028687,2021-12-17 08:00,58.0,EastSunsetWay
3,L8102503,Issaquah Salmon Hatchery,US,US-WA,US-WA-033,47.529503,-122.039512,2021-12-03 08:22,33.0,EastSunsetWay
4,L3352189,"Maple Street ponds, Issaquah",US,US-WA,US-WA-033,47.543027,-122.053715,2021-11-30 10:12,46.0,EastSunsetWay


#Duplicate Check
##Duplicate checking is done here rather than in the hotspot factory to prevent shelling of the eBird API while still allowing trailheads in close proximity to each other to pull the same observation data. 

In [3]:
##pull in trailhead reference as base for duplicate hotspot removal

trailheadRef = pd.DataFrame()
trailheadRef_json = 'https://raw.githubusercontent.com/Tanag3r/trailheadDirect_birds/main/trailheadsRef.json'
response = requests.get(trailheadRef_json)
trailheadRef = pd.DataFrame(response.json())

trailheadRef.head()

Unnamed: 0,Route,StopName,Address,Latitude,Longitude
0,IssaquahAlps,EastSunsetWay,"661-831 E Sunset Way, Issaquah, WA 98027",47.529635,-122.025119
1,IssaquahAlps,HighSchool,"Parking lot, The Rainier Trail, Issaquah, WA 9...",47.519345,-122.029801
2,IssaquahAlps,ChiricoTrail_PooPooPoint,"11400 Issaquah-Hobart Road Southeast, Issaquah...",47.499949,-122.02173
3,IssaquahAlps,SquakMountain,"13201 Squak Mountain Rd SE, Issaquah, WA 98027",47.481465,-122.053997
4,IssaquahAlps,NullMountain,,,


In [4]:
trailheadHotspots['StopName'].value_counts()

DiscoveryParkSouth          26
DiscoveryParkNorth          22
EastSunsetWay               14
HighSchool                  14
MargaretsWay                10
ChiricoTrail_PooPooPoint     7
SquakMountain                6
MountTeneriffe               6
MountSi                      5
LittleSi                     1
Name: StopName, dtype: int64

In [5]:
dfk = pd.merge(
    left=trailheadHotspots,
    right=trailheadRef,
    how='left',
    left_on='StopName',
    right_on='StopName')

In [6]:
##calculate absolute distance from stop location
##why did I do this, I don't need relevancy
for i in dfk.itertuples():
    latInt = dfk['lat']
    latitudeInt = dfk['Latitude']
    lngInt = dfk['lng']
    longitudeInt = dfk['Longitude']
    dfk['dist_fromTH'] = abs(latitudeInt-latInt) + abs(longitudeInt-lngInt)
dfk['dist_fromTH'].astype('int32')

0      0
1      0
2      0
3      0
4      0
      ..
106    0
107    0
108    0
109    0
110    0
Name: dist_fromTH, Length: 111, dtype: int32

In [7]:
##sort to push to the hotspot closest to the trailhead to the top of the subset
dfk.sort_values(by=['locName','dist_fromTH'],inplace=True)
##remove any duplicate hotspots except for the hotspot closest to the trailhead
dfk_cooked = dfk.drop_duplicates(subset=['locName'],keep='first')

dfk_cooked['StopName'].value_counts()

DiscoveryParkSouth          14
DiscoveryParkNorth          13
EastSunsetWay                9
MargaretsWay                 8
MountTeneriffe               5
HighSchool                   3
ChiricoTrail_PooPooPoint     2
SquakMountain                1
MountSi                      1
Name: StopName, dtype: int64

In [8]:
##TODO #7: write engine to check for new hotspots; also supports addition of new stops

In [9]:
##todo #8: write recency check engine and updater. Check for a gap between the latest run time and current date
dateRange_historical = []

today = dt.date.today()
yearStart = today.year-3
yearStop = today.year-1

for i in range(yearStart,yearStop):
    days = pd.date_range(
        start=dt.date(i,1,1),
        end=dt.date(i,12,31)
    )
    dateRange_historical.append(days)

dateList_hist = []
for sublist in dateRange_historical:
    for item in sublist:
        dateList_hist.append(item)

In [10]:
ebird_token = 'j6c7l80ga2ib'

##Build historical observation list
Run for each trailhead/StopName, then merge files and delete this comment

Progress on baseline TH history:
[] DiscoveryParkNorth
[] DiscoveryParkSouth
[X] EastSunsetWay
[X] MargaretsWay
[X] MountTeneriffe
[X] HighSchool
[] ChiricoTrail_PooPooPoint
[] SquakMountain
[] MountSi


In [11]:
THhotspots_cooked = dfk_cooked[['locId','locName','StopName']]
xCookedHotspots = THhotspots_cooked.loc[THhotspots_cooked['StopName'] == 'ChiricoTrail_PooPooPoint']

In [13]:
import time

ObsHist = []

for locId in xCookedHotspots.itertuples():
    for date in dateList_hist:
        time.sleep(0.5)
        date = date
        ymd = '{}/{}/{}'.format(date.year,date.month,date.day)
        ebird_baseUrl = 'https://api.ebird.org/v2/data/obs/'
        ebird_url = ebird_baseUrl + locId.locId + '/historic/' + ymd
        ebird_auth = {'X-eBirdApiToken': ebird_token}
        ebird_params = {
            'fmt':'json',
            'detail':'simple'
        }
        ebird_request = requests.get(ebird_url,params=ebird_params,headers=ebird_auth)
        if ebird_request.status_code == requests.codes.ok:
            ebird_resp = pd.DataFrame(ebird_request.json())
            ObsHist.append(ebird_resp)
        ebird_request.raise_for_status()

In [14]:
batch_ObsHist = pd.concat(ObsHist)
batch_ObsHist.head()

Unnamed: 0,speciesCode,comName,sciName,locId,locName,obsDt,howMany,lat,lng,obsValid,obsReviewed,locationPrivate,subId
0,amecro,American Crow,Corvus brachyrhynchos,L8365620,Issaquah High School,2019-01-02 15:47,3.0,47.52249,-122.028687,True,False,False,S51233769
1,bkcchi,Black-capped Chickadee,Poecile atricapillus,L8365620,Issaquah High School,2019-01-02 15:47,5.0,47.52249,-122.028687,True,False,False,S51233769
2,chbchi,Chestnut-backed Chickadee,Poecile rufescens,L8365620,Issaquah High School,2019-01-02 15:47,1.0,47.52249,-122.028687,True,False,False,S51233769
3,daejun,Dark-eyed Junco,Junco hyemalis,L8365620,Issaquah High School,2019-01-02 15:47,7.0,47.52249,-122.028687,True,False,False,S51233769
4,annhum,Anna's Hummingbird,Calypte anna,L8365620,Issaquah High School,2019-01-02 14:37,2.0,47.52249,-122.028687,True,False,False,S51235925


In [15]:
batch_ObsHist.to_csv('ChiricoTrail_PooPooPoint_Obs_2019to2021.csv',sep=',',index=False)