In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import statsmodels.api as sm
import datetime as dt
import sqlite3 as db
import geopy
#import matplotlib.pyplot as pp

In [2]:
github_userName = 'Tanag3r'
ebird_token = 'j6c7l80ga2ib'
db_name = 'trailheadDirectBirds_sous.db'

In [3]:
##connect to database
def connectDB():
    try:
        cnx = db.connect(db_name)
    except Exception as cnxError:
        raise UserWarning(f'Unable to connect to database due to: {cnxError}')
    return cnx

In [61]:
def stopQualityMask(comName: str,closestStop: str):
    cnx = connectDB()
    try:
        gap = dt.date.today().year-2018
        query = f'SELECT COUNT(DISTINCT(year)) as "frq" FROM coefficients_bySpecies WHERE comName = "{comName}" AND closestStop = "{closestStop}"'
        coeficients = pd.read_sql(query,con=cnx)
        coeficients['frq'] = coeficients.apply(lambda g: (g.frq/gap),axis=1)
    except Exception as maskExc:
        raise maskExc
    return coeficients

In [62]:
msk = stopQualityMask("Canada Jay","ChiricoTrail_PooPooPoint")

In [None]:
##TODO #77 identify birds that express seasonality/can only be found in the observed regions during specific times of year to inform the method of interpolation

In [None]:
##TODO #78 calculate R2 scores for each relative abundance interpolation method

In [105]:
def weeklyAbundance():
    cnx = connectDB()
    try:
        #note that the query filters out birds only seen by two people
        ObsDataset = pd.read_sql('SELECT comName,locId,closestStop,obsDt,howMany,subId FROM historicObservations AS FX WHERE (SELECT count(distinct(subId)) FROM historicObservations AS QA WHERE QA.comName = FX.comName) > 2 AND FX.comName in ("Canada Jay","Orange-crowned Warbler") LEFT JOIN closestStop on FX.locId = closestStop.locId;',con=cnx,parse_dates=['obsDt']) #Canada Jay filter is for testing purposes only
        ObsDataset['obsDt_week'] = ObsDataset['obsDt'].dt.isocalendar().week
        #group up data
        ObsDataset = ObsDataset.groupby(['comName','locId','obsDt_week'])['howMany'].mean()
        ObsDataset = pd.DataFrame(ObsDataset)
        ObsDataset['relativeAbundance'] = (ObsDataset['howMany']/ObsDataset['howMany'].mean())/max(ObsDataset['howMany']/ObsDataset['howMany'].mean())  #integrity of locId preserved by dividing per-week mean by total observed mean
        ObsDataset.sort_values(by=['comName','obsDt_week'],ascending=True,inplace=True)   #not needed
        ObsDataset.reset_index(inplace=True)   #not needed
    except Exception as calcEx:
        raise calcEx
    finally: cnx.close()
    return ObsDataset

In [128]:
##TODO #71 apply polynomial interpolation to the weeklyAbundance values for use in forecasting --DONE

def forecastWklyAbd():
    realObsAbd = weeklyAbundance()
    try:
        keyList = realObsAbd.drop_duplicates(subset=['comName'])
        realObsAbd = realObsAbd.groupby(['comName','obsDt_week'])['relativeAbundance'].mean()
        realObsAbd = realObsAbd.reset_index()
        forecastList = []
        for i in keyList.itertuples():
            allweek = pd.DataFrame({'obsDt_week':range(1,53)})
            obsAbd = realObsAbd[realObsAbd['comName'] == i.comName]
            obsAbd.drop(columns=['comName'],inplace=True)
            ptForxt = pd.merge(left=obsAbd,right=allweek,left_on='obsDt_week',right_on='obsDt_week',how='outer')
            ptForxt.set_index('obsDt_week',inplace=True)
            ptForxt.sort_index(axis='index',ascending=True,inplace=True)
            ptForxt.interpolate(method='spline',order=3,inplace=True,limit=3,limit_direction='both')
            ptForxt['comName'] = i.comName
            ptForxt = ptForxt.reset_index()
            forecastList.append(ptForxt)
        forecastAbd = pd.DataFrame()
        forecastAbd = pd.concat(forecastList,ignore_index=True)
    except Exception as abdForxExc:
        raise UserWarning(abdForxExc)
    return forecastAbd

In [118]:
trx = weeklyAbundance()

In [129]:
forx = forecastWklyAbd()

In [None]:
##TODO #66 split out birds into quartiles based on the number of observations available and the YOY frequency. Results will determine the strength of forecasting required

In [71]:
##TODO #64 using the relative abundance value from weeklyAbundance(), write a function that returns FAO land coverage preference --DONE
##TODO #68 add seasons to relative abundance function, then add in a loop over each season in the function seasonalHabitatPreference
def seasonalHabitatPref():
    cnx = connectDB()
    try:
        wklyAbd = weeklyAbundance()
        #start season loop here
        rawList = []
        for locId in wklyAbd.itertuples():
            query = f'SELECT Barren,Urban,Grasslands,Savannas,waterBodies,evergreenNeedleleafForest,evergreenBroadleafForests,openForests,mixedBroadleafandNeedleleafForests,sparseForests,denseHerbaceous,sparseHerbaceous FROM FAO_by_locId WHERE FAO_by_locId.locId = "{locId.locId}"'
            lx = pd.read_sql(query,con=cnx)
            lx = lx.applymap(lambda x: locId.relativeAbundance*x,na_action='ignore')
            lx['comName'] = locId.comName
            lx['obsDt_week'] = locId.obsDt_week
            rawList.append(lx)
        results = pd.DataFrame()
        results = pd.concat(rawList,ignore_index=True)
        results = results.groupby(['comName','obsDt_week']).mean()
        results.reset_index(inplace=True)
        results = pd.DataFrame(results)
    except Exception as prefExc:
        raise UserWarning(prefExc)
    finally: cnx.close()
    return results

In [103]:
##TODO #65 using historic habitat preference data, forecast the seasonal habitat preference for each species of bird --DONE
##TODO #74 update the interpolation method used in forecasting habitat preference to polynomial or just use the forecasted relative abundance
def forecastHabtPref():
    realHabtPref = seasonalHabitatPref()
    habtKeys = realHabtPref.drop_duplicates(subset=['comName'])
    try:
        forxHabtList = []
        for i in habtKeys.itertuples():
            allweek = pd.DataFrame({'obsDt_week':range(1,53)})
            obsPref = realHabtPref[realHabtPref['comName'] == i.comName]
            obsPref.drop(columns=['comName'],inplace=True)
            forxPref = pd.merge(left=obsPref,right=allweek,left_on='obsDt_week',right_on='obsDt_week',how='outer')
            forxPref.set_index('obsDt_week',inplace=True)
            forxPref.sort_index(axis='index',ascending=True,inplace=True)
            #forxPref.interpolate(method='polynomial',order=3,inplace=True,limit=3,limit_direction='both')
            forxPref.interpolate(method='linear',inplace=True,limit=3,limit_direction='both')
            forxPref['comName'] = i.comName
            forxPref = forxPref.reset_index()
            forxHabtList.append(forxPref)
        forecastPref = pd.DataFrame()
        forecastPref = pd.concat(forxHabtList,ignore_index=True)
    except Exception as habtPrefForxExcp:
        raise UserWarning(habtPrefForxExcp)
    return forecastPref

In [None]:
##TODO #65 using historic habitat preference data, forecast the seasonal habitat preference for each species of bird --DONE
##TODO #74 update the interpolation method used in forecasting habitat preference to polynomial or just use the forecasted relative abundance
def forecastHabtPref():
    realHabtPref = seasonalHabitatPref()
    habtKeys = realHabtPref.drop_duplicates(subset=['comName'])
    try:
        forxHabtList = []
        for i in habtKeys.itertuples():
            allweek = pd.DataFrame({'obsDt_week':range(1,53)})
            obsPref = realHabtPref[realHabtPref['comName'] == i.comName]
            obsPref.drop(columns=['comName'],inplace=True)
            forxPref = pd.merge(left=obsPref,right=allweek,left_on='obsDt_week',right_on='obsDt_week',how='outer')
            forxPref.set_index('obsDt_week',inplace=True)
            forxPref.sort_index(axis='index',ascending=True,inplace=True)
            #forxPref.interpolate(method='polynomial',order=3,inplace=True,limit=3,limit_direction='both')
            forxPref.interpolate(method='linear',inplace=True,limit=3,limit_direction='both')
            forxPref['comName'] = i.comName
            forxPref = forxPref.reset_index()
            forxHabtList.append(forxPref)
        forecastPref = pd.DataFrame()
        forecastPref = pd.concat(forxHabtList,ignore_index=True)
    except Exception as habtPrefForxExcp:
        raise UserWarning(habtPrefForxExcp)
    return forecastPref

In [104]:
forgex = forecastHabtPref()
forgex

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return super().drop(


Unnamed: 0,obsDt_week,Urban,Savannas,evergreenNeedleleafForest,evergreenBroadleafForests,openForests,mixedBroadleafandNeedleleafForests,sparseForests,comName
0,1,,,0.010000,,,0.067692,,Canada Jay
1,2,,,0.060171,,,0.067692,,Canada Jay
2,3,,,0.110342,,,0.067692,,Canada Jay
3,4,,,0.160513,,,0.067692,,Canada Jay
4,5,,,,,,,,Canada Jay
...,...,...,...,...,...,...,...,...,...
100,48,,,,,,,,Orange-crowned Warbler
101,49,,,,,,,,Orange-crowned Warbler
102,50,,,,,,,,Orange-crowned Warbler
103,51,,,,,,,,Orange-crowned Warbler
