In [None]:
import sys
import os
import pandas as pd
import numpy as np
import datetime as dt
import json
import geojson
import requests
import sqlite3 as db
sys.path.append('../')

pd.options.display.max_rows=99

In [None]:
github_userName = 'Tanag3r'
ebird_token = 'j6c7l80ga2ib'
cnx = db.connect('trailheadDirectBirds_sous.db')
cur = cnx.cursor()

NASA Data Products:
- LAI: LAI is a measure for the total area of leaves per unit ground area and directly related to the amount of light that can be intercepted by plants. It is defined as the one-sided green leaf area per unit ground surface area (LAI = leaf area / ground area, m2 / m2) in broadleaf canopies. There are three methods used to measure LAI for conifers; this project uses projected (or one-sided, in accordance the definition for broadleaf canopies) needle area per unit ground area.
    - In general, a higher LAI value indicates more leaf coverage
- fPAR: Fraction of absorbed photosynthetically active radiation (fPAR) is the fraction of incoming solar radiation in the spectrum of 400–700 nm that is absorbed by vegetation canopy. Data is provided as a percentage.
- Land Cover Type 3: Annual Leaf Area Index (LAI) classification
- FAO-Land Cover Classification System 1 (LCCS1) land cover layer

In [None]:
diff = 0.005    ##equivalent to 0.5 miles when applied
hotspotsGeo = pd.DataFrame()
hotspotsGeo = pd.read_sql('SELECT locId,lat,lng FROM Hotspots', con=cnx)
##TODO #34 remove duplicate hotspots before building table of polygons for each hotspot --DONE
##hotspotsGeo.set_index('index',inplace=True)
hotspotsGeo.sort_values(by=['locId'],ascending=True,inplace=True)
hotspotsGeo.drop_duplicates(subset=['locId'],keep='first',inplace=True)
hotspotsGeo.reset_index()
##TODO #33 write a function to increment the lat & lng values as needed then output the results to a single column in the format lat,lng --DONE

##Define functions to produce a corner of a polygon for each hotspot (NE,SE,SW,NW)
def NW(x,y):
    return x-diff,y+diff
def NE(x,y):
    return x+diff,y+diff
def SE(x,y):
    return x+diff,y-diff
def SW(x,y):
    return x-diff,y-diff

##apply the functions as new columns
##NOTE that appEEARS only accepts coordinates as (longitude,latitude) which is contrary to geoJSON documentation
hotspotsGeo['NW'] = hotspotsGeo.apply(lambda i: NW(i.lng,i.lat), axis = 1)##.astype(str)
hotspotsGeo['NE'] = hotspotsGeo.apply(lambda i: NE(i.lng,i.lat), axis = 1)##.astype(str)
hotspotsGeo['SE'] = hotspotsGeo.apply(lambda i: SE(i.lng,i.lat), axis = 1)##.astype(str)
hotspotsGeo['SW'] = hotspotsGeo.apply(lambda i: SW(i.lng,i.lat), axis = 1)##.astype(str)
##TODO #35 (script to support) write the results to the database as a new table with a 'latestUpdate' column --DONE
today = dt.datetime.today()
hotspotsGeo['latestUpdate'] = today.date()
hotspotsGeo['latestUpdate'] = hotspotsGeo['latestUpdate'].astype('datetime64[ns]')
##hotspotsGeo.to_sql(name='hotspotsGeo',con=cnx,if_exists='append')

##TODO #37 compile the polygon generated for each hotspot into a coordinate pack formatted for JSON insert --DONE
##no longer neccessary, geoJSON handles packaging
##for locId in hotspotsGeo.itertuples():
    ##hotspotsGeo['polygon'] = '[[{},{},{},{},{}]]'.format(locId.NW,locId.NE,locId.SE,locId.SW,locId.NW).replace('(','[').replace(')',']')

In [None]:
##TODO #40 remove the getpass prompt and replace with a credential manager call
earthdata_baseUrl = 'https://lpdaacsvc.cr.usgs.gov/appeears/api/'
appEEARS_username = 'lwylie'
appEEARS_password = 'BdiUPBhUa7ma5ds'
import getpass
NASA_username = getpass.getpass(prompt = 'Enter NASA Earthdata Login Username: ')
NASA_password = getpass.getpass(prompt = 'Enter NASA Earthdata Login Password: ')

In [None]:
##obtain an Earthdata token
##TODO #36 the earthdata API is often under maintenance, write a script to abort this process if a new token cannot be obtain
earthdata_loginURL = 'https://lpdaacsvc.cr.usgs.gov/appeears/api/login'
earthdata_loginRequest = requests.post(earthdata_loginURL,auth=(NASA_username,NASA_password))
earthdata_loginResponse = earthdata_loginRequest.json()
print(earthdata_loginResponse)
##Transcribe token, builder header
earthdata_token = earthdata_loginResponse['token']
earthdata_head = {'Authorization': 'Bearer {}'.format(earthdata_token)}

In [None]:
##TODO #43 write a cooked table of locId's with stats to check against list of hotspots. Drive fetch off difference

Architecture:

1. Make all requests in a loop, producing a list of {'locId': 'earthdata_taskID'}
2. For each locId, pull each .csv into a dataframe then load that dataframe into a database table bearing the name that corresponds with the layer and product. Append the locId.
    EXAMPLE: the contents of the .csv file for 'MCD12Q1-006-LC-Prop1-Statistics.csv' go into the table 'MCD12Q1-006-LC-Prop1-Statistics' in the database

In [None]:
##small frame of hotspots for testing
hotspotsGeo_short = hotspotsGeo.head(2)

In [None]:
##TODO #44 resolve error: {'message': "RecursionError('maximum recursion depth exceeded in __instancecheck__',)"} --DONE
##TODO #45 once all hotspots/locId's have Prop1 and Type3 MCD12Q1 data, convert this block to check for LAI and fPAR
earthdataTaskList = []
earthdata_errorLog = []
for locId in hotspotsGeo_short.itertuples():
    ##time.sleep(5.0)
    ##package polygon as a geoJSON
    yy = locId.locId
    glhf = {"type": "FeatureCollection", "features":
        [{
        "type":"Feature",
            "geometry":
                {"type": "Polygon",
                "coordinates":
                    '[[locId.NW,locId.NE,locId.SE,locId.SW,locId.NW]]'
                },
            "properties": {}}]
        }
    glhf_json = geojson.GeoJSON(glhf)
    ##compile JSON task request
    earthdata_task = {
        'task_type': 'area',
        'task_name': yy,
        'params': {
            'dates': 
                [{"endDate": "12-31", 
                "recurring": True, 
                "startDate": "01-01", 
                "yearRange": [2017, 2019]}],
            'layers': 
                [{"layer": "LC_Prop1", "product": "MCD12Q1.006"}, 
                {"layer": "LC_Type3", "product": "MCD12Q1.006"}],
            'output': {
                'format': {
                    'type': 'netcdf4'}, 
                    'projection': 'geographic'},
        'geo':glhf_json}}
    ##submit the task request
    
##TODO #42 if the request returns anything other than a task_id, stop the script and write the returned issue to the error log --DONE
earthdata_taskReq = requests.post('{}task'.format(earthdata_baseUrl),json=earthdata_task,headers=earthdata_head)
##TODO #47 an index is needed in the line below due to scalar values; figure out how to pass in an incremeneting index or find some other solve
##start over with lists here
ED_taskResp = pd.DataFrame(earthdata_taskReq.json(),index=[25])

##TODO #46 something about this is broken, fix it
if earthdata_taskReq.status_code == 202:
    ##ED_taskResp = pd.DataFrame(earthdata_taskReq.json())
    ED_taskResp.reset_index(inplace=True)
    earthdataTaskList.append(ED_taskResp)
else: 
    ##ED_taskResp = pd.DataFrame(earthdata_taskReq.json())
    ED_taskResp.reset_index(inplace=True)
    ED_taskResp['statusCode'] = earthdata_taskReq.status_code
    ED_taskResp['request'] = json.dumps(earthdata_task)
    ED_taskResp['reqDate'] = dt.datetime.today()
    earthdata_errorLog.append(ED_taskResp)
    

In [None]:
##compile either the error list or continue with the tasks

EDTaskList = pd.DataFrame()
try:
    EDTaskList = pd.concat(earthdataTaskList)
except ValueError: 
    try:
        earthdata_errorLog = pd.concat(earthdata_errorLog)
        earthdata_errorLog.to_sql('earthdata_errorlog',con=cnx,if_exists='append')
    except ValueError: raise Exception
else: pd.concat(earthdataTaskList)

In [None]:
##submit the task request
##TODO #42 if the request returns anything other than a task_id, stop the script and write the returned issue to the error log
params = {'limit': 15, 'pretty': True}
earthdata_tasks = requests.get('{}task'.format(earthdata_baseUrl),params=params,headers=earthdata_head).json()

In [None]:
for task_id in earthdataTaskList.itertuples():
##TODO #39 check status of the appEEARS task request --DONE
import time
earthdata_tasksResp = requests.get('{}task/{}'.format(earthdata_baseUrl,earthdata_taskID),headers=earthdata_head).json()
starttime = time.time()
while earthdata_tasksResp['status'] !='done':
    print(requests.get('{}task/{}'.format(earthdata_baseUrl,earthdata_taskID),headers=earthdata_head).json()['status'])
    time.sleep(20.0 - ((time.time() - starttime) % 20.0))
print(requests.get('{}task/{}'.format(earthdata_baseUrl,earthdata_taskID),headers=earthdata_head).json()['status'])

In [None]:
##TODO #38 get the results of the appEEARS task as a bundle
earthdata_bundle = requests.get('{}bundle/{}'.format(earthdata_baseUrl,earthdata_taskID),).json()

In [None]:
##filter for .csv's
earthdata_files = {}
for f in earthdata_bundle['files']:
    if f['file_type'] in 'csv':
        earthdata_files[f['file_id']] = f['file_name']
    else: continue

earthdata_files

In [None]:
##??
for file_id in earthdata_files:
    allresults = pd.DataFrame()

In [None]:
##IT WORKS
##To read results to a dataframe, pass in earthdata_baseUrl + 'bundle/ + taskID/ + file_id/ + file_name
##example: https://lpdaacsvc.cr.usgs.gov/appeears/api/bundle/098dbb7a-0dfc-4f19-8410-a1db4f91170c/5a51d0f8-362a-4ea3-9e8e-ce1bd783aa62/MCD12Q1-006-LC-Prop1-Statistics.csv
qrf = pd.DataFrame()
qrf = pd.read_csv('https://lpdaacsvc.cr.usgs.gov/appeears/api/bundle/098dbb7a-0dfc-4f19-8410-a1db4f91170c/5a51d0f8-362a-4ea3-9e8e-ce1bd783aa62/MCD12Q1-006-LC-Prop1-Statistics.csv')

In [None]:
##close the connection
cnx.close()