In [17]:
import sys
import os
import pandas as pd
import numpy as np
import datetime as dt
import json
import geojson
import requests
import sqlite3 as db

pd.options.display.max_rows=99

In [18]:
github_userName = 'Tanag3r'
ebird_token = 'j6c7l80ga2ib'
db_name = 'trailheadDirectBirds_sous.db'

In [19]:
##connect to database
def connectDB():
    try:
        cnx = db.connect(db_name)
    except Exception as cnxError:
        raise UserWarning(f'Unable to connect to database due to: {cnxError}')
    return cnx

NASA Data Products:
- LAI: LAI is a measure for the total area of leaves per unit ground area and directly related to the amount of light that can be intercepted by plants. It is defined as the one-sided green leaf area per unit ground surface area (LAI = leaf area / ground area, m2 / m2) in broadleaf canopies. There are three methods used to measure LAI for conifers; this project uses projected (or one-sided, in accordance the definition for broadleaf canopies) needle area per unit ground area.
    - In general, a higher LAI value indicates more leaf coverage
- fPAR: Fraction of absorbed photosynthetically active radiation (fPAR) is the fraction of incoming solar radiation in the spectrum of 400–700 nm that is absorbed by vegetation canopy. Data is provided as a percentage.
- Land Cover Type 3: Annual Leaf Area Index (LAI) classification
- FAO-Land Cover Classification System 1 (LCCS1) land cover layer

In [20]:
##TODO #40 remove the getpass prompt and replace with a credential manager call
#earthdata_baseUrl = 'https://lpdaacsvc.cr.usgs.gov/appeears/api/'  #depracated
earthdata_baseUrl = 'https://appeears.earthdatacloud.nasa.gov/api/'
appEEARS_username = 'lwylie'
appEEARS_password = 'BdiUPBhUa7ma5ds'
import getpass
NASA_username = getpass.getpass(prompt = 'Enter NASA Earthdata Login Username: ')
NASA_password = getpass.getpass(prompt = 'Enter NASA Earthdata Login Password: ')

In [21]:
##obtain an Earthdata token
##TODO #36 the earthdata API is often under maintenance, write a script to abort this process if a new token cannot be obtain
##TODO #56 refactor block into a function that requests a token from the earthdata service and returns the header on success
earthdata_loginURL = 'https://lpdaacsvc.cr.usgs.gov/appeears/api/login/'
earthdata_cloudLoginURL = 'https://appeears.earthdatacloud.nasa.gov/api/login'
earthdata_loginRequest = requests.post(earthdata_cloudLoginURL,auth=(NASA_username,NASA_password))
earthdata_loginResponse = earthdata_loginRequest.json()
print(earthdata_loginResponse)
##Transcribe token, builder header
earthdata_token = earthdata_loginResponse['token']
earthdata_head = {'Authorization': 'Bearer {}'.format(earthdata_token)}

{'token_type': 'Bearer', 'token': 'IdDetAD5JKgBniBlXWAOlRH_eglvr0eqOQOqU9e_KGTME-koik9MAjyvlme_FThXbc0dK2o-QgFwXACcexhVrQ', 'expiration': '2022-04-13T22:23:30Z'}


In [22]:
#get the earthdata dictionary from the database and return a list of locId's without Prop1 and Type3 data
def list_needMODIS(testList: list):
    cnx = connectDB()
    diff = 0.005    #equivalent to half a mile
    try:
        queryList = []
        for i in testList:
            i = str(i)
            queryList.append(i)
        queryList = str(queryList).strip('[]')
        #dataframe of hotspots from the database
        hotspotsGeo = pd.read_sql(f'SELECT locId,lat,lng FROM Hotspots WHERE locId not in (SELECT DISTINCT locId from lpdaac_dictionary WHERE LPDAACkey in ({queryList}));', con=cnx)
        if hotspotsGeo.empty == True:
            raise Exception(f'An empty dataframe has been returned')
        else:
            hotspotsGeo.sort_values(by=['locId'],ascending=True,inplace=True)
            hotspotsGeo.drop_duplicates(subset=['locId'],keep='first',inplace=True)
            hotspotsGeo.reset_index()
        #build out squares around each hotspot
            def NW(x,y):
                return x-diff,y+diff
            def NE(x,y):
                return x+diff,y+diff
            def SE(x,y):
                return x+diff,y-diff
            def SW(x,y):
                return x-diff,y-diff
        ##apply the functions as new columns
        ##NOTE that appEEARS only accepts coordinates as (longitude,latitude) which is contrary to geoJSON documentation
            hotspotsGeo['NW'] = hotspotsGeo.apply(lambda i: NW(i.lng,i.lat), axis = 1)
            hotspotsGeo['NE'] = hotspotsGeo.apply(lambda i: NE(i.lng,i.lat), axis = 1)
            hotspotsGeo['SE'] = hotspotsGeo.apply(lambda i: SE(i.lng,i.lat), axis = 1)
            hotspotsGeo['SW'] = hotspotsGeo.apply(lambda i: SW(i.lng,i.lat), axis = 1)
    
    except Exception as exd:
        raise UserWarning(f'An unexpected error occurred in the function list_needMODIS: {exd}')
    return hotspotsGeo

Architecture:

1. Make all requests in a loop, producing a list of {'locId': 'earthdata_taskID'}
2. For each locId, pull each .csv into a dataframe then load that dataframe into a database table bearing the name that corresponds with the layer and product. Append the locId.
    EXAMPLE: the contents of the .csv file for 'MCD12Q1-006-LC-Prop1-Statistics.csv' go into the table 'MCD12Q1-006-LC-Prop1-Statistics' in the database

In [23]:
#TODO #59 update input on geoPack function to take a list of coordinates
def geoPack(NW,NE,SE,SW):
    try:
        geoPack_wrap = {"type": "FeatureCollection", "features":
            [{
            "type":"Feature",
                "geometry":
                    {"type": "Polygon",
                    "coordinates":
                        [[NW,NE,SE,SW,NW]]
                    },
                "properties": {}}]
            }
    except geojson.GeoJSON(geoPack_wrap).is_valid == False:
        print(geojson.GeoJSON(geoPack_wrap).errors)
    return geojson.GeoJSON(geoPack_wrap)

In [24]:
#TODO #60 update 'task' function to include layers as an input
def task(taskName: str,endDate: str,startDate: str,recurring: bool,yearRange: list, geoPack_wrap = geojson.GeoJSON):
    try:
        edTask = {
            'task_type': 'area',
            'task_name': taskName,
            'params': {
                'dates': 
                    [{"endDate": endDate, 
                    "recurring": recurring, 
                    "startDate": startDate, 
                    "yearRange": yearRange}],
                'layers': 
                    [{"layer": "FparExtra_QC", "product": "MOD15A2H.006"}, 
                    {"layer": "FparLai_QC", "product": "MOD15A2H.006"}, 
                    {"layer": "FparStdDev_500m", "product": "MOD15A2H.006"}, 
                    {"layer": "Fpar_500m", "product": "MOD15A2H.006"}, 
                    {"layer": "LaiStdDev_500m", "product": "MOD15A2H.006"}, 
                    {"layer": "Lai_500m", "product": "MOD15A2H.006"}],
                'output': {
                    'format': {
                        'type': 'netcdf4'}, 
                        'projection': 'geographic'},
            'geo':geoPack_wrap}}
    except Exception as JSONerror:
        raise UserWarning(JSONerror)
    return edTask

In [25]:
##error logger function
def log_earthdataError(message,statusCode: int,request: str,reqDate):
    cnx = connectDB()
    cur = cnx.cursor()
    try:
        sqliteInsert = 'INSERT INTO earthdata_errorlog (message,statusCode,request,reqDate) VALUES (?, ?, ?, ?)'
        logTuple = (message,statusCode,request,reqDate)
        cur.execute(sqliteInsert,logTuple)
        cnx.commit()
        cur.close()
    except db.Error as sqlError:
        raise UserWarning(sqlError)
    finally:
        if cnx:
            cnx.close()

In [26]:
##TODO #45 once all hotspots/locId's have Prop1 and Type3 MCD12Q1 data, convert this block to check for LAI and fPAR

#the testList variable requires a str list of tests from the LPDAAC keys table
def post_earthdataTask(testList: list):
    import time
    #hotspotsGeo = hotspotsGeo_short    #small frame of hotspots for testing
    try:
        hotspotsGeo = pd.DataFrame(list_needMODIS(testList=testList)).head(2)
        earthdataTaskList = []
        for locId in hotspotsGeo.itertuples():
            time.sleep(0.5)
            coords = geoPack(NW = locId.NW,NE = locId.NE,SE = locId.SE,SW = locId.SW)
            earthdataTask = task(taskName = locId.locId,endDate = "12-31",startDate="01-01",recurring=True,yearRange=[2019,2021],geoPack_wrap=coords)
            taskReq = requests.post(f'{earthdata_baseUrl}task',json=earthdataTask,headers=earthdata_head)
            if taskReq.status_code != 200:
                log_earthdataError(str(taskReq),int(taskReq.status_code),json.dumps(earthdataTask),dt.datetime.today())
            earthdataTaskList.append(taskReq.json())
    except Exception as ee:
        raise ee
    return earthdataTaskList

In [27]:
post_earthdataTask(testList=["FparExtra_QC","FparLai_QC","FparStdDev_500m","Fpar_500m","LaiStdDev_500m","Lai_500m"])

[{'task_id': 'a9a2331d-188c-44f9-9ede-a9670c08428e', 'status': 'pending'},
 {'task_id': 'a4c4a771-36e9-4c43-8ca0-15d1601cfd1c', 'status': 'pending'}]

In [36]:
def get_earthdataTask(testList: list):
    #postedTasks = pd.DataFrame(help) #for testing
    taskInfo = []
    fileList = []
    cnx = connectDB()
    postedTasks = pd.DataFrame(post_earthdataTask(testList=testList))
    import time
    try:
        for task_id in postedTasks.itertuples():
            while requests.get(f'{earthdata_baseUrl}task/{task_id.task_id}',headers=earthdata_head).json()['status'] !='done':
                requests.get(f'{earthdata_baseUrl}task/{task_id.task_id}',headers=earthdata_head).json()['status']
                time.sleep(20.0)
            taskStatus = requests.get(f'{earthdata_baseUrl}task/{task_id.task_id}',headers=earthdata_head).json()
            taskInfo.append(taskStatus)
        taskBatch = pd.DataFrame(data=taskInfo,columns=['task_id','task_name','status','completed'])

        for task_id in taskBatch.itertuples():
            earthdata_bundle = requests.get(f'{earthdata_baseUrl}bundle/{task_id.task_id}',headers=earthdata_head).json()
            ##filter down to .csv files
            for x in earthdata_bundle['files']:
                if x['file_type'] in 'csv':
                    time.sleep(0.3)
                    fileID = x['file_id']
                    fileName = x['file_name']
                    ##To read results to a dataframe, pass in earthdata_baseUrl + 'bundle/ + taskID/ + file_id/ + file_name
                    getIt_url = requests.get(f'https://appeears.earthdatacloud.nasa.gov/api/bundle/{task_id.task_id}/{fileID}',headers=earthdata_head,allow_redirects=True,stream=True).url

                    getIt = pd.read_csv(getIt_url)
                    getIt['locId'] = task_id.task_name
                    try:
                        getIt.to_sql(name=f'{fileName}_MODIS_cooking',con=cnx,if_exists='append')
                    ##TODO #53 test the exception --DONE
                    ##TODO #95 update exception in get_earthdataTask to get the AWS URL if one is produced and log the url in the db
                    except db.DatabaseError as resultError:
                        fileList.append({resultError: resultError.__cause__,task_id.task_name: f'{earthdata_baseUrl}bundle/{task_id.task_id}/{fileID}/{fileName}'})
                        pass
                else: continue  
    except Exception as ex:
        raise ex
    cnx.close()
    return fileList

In [37]:
boots = get_earthdataTask(testList=["FparExtra_QC","FparLai_QC","FparStdDev_500m","Fpar_500m","LaiStdDev_500m","Lai_500m"])
boots

  sql.to_sql(


[{sqlite3.OperationalError('table MOD15A2H-006-FparExtra-QC-Statistics-QA.csv_MODIS_cooking has no column named 1'): None,
  'L10128988': 'https://appeears.earthdatacloud.nasa.gov/api/bundle/06b8a10d-8ba4-46ef-884c-0dfe401fd510/8f196def-014b-446d-b1b5-0725a7f9b0f9/MOD15A2H-006-FparExtra-QC-Statistics-QA.csv'},
 {sqlite3.OperationalError('table MOD15A2H-006-FparLai-QC-Statistics-QA.csv_MODIS_cooking has no column named 48'): None,
  'L10128988': 'https://appeears.earthdatacloud.nasa.gov/api/bundle/06b8a10d-8ba4-46ef-884c-0dfe401fd510/cfd6f5a6-38b2-4bb1-be6f-7863867789e4/MOD15A2H-006-FparLai-QC-Statistics-QA.csv'},
 {sqlite3.OperationalError('table MOD15A2H-006-FparExtra-QC-Statistics-QA.csv_MODIS_cooking has no column named 1'): None,
  'L10129002': 'https://appeears.earthdatacloud.nasa.gov/api/bundle/37bdd0e6-58fe-4a8e-9327-23cc3e4e7a6a/a768bfa6-c549-4536-b2ae-ede25bc4ba93/MOD15A2H-006-FparExtra-QC-Statistics-QA.csv'},
 {sqlite3.OperationalError('table MOD15A2H-006-FparLai-QC-Statistic