In [3]:
import sys
import os
import pandas as pd
import numpy as np
import datetime as dt
import json
import geojson
import requests
import sqlite3 as db
sys.path.append('../')

pd.options.display.max_rows=99

In [4]:
github_userName = 'Tanag3r'
ebird_token = 'j6c7l80ga2ib'
db_name = 'trailheadDirectBirds_sous.db'
##cur = cnx.cursor()

In [5]:
##connect to database
def connectDB():
    try:
        cnx = db.connect(db_name)
    except Exception as cnxError:
        raise UserWarning(f'Unable to connect to database due to: {cnxError}')
    return cnx

NASA Data Products:
- LAI: LAI is a measure for the total area of leaves per unit ground area and directly related to the amount of light that can be intercepted by plants. It is defined as the one-sided green leaf area per unit ground surface area (LAI = leaf area / ground area, m2 / m2) in broadleaf canopies. There are three methods used to measure LAI for conifers; this project uses projected (or one-sided, in accordance the definition for broadleaf canopies) needle area per unit ground area.
    - In general, a higher LAI value indicates more leaf coverage
- fPAR: Fraction of absorbed photosynthetically active radiation (fPAR) is the fraction of incoming solar radiation in the spectrum of 400–700 nm that is absorbed by vegetation canopy. Data is provided as a percentage.
- Land Cover Type 3: Annual Leaf Area Index (LAI) classification
- FAO-Land Cover Classification System 1 (LCCS1) land cover layer

In [7]:
##TODO #40 remove the getpass prompt and replace with a credential manager call
earthdata_baseUrl = 'https://lpdaacsvc.cr.usgs.gov/appeears/api/'
appEEARS_username = 'lwylie'
appEEARS_password = 'BdiUPBhUa7ma5ds'
import getpass
NASA_username = getpass.getpass(prompt = 'Enter NASA Earthdata Login Username: ')
NASA_password = getpass.getpass(prompt = 'Enter NASA Earthdata Login Password: ')

In [8]:
##obtain an Earthdata token
##TODO #36 the earthdata API is often under maintenance, write a script to abort this process if a new token cannot be obtain
##TODO #56 refactor block into a function that requests a token from the earthdata service and returns the header on success
earthdata_loginURL = 'https://lpdaacsvc.cr.usgs.gov/appeears/api/login'
earthdata_loginRequest = requests.post(earthdata_loginURL,auth=(NASA_username,NASA_password))
earthdata_loginResponse = earthdata_loginRequest.json()
print(earthdata_loginResponse)
##Transcribe token, builder header
earthdata_token = earthdata_loginResponse['token']
earthdata_head = {'Authorization': 'Bearer {}'.format(earthdata_token)}

{'token_type': 'Bearer', 'token': 'UK3jN04-qCuoyKBYRHqVEYjdzdCzjQrYueviDTdCd4WlCHx0N955kRGFERJrjsCvqsqtJftAXF_TQiKx1OGaTw', 'expiration': '2022-02-27T20:34:16Z'}


In [9]:
#get the earthdata dictionary from the database and return a list of locId's without Prop1 and Type3 data
def list_needMODIS():
    cnx = connectDB()
    diff = 0.005    #equivalent to half a mile
    try:
        #dataframe of hotspots from the database
        hotspotsGeo = pd.read_sql('SELECT locId,lat,lng FROM Hotspots WHERE locId not in (SELECT DISTINCT locId from earthdata_dictionary)', con=cnx)
        if hotspotsGeo.empty == True:
            raise Exception(f'An empty dataframe has been returned')
        hotspotsGeo.sort_values(by=['locId'],ascending=True,inplace=True)
        hotspotsGeo.drop_duplicates(subset=['locId'],keep='first',inplace=True)
        hotspotsGeo.reset_index()
        #build out squares around each hotspot
        def NW(x,y):
            return x-diff,y+diff
        def NE(x,y):
            return x+diff,y+diff
        def SE(x,y):
            return x+diff,y-diff
        def SW(x,y):
            return x-diff,y-diff
        ##apply the functions as new columns
        ##NOTE that appEEARS only accepts coordinates as (longitude,latitude) which is contrary to geoJSON documentation
        hotspotsGeo['NW'] = hotspotsGeo.apply(lambda i: NW(i.lng,i.lat), axis = 1)##.astype(str)
        hotspotsGeo['NE'] = hotspotsGeo.apply(lambda i: NE(i.lng,i.lat), axis = 1)##.astype(str)
        hotspotsGeo['SE'] = hotspotsGeo.apply(lambda i: SE(i.lng,i.lat), axis = 1)##.astype(str)
        hotspotsGeo['SW'] = hotspotsGeo.apply(lambda i: SW(i.lng,i.lat), axis = 1)##.astype(str)
    
    except Exception as exd:
        raise UserWarning(f'An unexpected error occurred in the function list_needMODIS: {exd}')
    return hotspotsGeo

Architecture:

1. Make all requests in a loop, producing a list of {'locId': 'earthdata_taskID'}
2. For each locId, pull each .csv into a dataframe then load that dataframe into a database table bearing the name that corresponds with the layer and product. Append the locId.
    EXAMPLE: the contents of the .csv file for 'MCD12Q1-006-LC-Prop1-Statistics.csv' go into the table 'MCD12Q1-006-LC-Prop1-Statistics' in the database

In [51]:
##small frame of hotspots for testing
hotspotsGeo_short = pd.DataFrame(list_needMODIS()).head(2)

In [19]:
def geoPack(NW,NE,SE,SW):
    try:
        geoPack_wrap = {"type": "FeatureCollection", "features":
            [{
            "type":"Feature",
                "geometry":
                    {"type": "Polygon",
                    "coordinates":
                        [[NW,NE,SE,SW,NW]]
                    },
                "properties": {}}]
            }
    except geojson.GeoJSON(geoPack_wrap).is_valid == False:
        print(geojson.GeoJSON(geoPack_wrap).errors)
    return geojson.GeoJSON(geoPack_wrap)

In [20]:
def task(taskName: str,endDate: str,startDate: str,recurring: bool,yearRange: list, geoPack_wrap = geojson.GeoJSON):
    try:
        edTask = {
            'task_type': 'area',
            'task_name': taskName,
            'params': {
                'dates': 
                    [{"endDate": endDate, 
                    "recurring": recurring, 
                    "startDate": startDate, 
                    "yearRange": yearRange}],
                'layers': 
                    [{"layer": "LC_Prop1", "product": "MCD12Q1.006"}, 
                    {"layer": "LC_Type3", "product": "MCD12Q1.006"}],
                'output': {
                    'format': {
                        'type': 'netcdf4'}, 
                        'projection': 'geographic'},
            'geo':geoPack_wrap}}
    except Exception as JSONerror:
        raise UserWarning(JSONerror)
    return edTask

In [21]:
##error logger function
def log_earthdataError(message,statusCode: int,request: str,reqDate):
    cnx = connectDB()
    cur = cnx.cursor()
    try:
        sqliteInsert = 'INSERT INTO earthdata_errorlog (message,statusCode,request,reqDate) VALUES (?, ?, ?, ?)'
        logTuple = (message,statusCode,request,reqDate)
        cur.execute(sqliteInsert,logTuple)
        cur.close()
    except db.Error as sqlError:
        raise UserWarning(sqlError)
    finally:
        if cnx:
            cnx.close()

In [53]:
##TODO #45 once all hotspots/locId's have Prop1 and Type3 MCD12Q1 data, convert this block to check for LAI and fPAR
def post_earthdataTask():
    import time
    ##small frame of hotspots for testing
    #hotspotsGeo_short = pd.DataFrame(list_needMODIS()).head()
    earthdataTaskList = []
    try:
        for locId in hotspotsGeo_short.itertuples():
            time.sleep(0.5)
            coords = geoPack(NW = locId.NW,NE = locId.NE,SE = locId.SE,SW = locId.SW)
            earthdataTask = task(taskName = locId.locId,endDate = "12-31",startDate="01-01",recurring=True,yearRange=[2017,2019],geoPack_wrap=coords)
            taskReq = requests.post(f'{earthdata_baseUrl}task',json=earthdataTask,headers=earthdata_head)
            if taskReq.status_code != 200:
                log_earthdataError(str(taskReq),int(taskReq.status_code),json.dumps(earthdataTask),dt.datetime.today())
            earthdataTaskList.append(taskReq.json())
    except Exception as ee:
        raise UserWarning(ee)
    return earthdataTaskList

In [54]:
help = post_earthdataTask()
help

[{'task_id': '4e5f730c-b0f5-4c74-9e53-6a59b9367196', 'status': 'pending'},
 {'task_id': '9565e075-7b19-46bf-8bcd-b1581e5f10ac', 'status': 'pending'}]

In [None]:
##TODO #55 test implementation of list_needMODIS() within get_earthdataTask function

In [55]:
##TODO #57 the function get_earthdataTask does not pass 'done' tasks to the file reader --DONE
def get_earthdataTask():
    postedTasks = pd.DataFrame(help) #for testing
    taskInfo = []
    fileList = []
    cnx = connectDB()
    ##postedTasks = pd.DataFrame(post_earthdata_Task())
    import time
    try:
        for task_id in postedTasks.itertuples():
            while requests.get(f'{earthdata_baseUrl}task/{task_id.task_id}',headers=earthdata_head).json()['status'] !='done':
                requests.get(f'{earthdata_baseUrl}task/{task_id.task_id}',headers=earthdata_head).json()['status']
                time.sleep(10.0)
            taskStatus = requests.get(f'{earthdata_baseUrl}task/{task_id.task_id}',headers=earthdata_head).json()
            taskInfo.append(taskStatus)
        taskBatch = pd.DataFrame(data=taskInfo,columns=['task_id','task_name','status','completed'])

        for task_id in taskBatch.itertuples():
            earthdata_bundle = requests.get(f'{earthdata_baseUrl}bundle/{task_id.task_id}').json()
            ##filter down to .csv files
            for x in earthdata_bundle['files']:
                if x['file_type'] in 'csv':
                    time.sleep(0.3)
                    fileID = x['file_id']
                    fileName = x['file_name']
                    ##To read results to a dataframe, pass in earthdata_baseUrl + 'bundle/ + taskID/ + file_id/ + file_name
                    getIt = pd.DataFrame(pd.read_csv(f'{earthdata_baseUrl}bundle/{task_id.task_id}/{fileID}/{fileName}'))
                    getIt['locId'] = task_id.task_name
                    try:
                        getIt.to_sql(name=f'{fileName}_MODIS_cooking',con=cnx,if_exists='append')
                    ##TODO #53 test the exception
                    except db.DatabaseError as resultError:
                        fileList.append({resultError: resultError.__cause__,task_id.task_name: f'{earthdata_baseUrl}bundle/{task_id.task_id}/{fileID}/{fileName}'})
                        pass
                else: continue  
    except Exception as ex:
        raise UserWarning(ex)
    cnx.close()
    return fileList

In [56]:
tg = get_earthdataTask()
tg

  sql.to_sql(


[]

In [50]:
lxx = pd.read_csv('https://lpdaacsvc.cr.usgs.gov/appeears/api/bundle/ac740f79-fc5f-42a9-9c43-709caffe4f47/9514c98f-107f-4be1-8d8b-7e3c6905c77a/MCD12Q1-006-LC-Prop1-Statistics.csv')
lxx['locId'] = 'L10129054'
lxx.to_sql(name='MCD12Q1-006-LC-Prop1-Statistics.csv_MODIS_cooking',con=connectDB(),if_exists='append')