# This notebook was adapted from the [instructions](https://disc.gsfc.nasa.gov/information/howto?keywords=MERRA&title=How%20to%20Use%20the%20Web%20Services%20API%20for%20Subsetting%20MERRA-2%20Data) found on the NASA Earthdata website

In [None]:
import sys
import json
import urllib3
import certifi
import requests
from time import sleep
from http.cookiejar import CookieJar
import urllib.request
from urllib.parse import urlencode
import getpass
import xarray as xr

    ##helpers

def get_http_data(request):

    # Create a urllib PoolManager instance to make requests.
    http = urllib3.PoolManager(cert_reqs='CERT_REQUIRED',ca_certs=certifi.where())
    # Set the URL for the GES DISC subset service endpoint
    url = 'https://disc.gsfc.nasa.gov/service/subset/jsonwsp'

    hdrs = {'Content-Type': 'application/json',
            'Accept'      : 'application/json'}
    data = json.dumps(request)       
    r = http.request('POST', url, body=data, headers=hdrs)
    response = json.loads(r.data)   
    # Check for errors
    if response['type'] == 'jsonwsp/fault' :
        print('API Error: faulty %s request' % response['methodname'])
        sys.exit(1)
    return response

def agg_long_lat(xarr_wide):
    weights = np.cos(np.deg2rad(xarr_wide.lat))
    xarr_weighted = xarr_wide.weighted(weights)
    xarr_flat = xarr_weighted.mean(dim=["lat", "lon"])
    return xarr_flat

def open_ncfile(filename_arr):

    aq_xarr = xr.open_dataset(filename_arr)
    aq_data_pm = aq_xarr.assign(pm25=aq_xarr['DUSMASS25']+aq_xarr['SSSMASS25']+aq_xarr['BCSMASS']+aq_xarr['OCSMASS']+1.375*aq_xarr['SO4SMASS']*1000000)
    return aq_data_pm


In [None]:
from datetime import datetime
from dateutil.relativedelta import relativedelta

def get_pm25_data(username, pw, start_date, end_date, minlong, maxlong, minlat, maxlat, product = 'M2TUNXAER_5.12.4'):
    varNames =['OCSMASS', 'BCSMASS', 'SO4SMASS', 'DUSMASS25', 'SSSMASS25']  
    end_date = (datetime.strptime(end_date, "%Y-%m-%d")-relativedelta(days=1)).strftime("%Y-%m-%d")
    
    subset_request = {
        'methodname': 'subset',
        'type': 'jsonwsp/request',
        'version': '1.0',
        'args': {
            'role'  : 'subset',
            'start' : start_date,
            'end'   : end_date,
            'box'   : [minlong, minlat, maxlong, maxlat],
            'crop'  : True, 
    #        'mapping': interp,
    #        'grid'  : destGrid,
            'data': [{'datasetId': product,
                      'variable' : varNames[0]
                     },
                      {'datasetId': product,
                      'variable' : varNames[1]
                     },
                     {'datasetId': product,
                      'variable' : varNames[2]
                     },
                      {'datasetId': product,
                      'variable' : varNames[3]
                     },
                     {'datasetId': product,
                      'variable' : varNames[4]                 
                     }]
               }
    }

    # Submit the subset request to the GES DISC Server
    response = get_http_data(subset_request)
    # Report the JobID and initial status
    myJobId = response['result']['jobId']
    print('Job ID: '+myJobId)
    print('Job status: '+response['result']['Status'])

    # Construct JSON WSP request for API method: GetStatus
    status_request = {
        'methodname': 'GetStatus',
        'version': '1.0',
        'type': 'jsonwsp/request',
        'args': {'jobId': myJobId}
    }
    
    # Check on the job status after a brief nap
    while response['result']['Status'] in ['Accepted', 'Running']:
        sleep(5)
        response = get_http_data(status_request)
        status  = response['result']['Status']
        percent = response['result']['PercentCompleted']
        print ('Job status: %s (%d%c complete)' % (status,percent,'%'))
    if response['result']['Status'] == 'Succeeded' :
        print ('Job Finished:  %s' % response['result']['message'])
    else : 
        print('Job Failed: %s' % response['fault']['code'])
        sys.exit(1)
    
    # Construct JSON WSP request for API method: GetResult
    batchsize = 20
    results_request = {
        'methodname': 'GetResult',
        'version': '1.0',
        'type': 'jsonwsp/request',
        'args': {
            'jobId': myJobId,
            'count': batchsize,
            'startIndex': 0
        }
    }
    
    # Retrieve the results in JSON in multiple batches 
    # Initialize variables, then submit the first GetResults request
    # Add the results from this batch to the list and increment the count
    results = []
    count = 0 
    response = get_http_data(results_request) 
    count = count + response['result']['itemsPerPage']
    results.extend(response['result']['items']) 
    
    # Increment the startIndex and keep asking for more results until we have them all
    total = response['result']['totalResults']
    while count < total :
        results_request['args']['startIndex'] += batchsize 
        response = get_http_data(results_request) 
        count = count + response['result']['itemsPerPage']
        results.extend(response['result']['items'])
           
    # Check on the bookkeeping
    print('Retrieved %d out of %d expected items' % (len(results), total))

    # Sort the results into documents and URLs
    docs = []     # documentation
    urls = []     # data URLs
    for item in results :
        try:
            if item['start'] and item['end'] : urls.append(item) 
        except:
            docs.append(item)
    # Print out the documentation links, but do not download them
    print('\nDocumentation:')
    for item in docs : print(item['label']+': '+item['link'])
    
    # Create a password manager to deal with the 401 response that is returned from
    # Earthdata Login
    
    password_manager = urllib.request.HTTPPasswordMgrWithDefaultRealm()
    password_manager.add_password(None, "https://urs.earthdata.nasa.gov", username, pw)
    
    # Create a cookie jar for storing cookies. This is used to store and return the session cookie #given to use by the data server
    cookie_jar = CookieJar()
       
    # Install all the handlers.
    opener = urllib.request.build_opener (urllib.request.HTTPBasicAuthHandler (password_manager),urllib.request.HTTPCookieProcessor (cookie_jar))
    urllib.request.install_opener(opener)
     
    # Open a request for the data, and download files
    print('\nHTTP_services output:')
    saved_files = []    
    for item in urls:
        URL = item['link'] 
        DataRequest = urllib.request.Request(URL)
        DataResponse = urllib.request.urlopen(DataRequest)
    
        DataBody = DataResponse.read()     # Print out the result
    
    # Save file to working directory
        try:
            file_name = "./data/" + item['label']
            file_ = open(file_name, 'wb')
            file_.write(DataBody)
            file_.close()
            print (file_name, "is downloaded")
            saved_files.append(file_name)
        except requests.exceptions.HTTPError as e:
             print(e)
                
    print('Downloading is done and find the downloaded files in your current working directory')    

    #dataset = convert_ncfile(saved_files)
    
    return saved_files