In [1]:
import base64
from datetime import datetime
from datetime import timedelta
import getpass
import json
import os
import requests
from requests.auth import HTTPBasicAuth
import shutil

# Programmatic Access to NASA Data via Earthdata Login Tokens

This notebook details how to create and use EDL tokens to request data from NASA.

There are a few ways to handle access from a user perspective.

1. Users could have their own EDL username and password, and use those credentials to generate tokens or use them directly in requests for data.
2. The system stores a system username and password, such as username `esa_maap_user` along with an associated password. This username and password are stored in a secret store which only the algorithm development and async worker systems have access too. For example, the system that launches new Jupyter Lab instances, such as a Kubernetes cluster. These credentials are used to fetch or generate a token which is then set as an environment variable in Jupyter Lab or worker instances for making requests for data.

We are assuming option 2 is more desirable for the ESA MAAP system, so ESA MAAP users and workers don't have to create a new username and login with Earthdata Login in addition to their ESA MAAP login. Below is detailed an approach for generating fresh-enough tokens for users as the system launches their environments.

# Creating new tokens

Given a lifetime of an active Jupyter Lab system or worker of `active_session_time`, we want to ensure the token deployed to an active session will remain valid for the extent of the session. EDL tokens are valid for 90 days and there can be 2 active tokens, (see https://urs.earthdata.nasa.gov/documentation/for_users/user_token), when a system (development environment or worker) boots up:

1. If there are no active tokens, generate a token.
2. If there are active tokens and one of those active tokens has an expiration that is greater than the current datetime + active session time, that token can be used since it won't expire in the lifetime of the active session.
3. If there is only 1 active token but it will expire within the time of an active session, generate a new token. This token should be used by any new development environments or workers that boot up afterwards (until that token is also about to expire).

# Considerations:

* The below are taken from https://urs.earthdata.nasa.gov/users/aimeeb/user_tokens
    * You can generate a bearer token for federated token access sharing. Not all EDL applications support Federated token access sharing.
    * You may have up to 2 active tokens at a time.
    * The token can then be passed into an application by using an Authorization: Bearer header
    * The token will only authorize for applications that are EDL compliant and do not have unapproved EULAs
* So, not all DAACs support token-based access - For example, this access was tested and working for GEDI02_A.002 but not SENTINEL-1A_SLC

# Code to generate a token when the user logs in

The following steps should happen server-side when a new Jupyter Lab session is starting.

These would be stored in a secret store, having inputs here is just for demonstration.

In [2]:
# These would be stored in a secret store, having inputs here is just for demonstration.
username = input('EDL Username')
password = getpass.getpass('EDL Password')

EDL Username aimeeb
EDL Password ··················


In [3]:
active_session_days = 1 # for example

def find_or_get_token():
    urs_endoint = 'https://urs.earthdata.nasa.gov/api/users/token'
    # first try and retrieve an active token
    token_response = requests.get(f"{urs_endoint}s", auth=HTTPBasicAuth(username, password))
    error_message = "Attempting to get a existing tokens returned an error: {status_code} - {content}"
    if token_response.status_code != 200:
        return error_message.format(status_code=token_response.status_code, content=token_response.content)
    else:
        # Tokens were found
        tokens = json.loads(token_response.content)
        for token in tokens:
            x_date = datetime.now()
            expiration_datetime = datetime.strptime(token['expiration_date'], '%m/%d/%Y')
            # Token expiration is greater than now + active session time, so it shouldn't expire during the lifetime of an active session
            if expiration_datetime > (x_date + timedelta(days=active_session_days)):
                return token['access_token']

    # no active tokens were found or active tokens will soon expire, generate a new token      
    error_message = "Attempting to create a new token returned an error: {status_code} - {content}"    
    token_response = requests.post(urs_endoint, auth=HTTPBasicAuth(username, password))
    if token_response.status_code != 200:
        return error_message.format(status_code=token_response.status_code, content=token_response.content)
    return json.loads(token_response.content)['access_token']

# Set the environment for new Jupyter Lab sessions

The following `TOKEN` variable should be set in the users environment.

In [4]:
os.environ["TOKEN"] = find_or_get_token()

# Provide a function for accessing data using the token

For the NASA MAAP, this type of function is part of the `maap-py` library.

In [5]:
# Simplified version of https://github.com/MAAP-Project/maap-py/blob/master/maap/Result.py#L89-L111
def get_data(url, destfile, destpath = '.'):
    api_header = {
        'Authorization': f"Bearer {os.environ['TOKEN']}"
    }
    r = requests.get(
        url=url,
        headers=api_header,
        stream=True
    )

    if r.status_code != 200:
        raise ValueError('Bad search response for url {}: {}'.format(url, r.text))
    print(r.status_code)

    r.raw.decode_content = True

    with open(destpath + "/" + destfile, 'wb') as f:
        shutil.copyfileobj(r.raw, f)

    return destpath + '/' + destfile

In [None]:
url = 'https://e4ftl01.cr.usgs.gov//GEDI_L1_L2/GEDI/GEDI02_A.002/2022.08.31/GEDI02_A_2022243234134_O21062_01_T10906_02_003_02_V002.h5'
destfile = url.split('/')[-1]
get_data(url, destfile)

200
