# Download MODIS MCD12Q1_V6
Script based on example provided on: https://git.earthdata.nasa.gov/projects/LPDUR/repos/daac_data_download_python/browse

Requires a `.netrc` file in user's home directory with login credentials for `urs.earthdata.nasa.gov`. See: https://lpdaac.usgs.gov/resources/e-learning/how-access-lp-daac-data-command-line/

In [None]:
# modules
import os
import time
import shutil
import requests
from netrc import netrc
from pathlib import Path
from shutil import copyfile
from datetime import datetime
import concurrent
from concurrent.futures.thread import ThreadPoolExecutor

#### Control file handling

In [None]:
# Easy access to control file folder
controlFolder = Path('../../../0_control_files')

In [None]:
# Store the name of the 'active' file in a variable
controlFile = 'control_active.txt'

In [None]:
# Function to extract a given setting from the control file
def read_from_control( file, setting ):
    
    # Open 'control_active.txt' and ...
    with open(file) as contents:
        for line in contents:
            
            # ... find the line with the requested setting
            if setting in line and not line.startswith('#'):
                break
    
    # Extract the setting's value
    substring = line.split('|',1)[1]      # Remove the setting's name (split into 2 based on '|', keep only 2nd part)
    substring = substring.split('#',1)[0] # Remove comments, does nothing if no '#' is found
    substring = substring.strip()         # Remove leading and trailing whitespace, tabs, newlines
       
    # Return this value    
    return substring

In [None]:
# Function to specify a default path
def make_default_path(suffix):
    
    # Get the root path
    rootPath = Path( read_from_control(controlFolder/controlFile,'root_path') )
    
    # Get the domain folder
    domainName = read_from_control(controlFolder/controlFile,'domain_name')
    domainFolder = 'domain_' + domainName
    
    # Specify the forcing path
    defaultPath = rootPath / domainFolder / suffix
    
    return defaultPath

In [None]:
#Function to request and download data
def request_get(file_url,output_file, usr, pwd):
    res = requests.get(file_url, verify=True, stream=True, auth=(usr, pwd))

    # Decode the response
    res.raw.decode_content = True
    content = res.raw

    # Write to file
    with open(output_file, 'wb') as data:
        shutil.copyfileobj(content, data)
        
    return None

#### Get the download settings

In [None]:
# Path and name of file with download links
links_path = read_from_control(controlFolder/controlFile,'parameter_land_list_path')
links_file = read_from_control(controlFolder/controlFile,'parameter_land_list_name')

In [None]:
# Specify the default paths if required 
if links_path == 'default':
    links_path = Path('./') # outputs a Path()
else:
    links_path = Path(links_path) # make sure a user-specified path is a Path()

In [None]:
# Find where the data needs to go
modis_path = read_from_control(controlFolder/controlFile,'parameter_land_raw_path')

In [None]:
# Specify the default paths if required 
if modis_path == 'default':
    modis_path = make_default_path('parameters/landclass/1_MODIS_raw_data') # outputs a Path()
else:
    modis_path = Path(modis_path) # make sure a user-specified path is a Path()

In [None]:
# Make output dir
modis_path.mkdir(parents=True, exist_ok=True)

#### Get the authentication info


In [None]:
# authentication url
url = 'urs.earthdata.nasa.gov'

In [None]:
# make the netrc directory
netrc_folder = os.path.expanduser("~/.netrc")

In [None]:
# Get user name and password - not great, but these are stored as plain text on the user's machine regardless..
usr = netrc(netrc_folder).authenticators(url)[0]
pwd = netrc(netrc_folder).authenticators(url)[2]

#### Do the downloads

In [None]:
# Get the download links from file
file_list = open(links_file, 'r').readlines()

In [None]:
# Retry settings: connection can be unstable, so specify a number of retries
retries_max = 100 

In [None]:

# Download needed files with threading
with ThreadPoolExecutor() as executor:
    futures = []
    for file_url_raw in file_list:

        file_url = file_url_raw.strip()

        file_name = file_url.split('/')[-1].strip()  # Get the last part of the url, strip whitespace and characters
        output_file = os.path.join(modis_path, file_name)
        print(f'Downloading file: {file_name} from: {file_url}')
        futures.append(executor.submit(request_get, file_url,output_file, usr, pwd))

#### Code provenance
Generates a basic log file in the domain folder and copies the control file and itself there.

In [None]:
# Set the log path and file name
logPath = modis_path
log_suffix = '_modis_download_log.txt'

In [None]:
# Create a log folder
logFolder = '_workflow_log'
Path( logPath / logFolder ).mkdir(parents=True, exist_ok=True)

In [None]:
# Copy this script
thisFile = 'download_modis_mcd12q1_v6.ipynb'
copyfile(thisFile, logPath / logFolder / thisFile);

In [None]:
# Get current date and time
now = datetime.now()

In [None]:
# Create a log file 
logFile = now.strftime('%Y%m%d') + log_suffix
with open( logPath / logFolder / logFile, 'w') as file:
    
    lines = ['Log generated by ' + thisFile + ' on ' + now.strftime('%Y/%m/%d %H:%M:%S') + '\n',
             'Downloaded MODIS MCD12Q1_V6 data with global coverage.']
    for txt in lines:
        file.write(txt) 