# Download MODIS MCD12Q1_V6
Script based on example provided on: https://git.earthdata.nasa.gov/projects/LPDUR/repos/daac_data_download_python/browse

Requires a `.netrc` file in user's home directory with login credentials for `urs.earthdata.nasa.gov`. See: https://lpdaac.usgs.gov/resources/e-learning/how-access-lp-daac-data-command-line/

In [1]:
# Import Modules
import os,sys,glob
import time
import shutil
import requests
from netrc import netrc
from pathlib import Path
from shutil import copyfile
from concurrent.futures import ThreadPoolExecutor
import subprocess
import ipynbname
thisFile = ipynbname.name()+'.ipynb'
#Import local modules
from workflow_utility_functions import read_from_control,make_default_path, create_log_file


#### Control file handling

In [2]:
# Easy access to control file folder
controlFolder = Path('../../../0_control_files')

In [3]:
# Store the name of the 'active' file in a variable
controlFile = 'control_active.txt'

In [4]:
def request_get(file_url,output_file, usr, pwd):
    '''Function to request and download data, given user credentials'''

    try:
        res = requests.get(file_url, verify=True, stream=True, auth=(usr, pwd))

        # Decode the response
        res.raw.decode_content = True
        content = res.raw

        # Write to file
        with open(output_file, 'wb') as data:
            shutil.copyfileobj(content, data)

    except:
        logger.warning(f'File {file_url} was not downloaded correctly, on attempt {retries_cur} of {retires_max}')
        retries_cur += 1

    return None

In [5]:
def run_modis_download(file_list,usr,pwd,modis_path):
    '''Download the needed files using Threading'''

    with ThreadPoolExecutor() as executor:
        futures = []
        for file_url_raw in file_list:

            file_url = file_url_raw.strip()
            file_name = file_url.split('/')[-1].strip()  # Get the last part of the url, strip whitespace and characters

            #Check if file already exists  and move to next file if so
            if (modis_path / file_name).is_file():
                logger.debug(f'File {file_name} exists, skipping download')
            else:
                #Set the output file name, and submit the download request
                output_file = os.path.join(modis_path, file_name)
                futures.append(executor.submit(request_get, file_url, output_file, usr, pwd))

                logger.info(f'Downloading file: {file_name} from: {file_url}')

    return None

In [6]:
def download_check(file_list, modis_path,retries_cur):
    '''This function checks that all needed files are downloaded, and if not will try again '''

    check_folder = str(modis_path) + "/*.hdf"
    file_list_check = glob.glob(check_folder)

    file_list.sort()
    file_list_check.sort()

    if len(file_list) == len(file_list_check):
        logger.info(f'All required files have been downloaded')
        download_complete_bool = True
    else:
        logger.warning(f'Required files were not downloaded, another attempt will be made')
        download_complete_bool = False
        retries_cur += 1

    return download_complete_bool,retries_cur

#### Get the download settings

In [7]:
# Path and name of file with download links
links_path = read_from_control(controlFolder/controlFile,'parameter_land_list_path')
links_file = read_from_control(controlFolder/controlFile,'parameter_land_list_name')

In [8]:
# Specify the default paths if required 
if links_path == 'default':
    links_path = Path('./') # outputs a Path()
else:
    links_path = Path(links_path) # make sure a user-specified path is a Path()

In [9]:
# Find where the data needs to go
modis_path = read_from_control(controlFolder/controlFile,'parameter_land_raw_path')

In [10]:
# Specify the default paths if required 
if modis_path == 'default':
    modis_path = make_default_path('parameters/landclass/1_MODIS_raw_data',controlFolder,controlFile) # outputs a Path()
else:
    modis_path = Path(modis_path) # make sure a user-specified path is a Path()

In [11]:
# Make output dir
modis_path.mkdir(parents=True, exist_ok=True)

In [12]:
# Set the log path and file name
logPath = modis_path
log_suffix = '_modis_download_'

# Create a log folder
logFolder = '_workflow_log'
Path(logPath / logFolder).mkdir(parents=True, exist_ok=True)

#Create a logging file
logger = create_log_file(logPath / logFolder,thisFile,suffix=log_suffix)

2021-05-10 10:21:25,241 - INFO - Log file /Users/drc858/Data/workflow_output/domain_BowAtBanff/parameters/landclass/1_MODIS_raw_data/_workflow_log/20210510_modis_download__log.txt generated by download_modis_mcd12q1_v6.ipynb on 2021/05/10 10:21:25


#### Get the authentication info


In [13]:
# authentication url
url = 'urs.earthdata.nasa.gov'

In [14]:
# make the netrc directory
netrc_folder = os.path.expanduser("~/.netrc")

In [15]:
# Get user name and password - not great, but these are stored as plain text on the user's machine regardless..
usr = netrc(netrc_folder).authenticators(url)[0]
pwd = netrc(netrc_folder).authenticators(url)[2]

#### Do the downloads

In [16]:
# Get the download links from file
file_list = open(links_file, 'r').readlines()

In [17]:
logger.info('Downloading MODIS MCD12Q1_V6 data with global coverage.')

#Read all files in folder, in order to compare to download list
check_folder = str(modis_path)+"/*.hdf"
file_list_check = glob.glob(check_folder)

file_list.sort()
file_list_check.sort()

download_complete_bool = False
retries_cur = 1
retries_max = 10

2021-05-10 10:21:25,262 - INFO - Downloading MODIS MCD12Q1_V6 data with global coverage.


In [18]:
"""This is the main download loop"""
while download_complete_bool == False:

    #Run download given complete list
    run_modis_download(file_list, usr, pwd, modis_path)
    #Check if number of files meets the length of the list
    download_complete_bool,retries_cur = download_check(file_list, modis_path,retries_cur)

    #Break when all files are downloaded
    if download_complete_bool == True:
        break

    #If there are too many retries, then break
    if retries_cur >= retries_max:
        logger.error(f'Maximum number of tries ({retries_max}) has been reached, aborting')
        break

2021-05-10 10:21:25,681 - INFO - All required files have been downloaded


#### Code provenance
Generates a basic log file in the domain folder and copies the control file and itself there.

In [20]:
# Generates copies the control file and itself there.
#Copy the control file
copyfile(controlFolder / controlFile, logPath / logFolder / controlFile)
logger.info(f'File: ({controlFolder / controlFile}) has been moved to {logPath / logFolder / controlFile}')
# Copy this script
copyfile(thisFile, logPath / logFolder / thisFile)
logger.info(f'File: ({thisFile}) has been moved to {logPath / logFolder / thisFile}')


2021-05-10 10:24:36,701 - INFO - File: (../../../0_control_files/control_active.txt) has been moved to /Users/drc858/Data/workflow_output/domain_BowAtBanff/parameters/landclass/1_MODIS_raw_data/_workflow_log/control_active.txt
2021-05-10 10:24:36,703 - INFO - File: (download_modis_mcd12q1_v6.ipynb) has been moved to /Users/drc858/Data/workflow_output/domain_BowAtBanff/parameters/landclass/1_MODIS_raw_data/_workflow_log/download_modis_mcd12q1_v6.ipynb
