# Donwloading GPM-IMERG Data

This notebook was created to download IMERG data using the data end points from NASA’s Earth Science Data Systems.

---

 - Author:          
                    Luis F Patino Velasquez - MA
 - Date:            
                    Jun 2020
 - Version:         
                    1.0
 - Notes:            
                    Files downloaded are in netCDF format,
                    This code is based on the code developed by Peter Smith, last modified by Srikanth Davu
                    https://wiki.earthdata.nasa.gov/display/EL/How+To+Access+Data+With+Python
 - Jupyter version: 
                    jupyter core     : 4.7.1
                    jupyter-notebook : 6.4.0
                    qtconsole        : 5.1.1
                    ipython          : 7.25.0
                    ipykernel        : 6.0.3
                    jupyter client   : 6.1.12
                    jupyter lab      : 3.0.16
                    nbconvert        : 6.1.0
                    ipywidgets       : 7.6.3
                    nbformat         : 5.1.3
                    traitlets        : 5.0.5
 - Python version:  
                    3.8.5 

---

**Importing Modules**

In [None]:
from http.cookiejar import CookieJar
from urllib.parse import urlencode
from pathlib import Path
import urllib.request as urllib2
import requests
import datetime
import xarray as xr
import pydap

**Adding data directory**

In [None]:
FILEPATH = Path('/mnt/d/MRes_dataset/search_data/gpm_imerg_nasa_uk/') # path for Earthdata data

**Functions**

In [2]:
def data_download(data_url, file_2save):
    ''' download netCDF file from end point using the data URL
        :data_url: url string
        :file_2save: string including path and file name with extension
    '''
    result = requests.get(data_url)
    try:
        result.raise_for_status()
        f = open(file_2save,'wb')
        f.write(result.content)
        f.close()
        print('contents of URL written to: {} '.format(file_2save))
    except:
        print('requests.get() returned an error code: {}'.format(str(result.status_code)))

**Set Earthdata information**

In [None]:
# Credential

# Earth data
username = ""
password = ""

# Create password manager for 401 response
password_manager = urllib2.HTTPPasswordMgrWithDefaultRealm()
password_manager.add_password(None, "https://urs.earthdata.nasa.gov", username, password) #Earthdata url

# Create cookie jar to store cookies. This will avoid having to authenticate user everytime data is requested
cookie_jar = CookieJar()

# Install handlers
opener = urllib2.build_opener(
    urllib2.HTTPBasicAuthHandler(password_manager),
    #urllib2.HTTPHandler(debuglevel=1),    # Uncomment these two lines to see
    #urllib2.HTTPSHandler(debuglevel=1),   # details of the requests/responses
    urllib2.HTTPCookieProcessor(cookie_jar))
urllib2.install_opener(opener)


# Working with Earthdata data
**Create list with dates**

In [None]:
# Create dates list ready to pass in URL

# Set loop to go through years - from 2000 to 2019
dates_lst = []
year = 2001

# create date objects
begin_year = datetime.date(year, 1, 1)
end_year = datetime.date(2019, 12, 31)
one_day = datetime.timedelta(days=1)

next_day = begin_year
for day in range(0, 366):  # includes potential leap year
    while next_day <= end_year:
        if next_day > datetime.date(2000, 5, 31): # gpm imerg data only start in 2000/06
            dates_lst.append(next_day)
        # increment date object by one day
        next_day += one_day

**Working with the date list to request the data**


In [None]:
# set list for url that can be reached
for date in dates_lst:
    url_p1 = 'https://gpm1.gesdisc.eosdis.nasa.gov/opendap/GPM_L3/GPM_3IMERGDF.06/'
    url_p2 = '3B-DAY.MS.MRG.3IMERG.'
    url_p3 = '-S000000-E235959.V06.nc4.nc4'
    url_p4 = '?precipitationCal[0:0][1709:1819][1389:1509],time,lon[1709:1819],lat[1389:1509]'
    
    # Use f-strings to get padded date and month
    url = url_p1 + f"{date:%Y}" + '/' + f"{date:%m}" + '/' + url_p2 + f"{date:%Y%m%d}" + url_p3 + url_p4
    print(url)
    
    # Create file to save data in local drive
    FILENAME = url_p2 + f"{date:%Y%m%d}" + url_p3
    SAVEFILE = Path(FILEPATH / FILENAME)
    
    # Run request using function
    data_download(url, SAVEFILE)
    
print ('--------------')
print ('--------------')
print ('Check files here: {}'.format(FILEPATH ))
print ('--------------')
print ('--------------')
print ('This is the URL to check')

## Data download check

- The code below will only need to be run after doing the download of the data
- The code checks the files downloaded against the files that are expected based on the years
- If the file has not been downloaded, the code will try to download the code again


In [None]:
# Create list of file names
name_lst = []
for date in dates_lst:
    part1 = '3B-DAY.MS.MRG.3IMERG.'
    part2 = '-S000000-E235959.V06.nc4.nc4'
    file_name = part1 + f"{date:%Y%m%d}" + part2
    name_lst.append(file_name)

# Create list of file names in folder
file_lst = []
python_files = FILEPATH.glob('**/*.nc4') 
for pf in python_files:
    file_lst.append(pf.parts[6])
    
# Compare the two list and get missing values
# the main list is name_lst as it contains names of expected files
missing = list(sorted(set(name_lst) - set(file_lst)))
print ('There are {} missing files in the folder \n'.format(len(missing)))

# print ('--------------')
# print ('--------------')
print ('Trying to download the files again')
# print ('--------------')
# print ('--------------')

# set list for url that can be reached
'3B-DAY.MS.MRG.3IMERG.20000604-S000000-E235959.V06.nc4.nc4'

if len(missing) > 0:
    fd
    url_toCheck = []
    for file_name in missing:
        url_p1 = 'https://gpm1.gesdisc.eosdis.nasa.gov/opendap/GPM_L3/GPM_3IMERGDF.06/'
        url_p2 = file_name
        url_p3 = '?precipitationCal[0:0][1709:1819][1389:1509],time_bnds[0:0][0:1],time,lon[1709:1819],lat[1389:1509]'

        # Get year and month from the file name
        year = file_name.split('.')[4][:4]
        month = file_name.split('.')[4][:6][-2:]

    #     build URL
        url = url_p1 + year + '/' + month + '/' + file_name + url_p3
        print(url)

        # Create file to save data in local drive
        FILENAME = file_name
        SAVEFILE = Path(FILEPATH / FILENAME)

        # Run request using function
        data_nasa(url, SAVEFILE)

    print ('--------------')
    print ('--------------')
    print ('Check files here: {}'.format(FILEPATH ))
    print ('--------------')
    print ('--------------')
else:
    print ('--------------')
    print ('--------------')
    print ('All the files have been downloaded!')
    print ('--------------')
    print ('--------------')
    
