# Donwloading HadUK-Grid Data

This notebook was created to download HadUK-Grid data using the data end points from the CEDA Archive.

---

 - Author:          
                    Luis F Patino Velasquez - MA
 - Date:            
                    Jun 2020
 - Version:         
                    1.0
 - Notes:            
                    Files downloaded are in netCDF format,
                    This code is a based on the code developed by Centre for Environmental Data Analysis Developers
                    https://wiki.earthdata.nasa.gov/display/EL/How+To+Access+Data+With+Python
 - Jupyter version: 
                    jupyter core     : 4.7.1
                    jupyter-notebook : 6.4.0
                    qtconsole        : 5.1.1
                    ipython          : 7.25.0
                    ipykernel        : 6.0.3
                    jupyter client   : 6.1.12
                    jupyter lab      : 3.0.16
                    nbconvert        : 6.1.0
                    ipywidgets       : 7.6.3
                    nbformat         : 5.1.3
                    traitlets        : 5.0.5
 - Python version:  
                    3.8.5 

---

In [1]:
# Import standard libraries
import os
import sys
import datetime
import requests
from requests.packages.urllib3.exceptions import InsecureRequestWarning
from pathlib import Path
from dateutil.relativedelta import relativedelta
import calendar

# Import third-party libraries
from cryptography import x509
from cryptography.hazmat.backends import default_backend
from contrail.security.onlineca.client import OnlineCaClient

# hide warning about request
requests.packages.urllib3.disable_warnings(InsecureRequestWarning)

CERTS_DIR = os.path.expanduser('~/.certs')
if not os.path.isdir(CERTS_DIR):
    os.makedirs(CERTS_DIR)

TRUSTROOTS_DIR = os.path.join(CERTS_DIR, 'ca-trustroots')
CREDENTIALS_FILE_PATH = os.path.join(CERTS_DIR, 'credentials.pem')

TRUSTROOTS_SERVICE = 'https://slcs.ceda.ac.uk/onlineca/trustroots/'
CERT_SERVICE = 'https://slcs.ceda.ac.uk/onlineca/certificate/'

sep = ('''------------\n------------''')


def cert_is_valid(cert_file, min_lifetime=0):
    """
    Returns boolean - True if the certificate is in date.
    Optional argument min_lifetime is the number of seconds
    which must remain.
    :param cert_file: certificate file path.
    :param min_lifetime: minimum lifetime (seconds)
    :return: boolean
    """
    try:
        with open(cert_file, 'rb') as f:
            crt_data = f.read()
    except IOError:
        return False

    try:
        cert = x509.load_pem_x509_certificate(crt_data, default_backend())
    except ValueError:
        return False

    now = datetime.datetime.now()

    return (cert.not_valid_before <= now
            and cert.not_valid_after > now + datetime.timedelta(0, min_lifetime))


def setup_credentials():
    """
    Download and create required credentials files.
    Return True if credentials were set up.
    Return False is credentials were already set up.
    :param force: boolean
    :return: boolean
    """

    # Test for DODS_FILE and only re-get credentials if it doesn't
    # exist AND `force` is True AND certificate is in-date.
    if cert_is_valid(CREDENTIALS_FILE_PATH):
        print('[INFO] Security credentials already set up.')
        return False

    # Get CEDA username and password from environment variables
    username = ''
    password = ''


    onlineca_client = OnlineCaClient()
    onlineca_client.ca_cert_dir = TRUSTROOTS_DIR

    # Set up trust roots
    trustroots = onlineca_client.get_trustroots(
        TRUSTROOTS_SERVICE,
        bootstrap=True,
        write_to_ca_cert_dir=True)

    # Write certificate credentials file
    key_pair, certs = onlineca_client.get_certificate(
        username,
        password,
        CERT_SERVICE,
        pem_out_filepath=CREDENTIALS_FILE_PATH)

    print('[INFO] Security credentials set up.')
    return True


def main(list_dates):
    """
    Main controller function.
    :param list_dates: list with all the dates required for the end-point
    :param nc_file_url: URL to a NetCDF4 opendap end-point.
    :param var_id: Variable ID [String]
    :return: None
    """

    try:
        setup_credentials()
    except KeyError:
        print("CEDA_USERNAME and CEDA_PASSWORD environment variables required")
        return
    
    # Download file to current working directory
    for fecha in list_dates:
        lst_day = calendar.monthrange(fecha.year, fecha.month)[1]

        url_p1 = 'https://dap.ceda.ac.uk/badc/ukmo-hadobs/data/insitu/MOHC/HadOBS/HadUK-Grid/v1.0.2.1/5km/rainfall/day/v20200731/'
        url_p2 = 'rainfall_hadukgrid_uk_5km_day_'

        # Use f-strings to get padded date and month
        url = url_p1 + url_p2 + f"{fecha:%Y%m%d}" + '-' + f"{fecha:%Y%m}" + str(lst_day) + '.nc'
        print('Downloading: {}'.format(url))
            
        response = requests.get(url, cert=(CREDENTIALS_FILE_PATH), verify=False)
        filename = url.rsplit('/', 1)[-1]
        
        with open(filename, 'wb') as file_object:
            file_object.write(response.content)
            print('contents of URL written to: {} '.format(filename))
            print(sep)


if __name__ == '__main__':

    try:
        # Create dates list ready to pass in main function

        # Set loop to go through years - from 2000 to 2019
        dates_lst = []
        year = 2001

        # create date objects
        begin_year = datetime.date(year, 1, 1)
        end_year = datetime.date(2019, 12, 31)
        next_year = begin_year

        while next_year <= end_year:
            dates_lst.append(next_year)
            next_year = next_year +  relativedelta(months=+1)
        
        # Download the data
        main(dates_lst)
            
    except IndexError:
        print("Please provide a file URL as input")

print(sep)
print('All done!!. Check files')

[INFO] Security credentials already set up.
Downloading: https://dap.ceda.ac.uk/badc/ukmo-hadobs/data/insitu/MOHC/HadOBS/HadUK-Grid/v1.0.2.1/5km/rainfall/day/v20200731/rainfall_hadukgrid_uk_5km_day_20010101-20010131.nc
contents of URL written to: rainfall_hadukgrid_uk_5km_day_20010101-20010131.nc 
------------
------------
Downloading: https://dap.ceda.ac.uk/badc/ukmo-hadobs/data/insitu/MOHC/HadOBS/HadUK-Grid/v1.0.2.1/5km/rainfall/day/v20200731/rainfall_hadukgrid_uk_5km_day_20010201-20010228.nc
contents of URL written to: rainfall_hadukgrid_uk_5km_day_20010201-20010228.nc 
------------
------------
Downloading: https://dap.ceda.ac.uk/badc/ukmo-hadobs/data/insitu/MOHC/HadOBS/HadUK-Grid/v1.0.2.1/5km/rainfall/day/v20200731/rainfall_hadukgrid_uk_5km_day_20010301-20010331.nc
contents of URL written to: rainfall_hadukgrid_uk_5km_day_20010301-20010331.nc 
------------
------------
Downloading: https://dap.ceda.ac.uk/badc/ukmo-hadobs/data/insitu/MOHC/HadOBS/HadUK-Grid/v1.0.2.1/5km/rainfall/day

NameError: name 'fld_out' is not defined

Error in atexit._run_exitfuncs:
Traceback (most recent call last):
  File "/home/lfpv/.local/lib/python3.8/site-packages/IPython/core/history.py", line 780, in writeout_cache
    self._writeout_input_cache(conn)
  File "/home/lfpv/.local/lib/python3.8/site-packages/IPython/core/history.py", line 763, in _writeout_input_cache
    conn.execute("INSERT INTO history VALUES (?, ?, ?, ?)",
sqlite3.DatabaseError: database disk image is malformed
