## Getting token for cds climate store

In [1]:
import psycopg2
import pandas.io.sql as sqlio
import getpass
import cdsapi
from fabric import Connection
from joblib import Parallel,delayed
import datetime
from tqdm import tqdm
import os

key = getpass.getpass('mobiserver password: ')
vaultKey = getpass.getpass('vault password: ')
#Querying necessary tokens
def query(query,fetch=True):
    with psycopg2.connect(user='jose-luis', host='mobiserver.niva.no', port=5432, database='vault',password=key) as db:
        with db.cursor() as cursor :
            cursor.execute(query)
            if fetch:
                result = sqlio.read_sql_query(query, db)
                return result
            
copernicusKey = query('''select niva.getToken('copernicusKey','{}');'''.format(vaultKey)).iloc[0,0]

del key,vaultKey

with Connection('localhost') as c:
    c.local('rm -f /home/jovyan/.cdsapirc')
    c.local('touch /home/jovyan/.cdsapirc') 
    c.local('''echo "url: https://cds.climate.copernicus.eu/api/v2\n" >>/home/jovyan/.cdsapirc''')
    c.local('''echo "key: {}" >>/home/jovyan/.cdsapirc'''.format(copernicusKey))
    
del copernicusKey

mobiserver password:  ·······
vault password:  ···············


## Querying data from the climate datastore

With the key obtained above, we can download data from the copernicus climate datastore. 
The available datasets are listed in https://cds.climate.copernicus.eu/cdsapp#!/search?type=dataset 

Let's try getting the potential evapotranspiration directly from the era5-land dataset: https://cds.climate.copernicus.eu/cdsapp#!/dataset/reanalysis-era5-land?tab=overview

The ERA-5 dataset covers the whole world. We will only store data from Europe. It will be extracted from the (remote) global data using the fimex tool.  

In [2]:
#Defining extent for Europe
#The .cfg file is used to pass the extent to the fimex tool
cfgName = 'extractEurope.cfg'   

cfgTemplate='''[extract]
reduceToBoundingBox.south= 33
reduceToBoundingBox.north= 82
reduceToBoundingBox.west= -14
reduceToBoundingBox.east= 35
'''

with open(cfgName, 'w') as f:
    f.write(cfgTemplate)
    
#Main function to extract the (remote) european data from the climate datastore
def getFile(inFile,outFile,date):
    dummyA = 'a' + date.strftime("%Y-%m-%d")  + '.nc'
    dummyB = 'b' + dummyA
    dummyC = 'c' + dummyB
    if not os.path.isfile(outFile):
        while True:
            if not os.path.isfile(inFile):
                try: 
                    d = cdsapi.Client()
                    d.retrieve(
                        'reanalysis-era5-land',
#                         'reanalysis-era5-single-levels',
#                         'reanalysis-uerra-europe-single-levels',
                        {
                             'product_type':'reanalysis',
                             'variable':'potential_evaporation',
                            'year':  date.strftime("%Y"),
                            'month': date.strftime("%m"),
                            'day':   date.strftime("%d"),
                            'time':[
                                '00:00','01:00','02:00',
                                '03:00','04:00','05:00',
                                '06:00','07:00','08:00',
                                '09:00','10:00','11:00',
                                '12:00','13:00','14:00',
                                '15:00','16:00','17:00',
                                '18:00','19:00','20:00',
                                '21:00','22:00','23:00'
                            ],
                            'format':'netcdf'
                        },
                         inFile
                        )
                except:
                    with Connection('localhost') as c:
                        c.local('rm -f {}'.format(inFile))
                    continue
            with Connection('localhost') as c:
                if not os.path.isfile(dummyA):
                    try :
                        c.local('ncks -O --msa -d longitude,180.1,359.9 -d longitude,0.0,180.0 {} {}'.format(inFile,dummyA)) #this line will be dependent on the resolution of the nc file
                    except :
                        continue
            with Connection('localhost') as c:
                if not os.path.isfile(dummyB):
                    try :
                        c.local('''ncap2 -O -s "where(longitude>180)longitude=round(longitude.double()*10.0-3600)/10.0" {} {}'''.format(dummyA,dummyB)) #hack to avoid loss of precision
                    except :
                        continue
            with Connection('localhost') as c:
                if not os.path.isfile(dummyC):
                    try :
                        c.local("fimex-1.5 -c {} --input.file {} --output.file {}".format(cfgName,dummyB,dummyC),replace_env=False)
                    except :
                        continue
            with Connection('localhost') as c:
                if os.path.isfile(dummyC):
                    try:
                        c.local('rm -f {} {} {} && mv {} {}'.format(inFile,dummyA,dummyB,dummyC,outFile))
                        break
                    except:
                        continue
            break

In [3]:
#Setting dates to download data

start = datetime.datetime.strptime("01-01-1981", "%d-%m-%Y")
end = datetime.datetime.strptime("28-02-2021", "%d-%m-%Y")
date_generated = [start + datetime.timedelta(days=x) for x in range(0, (end-start).days)]

#The data will be downloaded in parallel in split int the following number of workers (rule of thumb: twice as many workers as processors)
max_workers = 16
num_files = len(date_generated)

#Creating directory to download data
download_dir = '/home/jovyan/shared/era5-land'
# with Connection('localhost') as c:
#     c.local('rm -rf {}'.format(download_dir))
#     c.local('mkdir {}'.format(download_dir))   

#Download function: the date is included in the generated filename   
def getData(date):    
    print(date.strftime('%d-%m-%Y'))
    filename = '{}.nc'.format(date.strftime('%d-%m-%Y'))
    out =  '{}/europe_{}.nc'.format(download_dir,date.strftime('%d-%m-%Y'))
    getFile(filename,out,date) 

In [None]:
#%%time
#Actually downloading the data
Parallel(n_jobs=max_workers)(delayed(getData)(date) for date in date_generated)