In [1]:
import cdsapi
from fabric2 import Connection
import datetime
from pathlib import Path
import os
import time
import re 
from joblib import Parallel,delayed
import numpy as np


with Connection('localhost') as c:
    c.local('rm -f /home/jovyan/.cdsapirc')
    c.local('touch /home/jovyan/.cdsapirc') 
    c.local('''echo "url: https://cds.climate.copernicus.eu/api/v2\n" >>/home/jovyan/.cdsapirc''')
    c.local('''echo "key: 3628:a657e8de-56f1-41fc-bda6-efaf882add43" >>/home/jovyan/.cdsapirc''') 
    
    
cfgTemplate='''[extract]
reduceToBoundingBox.south= 55
reduceToBoundingBox.north=85
reduceToBoundingBox.west=2
reduceToBoundingBox.east=33
'''

with open('extractNorway.cfg', 'w') as f:
    f.write(cfgTemplate)
    
    
def getFile(inFile,outFile,date):
    if not os.path.isfile(outFile):
        while True:
            if not os.path.isfile(inFile):
                try: 
                    d = cdsapi.Client()
                    d.retrieve(
                        'reanalysis-era5-single-levels',
                        {
                             'product_type':'reanalysis',
                             'variable':[
                                '10m_u_component_of_wind','10m_v_component_of_wind','2m_dewpoint_temperature',
                                '2m_temperature','mean_sea_level_pressure','surface_net_solar_radiation',
                                'total_cloud_cover','total_precipitation'
                            ],
                            'year':  date.strftime("%Y"),
                            'month': date.strftime("%m"),
                            'day':   date.strftime("%d"),
                            'time':[
                                '00:00','01:00','02:00',
                                '03:00','04:00','05:00',
                                '06:00','07:00','08:00',
                                '09:00','10:00','11:00',
                                '12:00','13:00','14:00',
                                '15:00','16:00','17:00',
                                '18:00','19:00','20:00',
                                '21:00','22:00','23:00'
                            ],
                            'format':'netcdf'
                        },
                         inFile
                        )
                except:
                    with Connection('localhost') as c:
                        c.local('rm -f {}'.format(inFile))
                    continue
                break
            break
            with Connection('localhost') as c:
                if not os.path.isfile(outFile):
                    try :
                        bla = c.local("fimex-1.3 -c extractNorway.cfg --input.file {} --output.file {}".format(inFile,outFile),replace_env=False)
                        display(bla)
                    except :
                        continue
                try:
                    bla = c.local("rm -f {}".format(inFile))
                    display(bla) 
                    break
                except:
                    continue
            break                                   

## Defining dates to download

In [2]:
start = datetime.datetime.strptime("01-01-1979", "%d-%m-%Y")
end = datetime.datetime.strptime("01-08-2019", "%d-%m-%Y")
date_generated = [start + datetime.timedelta(days=x) for x in range(0, (end-start).days)]

## Setting parallel pool

In [3]:
max_workers = 2
num_files = len(date_generated)
display(num_files)

def chunks(l, n):
    """Yield successive n-sized chunks from l."""
    for i in range(0, len(l), n):
        yield l[i:i + n]

divided_list = list(chunks(date_generated,int(np.ceil(num_files/max_workers))))


14822

In [4]:
with Connection('localhost') as c:
    c.local('rm -rf /home/jovyan/common/')
    c.local('rm -f out.nc')
    c.local('mkdir /home/jovyan/common')
    
def getList(chunk_number):
    for date in divided_list[chunk_number]:
        filename = '/home/jovyan/common/{0}.nc'.format(date.strftime('%d-%m-%Y'))
        out =  '/home/jovyan/common/norway_{0}.nc'.format(date.strftime('%d-%m-%Y'))
        getFile(filename,out,date) 

## Downloading data and extracting for Norway

In [5]:
%%time
Parallel(n_jobs=max_workers)(delayed(getList)(i) for i in range(max_workers))

CPU times: user 718 ms, sys: 48.8 ms, total: 767 ms
Wall time: 1min


[None, None]

In [None]:
from time import sleep
def return_after_5_secs(message):
    sleep(5)
    return message
 
pool = cf.ProcessPoolExecutor(3)
 
future = pool.submit(return_after_5_secs, ("hello"))
print(future.done())
sleep(5)
print(future.done())
print("Result: " + future.result())

## Merging nc files into a single one

In [None]:
folder = '/home/jovyan/common/'
#with Connection('localhost') as c:
#    c.local('rm -rf {}'.format(folder))
#   c.local('mkdir {}'.format(folder))
#    c.local('cp -r /home/jovyan/common/norway*.nc {}'.format(folder))

def expandDim(file): #Expanding time dimension in individual nc file to be able to concatenate them
    display('Processing file {}'.format(file))
    while True:
        try:
            with Connection('localhost') as c:
                c.local('ncks -O --mk_rec_dmn time {} out.nc'.format(file))
                c.local('mv out.nc {}'.format(file))
        except:
            continue
        break
        
pathlist = Path(folder).glob('**/norway*.nc')
#for path in pathlist:
#     expandDim(str(path))
        
#Concatenating files with ncrcat. Note that they will me ordered in the order they are fed to the utility. Let's rename them so they are sorted by time
for path in pathlist:
    oldfile = str(path)
    idx = [-2,-3,-4]
    newfile = folder + '_'.join([re.split('[_.-]',oldfile)[i] for i in idx]) + '.nc'
    with Connection('localhost') as c:
        while True:
            if not os.path.isfile(newfile):
                try :  
                    c.local('mv {} {}'.format(oldfile,newfile))
                except :
                    continue
            else :
                break
    

In [None]:
with Connection('localhost') as c:
    c.local("ncrcat -O {}*.nc ./norway.nc".format(folder))