### Compute derivatives once!

In [1]:
import os
import sys
import json
import time
import numpy as np
import xarray as xr

sys.path.insert(0, '~/workspace_icon-ml/cloud_cover_parameterization/')
import my_classes
from my_classes import load_data

sys.path.insert(0, '~/workspace_icon-ml/symbolic_regression/')
from functions import add_derivatives
from functions import append_dict_to_json

# Possible days and years
years = np.arange(1979, 2022)
days = ['-01-01', '-04-01', '-07-01', '-10-01']

In [None]:
for ind_year in range(len(years)):
    for ind_day in range(len(days)):
        day = str(years[ind_year])+days[ind_day]
        print(day)
        
        existing_files = os.listdir('~/bd1179_work/ERA5/hvcg_data/clwc_z/')
        if not 'int_var_%s_clwc_z_R02B05.npy'%day in existing_files:

            t0 = time.time()
            
            # Create placeholder
            np.save('~/bd1179_work/ERA5/hvcg_data/clwc_z/int_var_%s_clwc_z_R02B05.npy'%day, np.zeros(1))

            order_of_vars = ['q', 'clwc', 'ciwc', 't', 'pa', 'u', 'v', 'zg', 'fr_land', 'cc']
            data_dict = load_data(source='era5', days=day, order_of_vars=order_of_vars)

            TIMESTEPS, VLAYERS, HFIELDS = data_dict['q'].shape

            data_dict['fr_land'] = np.repeat(np.expand_dims(data_dict['fr_land'], axis=1), VLAYERS, axis=1)

            print('CP1')

            time.time() - t0

            #---------------------------------------------------------------------------------------------------

            # data_dict['cc'].shape
            # (24, 31, 66655)

            #---------------------------------------------------------------------------------------------------

            # Add magnitude of horizontal wind
            data_dict['U'] = np.sqrt(data_dict['u']**2 + data_dict['v']**2)
            del data_dict['u']
            del data_dict['v']

            # Add RH
            T0 = 273.15
            r = 0.00263*data_dict['pa']*data_dict['q']*np.exp((17.67*(data_dict['t']-T0))/(data_dict['t']-29.65))**(-1)
            data_dict['rh'] = r

            # Add ps
            ps = np.repeat(np.expand_dims(data_dict['pa'][:, -1], axis=1), VLAYERS, axis=1)
            data_dict['ps'] = ps

            # Removing four upper-most levels
            for key in data_dict.keys():
                data_dict[key] = data_dict[key][:, 4:]

            # Data output
            data_output = data_dict['cc']
            del data_dict['cc']

            print('CP2')

            #---------------------------------------------------------------------------------------------------

#             # OPTIONAL CELL - Don't consider all horizontal fields #!
#             for key in data_dict.keys():
#                 data_dict[key] = data_dict[key][:, :, :1000]
#             data_output = data_output[:, :, :1000]

#             # Otherwise it takes up to one hour for one day of data

            #---------------------------------------------------------------------------------------------------

            from contextlib import contextmanager
            import multiprocessing as mlp
            import gc

            import time
            t0 = time.time()

            print('CP3')

            @contextmanager
            def poolcontext(*args, **kwargs):
                pool = mlp.Pool(*args, **kwargs)
                yield pool
                pool.terminate()

            def add_derivatives_par(data_dict):
                # Define variables for add_derivatives (Add 'zg' at the end)
                base_variables = ['q', 'clwc', 'ciwc', 't', 'pa', 'U', 'rh', 'zg']
                return add_derivatives(data_dict, base_variables)

            print('CP4')

            procs = 128
            with poolcontext(processes=procs) as pool:
                # Every process received a part of data_dict
                results = pool.map(add_derivatives_par, [{key: data_dict[key][k*TIMESTEPS//procs:(k+1)*TIMESTEPS//procs] for key in data_dict.keys()} for k in range(procs)])

            print('CP5')    

            data_dict = {}
            for key in results[0].keys():
                data_dict[key] = np.concatenate([results[k][key] for k in range(procs)])

            print('CP6')      

            del results
            gc.collect()

            print(time.time() - t0)

            #---------------------------------------------------------------------------------------------------

            print('CP7') 

            for key in data_dict.keys():
                if key[-1] == 'z':
                    np.save('~/bd1179_work/ERA5/hvcg_data/%s/int_var_%s_%s_R02B05.npy'%(key,day,key), data_dict[key])

1979-01-01
1979-04-01
1979-07-01
1979-10-01
1979-10-01
100.0
CP1
CP2
CP3
CP4


Process ForkPoolWorker-580:


An exception occurred: The argument is not treated as a dictionary but as a np array instead.

Process ForkPoolWorker-611:


An exception occurred: The argument is not treated as a dictionary but as a np array instead.An exception occurred: The argument is not treated as a dictionary but as a np array instead.

Process ForkPoolWorker-558:
Process ForkPoolWorker-549:
Process ForkPoolWorker-613:


An exception occurred: The argument is not treated as a dictionary but as a np array instead.

Process ForkPoolWorker-573:
Process ForkPoolWorker-593:
Process ForkPoolWorker-638:
Process ForkPoolWorker-610:
Process ForkPoolWorker-542:
Process ForkPoolWorker-533:
Process ForkPoolWorker-514:
Process ForkPoolWorker-563:
Process ForkPoolWorker-621:
Process ForkPoolWorker-584:
Process ForkPoolWorker-633:


An exception occurred: The argument is not treated as a dictionary but as a np array instead.

Process ForkPoolWorker-618:
Process ForkPoolWorker-543:
Process ForkPoolWorker-581:
Process ForkPoolWorker-548:
Process ForkPoolWorker-631:
Process ForkPoolWorker-586:
Process ForkPoolWorker-561:
Process ForkPoolWorker-574:


An exception occurred: The argument is not treated as a dictionary but as a np array instead.An exception occurred: The argument is not treated as a dictionary but as a np array instead.

Process ForkPoolWorker-515:
Process ForkPoolWorker-529:
Process ForkPoolWorker-522:
Process ForkPoolWorker-520:
Process ForkPoolWorker-551:
Process ForkPoolWorker-605:
Process ForkPoolWorker-532:
Process ForkPoolWorker-591:
Process ForkPoolWorker-615:
Process ForkPoolWorker-524:
Process ForkPoolWorker-639:
Process ForkPoolWorker-527:
Process ForkPoolWorker-553:
Process ForkPoolWorker-516:


An exception occurred: The argument is not treated as a dictionary but as a np array instead.

Process ForkPoolWorker-606:
