In [1]:
import pandas as pd
import xarray as xr
import numpy as np
import matplotlib.pyplot as plt
from tqdm import trange
import re

import os
import sys
from pathlib import Path

os.chdir(Path(sys.path[0]).parent)
import modules.utils as utils

import multiprocess as mp

### Without parallel computing

In [4]:
Directory = "data"
L = 32

files = [name for name in os.listdir(Directory) if  name.__contains__("uxy")]
temp = [int(re.split('(\d+)', name)[3]) for name in files]
times = [t for n, t in enumerate(temp) if t not in temp[:n]]
times.sort()

for t in times:
    u_ds = utils.concatenate_alt(Directory, 'u', t)
    v_ds = utils.concatenate_alt(Directory, 'v', t)
    w_ds = utils.concatenate_alt(Directory, 'w', t)
    theta_ds = utils.concatenate_alt(Directory, 'theta', t)
    assert u_ds.shape == v_ds.shape == w_ds.shape == theta_ds.shape, 'u,v,w,theta have different shape'

    u_coarse = utils.coarse_array(u_ds, L)
    v_coarse = utils.coarse_array(v_ds, L)
    w_coarse = utils.coarse_array(w_ds, L)
    theta_coarse = utils.coarse_array(theta_ds, L)

    wtheta_ds = w_ds*theta_ds
    output_ds = utils.variable_samples(utils.coarse_array(wtheta_ds, L))
    tke_ds = utils.coarse_array(u_ds*u_ds, L) - u_coarse*u_coarse + utils.coarse_array(v_ds*v_ds, L) - v_coarse*v_coarse + utils.coarse_array(w_ds*w_ds, L) - w_coarse*w_coarse
    tke_in = utils.variable_samples(tke_ds)

    variables = ['u', 'v', 'w', 'theta']  # add 's'
    datasets = [u_coarse, v_coarse, w_coarse, theta_coarse]  # add 's'
    input_ds = utils.input_dataset(datasets)
    tot_ds = np.concatenate((np.concatenate((input_ds,tke_in), axis=1), output_ds), axis=1)

    variables.append('tke')
    variables.append('wtheta')
    print(variables)

    utils.write_nc_file(tot_ds,L,variables,t,4)

['u', 'v', 'w', 'theta', 'tke', 'wtheta']
writing out


### With synchronous parallel computing

In [5]:
Directory = "data"  #"/glade/scratch/sshamekh/LES_512_ug16wtspt01_data"
L = 32

files = [name for name in os.listdir(Directory) if  name.__contains__("uxy")]
temp = [int(re.split('(\d+)', name)[3]) for name in files]
times = [t for n, t in enumerate(temp) if t not in temp[:n]]
times.sort()

for t in times:
    variables = ['u', 'v', 'w', 'theta']  # add 's' 
    
    pool=mp.Pool(mp.cpu_count())
    raw_ds = [pool.apply(utils.concatenate_alt, args=(Directory,variables[i],t)) for i in trange(len(variables))]
    pool.close()

    assert raw_ds[0].shape == raw_ds[1].shape == raw_ds[2].shape == raw_ds[3].shape ,'u,v,w,theta have different shape'

    pool=mp.Pool(mp.cpu_count())
    coarse_ds = [pool.apply(utils.coarse_array, args=(raw_ds[i], L)) for i in trange(len(raw_ds))]
    pool.close()

    wtheta_ds = raw_ds[2]*raw_ds[3]
    output_ds = utils.variable_samples(utils.coarse_array(wtheta_ds, L))

    tke_ds = utils.coarse_array(raw_ds[0]*raw_ds[0], L) - coarse_ds[0]*coarse_ds[0] + utils.coarse_array(raw_ds[1]*raw_ds[1], L) - coarse_ds[1]*coarse_ds[1] + utils.coarse_array(raw_ds[2]*raw_ds[2], L) - coarse_ds[2]*coarse_ds[2]
    tke_in = utils.variable_samples(tke_ds)

    datasets = [coarse_ds[i] for i in range(len(variables))]  # add 's'
    input_ds = utils.input_dataset(datasets)
    tot_ds = np.concatenate((np.concatenate((input_ds,tke_in), axis=1), output_ds), axis=1)

    variables.append('tke')
    variables.append('wtheta')
    print(variables)

    utils.write_nc_file(tot_ds,L,variables,t,4)

100%|██████████| 4/4 [00:00<00:00,  7.47it/s]
100%|██████████| 4/4 [00:00<00:00, 25.83it/s]


['u', 'v', 'w', 'theta', 'tke', 'wtheta']
writing out


### With Asynchronous parallel computing

In [2]:
Directory = "data"  #"/glade/scratch/sshamekh/LES_512_ug16wtspt01_data"
L = 32
variables = ['u', 'v', 'w', 'theta', 's']  # add 's' 

files = [name for name in os.listdir(Directory) if  name.__contains__("uxy")]
temp = [int(re.split('(\d+)', name)[3]) for name in files]
times = [t for n, t in enumerate(temp) if t not in temp[:n]]
times.sort()

for t in times:
    pool=mp.Pool(mp.cpu_count())
    result_objects_raw = [pool.apply_async(utils.concatenate_alt, args=(Directory,variables[i],t,i,False)) for i in trange(len(variables))]
    raw_ds = [r.get()[1] for r in result_objects_raw]
    pool.close()
    pool.join()

    assert raw_ds[0].shape == raw_ds[1].shape == raw_ds[2].shape == raw_ds[3].shape ,'u,v,w,theta have different shape'

    pool=mp.Pool(mp.cpu_count())
    result_objects_coarse = [pool.apply_async(utils.coarse_array, args=(raw_ds[i], L,i,False)) for i in trange(len(raw_ds))]
    coarse_ds = [r.get()[1] for r in result_objects_coarse]
    pool.close()
    pool.join()

    wtheta_ds = raw_ds[2]*raw_ds[3]
    output_ds = utils.variable_samples(utils.coarse_array(wtheta_ds, L))

    tke_ds = utils.coarse_array(raw_ds[0]*raw_ds[0], L) - coarse_ds[0]*coarse_ds[0] + utils.coarse_array(raw_ds[1]*raw_ds[1], L) - coarse_ds[1]*coarse_ds[1] + utils.coarse_array(raw_ds[2]*raw_ds[2], L) - coarse_ds[2]*coarse_ds[2]
    tke_in = utils.variable_samples(tke_ds)

    datasets = [coarse_ds[i] for i in range(len(variables))]  # add 's'
    input_ds = utils.input_dataset(datasets)
    tot_ds = np.concatenate((np.concatenate((input_ds,tke_in), axis=1), output_ds), axis=1)

    variables.append('tke')
    variables.append('wtheta')
    print(variables)

    utils.write_nc_file(tot_ds,str(L),variables,t,4)

100%|██████████| 5/5 [00:00<00:00, 13851.73it/s]
100%|██████████| 5/5 [00:00<00:00, 15352.50it/s]


['u', 'v', 'w', 'theta', 's', 'tke', 'wtheta']
writing out


PermissionError: [Errno 13] Permission denied: b'data/L_32/input_ds_for_simple_nn_T10_L_32.nc'

## Check results

In [7]:
path_data = 'data/L_32/input_ds_for_simple_nn_T10_L_32.nc'
ds_init = xr.open_dataset(path_data)
df_init = ds_init.to_dataframe()
df_init.shape

(6144, 1)

In [14]:
nz=4
len_samples = nz*len(variables)
tot_ds = df_init.to_numpy()
n_samples = len(tot_ds)//len_samples
tot_ds = tot_ds.reshape(n_samples, len_samples)
tot_ds.shape

(256, 24)

In [9]:
tot_ds[:,-4:]

array([[-0.11928583, -0.1221244 , -0.12504954, -0.12783829],
       [-0.0998517 , -0.10368239, -0.10765759, -0.11176325],
       [-0.1558731 , -0.15841225, -0.16097671, -0.16369624],
       ...,
       [ 0.38581366,  0.39047901,  0.39499218,  0.39963546],
       [ 0.27523937,  0.28560953,  0.29628333,  0.30696426],
       [ 0.13272098,  0.13817734,  0.14384872,  0.14971746]])