In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from tqdm import trange
import re
import netCDF4 as nc

import os
import sys
from pathlib import Path

os.chdir(Path(sys.path[0]).parent)
import modules.utils as utils

import multiprocessing as mp

### Without parallel computing

In [4]:
Directory = "data"
L = 32

files = [name for name in os.listdir(Directory) if  name.__contains__("uxy")]
temp = [int(re.split('(\d+)', name)[3]) for name in files]
times = [t for n, t in enumerate(temp) if t not in temp[:n]]
times.sort()

for t in times:
    u_ds = utils.concatenate_alt(Directory, 'u', t)
    v_ds = utils.concatenate_alt(Directory, 'v', t)
    w_ds = utils.concatenate_alt(Directory, 'w', t)
    theta_ds = utils.concatenate_alt(Directory, 'theta', t)
    assert u_ds.shape == v_ds.shape == w_ds.shape == theta_ds.shape, 'u,v,w,theta have different shape'

    u_coarse = utils.coarse_array(u_ds, L)
    v_coarse = utils.coarse_array(v_ds, L)
    w_coarse = utils.coarse_array(w_ds, L)
    theta_coarse = utils.coarse_array(theta_ds, L)

    wtheta_ds = w_ds*theta_ds
    output_ds = utils.variable_samples(utils.coarse_array(wtheta_ds, L))
    tke_ds = utils.coarse_array(u_ds*u_ds, L) - u_coarse*u_coarse + utils.coarse_array(v_ds*v_ds, L) - v_coarse*v_coarse + utils.coarse_array(w_ds*w_ds, L) - w_coarse*w_coarse
    tke_in = utils.variable_samples(tke_ds)

    variables = ['u', 'v', 'w', 'theta']  # add 's'
    datasets = [u_coarse, v_coarse, w_coarse, theta_coarse]  # add 's'
    input_ds = utils.input_dataset(datasets)
    tot_ds = np.concatenate((np.concatenate((input_ds,tke_in), axis=1), output_ds), axis=1)

    variables.append('tke')
    variables.append('wtheta')
    print(variables)

    utils.write_nc_file(tot_ds,L,variables,t,4)

['u', 'v', 'w', 'theta', 'tke', 'wtheta']
writing out


### With synchronous parallel computing

In [2]:
Directory = "data"  #"/glade/scratch/sshamekh/LES_512_ug16wtspt01_data"
L = 32

files = [name for name in os.listdir(Directory) if  name.__contains__("uxy")]
temp = [int(re.split('(\d+)', name)[3]) for name in files]
times = [t for n, t in enumerate(temp) if t not in temp[:n]]
times.sort()

for t in times:
    if t<= 1:
        continue
    variables = ['u', 'v', 'w', 'theta']  # add 's' 
    
    pool=mp.Pool(mp.cpu_count())
    raw_ds = [pool.apply(utils.concatenate_alt, args=(Directory,variables[i],t)) for i in trange(len(variables))]
    pool.close()

    assert raw_ds[0].shape == raw_ds[1].shape == raw_ds[2].shape == raw_ds[3].shape ,'u,v,w,theta have different shape'

    pool=mp.Pool(mp.cpu_count())
    coarse_ds = [pool.apply(utils.coarse_array, args=(raw_ds[i], L)) for i in trange(len(raw_ds))]
    pool.close()

    wtheta_ds = raw_ds[2]*raw_ds[3]
    output_ds = utils.variable_samples(utils.coarse_array(wtheta_ds, L))

    tke_ds = utils.coarse_array(raw_ds[0]*raw_ds[0], L) - coarse_ds[0]*coarse_ds[0] + utils.coarse_array(raw_ds[1]*raw_ds[1], L) - coarse_ds[1]*coarse_ds[1] + utils.coarse_array(raw_ds[2]*raw_ds[2], L) - coarse_ds[2]*coarse_ds[2]
    tke_in = utils.variable_samples(tke_ds)

    datasets = [coarse_ds[i] for i in range(len(variables))]  # add 's'
    input_ds = utils.input_dataset(datasets)
    tot_ds = np.concatenate((np.concatenate((input_ds,tke_in), axis=1), output_ds), axis=1)

    variables.append('tke')
    variables.append('wtheta')
    print(variables)

    utils.write_nc_file(tot_ds,L,variables,t,4)

### With Asynchronous parallel computing

In [5]:
Directory = "/glade/scratch/sshamekh/LES_512_ug16wtspt01_data"
L = 32 

files = [name for name in os.listdir(Directory) if  name.__contains__("uxy")]
temp = [int(re.split('(\d+)', name)[3]) for name in files]
times = [t for n, t in enumerate(temp) if t not in temp[:n]]
times.sort()

for t in times:
    if t<= 2 :
        continue
    variables = ['u', 'v', 'w', 'theta', 's']  # add 's'
    pool=mp.Pool(mp.cpu_count())
    result_objects_raw = [pool.apply_async(utils.concatenate_alt, args=(Directory,variables[i],t,i,False)) for i in range(len(variables))]
    raw_ds = [r.get()[1] for r in result_objects_raw]
    pool.close()
    pool.join()

    assert raw_ds[0].shape == raw_ds[1].shape == raw_ds[2].shape == raw_ds[3].shape ,'u,v,w,theta have different shape'

    pool=mp.Pool(mp.cpu_count())
    result_objects_coarse = [pool.apply_async(utils.coarse_array, args=(raw_ds[i], L,i,False)) for i in range(len(raw_ds))]
    coarse_ds = [r.get()[1] for r in result_objects_coarse]
    pool.close()
    pool.join()

    wtheta_ds = raw_ds[2]*raw_ds[3]
    output_ds = utils.variable_samples(utils.coarse_array(wtheta_ds, L))

    tke_ds = utils.coarse_array(raw_ds[0]*raw_ds[0], L) - coarse_ds[0]*coarse_ds[0] + utils.coarse_array(raw_ds[1]*raw_ds[1], L) - coarse_ds[1]*coarse_ds[1] + utils.coarse_array(raw_ds[2]*raw_ds[2], L) - coarse_ds[2]*coarse_ds[2]
    tke_in = utils.variable_samples(tke_ds)

    datasets = [coarse_ds[i] for i in range(len(variables))]  # add 's'
    input_ds = utils.input_dataset(datasets)
    tot_ds = np.concatenate((np.concatenate((input_ds,tke_in), axis=1), output_ds), axis=1)

    variables.append('tke')
    variables.append('wtheta')

    utils.write_nc_file(tot_ds,str(L),variables,t)

100%|██████████| 5/5 [00:00<00:00, 11161.00it/s]
100%|██████████| 5/5 [00:00<00:00, 28187.53it/s]


writing out


100%|██████████| 5/5 [00:00<00:00, 16070.13it/s]
100%|██████████| 5/5 [00:00<00:00, 42886.54it/s]


writing out


100%|██████████| 5/5 [00:00<00:00, 36033.54it/s]
100%|██████████| 5/5 [00:00<00:00, 30305.66it/s]


writing out


100%|██████████| 5/5 [00:00<00:00, 17036.17it/s]
100%|██████████| 5/5 [00:00<00:00,  5.19it/s]


writing out


100%|██████████| 5/5 [00:00<00:00, 26715.31it/s]
100%|██████████| 5/5 [00:00<00:00, 30885.89it/s]


writing out


100%|██████████| 5/5 [00:00<00:00, 11066.77it/s]
100%|██████████| 5/5 [00:00<00:00, 10.39it/s]


writing out


100%|██████████| 5/5 [00:00<00:00, 18204.44it/s]
100%|██████████| 5/5 [00:00<00:00, 33182.78it/s]


writing out


100%|██████████| 5/5 [00:00<00:00, 16723.70it/s]
100%|██████████| 5/5 [00:00<00:00, 29371.88it/s]


writing out


100%|██████████| 5/5 [00:00<00:00, 35544.95it/s]
100%|██████████| 5/5 [00:00<00:00,  5.06it/s]


writing out


100%|██████████| 5/5 [00:00<00:00, 19599.55it/s]
100%|██████████| 5/5 [00:00<00:00,  5.25it/s]


writing out


100%|██████████| 5/5 [00:00<00:00, 20661.60it/s]
100%|██████████| 5/5 [00:00<00:00, 33130.36it/s]


writing out


100%|██████████| 5/5 [00:00<00:00, 31631.25it/s]
100%|██████████| 5/5 [00:00<00:00, 10.53it/s]


writing out


100%|██████████| 5/5 [00:00<00:00, 40485.56it/s]
100%|██████████| 5/5 [00:00<00:00,  5.20it/s]


writing out


100%|██████████| 5/5 [00:00<00:00, 30437.62it/s]
100%|██████████| 5/5 [00:00<00:00,  5.23it/s]


writing out


100%|██████████| 5/5 [00:00<00:00, 17218.00it/s]
100%|██████████| 5/5 [00:00<00:00,  5.17it/s]


writing out


100%|██████████| 5/5 [00:00<00:00, 21799.92it/s]
100%|██████████| 5/5 [00:00<00:00,  5.35it/s]


writing out


100%|██████████| 5/5 [00:00<00:00, 17564.09it/s]
100%|██████████| 5/5 [00:00<00:00,  5.30it/s]


writing out


100%|██████████| 5/5 [00:00<00:00, 24499.44it/s]
100%|██████████| 5/5 [00:00<00:00,  5.24it/s]


writing out


100%|██████████| 5/5 [00:00<00:00, 30305.66it/s]
100%|██████████| 5/5 [00:00<00:00, 31583.61it/s]


writing out


100%|██████████| 5/5 [00:00<00:00, 35069.43it/s]
100%|██████████| 5/5 [00:00<00:00,  5.34it/s]


writing out


100%|██████████| 5/5 [00:00<00:00, 15580.62it/s]
100%|██████████| 5/5 [00:00<00:00, 31068.92it/s]


writing out


100%|██████████| 5/5 [00:00<00:00, 9062.89it/s]
100%|██████████| 5/5 [00:00<00:00, 24132.93it/s]


writing out


100%|██████████| 5/5 [00:00<00:00, 14315.03it/s]
100%|██████████| 5/5 [00:00<00:00,  5.15it/s]


writing out


100%|██████████| 5/5 [00:00<00:00, 31068.92it/s]
100%|██████████| 5/5 [00:00<00:00, 10.41it/s]


writing out


100%|██████████| 5/5 [00:00<00:00, 27776.85it/s]
100%|██████████| 5/5 [00:00<00:00, 29831.47it/s]


writing out


100%|██████████| 5/5 [00:00<00:00, 16552.11it/s]
100%|██████████| 5/5 [00:00<00:00,  5.26it/s]


writing out


100%|██████████| 5/5 [00:00<00:00, 20145.55it/s]
100%|██████████| 5/5 [00:00<00:00,  5.18it/s]


writing out


100%|██████████| 5/5 [00:00<00:00, 11637.91it/s]
100%|██████████| 5/5 [00:00<00:00, 15229.86it/s]


writing out


100%|██████████| 5/5 [00:00<00:00, 33825.03it/s]
100%|██████████| 5/5 [00:00<00:00,  5.24it/s]


writing out


100%|██████████| 5/5 [00:00<00:00, 20321.24it/s]
100%|██████████| 5/5 [00:00<00:00, 10.67it/s]


writing out


100%|██████████| 5/5 [00:00<00:00, 29873.96it/s]
100%|██████████| 5/5 [00:00<00:00,  5.08it/s]


writing out


100%|██████████| 5/5 [00:00<00:00, 29873.96it/s]
100%|██████████| 5/5 [00:00<00:00,  5.13it/s]


writing out


100%|██████████| 5/5 [00:00<00:00, 10629.25it/s]
100%|██████████| 5/5 [00:00<00:00, 29662.69it/s]


writing out


100%|██████████| 5/5 [00:00<00:00, 34155.57it/s]
100%|██████████| 5/5 [00:00<00:00,  5.11it/s]


writing out


100%|██████████| 5/5 [00:00<00:00, 27413.75it/s]
100%|██████████| 5/5 [00:00<00:00, 33662.15it/s]


writing out


100%|██████████| 5/5 [00:00<00:00, 29579.01it/s]
100%|██████████| 5/5 [00:00<00:00, 31871.61it/s]


writing out


100%|██████████| 5/5 [00:00<00:00, 31488.77it/s]
100%|██████████| 5/5 [00:00<00:00,  5.20it/s]


writing out


100%|██████████| 5/5 [00:00<00:00, 34155.57it/s]
100%|██████████| 5/5 [00:00<00:00, 27630.46it/s]


writing out


100%|██████████| 5/5 [00:00<00:00, 30481.86it/s]
100%|██████████| 5/5 [00:00<00:00, 36727.71it/s]


writing out


100%|██████████| 5/5 [00:00<00:00, 20763.88it/s]
100%|██████████| 5/5 [00:00<00:00, 10.23it/s]


writing out


100%|██████████| 5/5 [00:00<00:00, 16844.59it/s]
100%|██████████| 5/5 [00:00<00:00, 29127.11it/s]


writing out


100%|██████████| 5/5 [00:00<00:00, 30437.62it/s]
100%|██████████| 5/5 [00:00<00:00, 32313.59it/s]


writing out


100%|██████████| 5/5 [00:00<00:00, 16082.45it/s]
100%|██████████| 5/5 [00:00<00:00,  5.26it/s]


writing out


100%|██████████| 5/5 [00:00<00:00, 21098.11it/s]
100%|██████████| 5/5 [00:00<00:00, 22429.43it/s]


writing out


100%|██████████| 5/5 [00:00<00:00, 17955.07it/s]
100%|██████████| 5/5 [00:00<00:00, 31347.56it/s]


writing out


100%|██████████| 5/5 [00:00<00:00, 28532.68it/s]
100%|██████████| 5/5 [00:00<00:00,  5.11it/s]


writing out


100%|██████████| 5/5 [00:00<00:00, 19082.37it/s]
100%|██████████| 5/5 [00:00<00:00, 29371.88it/s]


writing out


100%|██████████| 5/5 [00:00<00:00, 10200.16it/s]
100%|██████████| 5/5 [00:00<00:00, 11.00it/s]


writing out


100%|██████████| 5/5 [00:00<00:00, 27924.79it/s]
100%|██████████| 5/5 [00:00<00:00, 26613.60it/s]


writing out


100%|██████████| 5/5 [00:00<00:00, 16294.89it/s]
100%|██████████| 5/5 [00:00<00:00,  5.22it/s]


writing out


100%|██████████| 5/5 [00:00<00:00, 12686.94it/s]


writing out


100%|██████████| 5/5 [00:00<00:00, 33235.37it/s]
100%|██████████| 5/5 [00:00<00:00, 27235.74it/s]


writing out


100%|██████████| 5/5 [00:00<00:00, 27557.84it/s]
100%|██████████| 5/5 [00:00<00:00,  5.29it/s]


writing out


100%|██████████| 5/5 [00:00<00:00, 18591.77it/s]
100%|██████████| 5/5 [00:00<00:00,  5.30it/s]


writing out


100%|██████████| 5/5 [00:00<00:00, 18267.87it/s]
100%|██████████| 5/5 [00:00<00:00, 27630.46it/s]


writing out


100%|██████████| 5/5 [00:00<00:00, 17447.19it/s]
100%|██████████| 5/5 [00:00<00:00,  5.19it/s]


writing out


100%|██████████| 5/5 [00:00<00:00, 12066.47it/s]
100%|██████████| 5/5 [00:00<00:00, 28728.11it/s]


writing out


100%|██████████| 5/5 [00:00<00:00, 35128.17it/s]
100%|██████████| 5/5 [00:00<00:00,  5.10it/s]


writing out


100%|██████████| 5/5 [00:00<00:00, 31207.62it/s]
100%|██████████| 5/5 [00:00<00:00, 27271.16it/s]


writing out


100%|██████████| 5/5 [00:00<00:00, 15923.71it/s]
100%|██████████| 5/5 [00:00<00:00, 10.69it/s]


writing out


## Check results

In [6]:
path_data = 'data/L_32_new/input_ds_for_simple_nn_T10_L_32.nc'
nc_init = nc.Dataset(path_data)
arr = nc_init[f'sample'][:].filled()[:,:]
arr.shape

(256, 2632)