In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from tqdm import trange
import re
import netCDF4 as nc

import os
import sys
from pathlib import Path

os.chdir(Path(sys.path[0]).parent)
import modules.utils as utils

import multiprocessing as mp

### Without parallel computing

In [None]:
Directory = "data"
L = 32

files = [name for name in os.listdir(Directory) if  name.__contains__("uxy")]
temp = [int(re.split('(\d+)', name)[3]) for name in files]
times = [t for n, t in enumerate(temp) if t not in temp[:n]]
times.sort()

for t in times:
    u_ds = utils.concatenate_alt(Directory, 'u', t)
    v_ds = utils.concatenate_alt(Directory, 'v', t)
    w_ds = utils.concatenate_alt(Directory, 'w', t)
    theta_ds = utils.concatenate_alt(Directory, 'theta', t)
    assert u_ds.shape == v_ds.shape == w_ds.shape == theta_ds.shape, 'u,v,w,theta have different shape'

    u_coarse = utils.coarse_array(u_ds, L)
    v_coarse = utils.coarse_array(v_ds, L)
    w_coarse = utils.coarse_array(w_ds, L)
    theta_coarse = utils.coarse_array(theta_ds, L)

    wtheta_ds = w_ds*theta_ds
    output_ds = utils.variable_samples(utils.coarse_array(wtheta_ds, L))
    tke_ds = utils.coarse_array(u_ds*u_ds, L) - u_coarse*u_coarse + utils.coarse_array(v_ds*v_ds, L) - v_coarse*v_coarse + utils.coarse_array(w_ds*w_ds, L) - w_coarse*w_coarse
    tke_in = utils.variable_samples(tke_ds)

    variables = ['u', 'v', 'w', 'theta']  # add 's'
    datasets = [u_coarse, v_coarse, w_coarse, theta_coarse]  # add 's'
    input_ds = utils.input_dataset(datasets)
    tot_ds = np.concatenate((np.concatenate((input_ds,tke_in), axis=1), output_ds), axis=1)

    variables.append('tke')
    variables.append('wtheta')
    print(variables)

    utils.write_nc_file(tot_ds,L,variables,t,4)

### With synchronous parallel computing

In [2]:
Directory = "data"  #"/glade/scratch/sshamekh/LES_512_ug16wtspt01_data"
L = 32

files = [name for name in os.listdir(Directory) if  name.__contains__("uxy")]
temp = [int(re.split('(\d+)', name)[3]) for name in files]
times = [t for n, t in enumerate(temp) if t not in temp[:n]]
times.sort()

for t in times:
    variables = ['u', 'v', 'w', 'theta']  # add 's' 
    
    pool=mp.Pool(mp.cpu_count())
    raw_ds = [pool.apply(utils.concatenate_alt, args=(Directory,variables[i],t)) for i in trange(len(variables))]
    pool.close()

    assert raw_ds[0].shape == raw_ds[1].shape == raw_ds[2].shape == raw_ds[3].shape ,'u,v,w,theta have different shape'

    pool=mp.Pool(mp.cpu_count())
    coarse_ds = [pool.apply(utils.coarse_array, args=(raw_ds[i], L)) for i in trange(len(raw_ds))]
    pool.close()

    wtheta_ds = raw_ds[2]*raw_ds[3]
    output_ds = utils.variable_samples(utils.coarse_array(wtheta_ds, L))

    tke_ds = utils.coarse_array(raw_ds[0]*raw_ds[0], L) - coarse_ds[0]*coarse_ds[0] + utils.coarse_array(raw_ds[1]*raw_ds[1], L) - coarse_ds[1]*coarse_ds[1] + utils.coarse_array(raw_ds[2]*raw_ds[2], L) - coarse_ds[2]*coarse_ds[2]
    tke_in = utils.variable_samples(tke_ds)

    datasets = [coarse_ds[i] for i in range(len(variables))]  # add 's'
    input_ds = utils.input_dataset(datasets)
    tot_ds = np.concatenate((np.concatenate((input_ds,tke_in), axis=1), output_ds), axis=1)

    variables.append('tke')
    variables.append('wtheta')
    print(variables)

    utils.write_nc_file(tot_ds,L,variables,t,4)

### With Asynchronous parallel computing

In [2]:
Directory = "/glade/scratch/sshamekh/LES_512_ug16wtspt01_data"
coarse_factors=[16,64,128]

files = [name for name in os.listdir(Directory) if  name.__contains__("uxy")]
temp = [int(re.split('(\d+)', name)[3]) for name in files]
times = [t for n, t in enumerate(temp) if t not in temp[:n]]
times.sort()

for t in trange(1,len(times)+1):
    variables = ['u', 'v', 'w', 'theta', 's']  # add 's' to predict tracer flux
    pool=mp.Pool(mp.cpu_count())
    result_objects_raw = [pool.apply_async(utils.concatenate_alt, args=(Directory,variables[i],t,i,False)) for i in range(len(variables))]
    raw_ds = [r.get()[1] for r in result_objects_raw]   # raw_ds[0] (resp 1,2,3) is the u (resp v,w,theta',s) dataset of shape(1,lz,512,512) with lz usually 376.
    raw_ds[3][0] = 300*(raw_ds[3][0]-raw_ds[3][0].mean(axis=(1,2)).reshape(raw_ds[3][0].shape[0],1,1))  # theta' : variations of theta instead of whole theta
    pool.close()
    pool.join() 

    assert raw_ds[0].shape == raw_ds[1].shape == raw_ds[2].shape == raw_ds[3].shape == raw_ds[4].shape ,'u,v,w,theta,s have different shape'

    wtheta_ds = raw_ds[2]*raw_ds[3]
    
    for L in coarse_factors:
        pool=mp.Pool(mp.cpu_count())
        result_objects_coarse = [pool.apply_async(utils.coarse_array, args=(raw_ds[i], L,i,False)) for i in range(len(raw_ds))]
        coarse_ds = [r.get()[1] for r in result_objects_coarse]
        pool.close()
        pool.join()

        output_ds = utils.variable_samples(utils.coarse_array(wtheta_ds, L))

        tke_ds = utils.coarse_array(raw_ds[0]*raw_ds[0], L) - coarse_ds[0]*coarse_ds[0] + utils.coarse_array(raw_ds[1]*raw_ds[1], L) - coarse_ds[1]*coarse_ds[1] + utils.coarse_array(raw_ds[2]*raw_ds[2], L) - coarse_ds[2]*coarse_ds[2]
        tke_in = utils.variable_samples(tke_ds)

        datasets = [coarse_ds[i] for i in range(len(variables))] 
        input_ds = utils.input_dataset(datasets)
        tot_ds = np.concatenate((np.concatenate((input_ds,tke_in), axis=1), output_ds), axis=1)

        variables.append('tke')
        variables.append('wtheta')

        utils.write_nc_file(tot_ds,str(L),variables,t)
        variables = ['u', 'v', 'w', 'theta', 's']

100%|██████████| 63/63 [1:40:10<00:00, 95.40s/it]


We notice that coarse_wtheta is very close to coarse_w*coarse_theta : each term is of order 10e-2 and diff of each term is of order 10e-5.

This means that it might be hard to do better with ML.

## Check results

In [4]:
path_data = 'data/L_32/input_ds_for_simple_nn_T10_L_32.nc'
nc_init = nc.Dataset(path_data)
arr = nc_init[f'sample'][:].filled()[:,:]
arr.shape

(256, 24)