# Initialization

In [1]:
from cbrain.imports import *
from cbrain.data_generator import *
from cbrain.cam_constants import *
from cbrain.losses import *
from cbrain.utils import limit_mem
from cbrain.layers import *
from cbrain.data_generator import DataGenerator

# [https://stackoverflow.com/questions/43990046/tensorflow-blas-gemm-launch-failed] If using more than 1 GPU at at time

import tensorflow as tf
physical_devices = tf.config.experimental.list_physical_devices('GPU') 
tf.config.experimental.set_memory_growth(physical_devices[0], True)
tf.config.experimental.set_memory_growth(physical_devices[1], True)
tf.config.experimental.set_memory_growth(physical_devices[2], True)

from tensorflow import math as tfm
import tensorflow_probability as tfp
from tensorflow.keras.layers import *
from tensorflow.keras.models import *
import xarray as xr
import numpy as np
from cbrain.model_diagnostics import ModelDiagnostics
import matplotlib as mpl
import matplotlib.pyplot as plt
import matplotlib.image as imag
import scipy.integrate as sin
import cartopy.crs as ccrs
import matplotlib.ticker as mticker
from cartopy.mpl.gridliner import LONGITUDE_FORMATTER, LATITUDE_FORMATTER
import pickle
import xarray as xr

#TRAINDIR = '/local/Tom.Beucler/SPCAM_PHYS/'
TRAINDIR = '/DFS-L/DATA/pritchard/tbeucler/SPCAM/SPCAM_PHYS/'
DATADIR = '/project/meteo/w2w/A6/S.Rasp/SP-CAM/fluxbypass_aqua/'
PREFIX = '8col009_01_'
#%cd /filer/z-sv-pool12c/t/Tom.Beucler/SPCAM/CBRAIN-CAM
%cd /export/home/tbeucler/CBRAIN-CAM
# Otherwise tensorflow will use ALL your GPU RAM for no reason
#limit_mem()

/nfspool-0/home/tbeucler/CBRAIN-CAM/notebooks/tbeucler_devlog
/nfspool-0/home/tbeucler/CBRAIN-CAM


# Custom generator (all outputs minus the residual ones)

## Build custom generator class

Taking as argument the *output* indices it will not be trained on **out_cut_off** (var_cut_off refers to the *input* indices it is not trained on). **out_cut_off** will be formatted as a dictionary with int entries corresponding to the single index to exclude from the output vector.

In [2]:
def return_var_idxs_outputcutoff(ds, var_list, out_cut_off=None):
    """
    To be used on stacked variable dimension. Returns indices array
    Parameters
    ----------
    ds: xarray dataset
    var_list: list of variables
    Returns
    -------
    var_idxs: indices array
    """
    if out_cut_off is None:
        var_idxs = np.concatenate([np.where(ds.var_names == v)[0] for v in var_list])
    else:
        idxs_list = []
        for v in var_list:
            i = np.where(ds.var_names == v)[0]
            if v in out_cut_off.keys():
                i = np.delete(i,out_cut_off[v])
            idxs_list.append(i)
        var_idxs = np.concatenate(idxs_list)
    return var_idxs

In [3]:
class DictNormalizer_outputcutoff(object):
    """Normalizer that takes a conversion dictionary as input. Simply scales by factors in dict."""
    def __init__(self, norm_ds, var_list, dic=None,out_cut_off=None):
        if dic is None: dic = conversion_dict
        var_idxs = return_var_idxs_outputcutoff(norm_ds, var_list, out_cut_off=out_cut_off)
        var_names = norm_ds.var_names[var_idxs].copy()
        scale = []
        for v in var_list:
            s = np.atleast_1d(dic[v])
            # Modification below: Delete scaling factor for outputs
            # that have been cut off via out_cut_off 
            if v in out_cut_off.keys(): s = np.delete(s,out_cut_off[v])
            scale.append(s)
        self.scale = np.concatenate(scale).astype('float32')
        self.transform_arrays = {
            'scale': self.scale,
        }

    def transform(self, x):
        return x * self.scale

    def inverse_transform(self, x):
        return x / self.scale

In [4]:
class DataGenerator_outputcutoff(tf.keras.utils.Sequence):
    """
    https://stanford.edu/~shervine/blog/keras-how-to-generate-data-on-the-fly
    Data generator class.
    """

    def __init__(self, data_fn, input_vars, output_vars,
                 norm_fn=None, input_transform=None, output_transform=None,
                 batch_size=1024, shuffle=True, xarray=False, var_cut_off=None,
                out_cut_off=None):
        # Just copy over the attributes
        self.data_fn, self.norm_fn = data_fn, norm_fn
        self.input_vars, self.output_vars = input_vars, output_vars
        self.batch_size, self.shuffle = batch_size, shuffle

        # Open datasets
        self.data_ds = xr.open_dataset(data_fn)
        if norm_fn is not None: self.norm_ds = xr.open_dataset(norm_fn)

        # Compute number of samples and batches
        self.n_samples = self.data_ds.vars.shape[0]
        self.n_batches = int(np.floor(self.n_samples) / self.batch_size)

        # Get input and output variable indices
        self.input_idxs = return_var_idxs(self.data_ds, input_vars, var_cut_off)
        self.output_idxs = return_var_idxs_outputcutoff(self.data_ds, output_vars, out_cut_off=out_cut_off)
        self.n_inputs, self.n_outputs = len(self.input_idxs), len(self.output_idxs)

        # Initialize input and output normalizers/transformers
        if input_transform is None:
            self.input_transform = Normalizer()
        elif type(input_transform) is tuple:
            self.input_transform = InputNormalizer(
                self.norm_ds, input_vars, input_transform[0], input_transform[1], var_cut_off)
        else:
            self.input_transform = input_transform  # Assume an initialized normalizer is passed

        if output_transform is None:
            self.output_transform = Normalizer()
        elif type(output_transform) is dict:
            self.output_transform = DictNormalizer_outputcutoff(self.norm_ds, output_vars, output_transform,
                                                                out_cut_off=out_cut_off)
        else:
            self.output_transform = output_transform  # Assume an initialized normalizer is passed

        # Now close the xarray file and load it as an h5 file instead
        # This significantly speeds up the reading of the data...
        if not xarray:
            self.data_ds.close()
            self.data_ds = h5py.File(data_fn, 'r')

    def __len__(self):
        return self.n_batches

    def __getitem__(self, index):
        # Compute start and end indices for batch
        start_idx = index * self.batch_size
        end_idx = start_idx + self.batch_size

        # Grab batch from data
        batch = self.data_ds['vars'][start_idx:end_idx]

        # Split into inputs and outputs
        X = batch[:, self.input_idxs]
        Y = batch[:, self.output_idxs]

        # Normalize
        X = self.input_transform.transform(X)
        Y = self.output_transform.transform(Y)

        return X, Y

    def on_epoch_end(self):
        self.indices = np.arange(self.n_batches)
        if self.shuffle: np.random.shuffle(self.indices)

## Build custom generator and compare to standard generator

For the manuscript's purposes, we will choose the lowest levels as the residuals for direct comparison with the reference ACnet.

### Standard generator

In [5]:
TRAINFILEQ = '8col009_01_train.nc'
VALIDFILEQ = '8col009_01_valid.nc'
NORMFILEQ = '8col009_01_norm.nc'
TESTFILEQ = '8col009_01_test.nc'

In [6]:
scale_dictQ = load_pickle('./nn_config/scale_dicts/009_Wm2_scaling.pkl')

In [7]:
in_varsQ = ['QBP', 'QCBP', 'QIBP', 'TBP', 'VBP', 
           'Qdt_adiabatic', 'QCdt_adiabatic', 'QIdt_adiabatic', 'Tdt_adiabatic', 'Vdt_adiabatic',
           'PS', 'SOLIN', 'SHFLX', 'LHFLX']
out_varsQ = ['PHQ', 'PHCLDLIQ', 'PHCLDICE', 'TPHYSTND', 'QRL', 'QRS', 'DTVKE', 
            'FSNT', 'FSNS', 'FLNT', 'FLNS', 'PRECT', 'PRECTEND', 'PRECST', 'PRECSTEN']

In [8]:
train_genQ = DataGenerator(
    data_fn = TRAINDIR+TRAINFILEQ,
    input_vars = in_varsQ,
    output_vars = out_varsQ,
    norm_fn = TRAINDIR+NORMFILEQ,
    input_transform = ('mean', 'maxrs'),
    output_transform = scale_dictQ,
    batch_size=8192,
    shuffle=True
)

In [9]:
valid_genQ = DataGenerator(
    data_fn = TRAINDIR+VALIDFILEQ,
    input_vars = in_varsQ,
    output_vars = out_varsQ,
    norm_fn = TRAINDIR+NORMFILEQ,
    input_transform = ('mean', 'maxrs'),
    output_transform = scale_dictQ,
    batch_size=8192,
    shuffle=True
)

In [10]:
test_genQ = DataGenerator(
    data_fn = TRAINDIR+TESTFILEQ,
    input_vars = in_varsQ,
    output_vars = out_varsQ,
    norm_fn = TRAINDIR+NORMFILEQ,
    input_transform = ('mean', 'maxrs'),
    output_transform = scale_dictQ,
    batch_size=8192,
    shuffle=True
)

### Custom generator

In [11]:
in_vars_custom = ['QBP', 'QCBP', 'QIBP', 'TBP', 'VBP', 
           'Qdt_adiabatic', 'QCdt_adiabatic', 'QIdt_adiabatic', 'Tdt_adiabatic', 'Vdt_adiabatic',
           'PS', 'SOLIN', 'SHFLX', 'LHFLX']
out_vars_custom = ['PHQ', 'PHCLDLIQ', 'PHCLDICE', 'TPHYSTND', 'QRL', 'QRS', 'DTVKE', 
            'FSNT', 'FLNT', 'PRECT', 'PRECTEND', 'PRECST', 'PRECSTEN']

In [12]:
out_cut_off_low = {}

In [13]:
out_cut_off_low = {}
out_cut_off_low['PHQ'] = 29
out_cut_off_low['TPHYSTND'] = 29

In [14]:
out_cut_off_low

{'PHQ': 29, 'TPHYSTND': 29}

In [15]:
train_gen_custom = DataGenerator_outputcutoff(
    data_fn = TRAINDIR+TRAINFILEQ,
    input_vars = in_vars_custom,
    output_vars = out_vars_custom,
    norm_fn = TRAINDIR+NORMFILEQ,
    input_transform = ('mean', 'maxrs'),
    output_transform = scale_dictQ,
    batch_size=1024,
    shuffle=True,
    out_cut_off=out_cut_off_low
)

In [16]:
valid_gen_custom = DataGenerator_outputcutoff(
    data_fn = TRAINDIR+VALIDFILEQ,
    input_vars = in_vars_custom,
    output_vars = out_vars_custom,
    norm_fn = TRAINDIR+NORMFILEQ,
    input_transform = ('mean', 'maxrs'),
    output_transform = scale_dictQ,
    batch_size=1024,
    shuffle=True,
    out_cut_off=out_cut_off_low
)

In [17]:
test_gen_custom = DataGenerator_outputcutoff(
    data_fn = TRAINDIR+TESTFILEQ,
    input_vars = in_vars_custom,
    output_vars = out_vars_custom,
    norm_fn = TRAINDIR+NORMFILEQ,
    input_transform = ('mean', 'maxrs'),
    output_transform = scale_dictQ,
    batch_size=1024,
    shuffle=True,
    out_cut_off=out_cut_off_low
)

This is where notebook 061 starts separating from [https://github.com/tbeucler/CBRAIN-CAM/blob/master/notebooks/tbeucler_devlog/060_Constraints_Post_Processing_vs_ACNet.ipynb]

# Diagnostics: Load instead of Train Neural network 

## Load models

In [18]:
path_HDF5 = '/DFS-L/DATA/pritchard/tbeucler/SPCAM/HDF5_DATA/'

#NNarray = ['UCnet_214_1.hdf5','JNNC.h5']
#FlagINC = [1,0] # Flags if the NN is missing outputs that must be calculate post-processing
#NNname = ['UCnet_{214}','ACnet'] # TODO: Add UCnet_NL

NNarray = ['UCnet_214_q18T28.hdf5','UCnet_214_q3T26.hdf5',
           'UCnet_214_q4T4.hdf5','UCnet_214_q5T5.hdf5',
           'UCnet_214_q8T4.hdf5']
FlagINC = [1,1,1,1,1]
NNname = ['UCnet_{q18T28}','UCnet_{q3T26}','UCnet_{q4T4}','UCnet_{q5T5}','UCnet_{q8T4}']
Mcutoff = [18,3,4,5,8]
Ecutoff = [28,26,4,5,4]

dict_lay = {'SurRadLayer':SurRadLayer,'MassConsLayer':MassConsLayer,'EntConsLayer':EntConsLayer}

In [19]:
NN = {}; md = {};
for i,NNs in enumerate(NNarray):
    print('NN name is ',NNs)
    path = path_HDF5+NNs
    NN[NNs] = load_model(path,custom_objects=dict_lay)

NN name is  UCnet_214_q18T28.hdf5
NN name is  UCnet_214_q3T26.hdf5
NN name is  UCnet_214_q4T4.hdf5
NN name is  UCnet_214_q5T5.hdf5
NN name is  UCnet_214_q8T4.hdf5


In [20]:
gen = test_gen_custom

In [21]:
sample = 5
inp = gen[sample][0]
truth = gen[sample][1]

In [22]:
NN[NNs].predict_on_batch(inp)

<tf.Tensor: id=4663, shape=(1024, 214), dtype=float32, numpy=
array([[-8.6733829e-03, -3.2105796e-02,  6.1838023e-02, ...,
        -7.1555565e+01, -2.6726155e+00, -1.8707504e+01],
       [ 7.8717116e-03, -8.9696702e-04,  6.5471850e-02, ...,
        -3.7608677e+01, -1.3191630e+00, -3.3747487e+00],
       [ 2.4816066e-02,  7.1093719e-04,  1.4633943e-01, ...,
         1.2512340e+01, -4.3135433e+00, -3.3440247e+00],
       ...,
       [-9.5730033e-03, -8.7316651e-03, -1.4988841e-01, ...,
        -1.4162773e+00, -1.7971500e+00, -2.1660233e+00],
       [-1.4475256e-02, -3.4214780e-03, -1.5505002e-01, ...,
        -1.9624866e+00, -1.7847141e+00, -2.0972819e+00],
       [-1.4197432e-02, -9.6753072e-03, -1.3704923e-01, ...,
        -3.3366709e+00, -1.4479843e+00, -3.1109478e+00]], dtype=float32)>

## Physical constraints layer in Numpy

In [21]:
def compute_dP_tilde(PS, PS_div, PS_sub, norm_q, hyai, hybi):
    """
    Computes dP_tilde in Tom's notation.
    PS is the normalized pressure as it is used in the input.
    PS_mult and PS_add are the corresponding values to unnormalize PS.
    WARNING: Really not sure about norm_q
    """
    PS = PS * PS_div + PS_sub
    P = P0 * hyai + PS[:, None] * hybi
    dP = P[:, 1:] - P[:, :-1]
    dP_norm = norm_q * G / L_V   # Why L_V?
    dP_tilde = dP / dP_norm
    return dP_tilde

In [22]:
def SurRadLayer_np(inp,prior,inp_sub,inp_div,norm_q):
    # Define variable indices here
    # Input
    PS_idx = 300
    # Output
    QRL_idx = slice(118, 148)  # Odd numbers because residuals
    QRS_idx = slice(148, 178)  # for Q and T are still missing
    FSNT_idx = 208
    FLNT_idx = 209
    
    # 1. Compute dP_tilde
    dP_tilde = compute_dP_tilde(
        inp[:, PS_idx],
        inp_div[PS_idx], inp_sub[PS_idx],
        norm_q, hyai, hybi
    )

    # 2. Compute radiative integrals
    SWINT = np.sum(dP_tilde * prior[:, QRS_idx], axis=1)
    LWINT = np.sum(dP_tilde * prior[:, QRL_idx], axis=1)

    # 3. Infer surface fluxes from residual
    FSNS = prior[:, FSNT_idx] - SWINT
    FLNS = prior[:, FLNT_idx] + LWINT

    # 4. Concatenate output vector
    post = np.concatenate((
        prior[:, :FLNT_idx], FSNS[:, None],
        prior[:, FLNT_idx][:, None], FLNS[:, None],
        prior[:, (FLNT_idx + 1):]
    ), axis=1)
    
    return post

In [23]:
def MassConsLayer_choice_np(inp,prior,lvl_choice,inp_sub,inp_div,norm_q):
    # Define variable indices here
    # Input
    PS_idx = 300
    LHFLX_idx = 303
    # Output
    PHQbef_idx = slice(0, lvl_choice) # Indices before the residual
    PHCLDLIQ_idx = slice(29, 59)
    PHCLDICE_idx = slice(59, 89)
    PRECT_idx = 212
    PRECTEND_idx = 213
    
    # 1. Compute dP_tilde
    dP_tilde = compute_dP_tilde(
        inp[:, PS_idx],
        inp_div[PS_idx], inp_sub[PS_idx],
        norm_q, hyai, hybi
    )

    # 2. Compute vertical cloud water integral
    CLDINT = np.sum(dP_tilde *
                   (prior[:, PHCLDLIQ_idx] + prior[:, PHCLDICE_idx]),
                   axis=1)

    # 3. Compute water vapor integral minus the water vapor residual
    # Careful with handling the pressure vector since it is not aligned
    # with the prior water vapor vector
    VAPINT = np.sum(dP_tilde[:, PHQbef_idx] * prior[:, PHQbef_idx], 1) +\
    np.sum(dP_tilde[:, lvl_choice+1:30] * prior[:, lvl_choice:29], 1)

    # 4. Compute forcing (see Tom's note for details, I am just copying)
    LHFLX = (inp[:, LHFLX_idx] * inp_div[LHFLX_idx] +
             inp_sub[LHFLX_idx])
    PREC = prior[:, PRECT_idx] + prior[:, PRECTEND_idx]

    # 5. Compute water vapor tendency at level lvl_choice as residual
    PHQ_LVL = (LHFLX - PREC - CLDINT - VAPINT) / dP_tilde[:, lvl_choice]

    # 6. Concatenate output vector
    post = np.concatenate([
        prior[:, PHQbef_idx], PHQ_LVL[:, None],
        prior[:, lvl_choice:]
    ], axis=1)
    
    return post

In [24]:
def EntConsLayer_choice_np(inp,prior,lvl_choice,inp_sub,inp_div,norm_q):
    # Define variable indices here
    # Input
    PS_idx = 300
    SHFLX_idx = 302
    LHFLX_idx = 303

    # Output
    PHQ_idx = slice(0, 30)
    PHCLDLIQ_idx = slice(30, 60)
    Tbef_idx = slice(90, 90+lvl_choice)
    DTVKE_idx = slice(179, 209)
    FSNT_idx = 209
    FSNS_idx = 210
    FLNT_idx = 211
    FLNS_idx = 212
    PRECT_idx = 213
    PRECTEND_idx = 214
    PRECST_idx = 215
    PRECSTEND_idx = 216
    
    # 1. Compute dP_tilde
    dP_tilde = compute_dP_tilde(
        inp[:, PS_idx],
        inp_div[PS_idx], inp_sub[PS_idx],
        norm_q, hyai, hybi
    )

    # 2. Compute net energy input from phase change and precipitation
    PHAS = L_I / L_V * (
            (prior[:, PRECST_idx] + prior[:, PRECSTEND_idx]) -
            (prior[:, PRECT_idx] + prior[:, PRECTEND_idx])
    )

    # 3. Compute net energy input from radiation, SHFLX and TKE
    RAD = (prior[:, FSNT_idx] - prior[:, FSNS_idx] -
           prior[:, FLNT_idx] + prior[:, FLNS_idx])
    SHFLX = (inp[:, SHFLX_idx] * inp_div[SHFLX_idx] +
             inp_sub[SHFLX_idx])
    KEDINT = np.sum(dP_tilde * prior[:, DTVKE_idx], 1)

    # 4. Compute tendency of vapor due to phase change
    LHFLX = (inp[:, LHFLX_idx] * inp_div[LHFLX_idx] +
             inp_sub[LHFLX_idx])
    VAPINT = np.sum(dP_tilde * prior[:, PHQ_idx], 1)
    SPDQINT = (VAPINT - LHFLX) * L_S / L_V

    # 5. Same for cloud liquid water tendency
    SPDQCINT = np.sum(dP_tilde * prior[:, PHCLDLIQ_idx], 1) * L_I / L_V

    # 6. And the same for T but remember residual is still missing
    DTINT = np.sum(dP_tilde[:, :lvl_choice] *\
                  prior[:, Tbef_idx], 1) +\
    K.sum(dP_tilde[:, lvl_choice+1:30] *\
         prior[:, 90+lvl_choice:119], 1)

    # 7. Compute DT30 as residual
    DT_LVL = (
                   PHAS + RAD + SHFLX + KEDINT - SPDQINT - SPDQCINT - DTINT
           ) / dP_tilde[:, lvl_choice]

    # 8. Concatenate output vector
    post = np.concatenate([
        prior[:, :(90+lvl_choice)], DT_LVL[:, None], \
        prior[:, (90+lvl_choice):]
    ], axis=1)
    
    return post

In [25]:
def mass_res_diagno(inp_div,inp_sub,norm_q,inp,pred):
    # Input
    PS_idx = 300
    LHFLX_idx = 303

    # Output
    PHQ_idx = slice(0, 30)
    PHCLDLIQ_idx = slice(30, 60)
    PHCLDICE_idx = slice(60, 90)
    PRECT_idx = 214
    PRECTEND_idx = 215

    # 1. Compute dP_tilde
    dP_tilde = compute_dP_tilde(inp[:, PS_idx],  inp_div[PS_idx], inp_sub[PS_idx], norm_q, hyai, hybi)

    # 2. Compute water integral
    WATINT = np.sum(dP_tilde *(pred[:, PHQ_idx] + pred[:, PHCLDLIQ_idx] + pred[:, PHCLDICE_idx]), axis=1)
#     print('PHQ',np.mean(np.sum(dP_tilde*pred[:,PHQ_idx],axis=1)))
#     print('PHCLQ',np.mean(np.sum(dP_tilde*pred[:,PHCLDLIQ_idx],axis=1)))
#     print('PHICE',np.mean(np.sum(dP_tilde*pred[:,PHCLDICE_idx],axis=1)))

    # 3. Compute latent heat flux and precipitation forcings
    LHFLX = inp[:, LHFLX_idx] * inp_div[LHFLX_idx] + inp_sub[LHFLX_idx]
    PREC = pred[:, PRECT_idx] + pred[:, PRECTEND_idx]

    # 4. Compute water mass residual
#     print('LHFLX',np.mean(LHFLX))
#     print('PREC',np.mean(PREC))
#     print('WATINT',np.mean(WATINT))
    WATRES = LHFLX - PREC - WATINT
    #print('WATRES',np.mean(WATRES))

    return np.square(WATRES)

def ent_res_diagno(inp_div,inp_sub,norm_q,inp,pred):

    # Input
    PS_idx = 300
    SHFLX_idx = 302
    LHFLX_idx = 303

    # Output
    PHQ_idx = slice(0, 30)
    PHCLDLIQ_idx = slice(30, 60)
    PHCLDICE_idx = slice(60, 90)
    TPHYSTND_idx = slice(90, 120)
    DTVKE_idx = slice(180, 210)
    FSNT_idx = 210
    FSNS_idx = 211
    FLNT_idx = 212
    FLNS_idx = 213
    PRECT_idx = 214
    PRECTEND_idx = 215
    PRECST_idx = 216
    PRECSTEND_idx = 217

    # 1. Compute dP_tilde
    dP_tilde = compute_dP_tilde(inp[:, PS_idx],  inp_div[PS_idx], inp_sub[PS_idx], norm_q, hyai, hybi)

    # 2. Compute net energy input from phase change and precipitation
    PHAS = L_I / L_V * (
            (pred[:, PRECST_idx] + pred[:, PRECSTEND_idx]) -
            (pred[:, PRECT_idx] + pred[:, PRECTEND_idx])
    )

    # 3. Compute net energy input from radiation, SHFLX and TKE
    RAD = (pred[:, FSNT_idx] - pred[:, FSNS_idx] -
           pred[:, FLNT_idx] + pred[:, FLNS_idx])
    SHFLX = (inp[:, SHFLX_idx] * inp_div[SHFLX_idx] +
             inp_sub[SHFLX_idx])
    KEDINT = np.sum(dP_tilde * pred[:, DTVKE_idx], 1)

    # 4. Compute tendency of vapor due to phase change
    LHFLX = (inp[:, LHFLX_idx] * inp_div[LHFLX_idx] +
             inp_sub[LHFLX_idx])
    VAPINT = np.sum(dP_tilde * pred[:, PHQ_idx], 1)
    SPDQINT = (VAPINT - LHFLX) * L_S / L_V

    # 5. Same for cloud liquid water tendency
    SPDQCINT = np.sum(dP_tilde * pred[:, PHCLDLIQ_idx], 1) * L_I / L_V

    # 6. And the same for T but remember residual is still missing
    DTINT = np.sum(dP_tilde * pred[:, TPHYSTND_idx], 1)

    # 7. Compute enthalpy residual
    ENTRES = SPDQINT + SPDQCINT + DTINT - RAD - SHFLX - PHAS - KEDINT

    return np.square(ENTRES)

def lw_res_diagno(inp_div,inp_sub,norm_q,inp,pred):

    # Input
    PS_idx = 300

    # Output
    QRL_idx = slice(120, 150)
    FLNS_idx = 213
    FLNT_idx = 212

    # 1. Compute dP_tilde
    dP_tilde = compute_dP_tilde(inp[:, PS_idx],  inp_div[PS_idx], inp_sub[PS_idx], norm_q, hyai, hybi)

    # 2. Compute longwave integral
    LWINT = np.sum(dP_tilde *pred[:, QRL_idx], axis=1)

    # 3. Compute net longwave flux from lw fluxes at top and bottom
    LWNET = pred[:, FLNS_idx] - pred[:, FLNT_idx]

    # 4. Compute water mass residual
    LWRES = LWINT-LWNET

    return np.square(LWRES)

def sw_res_diagno(inp_div,inp_sub,norm_q,inp,pred):

    # Input
    PS_idx = 300

    # Output
    QRS_idx = slice(150, 180)
    FSNS_idx = 211
    FSNT_idx = 210

    # 1. Compute dP_tilde
    dP_tilde = compute_dP_tilde(inp[:, PS_idx],  inp_div[PS_idx], inp_sub[PS_idx], norm_q, hyai, hybi)

    # 2. Compute longwave integral
    SWINT = np.sum(dP_tilde *pred[:, QRS_idx], axis=1)

    # 3. Compute net longwave flux from lw fluxes at top and bottom
    SWNET = pred[:, FSNT_idx] - pred[:, FSNS_idx]

    # 4. Compute water mass residual
    SWRES = SWINT-SWNET

    return np.square(SWRES)

def tot_res_diagno(inp_div,inp_sub,norm_q,inp,pred):
    return 0.25*(mass_res_diagno(inp_div,inp_sub,norm_q,inp,pred)+\
                ent_res_diagno(inp_div,inp_sub,norm_q,inp,pred)+\
                lw_res_diagno(inp_div,inp_sub,norm_q,inp,pred)+\
                sw_res_diagno(inp_div,inp_sub,norm_q,inp,pred))

## Test: Can we close the budgets using UCnet's predictions?

### One batch using the custom generator

In [28]:
gen = test_gen_custom

In [29]:
sample = 5
inp = gen[sample][0]
truth = gen[sample][1]

In [30]:
pred = NN[NNs].predict_on_batch(inp)

In [31]:
pred.numpy()

array([[-8.6733829e-03, -3.2105796e-02,  6.1838023e-02, ...,
        -7.1555565e+01, -2.6726155e+00, -1.8707504e+01],
       [ 7.8717116e-03, -8.9696702e-04,  6.5471850e-02, ...,
        -3.7608677e+01, -1.3191630e+00, -3.3747487e+00],
       [ 2.4816066e-02,  7.1093719e-04,  1.4633943e-01, ...,
         1.2512340e+01, -4.3135433e+00, -3.3440247e+00],
       ...,
       [-9.5730033e-03, -8.7316651e-03, -1.4988841e-01, ...,
        -1.4162773e+00, -1.7971500e+00, -2.1660233e+00],
       [-1.4475256e-02, -3.4214780e-03, -1.5505002e-01, ...,
        -1.9624866e+00, -1.7847141e+00, -2.0972819e+00],
       [-1.4197432e-02, -9.6753072e-03, -1.3704923e-01, ...,
        -3.3366709e+00, -1.4479843e+00, -3.1109478e+00]], dtype=float32)

In [32]:
se = (pred.numpy()-truth)**2

In [33]:
se.shape

(1024, 214)

In [34]:
se.mean()

117.43607

### Now use the q generator

In [26]:
gen = test_genQ

In [27]:
sample = 250
inpQ = gen[sample][0]
truthQ = gen[sample][1]

In [28]:
out_cut_off_low = {}
out_cut_off_low['PHQ'] = 29
out_cut_off_low['TPHYSTND'] = 29

In [29]:
prior = np.array(NN[NNs].predict_on_batch(inpQ))

In [30]:
postrad = SurRadLayer_np(inpQ,prior,gen.input_transform.sub,gen.input_transform.div,gen.output_transform.scale[:30])

In [31]:
postmas = MassConsLayer_choice_np(inpQ,postrad,out_cut_off_low['PHQ'],
                                  gen.input_transform.sub,gen.input_transform.div,
                                  gen.output_transform.scale[:30])

In [32]:
postent = EntConsLayer_choice_np(inpQ,postmas,out_cut_off_low['TPHYSTND'],
                                 gen.input_transform.sub,gen.input_transform.div,
                                 gen.output_transform.scale[:30])

In [33]:
def output_postprocessing(output_preprocessing,inp,inp_sub,inp_div,out_scale_PHQ,lvl_choice_mas,lvl_choice_ent):
    
    return EntConsLayer_choice_np(inp,
                                  MassConsLayer_choice_np(inp,
                                                          SurRadLayer_np(inp,
                                                                         output_preprocessing,
                                                                         inp_sub,
                                                                         inp_div,
                                                                         out_scale_PHQ),
                                                          lvl_choice_mas,
                                                          inp_sub,inp_div,out_scale_PHQ),
                                  lvl_choice_ent,inp_sub,inp_div,out_scale_PHQ)

In [34]:
post = output_postprocessing(prior,inpQ,gen.input_transform.sub,gen.input_transform.div,
                             gen.output_transform.scale[:30],
                             out_cut_off_low['PHQ'],
                             out_cut_off_low['TPHYSTND'])

In [35]:
post.shape

(8192, 218)

In [36]:
postent.shape

(8192, 218)

In [37]:
se = ((post-truthQ)**2).mean() 

In [38]:
se

144.65526

### Multiple batches

In [93]:
dataset = 'test'

In [94]:
if dataset=='test': gen = test_genQ
elif dataset=='valid': gen = valid_genQ
elif dataset=='train': gen = train_genQ

In [95]:
TRES = {}

TRES = {}
PSUM = {}
TSUM = {}
PSQSUM = {}
TSQSUM = {}
SSE = {}

PRED_MEAN = {}
TRUE_MEAN = {}
TRUE_SQMEAN = {}
    
MSE_SSE = {}
TRUE_VAR = {}
R2 = {}

In [96]:
spl = 0
while gen[spl][0].size>0: #spl is sample number
    
    print('spl=',spl,'                  ',end='\r')
    
    inp = gen[spl][0]
    truth = gen[spl][1]
    
    for iNNs,NNs in enumerate(NNarray):

        prior = np.array(NN[NNs].predict_on_batch(inp))
        if FlagINC[iNNs]:
            pred = output_postprocessing(prior,inp,gen.input_transform.sub,
                                         gen.input_transform.div,
                                         gen.output_transform.scale[:30],
                                         Mcutoff[iNNs],Ecutoff[iNNs])
        else: pred=prior

        se = (pred-truth)**2
        tresid = tot_res_diagno(gen.input_transform.div,
                                gen.input_transform.sub,
                                gen.output_transform.scale[:30],
                                inp,pred)
        
        if spl==0: 
            TRES[NNs] = tresid;
            PSUM[NNs] = pred; 
            TSUM[NNs] = truth;
            PSQSUM[NNs] = pred**2; 
            TSQSUM[NNs] = truth**2;
            SSE[NNs] = (truth-pred)**2
        else: 
            TRES[NNs] = np.concatenate((TRES[NNs],tresid),axis=0); 
            PSUM[NNs] = PSUM[NNs] + pred; 
            TSUM[NNs] = TSUM[NNs] + truth;
            PSQSUM[NNs] = PSQSUM[NNs] + pred**2; 
            TSQSUM[NNs] = TSQSUM[NNs] + truth**2;
            SSE[NNs] = SSE[NNs] + (truth-pred)**2
        
    spl += 1

spl= 5747                   

In [97]:
for iNNs,NNs in enumerate(NNarray):
    print('NNs=',NNs)
    
    PRED_MEAN[NNs] = PSUM[NNs]/spl
    TRUE_MEAN[NNs] = TSUM[NNs]/spl
    
    TRUE_SQMEAN[NNs] = TSQSUM[NNs]/spl
    
    MSE_SSE[NNs] = SSE[NNs]/spl
    TRUE_VAR[NNs] = TRUE_SQMEAN[NNs] - (TRUE_MEAN[NNs]**2)
    R2[NNs] = 1-(MSE_SSE[NNs]/TRUE_VAR[NNs])

NNs= UCnet_214_q18T28.hdf5
NNs= UCnet_214_q3T26.hdf5
NNs= UCnet_214_q4T4.hdf5
NNs= UCnet_214_q5T5.hdf5
NNs= UCnet_214_q8T4.hdf5


  # This is added back by InteractiveShellApp.init_path()
  # This is added back by InteractiveShellApp.init_path()


In [98]:
pathPKL = '/DFS-L/DATA/pritchard/tbeucler/SPCAM/HDF5_DATA/'

In [99]:
hf = open(pathPKL+'2021_01_09_'+dataset+'_custom_'+'_061.pkl','wb')
S = {"TRES":TRES,"MSE_SSE":MSE_SSE,"NNarray":NNarray,
    "R2":R2}
pickle.dump(S,hf)
hf.close()