In [1]:
import xarray as xr
import numpy as np
import matplotlib.pyplot as plt
import os, sys, glob, string, argparse
sys.path.append('/global/cfs/cdirs/m4334/jerry/climsim3_dev')
from climsim_utils.data_utils import *



In [2]:
grid_info = xr.open_dataset('/global/cfs/cdirs/m4334/jerry/climsim3_dev/grid_info/ClimSim_low-res_grid-info.nc')
input_mean = xr.open_dataset('/global/cfs/cdirs/m4334/jerry/climsim3_dev/preprocessing/normalizations/inputs/input_mean_v2_rh_mc_pervar.nc')
input_max = xr.open_dataset('/global/cfs/cdirs/m4334/jerry/climsim3_dev/preprocessing/normalizations/inputs/input_max_v2_rh_mc_pervar.nc')
input_min = xr.open_dataset('/global/cfs/cdirs/m4334/jerry/climsim3_dev/preprocessing/normalizations/inputs/input_min_v2_rh_mc_pervar.nc')
output_scale = xr.open_dataset('/global/cfs/cdirs/m4334/jerry/climsim3_dev/preprocessing/normalizations/outputs/output_scale_std_lowerthred_v6.nc')
lbd_qn = np.loadtxt('/global/cfs/cdirs/m4334/jerry/climsim3_dev/preprocessing/normalizations/inputs/qn_exp_lambda_large.txt', delimiter = ',')

data = data_utils(grid_info = grid_info,
                input_mean = input_mean, 
                input_max = input_max, 
                input_min = input_min, 
                output_scale = output_scale,
                qinput_log = False,
                normalize = False)
data.set_to_v2_rh_mc_vars()

input_sub, input_div, out_scale = data.save_norm(write=False)
assert(input_sub.dtype == np.float64)
assert(input_div.dtype == np.float64)
assert(out_scale.dtype == np.float64)

input_var_pairs = [(x, data.var_lens[x]) for x in data.input_vars]

In [3]:
[(x, data.var_lens[x]) for x in data.input_vars]

[('state_t', 60),
 ('state_rh', 60),
 ('state_qn', 60),
 ('liq_partition', 60),
 ('state_u', 60),
 ('state_v', 60),
 ('pbuf_ozone', 60),
 ('pbuf_CH4', 60),
 ('pbuf_N2O', 60),
 ('state_ps', 1),
 ('pbuf_SOLIN', 1),
 ('pbuf_LHFLX', 1),
 ('pbuf_SHFLX', 1),
 ('pbuf_TAUX', 1),
 ('pbuf_TAUY', 1),
 ('pbuf_COSZRS', 1),
 ('cam_in_ALDIF', 1),
 ('cam_in_ALDIR', 1),
 ('cam_in_ASDIF', 1),
 ('cam_in_ASDIR', 1),
 ('cam_in_LWUP', 1),
 ('cam_in_ICEFRAC', 1),
 ('cam_in_LANDFRAC', 1),
 ('cam_in_OCNFRAC', 1),
 ('cam_in_SNOWHICE', 1),
 ('cam_in_SNOWHLAND', 1)]

In [4]:
train_set_path = '/pscratch/sd/j/jerrylin/hugging/E3SM-MMF_ne4/preprocessing/v2_rh_mc/train_set/'
sub_folders = ['11', '12', '21', '22', '31', '32', '41', '42', '51', '52', '61', '62', '71', '72']
train_input = np.concatenate([np.load(os.path.join(train_set_path, sub_folder, 'train_input.npy')) for sub_folder in sub_folders])
train_input = train_input.astype(np.float64)

In [None]:
# per level mean

input_mean_state_t_per_lev = np.mean(train_input[:,:60], axis = 0, dtype=np.float64)
input_mean_state_rh_per_lev = np.mean(train_input[:,60:120], axis = 0, dtype=np.float64)
input_mean_state_qn_per_lev = np.mean(train_input[:,120:180], axis = 0, dtype=np.float64)
input_mean_state_u_per_lev = np.mean(train_input[:,240:300], axis = 0, dtype=np.float64)
input_mean_state_v_per_lev = np.mean(train_input[:,300:360], axis = 0, dtype=np.float64)
input_mean_pbuf_ozone_per_lev = np.mean(train_input[:,360:420], axis = 0, dtype=np.float64)
input_mean_pbuf_CH4_per_lev = np.mean(train_input[:,420:480], axis = 0, dtype=np.float64)
input_mean_pbuf_N2O_per_lev = np.mean(train_input[:,480:540], axis = 0, dtype=np.float64)
input_mean_state_ps = np.mean(train_input[:,540], dtype=np.float64)
input_mean_pbuf_SOLIN = np.mean(train_input[:,541], dtype=np.float64)
input_mean_pbuf_LHFLX = np.mean(train_input[:,542], dtype=np.float64)
input_mean_pbuf_SHFLX = np.mean(train_input[:,543], dtype=np.float64)
input_mean_pbuf_TAUX = np.mean(train_input[:,544], dtype=np.float64)
input_mean_pbuf_TAUY = np.mean(train_input[:,545], dtype=np.float64)
input_mean_pbuf_COSZRS = np.mean(train_input[:,546], dtype=np.float64)
input_mean_cam_in_ALDIF = np.mean(train_input[:,547], dtype=np.float64)
input_mean_cam_in_ALDIR = np.mean(train_input[:,548], dtype=np.float64)
input_mean_cam_in_ASDIF = np.mean(train_input[:,549], dtype=np.float64)
input_mean_cam_in_ASDIR = np.mean(train_input[:,550], dtype=np.float64)
input_mean_cam_in_LWUP = np.mean(train_input[:,551], dtype=np.float64)
input_mean_cam_in_ICEFRAC = np.mean(train_input[:,552], dtype=np.float64)
input_mean_cam_in_LANDFRAC = np.mean(train_input[:,553], dtype=np.float64)
input_mean_cam_in_OCNFRAC = np.mean(train_input[:,554], dtype=np.float64)
input_mean_cam_in_SNOWHICE = np.mean(train_input[:,555], dtype=np.float64)
input_mean_cam_in_SNOWHLAND = np.mean(train_input[:,556], dtype=np.float64)

# per level std

input_std_state_t_per_lev = np.std(train_input[:,:60], axis = 0, dtype=np.float64)
input_std_state_rh_per_lev = np.std(train_input[:,60:120], axis = 0, dtype=np.float64)
input_std_state_qn_per_lev = np.std(train_input[:,120:180], axis = 0, dtype=np.float64)
input_std_state_u_per_lev = np.std(train_input[:,240:300], axis = 0, dtype=np.float64)
input_std_state_v_per_lev = np.std(train_input[:,300:360], axis = 0, dtype=np.float64)
input_std_pbuf_ozone_per_lev = np.std(train_input[:,360:420], axis = 0, dtype=np.float64)
input_std_pbuf_CH4_per_lev = np.std(train_input[:,420:480], axis = 0, dtype=np.float64)
input_std_pbuf_N2O_per_lev = np.std(train_input[:,480:540], axis = 0, dtype=np.float64)
input_std_state_ps = np.std(train_input[:,540], dtype=np.float64)
input_std_pbuf_SOLIN = np.std(train_input[:,541], dtype=np.float64)
input_std_pbuf_LHFLX = np.std(train_input[:,542], dtype=np.float64)
input_std_pbuf_SHFLX = np.std(train_input[:,543], dtype=np.float64)
input_std_pbuf_TAUX = np.std(train_input[:,544], dtype=np.float64)
input_std_pbuf_TAUY = np.std(train_input[:,545], dtype=np.float64)
input_std_pbuf_COSZRS = np.std(train_input[:,546], dtype=np.float64)
input_std_cam_in_ALDIF = np.std(train_input[:,547], dtype=np.float64)
input_std_cam_in_ALDIR = np.std(train_input[:,548], dtype=np.float64)
input_std_cam_in_ASDIF = np.std(train_input[:,549], dtype=np.float64)
input_std_cam_in_ASDIR = np.std(train_input[:,550], dtype=np.float64)
input_std_cam_in_LWUP = np.std(train_input[:,551], dtype=np.float64)
input_std_cam_in_ICEFRAC = np.std(train_input[:,552], dtype=np.float64)
input_std_cam_in_LANDFRAC = np.std(train_input[:,553], dtype=np.float64)
input_std_cam_in_OCNFRAC = np.std(train_input[:,554], dtype=np.float64)
input_std_cam_in_SNOWHICE = np.std(train_input[:,555], dtype=np.float64)
input_std_cam_in_SNOWHLAND = np.std(train_input[:,556], dtype=np.float64)

# per col mean

input_mean_state_t_per_col = np.repeat(np.mean(train_input[:,:60], dtype=np.float64), 60)
input_mean_state_rh_per_col = np.repeat(np.mean(train_input[:,60:120], dtype=np.float64), 60)
input_mean_state_qn_per_col = np.repeat(np.mean(train_input[:,120:180], dtype=np.float64), 60)
input_mean_state_u_per_col = np.repeat(np.mean(train_input[:,240:300], dtype=np.float64), 60)
input_mean_state_v_per_col = np.repeat(np.mean(train_input[:,300:360], dtype=np.float64), 60)
input_mean_pbuf_ozone_per_col = np.repeat(np.mean(train_input[:,360:420], dtype=np.float64), 60)
input_mean_pbuf_CH4_per_col = np.repeat(np.mean(train_input[:,420:480], dtype=np.float64), 60)
input_mean_pbuf_N2O_per_col = np.repeat(np.mean(train_input[:,480:540], dtype=np.float64), 60)

# per col std

input_std_state_t_per_col = np.repeat(np.std(train_input[:,:60], dtype=np.float64), 60)
input_std_state_rh_per_col = np.repeat(np.std(train_input[:,60:120], dtype=np.float64), 60)
input_std_state_qn_per_col = np.repeat(np.std(train_input[:,120:180], dtype=np.float64), 60)
input_std_state_u_per_col = np.repeat(np.std(train_input[:,240:300], dtype=np.float64), 60)
input_std_state_v_per_col = np.repeat(np.std(train_input[:,300:360], dtype=np.float64), 60)
input_std_pbuf_ozone_per_col = np.repeat(np.std(train_input[:,360:420], dtype=np.float64), 60)
input_std_pbuf_CH4_per_col = np.repeat(np.std(train_input[:,420:480], dtype=np.float64), 60)
input_std_pbuf_N2O_per_col = np.repeat(np.std(train_input[:,480:540], dtype=np.float64), 60)

# per lev min norm

input_norm_min_state_t_per_lev = np.min((train_input[:,:60] - input_mean_state_t_per_lev[None, :])/input_mean_state_t_per_lev[None, :], axis = 0)
input_norm_min_state_rh_per_lev = np.min((train_input[:,60:120] - input_mean_state_rh_per_lev[None, :])/input_mean_state_rh_per_lev[None, :], axis = 0)
input_norm_min_state_qn_per_lev = np.min((train_input[:,120:180] - input_mean_state_qn_per_lev[None, :])/input_mean_state_qn_per_lev[None, :], axis = 0)
input_norm_min_state_u_per_lev = np.min((train_input[:,240:300] - input_mean_state_u_per_lev[None, :])/input_mean_state_u_per_lev[None, :], axis = 0)
input_norm_min_state_v_per_lev = np.min((train_input[:,300:360] - input_mean_state_v_per_lev[None, :])/input_mean_state_v_per_lev[None, :], axis = 0)
input_norm_min_pbuf_ozone_per_lev = np.min((train_input[:,360:420] - input_mean_pbuf_ozone_per_lev[None, :])/input_mean_pbuf_ozone_per_lev[None, :], axis = 0)
input_norm_min_pbuf_CH4_per_lev = np.min((train_input[:,420:480] - input_mean_pbuf_CH4_per_lev[None, :])/input_mean_pbuf_CH4_per_lev[None, :], axis = 0)
input_norm_min_pbuf_N2O_per_lev = np.min((train_input[:,480:540] - input_mean_pbuf_N2O_per_lev[None, :])/input_mean_pbuf_N2O_per_lev[None, :], axis = 0)

In [None]:
per_lev_sub = xr.Dataset(
    {
        'state_t': input_mean_state_t_per_lev,
        'state_rh': input_mean_state_rh_per_lev,
        'state_qn': input_mean_state_qn_per_lev,
        'liq_partition': np.zeros(60),
        'state_u': input_mean_state_u_per_lev,
        'state_v': input_mean_state_v_per_lev,
        'pbuf_ozone': input_mean_pbuf_ozone_per_lev,
        'pbuf_CH4': input_mean_pbuf_CH4_per_lev,
        'pbuf_N2O': input_mean_pbuf_N2O_per_lev,
        'state_ps': input_mean_state_ps,
        'pbuf_SOLIN': input_mean_pbuf_SOLIN,
        'pbuf_LHFLX': input_mean_pbuf_LHFLX,
        'pbuf_SHFLX': input_mean_pbuf_SHFLX,
        'pbuf_TAUX': input_mean_pbuf_TAUX,
        'pbuf_TAUY': input_mean_pbuf_TAUY,
        'pbuf_COSZRS': input_mean_pbuf_COSZRS,
        'cam_in_ALDIF': input_mean_cam_in_ALDIF,
        'cam_in_ALDIR': input_mean_cam_in_ALDIR,
        'cam_in_ASDIF': input_mean_cam_in_ASDIF,
        'cam_in_ASDIR': input_mean_cam_in_ASDIR,
        'cam_in_LWUP': input_mean_cam_in_LWUP,
        'cam_in_ICEFRAC': input_mean_cam_in_ICEFRAC,
        'cam_in_LANDFRAC': input_mean_cam_in_LANDFRAC,
        'cam_in_OCNFRAC': input_mean_cam_in_OCNFRAC,
        'cam_in_SNOWHICE': input_mean_cam_in_SNOWHICE,
        'cam_in_SNOWHLAND': input_mean_cam_in_SNOWHLAND,
    }
)

per_lev_div = xr.Dataset(
    {
        'state_t': input_std_state_t_per_lev,
        'state_rh': input_std_state_rh_per_lev,
        'state_qn': input_std_state_qn_per_lev,
        'liq_partition': np.ones(60),
        'state_u': input_std_state_u_per_lev,
        'state_v': input_std_state_v_per_lev,
        'pbuf_ozone': input_std_pbuf_ozone_per_lev,
        'pbuf_CH4': input_std_pbuf_CH4_per_lev,
        'pbuf_N2O': input_std_pbuf_N2O_per_lev,
        'state_ps': input_std_state_ps,
        'pbuf_SOLIN': input_std_pbuf_SOLIN,
        'pbuf_LHFLX': input_std_pbuf_LHFLX,
        'pbuf_SHFLX': input_std_pbuf_SHFLX,
        'pbuf_TAUX': input_std_pbuf_TAUX,
        'pbuf_TAUY': input_std_pbuf_TAUY,
        'pbuf_COSZRS': input_std_pbuf_COSZRS,
        'cam_in_ALDIF': input_std_cam_in_ALDIF,
        'cam_in_ALDIR': input_std_cam_in_ALDIR,
        'cam_in_ASDIF': input_std_cam_in_ASDIF,
        'cam_in_ASDIR': input_std_cam_in_ASDIR,
        'cam_in_LWUP': input_std_cam_in_LWUP,
        'cam_in_ICEFRAC': input_std_cam_in_ICEFRAC,
        'cam_in_LANDFRAC': input_std_cam_in_LANDFRAC,
        'cam_in_OCNFRAC': input_std_cam_in_OCNFRAC,
        'cam_in_SNOWHICE': input_std_cam_in_SNOWHICE,
        'cam_in_SNOWHLAND': input_std_cam_in_SNOWHLAND,
    }
)

per_col_sub = xr.Dataset(
    {
        'state_t': input_mean_state_t_per_col,
        'state_rh': input_mean_state_rh_per_col,
        'state_qn': input_mean_state_qn_per_col,
        'state_u': input_mean_state_u_per_col,
        'state_v': input_mean_state_v_per_col,
        'pbuf_ozone': input_mean_pbuf_ozone_per_col,
        'pbuf_CH4': input_mean_pbuf_CH4_per_col,
        'pbuf_N2O': input_mean_pbuf_N2O_per_col,
    }
)

per_col_div = xr.Dataset(
    {
        'state_t': input_std_state_t_per_col,
        'state_rh': input_std_state_rh_per_col,
        'state_qn': input_std_state_qn_per_col,
        'state_u': input_std_state_u_per_col,
        'state_v': input_std_state_v_per_col,
        'pbuf_ozone': input_std_pbuf_ozone_per_col,
        'pbuf_CH4': input_std_pbuf_CH4_per_col,
        'pbuf_N2O': input_std_pbuf_N2O_per_col,
    }
)

per_lev_min_norm = xr.Dataset(
    {
        'state_t': input_norm_min_state_t_per_lev,
        'state_rh': input_norm_min_state_rh_per_lev,
        'state_qn': input_norm_min_state_qn_per_lev,
        'state_u': input_norm_min_state_u_per_lev,
        'state_v': input_norm_min_state_v_per_lev,
        'pbuf_ozone': input_norm_min_pbuf_ozone_per_lev,
        'pbuf_CH4': input_norm_min_pbuf_CH4_per_lev,
        'pbuf_N2O': input_norm_min_pbuf_N2O_per_lev,
    }
)

In [None]:
save_path = '/global/cfs/cdirs/m4334/jerry/climsim3_dev/preprocessing/normalizations/inputs/multirepresentation/'
per_lev_sub.to_netcdf(save_path + 'per_lev_sub.nc')
per_lev_div.to_netcdf(save_path + 'per_lev_div.nc')
per_col_sub.to_netcdf(save_path + 'per_col_sub.nc')
per_col_div.to_netcdf(save_path + 'per_col_div.nc')
per_lev_min_norm.to_netcdf(save_path + 'per_lev_min_norm.nc')

In [None]:
print('finished')