# Create z-score files for CREDIT

In [5]:
import os
import yaml
import numpy as np
import xarray as xr

## File creation

### 6 hourly mean std files

In [6]:
# get variable information from data_preprocessing/config
config_name = os.path.realpath('data_config_mlevel_6h.yml')

with open(config_name, 'r') as stream:
    conf = yaml.safe_load(stream)

In [7]:
N_levels = 18 #46
base_dir = '/glade/derecho/scratch/ksha/CREDIT_data/ERA5_mlevel_1deg/'

In [8]:
# get variable names
varnames = list(conf['zscore'].keys())
varnames = varnames[:-3] # remove save_loc and others

varname_upper = ['specific_humidity', 
                 'specific_total_water', 
                 'temperature', 
                 'u_component_of_wind',
                 'v_component_of_wind']

varname_surf = list(set(varnames) - set(varname_upper))


# collect computed mean and variance values
# See "qsub_STEP01_compute_mean_std.ipynb"
MEAN_values = {}
STD_values = {}

for varname in varname_surf:
    save_name = conf['zscore']['save_loc'] + '{}_mean_std_{}.npy'.format(conf['zscore']['prefix'], varname)
    mean_std = np.load(save_name)
    MEAN_values[varname] = mean_std[0]
    STD_values[varname] = mean_std[1]

for varname in varname_upper:

    # -------------------------------------------- #
    # allocate all levels
    mean_std_all_levels = np.empty((2, N_levels))
    mean_std_all_levels[...] = np.nan
    
    for i_level in range(N_levels):
        save_name = conf['zscore']['save_loc'] + '{}_level{}_mean_std_{}.npy'.format(conf['zscore']['prefix'], i_level, varname)
        mean_std = np.load(save_name)
        mean_std_all_levels[:, i_level] = mean_std

    # -------------------------------------------- #
    # save
    MEAN_values[varname] = np.copy(mean_std_all_levels[0, :])
    STD_values[varname] = np.copy(mean_std_all_levels[1, :])

In [9]:
ds_example = xr.open_zarr(base_dir+'all_in_one/ERA5_mlevel_1deg_6h_subset_1979_conserve.zarr')

In [10]:
# ------------------------------------------------------- #
# create xr.DataArray for mean

# Initialize level coord
level = np.array(ds_example['level'])

# Initialize dataset
ds_mean_6h = xr.Dataset(coords={"level": level})

for varname, data in MEAN_values.items():
    if len(data.shape) == 1:
        data_array = xr.DataArray(
            data,
            dims=["level",],
            coords={"level": level},
            name=varname,
        )
        ds_mean_6h[varname] = data_array
    else:
        data_array = xr.DataArray(
            data,
            name=varname,
        )
        ds_mean_6h[varname] = data_array

In [11]:
# ------------------------------------------------------- #
# create xr.DataArray for std

# use the same level coord as mean
ds_std_6h = xr.Dataset(coords={"level": level})

for varname, data in STD_values.items():
    data = np.sqrt(data)
    if len(data.shape) == 1:
        data_array = xr.DataArray(
            data,
            dims=["level",],
            coords={"level": level},
            name=varname,
        )
        ds_std_6h[varname] = data_array
    else:
        data_array = xr.DataArray(
            data,
            name=varname,
        )
        ds_std_6h[varname] = data_array

In [12]:
ds_mean_6h['land_sea_CI_mask'] = 0.0
# ds_std_6h['land_sea_CI_mask'] = 1.0

In [13]:
# ------------------------------------------------------- #
# Same to netCDF
ds_mean_6h.to_netcdf(base_dir+'mean_std/mean_6h_1979_2019_conserve_1deg.nc')
ds_std_6h.to_netcdf(base_dir+'mean_std/std_6h_1979_2019_conserve_1deg.nc')

In [14]:
# ------------------------------------------------------- #
# Compare with my old ones
base_dir_plevel = '/glade/derecho/scratch/ksha/CREDIT_data/ERA5_plevel_1deg/'
STD_mlevel = xr.open_dataset(base_dir+'mean_std/std_6h_1979_2019_conserve_1deg.nc')
STD_plevel = xr.open_dataset(base_dir_plevel+'mean_std/std_6h_1979_2019_conserve_1deg.nc')

for varname in varnames:
    try:
        print('=============== {} ================='.format(varname))
        print(np.array(STD_mlevel[varname]))
        print(np.array(STD_plevel[varname]))
    except:
        pass

1330.8846696194373
1330.8846696194373
22.40483815378879
22.404838153788788
9597.442621657625
9597.442621657625
0.3491840944157281
0.34918409441572806
5.487526636552918
5.487526636552918
4.71086288032973
4.71086288032973
21.373759417572977
21.373759417572977
0.0006418363022945283
0.0006418363022945283
1604157.7576941664
1604157.7576941664
4214109.662542443
4214109.662542443
666310.6177924678
666310.6177924678
917596.9043234516
917596.9043234516
7716263.343614088
7716263.343614088
5830393.034174383
5830393.034174383
1048153.7804762697
1048153.7804762697
0.0017714241278640804
0.0017714241278640804
0.601523829454437
0.6015238294544372
[5.24473482e-07 3.61295863e-07 2.47725758e-07 3.44971787e-07
 2.97706450e-07 3.67206094e-07 5.38408906e-07 4.64842550e-06
 6.94580040e-05 4.16347676e-04 1.22804425e-03 2.20238795e-03
 3.47067279e-03 4.28970221e-03 5.22079719e-03 5.66026808e-03
 5.82966602e-03 5.94418469e-03]
[1.47448629e-07 2.18486793e-07 2.69306473e-07 3.18866754e-07
 3.39629482e-07 3.444085