### Fix norm file for Tom's Beucler data set
His data set has fewer variables compared to Gunnar's Behrens
See: https://towardsdatascience.com/how-to-create-xarray-datasets-cf1859c95921

In [37]:
import pdb
import xarray as xr
import numpy as np

**norm & data files**

In [38]:
dataPath = '/work/bd1179/b309172/data/SPCAM_tb_preproc'

# Open datasets
data_fn = '2021_09_02_TEST_For_Nando.nc'
data_ds = xr.open_dataset(dataPath+'/'+data_fn)

norm_fn = '001_norm.nc'
norm_ds = xr.open_dataset(dataPath+'/'+norm_fn)


In [39]:
norm_ds

In [40]:
data_ds

**Get the indexes to extract data from the original norm file**

In [41]:
data_vars = list(data_ds.coords['var_names'].values)
idx_coords_var_names = []
for iVar in range(len(norm_ds.coords['var_names'])):
    iNorm_var = str(norm_ds.coords['var_names'][iVar].values)
    if iNorm_var in data_vars:
        idx_coords_var_names.append(iVar)

In [42]:
norm_ds.coords['var_names'][idx_coords_var_names]

In [43]:
data_vars = list(data_ds.coords['var_names'].values)
idx_coords_var_names_single = []
for iVar in range(len(norm_ds.coords['var_names_single'])):
    iNorm_var = str(norm_ds.coords['var_names_single'][iVar].values)
    if iNorm_var in data_vars:
        idx_coords_var_names_single.append(iVar)

In [44]:
norm_ds.coords['var_names_single'][idx_coords_var_names_single]

In [45]:
for i, iVar in enumerate(norm_ds):
    print(f'Before: {i} {iVar} {norm_ds[iVar].shape}')
    if iVar != 'std_by_var':
        print(f'After: {i} {iVar} {norm_ds[iVar][idx_coords_var_names].shape}\n')
    else:
        print(f'After: {i} {iVar} {norm_ds[iVar][idx_coords_var_names_single].shape}\n')

Before: 0 mean (522,)
After: 0 mean (159,)

Before: 1 std (522,)
After: 1 std (159,)

Before: 2 min (522,)
After: 2 min (159,)

Before: 3 max (522,)
After: 3 max (159,)

Before: 4 std_by_var (29,)
After: 4 std_by_var (14,)



**Create new norm file**

In [46]:
# define data with variable attributes
data_vars = {
    'mean':(['var_names'], norm_ds['mean'][idx_coords_var_names]),
    'std':(['var_names'],  norm_ds['std'][idx_coords_var_names]),
    'min':(['var_names'],  norm_ds['min'][idx_coords_var_names]),
    'max':(['var_names'],  norm_ds['max'][idx_coords_var_names]),
    'std_by_var':(['var_names_single'], norm_ds['std_by_var'][idx_coords_var_names_single]),
            }

# define coordinates
coords = {
    'var_names': (['var_names'], norm_ds['var_names'][idx_coords_var_names]),
    'var_names_single': (['var_names_single'], norm_ds['var_names_single'][idx_coords_var_names_single]),
}

# define global attributes
attrs = {'author':'Fernando Iglesias-Suarez', 
         'email':'fernando.iglesias-suarez@dlr.de'}

# create dataset
new_norm = xr.Dataset(data_vars=data_vars, 
                      coords=coords, 
                      attrs=attrs)

In [47]:
new_norm

In [48]:
new_norm.to_netcdf('001_norm_for_tb_dataset.nc')

**Check the new_norm compared to norm**

In [49]:
var_list = ['QBP', 'TBP', 'VBP', 'PS', 'SOLIN', 'SHFLX', 'LHFLX']
idxs_list = []
for v in var_list:
    i = np.where(norm_ds.var_names == v)[0]
    idxs_list.append(i)
var_idxs = np.concatenate(idxs_list)
print(idxs_list)

[array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16,
       17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29]), array([ 90,  91,  92,  93,  94,  95,  96,  97,  98,  99, 100, 101, 102,
       103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115,
       116, 117, 118, 119]), array([120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132,
       133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145,
       146, 147, 148, 149]), array([150]), array([151]), array([152]), array([153])]


In [50]:
var_list = ['QBP', 'TBP', 'VBP', 'PS', 'SOLIN', 'SHFLX', 'LHFLX']
idxs_list = []
for v in var_list:
    i = np.where(new_norm.var_names == v)[0]
    idxs_list.append(i)
var_idxs = np.concatenate(idxs_list)
print(idxs_list)

[array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16,
       17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29]), array([30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46,
       47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59]), array([60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76,
       77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89]), array([90]), array([91]), array([92]), array([93])]
