In [1]:
import numpy as np
import torch
from torch import nn  
import time 
import os  
import xarray as xr
import subprocess
import matplotlib.pyplot as plt
from matplotlib import colors
import cartopy.crs as ccrs

In [2]:
## import self defined functions
from sys import path 
# insert at 1, 0 is the script path (or '' in REPL)
path.insert(1, '/tigress/cw55/local/python_lib')
from cg_funcs import global_mean_xarray
root = '/tigress/cw55/work/2022_radi_nn/NN_AM4'
path.insert(1,  root+'/work')
# import work.lw_csaf_Li5Relu_EN.train_script01 as lwcsafen
import work.lw_csaf_Li5Relu_EY.train_script01 as lwcsafey 
# import work.lw_af_Li5Relu_EN.train_script01 as lwafen
import work.lw_af_Li5Relu_EY.train_script01 as lwafey 

In [3]:
from get_AM4_data_lw import get_AM4_data_lw
######################################################
# common functions to split the training and test data
from NNRTMC_lw_utils import  split_train_test_sample, \
draw_batches, data_std_normalization, print_key_results, return_exp_dir
    
from diag_utils import batch_index_sta_end, pred_NN_batch,\
create_6tiles_lw,regrid_6tile2latlon

In [4]:
if __name__ == '__main__': 
    torch.cuda.set_device(0) # select gpu_id, default 0 means the first GPU
    device = f'cuda:{torch.cuda.current_device()}'
    ######################################################
    # set exp name and runs
    Exp_name = [
        # 'lw_csaf_Li5Relu_EN',
        'lw_csaf_Li5Relu_EY',
        # 'lw_af_Li5Relu_EN'  ,
        'lw_af_Li5Relu_EY'  ,
    ]
    Exp_name_model_dict = {
        # 'lw_csaf_Li5Relu_EN': lwcsafen,
        'lw_csaf_Li5Relu_EY': lwcsafey,
        # 'lw_af_Li5Relu_EN'  : lwafen,
        'lw_af_Li5Relu_EY'  : lwafey,
    }
    sky_cond = {
        # 'lw_csaf_Li5Relu_EN': 'csaf',
        'lw_csaf_Li5Relu_EY': 'csaf',
        # 'lw_af_Li5Relu_EN'  : 'af',
        'lw_af_Li5Relu_EY'  : 'af',
    }
    work_dir = root+'/work/' 
    # file list AM4 runs 
    out_filelist = [f'/scratch/gpfs/cw55/NNRTMC_data/AM4_v2/20000101.fluxes.tile{_}.nc' for _ in range(1,7)]
    inp_filelist = [f'/scratch/gpfs/cw55/NNRTMC_data/AM4_v2/20000101.new_offline_input.tile{_}.nc' for _ in range(1,7)]
    out_filelist = [f'/scratch/gpfs/cw55/NNRTMC_data/AM4_v2/20000101.fluxes.tile{_}.nc' for _ in range(1,2)]
    inp_filelist = [f'/scratch/gpfs/cw55/NNRTMC_data/AM4_v2/20000101.new_offline_input.tile{_}.nc' for _ in range(1,2)]

    hybrid_p_sigma_para = xr.open_dataset('/tigress/cw55/data/NNRTMC_dataset/AM4_pk_bk_202207.nc')
    A_k = hybrid_p_sigma_para.ak.values[None,:]
    B_k = hybrid_p_sigma_para.bk.values[None,:] 

In [5]:
plt.rcParams['font.size'] = '6'

In [6]:
month_sel = None
day_sel = [15,18,21,24,27]
month_sel = [1]
day_sel = [15]

In [7]:
%%time
predi = {}
error = {}
eng_err = {}
NN_model = {}
ds_regrid = {}

for mo in Exp_name:
    ######################################################
    # load restart file
    run_num, exp_dir = return_exp_dir(work_dir, mo, create_dir=False)
    PATH_last =  exp_dir+f'/restart.{run_num-1:02d}.pth'
    restart_data = torch.load(PATH_last)  # load exist results and restart training
    print(f'load: {PATH_last}')
    # read training dataset, nor_para, model parameteres
    nor_para = restart_data['nor_para']
    model_state_dict = restart_data['model_state_dict']
    # read data
    input_array_ori, output_array_ori, ds_coords = \
    get_AM4_data_lw(out_filelist, inp_filelist, condition=sky_cond[mo], 
                    month_sel = month_sel, day_sel = day_sel, return_coords=True) 
    # initialize model 
    NN_model[mo] = Exp_name_model_dict[mo].NNRTMC_NN(device, nor_para, A_k, B_k, input_array_ori.shape[1],model_state_dict)  
    
    # normalize data via saved nor_para in restart file
    nor_para, input_array, output_array   = data_std_normalization(input_array_ori, output_array_ori, nor_para)
    
    # try NN on test dataset  
    predi[mo], eng_err[mo] = pred_NN_batch(input_array, output_array, NN_model[mo], nor_para, device)
    error[mo] = predi[mo] - output_array_ori
    predi[mo][:,3:] = predi[mo][:,3:]*86400 # HR K/s >> K/day
    error[mo][:,3:] = error[mo][:,3:]*86400 # HR K/s >> K/day
    break

load: /tigress/cw55/work/2022_radi_nn/NN_AM4/work/lw_csaf_Li5Relu_EY/restart.04.pth
Data files:
['/scratch/gpfs/cw55/NNRTMC_data/AM4_v2/20000101.fluxes.tile1.nc'] ['/scratch/gpfs/cw55/NNRTMC_data/AM4_v2/20000101.new_offline_input.tile1.nc']
Data selection:
    Month: [1] 
    Day: [15] 
Reading data... 0 Done.
Total data size: 73728
CPU times: user 1.64 s, sys: 555 ms, total: 2.2 s
Wall time: 1.67 s


# process NN dict and save parameters

In [8]:
model_state_dict.keys()

dict_keys(['Res_stack.0.weight', 'Res_stack.0.bias', 'Res_stack.1.weight', 'Res_stack.1.bias', 'Res_stack.1.running_mean', 'Res_stack.1.running_var', 'Res_stack.1.num_batches_tracked', 'Res_stack.3.weight', 'Res_stack.3.bias', 'Res_stack.4.weight', 'Res_stack.4.bias', 'Res_stack.4.running_mean', 'Res_stack.4.running_var', 'Res_stack.4.num_batches_tracked', 'Res_stack.6.weight', 'Res_stack.6.bias', 'Res_stack.7.weight', 'Res_stack.7.bias', 'Res_stack.7.running_mean', 'Res_stack.7.running_var', 'Res_stack.7.num_batches_tracked', 'Res_stack.9.weight', 'Res_stack.9.bias', 'Res_stack.10.weight', 'Res_stack.10.bias', 'Res_stack.10.running_mean', 'Res_stack.10.running_var', 'Res_stack.10.num_batches_tracked', 'Res_stack.12.weight', 'Res_stack.12.bias'])

In [78]:

# a specific implement for Li5ReluBN
# (Li BN ReLU ) *4 Li
def regroup_linear_BN_para(ori_NN_parameters, nor_para):
    new_NN_parameters = []
    para_ind = 0
    for i in range(4):
        # accout for input_nor
        weight = ori_NN_parameters[para_ind]
        bias   = ori_NN_parameters[para_ind+1]
        if para_ind == 0:
            bias   = bias - weight@(nor_para['input_scale']*nor_para['input_offset'])
            weight = nor_para['input_scale']*weight
        # Batch_nor
        tmp_BN = ori_NN_parameters[para_ind+2]/np.sqrt(ori_NN_parameters[para_ind+5] + 1e-5)
        new_NN_parameters.append(tmp_BN[:,None]*weight)
        new_NN_parameters.append((bias-ori_NN_parameters[para_ind+4])*tmp_BN+ori_NN_parameters[para_ind+3])
        para_ind = para_ind+7
    
    weight = ori_NN_parameters[para_ind]
    bias   = ori_NN_parameters[para_ind+1]
    weight = weight/nor_para['output_scale'][:,None]
    bias   = bias/nor_para['output_scale']+nor_para['output_offset']
    new_NN_parameters.append(weight)
    new_NN_parameters.append(bias)
    return new_NN_parameters
#!!!!! need to check new NN parameter predict results

In [81]:
a_list_of_parameters = [model_state_dict[_].numpy() for _ in model_state_dict.keys()]
a_list_of_parameters = regroup_linear_BN_para(a_list_of_parameters, nor_para)

In [82]:
len(a_list_of_parameters)

10

In [83]:
a_list_of_parameters[-1]

array([ 3.2002768e+02,  4.0090723e+02,  2.6558441e+02, -9.8864832e-05,
       -4.2484757e-05, -3.0277746e-05, -2.1887121e-05, -1.6019969e-05,
       -1.1250479e-05, -7.3125589e-06, -3.3226536e-06, -9.9745807e-07,
       -9.5678615e-07, -2.9403518e-06, -6.3936695e-06, -1.1280928e-05,
       -1.6056405e-05, -1.8749690e-05, -2.0623025e-05, -2.1737691e-05,
       -2.2328793e-05, -2.1849477e-05, -2.1396041e-05, -2.1058808e-05,
       -2.1287702e-05, -2.1574351e-05, -2.1334796e-05, -2.1188729e-05,
       -2.1309002e-05, -2.2046828e-05, -2.2597989e-05, -2.3016191e-05,
       -2.3767143e-05, -2.4191400e-05, -2.2670951e-05, -1.2569994e-05],
      dtype=float32)

In [84]:
# a_list_of_parameters[i*2].shape[0]

In [85]:
# save FNN parameters
# num_layer = len(a_list_of_parameters)/2 
if len(a_list_of_parameters)%2 == 0:
    num_layer = len(a_list_of_parameters)/2 
else:
    raise Exception('num_layer must be integer')
ds_nn_save = xr.Dataset()
ds_nn_save['LN'] = int(len(a_list_of_parameters)/2)
for i in range(ds_nn_save['LN'].values): 
    ds_nn_save[f'W{i+1}'] = ((f'x{i+1}',f'y{i+1}'),a_list_of_parameters[i*2])
    ds_nn_save[f'B{i+1}'] = ((f'x{i+1}'),a_list_of_parameters[i*2+1])
    ds_nn_save[f'size{i+1}0'] = a_list_of_parameters[i*2].shape[0]
    ds_nn_save[f'size{i+1}1'] = a_list_of_parameters[i*2].shape[1]
ds_nn_save.attrs['info'] = 'FNN parameters for RadNN AM4, 20230211'

In [99]:
ds_nn_save.to_netcdf('RadNN_para_ReLU_L5W256.nc')

In [87]:
ds_nn_save

# Prototype function for implement 

In [88]:
def Rad_NN_activation_function(x):
    # ReLU:
    if x>0: return x
    else: return  np.float32(0)
# a specific implement for Li5ReluBN
def Rad_NN_pred(NN_parameters, input_X):
    # first 4 Linear>Relu>BN
    intermediate = np.empty(256)
    para_ind = 0
    for i in range(4):
        # y = x*w+b 
        if i == 0:  
            intermediate = NN_parameters[para_ind] @ input_X.T + NN_parameters[para_ind+1][:,None]
        else:
            intermediate = NN_parameters[para_ind] @ intermediate + NN_parameters[para_ind+1][:,None]
        # y = sigma(y) apply activation function for all nodes
        intermediate = np.vectorize(Rad_NN_activation_function)(intermediate)  
        # move to next set of para (w,b) from NN 
        para_ind = para_ind + 2
    output_Y = NN_parameters[para_ind] @ intermediate + NN_parameters[para_ind+1][:,None] # y = x*w+b
    return output_Y.T

In [89]:
print(input_array.shape,output_array.shape)
input_X = input_array[:300,:]
output_Y = output_array[:300,:] 
input_X_ori = input_array_ori[:300,:]

(73728, 102) (73728, 36)


In [94]:
# results from pytorch
NN_pred1 = NN_model[mo].predict(torch.tensor(input_X).to(device)).cpu().numpy()
# results from prototype function
# NN_pred2 = Rad_NN_pred(a_list_of_parameters, input_X)
NN_pred2 = Rad_NN_pred(a_list_of_parameters, input_X_ori)

In [95]:
NN_pred1 = NN_pred1/nor_para['output_scale']+nor_para['output_offset']

In [96]:
NN_pred1[100]

array([ 3.3553534e+02,  4.3721405e+02,  2.9177805e+02, -1.2663462e-04,
       -5.0152579e-05, -3.3372165e-05, -2.5395046e-05, -1.9746803e-05,
       -1.3605272e-05, -5.8798028e-06, -6.7786505e-07, -2.5770155e-06,
        2.3961002e-07, -1.1050860e-06, -5.2782989e-06, -1.1935447e-05,
       -1.5862905e-05, -2.1303531e-05, -2.1934484e-05, -1.4854924e-05,
       -1.8882292e-05, -2.1659778e-05, -1.3793571e-05, -9.4141287e-06,
       -1.4135028e-05, -1.8763014e-05, -3.7484344e-05, -3.6534591e-05,
       -3.6698897e-05, -3.7677273e-05, -3.5703753e-05, -3.3471886e-05,
       -3.2343192e-05, -3.0788200e-05, -2.5603178e-05, -3.8061598e-06],
      dtype=float32)

In [97]:
NN_pred2[100]

array([ 3.3553516e+02,  4.3721347e+02,  2.9177780e+02, -1.2663445e-04,
       -5.0152761e-05, -3.3372060e-05, -2.5395038e-05, -1.9746763e-05,
       -1.3605304e-05, -5.8798541e-06, -6.7780093e-07, -2.5769887e-06,
        2.3962423e-07, -1.1051552e-06, -5.2782534e-06, -1.1935461e-05,
       -1.5862926e-05, -2.1303575e-05, -2.1934327e-05, -1.4855194e-05,
       -1.8881979e-05, -2.1660324e-05, -1.3793040e-05, -9.4145689e-06,
       -1.4134652e-05, -1.8763305e-05, -3.7483485e-05, -3.6535283e-05,
       -3.6698635e-05, -3.7677746e-05, -3.5703233e-05, -3.3471570e-05,
       -3.2344156e-05, -3.0788211e-05, -2.5604000e-05, -3.8093922e-06],
      dtype=float32)

In [98]:
abs(NN_pred1 - NN_pred2).mean(axis=0)/abs(NN_pred1 + NN_pred2).mean(axis=0)

array([3.97747726e-07, 4.10508960e-07, 2.84382963e-07, 4.17946239e-07,
       1.38945120e-06, 8.30368379e-07, 6.53234849e-07, 1.06811842e-06,
       1.51011875e-06, 3.08170070e-06, 7.40635232e-06, 1.01022324e-05,
       1.06333919e-05, 1.09047687e-05, 2.33664559e-06, 9.95295864e-07,
       1.71027261e-06, 1.00887235e-06, 3.98043767e-06, 6.28012003e-06,
       7.88769466e-06, 1.01769447e-05, 9.33329284e-06, 9.33354568e-06,
       9.41365397e-06, 1.22670199e-05, 2.61592977e-05, 2.79286451e-05,
       1.67236340e-05, 2.03629625e-05, 2.21717710e-05, 1.49308835e-05,
       4.62604366e-05, 8.54762129e-06, 3.28007263e-05, 4.79521223e-05],
      dtype=float32)