tgb - 8/6/2020 - Trying a new neural network predicting the percentile of the output distribution rather than the value itself. For now, we will try two architectures:  
1. Use tanh and assign a percentile based on the value  
2. Use softmax and assign a percentile based on the maximal probability  

Draws heavily from [https://github.com/tbeucler/CBRAIN-CAM/blob/master/notebooks/tbeucler_devlog/048_Ankitesh_BF_Notebook_For_David.ipynb]

# Imports

In [1]:
import sys
sys.path.insert(1,"/home1/07064/tg863631/anaconda3/envs/CbrainCustomLayer/lib/python3.6/site-packages") #work around for h5py
from cbrain.imports import *
from cbrain.cam_constants import *
from cbrain.utils import *
from cbrain.layers import *
from cbrain.data_generator import DataGenerator
import tensorflow as tf
from tensorflow import math as tfm
from tensorflow.keras.layers import *
from tensorflow.keras.models import *
import tensorflow_probability as tfp
import xarray as xr
import numpy as np
from cbrain.model_diagnostics import ModelDiagnostics
import matplotlib as mpl
import matplotlib.pyplot as plt
import matplotlib.image as imag
import scipy.integrate as sin
# import cartopy.crs as ccrs
import matplotlib.ticker as mticker
# from cartopy.mpl.gridliner import LONGITUDE_FORMATTER, LATITUDE_FORMATTER
import pickle
# from climate_invariant import *
from tensorflow.keras import layers
import datetime
from climate_invariant_utils import *
import yaml

/oasis/scratch/comet/tbeucler/temp_project/CBRAIN-CAM/notebooks/tbeucler_devlog


In [2]:
# Load coordinates (just pick any file from the climate model run)
coor = xr.open_dataset("/oasis/scratch/comet/ankitesh/temp_project/data/sp8fbp_minus4k.cam2.h1.0000-01-01-00000.nc",\
                    decode_times=False)
lat = coor.lat; lon = coor.lon; lev = coor.lev;
coor.close();

# Define training directory
TRAINDIR = '/oasis/scratch/comet/ankitesh/temp_project/PrepData/CRHData/'
path = '/home/ankitesh/CBrain_project/CBRAIN-CAM/cbrain/'

# Load hyam and hybm to calculate pressure field in SPCAM
path_hyam = 'hyam_hybm.pkl'
hf = open(path+path_hyam,'rb')
hyam,hybm = pickle.load(hf)

# Scale dictionary to convert the loss to W/m2
scale_dict = load_pickle('/home/ankitesh/CBrain_project/CBRAIN-CAM/nn_config/scale_dicts/009_Wm2_scaling_2.pkl')

# Data generator

## Choose between real-geography and aquaplanet

In [3]:
path_aquaplanet = '/oasis/scratch/comet/ankitesh/temp_project/PrepData/'
path_realgeography = '/oasis/scratch/comet/ankitesh/temp_project/PrepData/geography/'

path = path_aquaplanet

## Data generator using RH

In [4]:
scale_dict_RH = load_pickle('/home/ankitesh/CBrain_project/CBRAIN-CAM/nn_config/scale_dicts/009_Wm2_scaling_2.pkl')
scale_dict_RH['RH'] = 0.01*L_S/G, # Arbitrary 0.1 factor as specific humidity is generally below 2%

in_vars_RH = ['RH','TBP','PS', 'SOLIN', 'SHFLX', 'LHFLX']
if path==path_realgeography: out_vars_RH = ['PTEQ','PTTEND','FSNT','FSNS','FLNT','FLNS']
elif path==path_aquaplanet: out_vars_RH = ['PHQ','TPHYSTND','FSNT', 'FSNS', 'FLNT', 'FLNS']

TRAINFILE_RH = 'CI_RH_M4K_NORM_train_shuffle.nc'
NORMFILE_RH = 'CI_RH_M4K_NORM_norm.nc'
VALIDFILE_RH = 'CI_RH_M4K_NORM_valid.nc'

train_gen_RH = DataGenerator(
    data_fn = path+TRAINFILE_RH,
    input_vars = in_vars_RH,
    output_vars = out_vars_RH,
    norm_fn = path+NORMFILE_RH,
    input_transform = ('mean', 'maxrs'),
    output_transform = scale_dict_RH,
    batch_size=1024,
    shuffle=True,
)

## Data generator using TNS

In [5]:
in_vars = ['QBP','TfromNS','PS', 'SOLIN', 'SHFLX', 'LHFLX']
if path==path_aquaplanet: out_vars = ['PHQ','TPHYSTND','FSNT', 'FSNS', 'FLNT', 'FLNS']
elif path==path_realgeography: out_vars = ['PTEQ','PTTEND','FSNT','FSNS','FLNT','FLNS']

TRAINFILE_TNS = 'CI_TNS_M4K_NORM_train_shuffle.nc'
NORMFILE_TNS = 'CI_TNS_M4K_NORM_norm.nc'
VALIDFILE_TNS = 'CI_TNS_M4K_NORM_valid.nc'

train_gen_TNS = DataGenerator(
    data_fn = path+TRAINFILE_TNS,
    input_vars = in_vars,
    output_vars = out_vars,
    norm_fn = path+NORMFILE_TNS,
    input_transform = ('mean', 'maxrs'),
    output_transform = scale_dict,
    batch_size=1024,
    shuffle=True,
)

## Data generator combined

In [6]:
in_vars = ['QBP','TBP','PS', 'SOLIN', 'SHFLX', 'LHFLX']
if path==path_aquaplanet: out_vars = ['PHQ','TPHYSTND','FSNT', 'FSNS', 'FLNT', 'FLNS']
elif path==path_realgeography: out_vars = ['PTEQ','PTTEND','FSNT','FSNS','FLNT','FLNS']

In [7]:

TRAINFILE = 'CI_SP_M4K_train_shuffle.nc'
NORMFILE = 'CI_SP_M4K_NORM_norm.nc'
VALIDFILE = 'CI_SP_M4K_valid.nc'

train_gen = DataGeneratorClimInv(
    data_fn = path+TRAINFILE,
    input_vars = in_vars,
    output_vars = out_vars,
    norm_fn = path+NORMFILE,
    input_transform = ('mean', 'maxrs'),
    output_transform = scale_dict,
    batch_size=1024,
    shuffle=True,
    lev=lev,
    hyam=hyam,hybm=hybm,
    inp_subRH=train_gen_RH.input_transform.sub, inp_divRH=train_gen_RH.input_transform.div,
    inp_subTNS=train_gen_TNS.input_transform.sub,inp_divTNS=train_gen_TNS.input_transform.div,
    rh_trans=False,t2tns_trans=False,
    lhflx_trans=False,
    scaling=False,
    interpolate=False
)

valid_gen = DataGeneratorClimInv(
    data_fn = path+VALIDFILE,
    input_vars = in_vars,
    output_vars = out_vars,
    norm_fn = path+NORMFILE,
    input_transform = ('mean', 'maxrs'),
    output_transform = scale_dict,
    batch_size=1024,
    shuffle=True,
    lev=lev,
    hyam=hyam,hybm=hybm,
    inp_subRH=train_gen_RH.input_transform.sub, inp_divRH=train_gen_RH.input_transform.div,
    inp_subTNS=train_gen_TNS.input_transform.sub,inp_divTNS=train_gen_TNS.input_transform.div,
    rh_trans=False,t2tns_trans=False,
    lhflx_trans=False,
    scaling=False,
    interpolate=False
)

# Tanh model

## Model

Note that the *loss* has to be customized in order to evaluate the model on the (+4K) set

In [8]:
inp = Input(shape=(64,)) ## input after rh and tns transformation

densout = Dense(128, activation='linear')(inp)
densout = LeakyReLU(alpha=0.3)(densout)

for i in range (6):
    densout = Dense(128, activation='linear')(densout)
    densout = LeakyReLU(alpha=0.3)(densout)

Conv = Dense(60, activation='tanh')(densout)
Radflux = Dense(4, activation='linear')(densout) 

dense_out = concatenate([Conv,Radflux])
model = tf.keras.models.Model(inp, dense_out)

In [9]:
model.summary()

Model: "model"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_1 (InputLayer)            [(None, 64)]         0                                            
__________________________________________________________________________________________________
dense (Dense)                   (None, 128)          8320        input_1[0][0]                    
__________________________________________________________________________________________________
leaky_re_lu (LeakyReLU)         (None, 128)          0           dense[0][0]                      
__________________________________________________________________________________________________
dense_1 (Dense)                 (None, 128)          16512       leaky_re_lu[0][0]                
______________________________________________________________________________________________

## Custom loss converting convective heating and moistening to proper units

In [10]:
path_perc = '/oasis/scratch/comet/tbeucler/temp_project/CBRAIN-CAM/notebooks/tbeucler_devlog/NPZ_DATA/2020_08_06_PERC.npz'
with np.load(path_perc) as data:
    perc = data['perc_m4K']

In [11]:
test = tf.data.Dataset.from_tensor_slices((perc))

### Develop in numpy

In [12]:
a = np.random.rand(8000,60)

In [13]:
b = np.floor(999*a).astype('int')

In [14]:
b.shape

(8000, 60)

In [15]:
perc.shape

(60, 1001)

In [16]:
c = np.zeros((8000,60))
for ibatch in range(8000):
    for i in range(60):
        c[ibatch,i] = perc[i,b[ibatch,i]+1]

In [17]:
c.shape

(8000, 60)

In [18]:
pflat = perc.flatten()

In [19]:
np.tile(1001*np.arange(0,60)+1,(8000,1)).shape

(8000, 60)

In [20]:
bplus = b+np.tile(1001*np.arange(0,60)+1,(8000,1))

In [21]:
bplus.shape

(8000, 60)

In [22]:
pflat[377]

0.0

In [23]:
cbis = pflat[bplus]

In [24]:
cbis.shape

(8000, 60)

In [25]:
resid = cbis-c

In [26]:
def tanh_to_perc(tanh_output,perc):
    """
    Converts output of tanh function to physical units using saved percentile array
    """
    St = np.shape(tanh_output)[0]
    Sp = np.shape(perc)[1]
    return perc.flatten()[np.floor((Sp-2)*tanh_output).astype('int')+np.tile(Sp*np.arange(0,60)+1,(St,1))]

### Convert to custom loss in tensorflow

In [29]:
class perc_to_Wm2(Layer):
    def __init__(self, perc, **kwargs):
        self.perc = perc
        self().__init__(**kwargs)
        
    def build(self,input_shape):
        super().build(input_shape)
        
    def get_config(self):
        config = {'perc':list(self.perc)}
        base_config = super().get_config()
        return dict(list(base_config.items()) + list(config.items()))
    
    def call(self,arrs):
        prior = arrs
        return tf.cast(tanh_to_perc(prior[:,:60],perc),tf.float32)
    
    def compute_output_shape(self,input_shape):
        return (input_shape[0][0])

In [113]:
class Perc_to_MSE_loss():
    """
    Similar to the MSE loss except it converts the percentile prediction to physical values in W/m2
    """
    def __init__(self,perc,name='perc_to_MSE_loss',**kwargs):
        self.perc = tf.convert_to_tensor(perc)
        self.conv = slice(0,60)
        self.radf = slice(60,64)
        self.__name__ = name
        super().__init__(**kwargs)
        
    def __call__(self,ytrue,ypred):
        
#         def tanh_to_perc(tanh_output,perc):
#             """
#             Converts output of tanh function to physical units using saved percentile array
#             """
#             St = np.shape(tanh_output)[0]
#             Sp = np.shape(perc)[1]
            
#             step0 = (Sp-2)*tanh_output
#             print(step0)
#             step01 = np.floor(step0)
#             step1 = step01.astype('int')
#             step2 = np.tile(Sp*np.arange(0,60)+1,(St,1))
            
            
#             return perc.flatten()[step1+step2]
        
#         a = tanh_to_perc(ypred[:,self.conv],perc)
#         print(a)
#         b = tf.concat([a,ypred[:,self.radf]])
        
        St = tf.shape(ypred[:,self.conv])[0]
        Sp = tf.shape(self.perc)[1]
        
        a = tf.cast((Sp-2),dtype=tf.float32)
        b = ypred[:,self.conv]
        c = tfm.floor(a*b)
        d = tf.cast(c,dtype=tf.int32)
        print(d)
        
        e = Sp*tf.range(0,60)+1
        print(e)
#         ebis = tf.cast(St,dtype=tf.int32)
#         print(St)
#         f = tf.constant([ebis,1],dtype=tf.int32)
#         g = tf.tile(e,f)
        
        ind = d+e
        print(ind)
        f = tf.reshape(self.perc,[60060,])
        
        i = tf.constant(0)
        c = lambda i: tf.less(i, 10)
        b = lambda i: (tf.add(i, 1), )
        r = tf.while_loop(c, b, [i])
        print(f)
        g = (f[i] for i in ind)
        print(g)
        print(i)
        h = ypred[:,self.radf]
        j = tf.concat([g,h])
        
        return (j-ytrue)**2

## Compile model

In [114]:
model.compile(tf.keras.optimizers.Adam(),loss=Perc_to_MSE_loss(perc=perc))

Tensor("loss_38/concatenate_loss/Cast_1:0", shape=(None, 60), dtype=int32)
Tensor("loss_38/concatenate_loss/add:0", shape=(60,), dtype=int32)
Tensor("loss_38/concatenate_loss/add_1:0", shape=(None, 60), dtype=int32)
Tensor("loss_38/concatenate_loss/Reshape:0", shape=(60060,), dtype=float64)
<generator object Perc_to_MSE_loss.__call__.<locals>.<genexpr> at 0x2b127f14f830>
59


TypeError: concat() missing 1 required positional argument: 'axis'

In [33]:
# Where to save the model
path_HDF5 = '/oasis/scratch/comet/tbeucler/temp_project/CBRAIN_models/'
save_name = 'Tanh_2020_08_06'

In [None]:
earlyStopping = EarlyStopping(monitor='val_loss', patience=10, verbose=0, mode='min')
mcp_save = ModelCheckpoint(path_HDF5+save_name+'.hdf5',save_best_only=True, monitor='val_loss', mode='min')