tgb - 8/6/2020 - Trying a new neural network predicting the percentile of the output distribution rather than the value itself. For now, we will try two architectures:  
1. Use tanh and assign a percentile based on the value  
2. Use softmax and assign a percentile based on the maximal probability  

Draws heavily from [https://github.com/tbeucler/CBRAIN-CAM/blob/master/notebooks/tbeucler_devlog/048_Ankitesh_BF_Notebook_For_David.ipynb]

# Imports

In [1]:
import sys
sys.path.insert(1,"/home1/07064/tg863631/anaconda3/envs/CbrainCustomLayer/lib/python3.6/site-packages") #work around for h5py
from cbrain.imports import *
from cbrain.cam_constants import *
from cbrain.utils import *
from cbrain.layers import *
from cbrain.data_generator import DataGenerator
import tensorflow as tf
from tensorflow import math as tfm
from tensorflow.keras.layers import *
from tensorflow.keras.models import *
import tensorflow_probability as tfp
import xarray as xr
import numpy as np
from cbrain.model_diagnostics import ModelDiagnostics
import matplotlib as mpl
import matplotlib.pyplot as plt
import matplotlib.image as imag
import scipy.integrate as sin
# import cartopy.crs as ccrs
import matplotlib.ticker as mticker
# from cartopy.mpl.gridliner import LONGITUDE_FORMATTER, LATITUDE_FORMATTER
import pickle
# from climate_invariant import *
from tensorflow.keras import layers
import datetime
from climate_invariant_utils import *
import yaml

/oasis/scratch/comet/tbeucler/temp_project/CBRAIN-CAM/notebooks/tbeucler_devlog


In [2]:
# Load coordinates (just pick any file from the climate model run)
coor = xr.open_dataset("/oasis/scratch/comet/ankitesh/temp_project/data/sp8fbp_minus4k.cam2.h1.0000-01-01-00000.nc",\
                    decode_times=False)
lat = coor.lat; lon = coor.lon; lev = coor.lev;
coor.close();

# Define training directory
TRAINDIR = '/oasis/scratch/comet/ankitesh/temp_project/PrepData/CRHData/'
path = '/home/ankitesh/CBrain_project/CBRAIN-CAM/cbrain/'

# Load hyam and hybm to calculate pressure field in SPCAM
path_hyam = 'hyam_hybm.pkl'
hf = open(path+path_hyam,'rb')
hyam,hybm = pickle.load(hf)

# Scale dictionary to convert the loss to W/m2
scale_dict = load_pickle('/home/ankitesh/CBrain_project/CBRAIN-CAM/nn_config/scale_dicts/009_Wm2_scaling_2.pkl')

# Data generator

## Choose between real-geography and aquaplanet

In [4]:
path_aquaplanet = '/oasis/scratch/comet/ankitesh/temp_project/PrepData/'
path_realgeography = '/oasis/scratch/comet/ankitesh/temp_project/PrepData/geography/'

path = path_aquaplanet

## Data generator using RH

In [8]:
scale_dict_RH = load_pickle('/home/ankitesh/CBrain_project/CBRAIN-CAM/nn_config/scale_dicts/009_Wm2_scaling_2.pkl')
scale_dict_RH['RH'] = 0.01*L_S/G, # Arbitrary 0.1 factor as specific humidity is generally below 2%

in_vars_RH = ['RH','TBP','PS', 'SOLIN', 'SHFLX', 'LHFLX']
if path==path_realgeography: out_vars_RH = ['PTEQ','PTTEND','FSNT','FSNS','FLNT','FLNS']
elif path==path_aquaplanet: out_vars_RH = ['PHQ','TPHYSTND','FSNT', 'FSNS', 'FLNT', 'FLNS']

TRAINFILE_RH = 'CI_RH_M4K_NORM_train_shuffle.nc'
NORMFILE_RH = 'CI_RH_M4K_NORM_norm.nc'
VALIDFILE_RH = 'CI_RH_M4K_NORM_valid.nc'

train_gen_RH = DataGenerator(
    data_fn = path+TRAINFILE_RH,
    input_vars = in_vars_RH,
    output_vars = out_vars_RH,
    norm_fn = path+NORMFILE_RH,
    input_transform = ('mean', 'maxrs'),
    output_transform = scale_dict_RH,
    batch_size=1024,
    shuffle=True,
)

## Data generator using TNS

In [9]:
in_vars = ['QBP','TfromNS','PS', 'SOLIN', 'SHFLX', 'LHFLX']
if path==path_aquaplanet: out_vars = ['PHQ','TPHYSTND','FSNT', 'FSNS', 'FLNT', 'FLNS']
elif path==path_realgeography: out_vars = ['PTEQ','PTTEND','FSNT','FSNS','FLNT','FLNS']

TRAINFILE_TNS = 'CI_TNS_M4K_NORM_train_shuffle.nc'
NORMFILE_TNS = 'CI_TNS_M4K_NORM_norm.nc'
VALIDFILE_TNS = 'CI_TNS_M4K_NORM_valid.nc'

train_gen_TNS = DataGenerator(
    data_fn = path+TRAINFILE_TNS,
    input_vars = in_vars,
    output_vars = out_vars,
    norm_fn = path+NORMFILE_TNS,
    input_transform = ('mean', 'maxrs'),
    output_transform = scale_dict,
    batch_size=1024,
    shuffle=True,
)

## Data generator combined

In [10]:
in_vars = ['QBP','TBP','PS', 'SOLIN', 'SHFLX', 'LHFLX']
if path==path_aquaplanet: out_vars = ['PHQ','TPHYSTND','FSNT', 'FSNS', 'FLNT', 'FLNS']
elif path==path_realgeography: out_vars = ['PTEQ','PTTEND','FSNT','FSNS','FLNT','FLNS']

In [11]:

TRAINFILE = 'CI_SP_M4K_train_shuffle.nc'
NORMFILE = 'CI_SP_M4K_NORM_norm.nc'
VALIDFILE = 'CI_SP_M4K_valid.nc'

train_gen = DataGeneratorClimInv(
    data_fn = path+TRAINFILE,
    input_vars = in_vars,
    output_vars = out_vars,
    norm_fn = path+NORMFILE,
    input_transform = ('mean', 'maxrs'),
    output_transform = scale_dict,
    batch_size=1024,
    shuffle=True,
    lev=lev,
    hyam=hyam,hybm=hybm,
    inp_subRH=train_gen_RH.input_transform.sub, inp_divRH=train_gen_RH.input_transform.div,
    inp_subTNS=train_gen_TNS.input_transform.sub,inp_divTNS=train_gen_TNS.input_transform.div,
    rh_trans=False,t2tns_trans=False,
    lhflx_trans=False,
    scaling=False,
    interpolate=False
)

valid_gen = DataGeneratorClimInv(
    data_fn = path+VALIDFILE,
    input_vars = in_vars,
    output_vars = out_vars,
    norm_fn = path+NORMFILE,
    input_transform = ('mean', 'maxrs'),
    output_transform = scale_dict,
    batch_size=1024,
    shuffle=True,
    lev=lev,
    hyam=hyam,hybm=hybm,
    inp_subRH=train_gen_RH.input_transform.sub, inp_divRH=train_gen_RH.input_transform.div,
    inp_subTNS=train_gen_TNS.input_transform.sub,inp_divTNS=train_gen_TNS.input_transform.div,
    rh_trans=False,t2tns_trans=False,
    lhflx_trans=False,
    scaling=False,
    interpolate=False
)

# Tanh model

## Model

Note that the *loss* has to be customized in order to evaluate the model on the (+4K) set

In [15]:
inp = Input(shape=(64,)) ## input after rh and tns transformation

densout = Dense(128, activation='linear')(inp)
densout = LeakyReLU(alpha=0.3)(densout)

for i in range (6):
    densout = Dense(128, activation='linear')(densout)
    densout = LeakyReLU(alpha=0.3)(densout)

Conv = Dense(60, activation='tanh')(densout)
Radflux = Dense(4, activation='linear')(densout) 

dense_out = concatenate([Conv,Radflux])
model = tf.keras.models.Model(inp, dense_out)

In [16]:
model.summary()

Model: "model_2"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_3 (InputLayer)            [(None, 64)]         0                                            
__________________________________________________________________________________________________
dense_18 (Dense)                (None, 128)          8320        input_3[0][0]                    
__________________________________________________________________________________________________
leaky_re_lu_14 (LeakyReLU)      (None, 128)          0           dense_18[0][0]                   
__________________________________________________________________________________________________
dense_19 (Dense)                (None, 128)          16512       leaky_re_lu_14[0][0]             
____________________________________________________________________________________________

## Custom loss converting convective heating and moistening to proper units

In [43]:
path_perc = '/oasis/scratch/comet/tbeucler/temp_project/CBRAIN-CAM/notebooks/tbeucler_devlog/NPZ_DATA/2020_08_06_PERC.npz'
with np.load(path_perc) as data:
    perc = data['perc_m4K']

In [45]:
test = tf.data.Dataset.from_tensor_slices((perc))

### Develop in numpy

In [143]:
a = np.random.rand(8000,60)

In [144]:
b = np.floor(999*a).astype('int')

In [146]:
b.shape

(8000, 60)

In [72]:
perc.shape

(60, 1001)

In [147]:
c = np.zeros((8000,60))
for ibatch in range(8000):
    for i in range(60):
        c[ibatch,i] = perc[i,b[ibatch,i]+1]

In [149]:
c.shape

(8000, 60)

In [150]:
pflat = perc.flatten()

In [153]:
np.tile(1001*np.arange(0,60)+1,(8000,1)).shape

(8000, 60)

In [155]:
bplus = b+np.tile(1001*np.arange(0,60)+1,(8000,1))

In [156]:
bplus.shape

(8000, 60)

In [119]:
pflat[377]

0.0

In [159]:
cbis = pflat[bplus]

In [160]:
cbis.shape

(8000, 60)

In [161]:
resid = cbis-c

In [172]:
def tanh_to_perc(tanh_output,perc):
    """
    Converts output of tanh function to physical units using saved percentile array
    """
    St = np.shape(tanh_output)[0]
    Sp = np.shape(perc)[1]
    return perc.flatten()[np.floor((Sp-2)*tanh_output).astype('int')+np.tile(Sp*np.arange(0,60)+1,(St,1))]

In [175]:
class perc_to_Wm2(Layer):
    def __init__(self, perc, **kwargs):
        self.perc = perc
        self().__init__(**kwargs)
        
    def build(self,input_shape):
        super().build(input_shape)
        
    def get_config(self):
        config = {'perc':list(self.perc)}
        base_config = super().get_config()
        return dict(list(base_config.items()) + list(config.items()))
    
    def call(self,arrs):
        prior = arrs
        return tf.cast(tanh_to_perc(prior[:,:60],perc),tf.float32)
    
    def compute_output_shape(self,input_shape):
        return (input_shape[0][0])