In [9]:
import tensorflow_probability as tfp
import numpy as np
import pandas as pd
import pickle
import tensorflow as tf
from tensorflow.keras import Model, Sequential
from tensorflow import keras
from tensorflow.keras.optimizers import RMSprop
from sklearn.utils import class_weight
from scipy.stats import pearsonr
tfd = tfp.distributions
tfpl = tfp.layers
def nll(y_true, y_pred):
    return -y_pred.log_prob(y_true)
def ubrmse(ground,pred):
    bias = np.mean(ground-pred)
    rmse = np.sqrt(np.mean((ground-pred)**2))
    ubrmse = np.sqrt(rmse**2-bias**2)
    return round(ubrmse,4)

In [3]:
all_df = pickle.load(open('Datasets/all_df_final.pkl','rb'))
all_rf = all_df.copy()
all_df['NDVI_500'] /= 10000
all_df['NDVI_250'] /= 10000
all_df['LST_11'] /= 5000
all_df['LST_11'] -= 2.7315
all_df['precip'] /= 1000
all_df['sand'] /= 100
all_df['clay'] /= 100
all_df['ph'] /= 100
all_df['ET'] /= 1000
all_df['dem'] /= 10000
all_df.dem = round(all_df.dem,4)

In [5]:
test_df = pd.read_pickle('Datasets/val_df.pkl')
cond = all_df.index.isin(test_df.index)
train_df = all_df[~cond]

## TFP

In [6]:
def prior_trainable(kernel_size, bias_size=0, dtype=None):
    n = kernel_size + bias_size
    return tf.keras.Sequential([
      tfp.layers.VariableLayer(n, dtype=dtype,name='prior_var'),
      tfp.layers.DistributionLambda(lambda t: tfd.Independent(
          tfd.Normal(loc=t, scale=.5),
          reinterpreted_batch_ndims=1),name='prior_lam'),
    ])

def posterior_mean_field(kernel_size, bias_size=0, dtype=None):
    n = kernel_size + bias_size
    #c = np.log(np.expm1(1.))
    return tf.keras.Sequential([
      tfp.layers.VariableLayer(2 * n, dtype=dtype,name='post_var'),
      tfp.layers.DistributionLambda(lambda t: tfd.Independent(
          tfd.Normal(loc=t[..., :n],
                     scale=tf.nn.softplus(t[..., n:])),
          reinterpreted_batch_ndims=1),name='post_lam'),
    ])

In [7]:
def gen_model(inputs=7,train_len=100):
    model = keras.Sequential([keras.layers.Input(inputs,name='Input'),
                          keras.layers.Dense(inputs,'sigmoid',name='Dense_Layer'),
                          tfpl.DenseVariational(
                          units=tfpl.IndependentNormal.params_size(1),
                          make_prior_fn=prior_trainable,
                          make_posterior_fn=posterior_mean_field,
                          kl_weight=1/train_len,
                          name='Dense_Variational'),  
                          tfpl.IndependentNormal((1,),name='Output_Normal')])
    return model


def dense_model(inputs=7):
    model = keras.Sequential([keras.layers.Input(inputs,name='Input'),  
                          keras.layers.Dense(inputs,activation='sigmoid'),
                          keras.layers.Dense(6,activation='sigmoid'),  
                          keras.layers.Dense(2,activation=None),
                          tfpl.IndependentNormal((1,),name='Output_Normal')])
    
    return model
    


### Training

In [8]:
def create_weights(criteria,df):
    new_df = df.copy()
    class_weights = class_weight.compute_class_weight('balanced',
                                                 classes=criteria.unique(),
                                                 y=criteria)
    weight_dict = {}
    for idx,i in enumerate(criteria.unique()):
        weight_dict[i] = class_weights[idx]
    for text in criteria.unique():
        new_df.loc[criteria == text,'weight'] = weight_dict[text]
        
    return new_df.pop('weight')

##### Dense Training

In [None]:
variables = ['sand','clay','ph','dem','ET','NDVI_500','LST_11','precip','smap']
x = train_df.loc[:,variables]
y = train_df['in_situ']

val_x = test_df.loc[:,variables]
val_y = test_df.in_situ
histories = {}
save_dir = f'Models/new_train/Dense/'

reduce_lr = tf.keras.callbacks.ReduceLROnPlateau(
                                    monitor='val_loss',
                                    factor=0.5,
                                    patience=15,
                                    verbose=0,
                                    mode='auto',
                                    min_delta=0.01,#0.0005
                                    cooldown=0,
                                    min_lr=0.0005)

early_stopping = tf.keras.callbacks.EarlyStopping(
                                    monitor='val_loss',
                                    patience=31,
                                    min_delta=0.0005)#0.0002

for att in ['texture','sand','clay','koep','mcd12','ph']:
    checkpoint = tf.keras.callbacks.ModelCheckpoint(
                '{0}{1}'.format(save_dir,att),
                monitor='val_loss',
                verbose=0,
                save_best_only=True,
                save_weights_only=False,
                mode='min',
                save_freq='epoch',
            )
    callbacks = [checkpoint,reduce_lr,early_stopping]
    weights = create_weights(train_df[att],train_df)
    val_weights = create_weights(test_df[att],test_df)
    val_data = (val_x,val_y.values,val_weights)
    model = dense_model(inputs=x.shape[-1])
    model.compile(loss=nll, optimizer=keras.optimizers.Adam(learning_rate=0.1),weighted_metrics=[])
    history = model.fit(x=x,y=y,epochs=500,batch_size=8192*2,sample_weight=weights,validation_data=val_data,callbacks=callbacks)
    histories[att] = history

checkpoint = tf.keras.callbacks.ModelCheckpoint(
            '{}free_run'.format(save_dir),
            monitor='val_loss',
            verbose=0,
            save_best_only=True,
            save_weights_only=False,
            mode='min',
            save_freq='epoch',
        )

callbacks = [checkpoint,reduce_lr,early_stopping]
val_data = (val_x,val_y.values)
model = gen_model(inputs=x.shape[-1],train_len=x.shape[0])
model.compile(loss=nll, optimizer=keras.optimizers.Adam(learning_rate=0.1))
history = model.fit(x=x,y=y,epochs=500,batch_size=8192*2,validation_data=val_data,callbacks=callbacks)

##### Prob Training

In [None]:
variables = ['sand','clay','ph','dem','ET','NDVI_500','LST_11','precip','smap']
x = train_df.loc[:,variables]
y = train_df['in_situ']

val_x = test_df.loc[:,variables]
val_y = test_df.in_situ
histories = {}
save_dir = f'Models/new_train/Prob/'

reduce_lr = tf.keras.callbacks.ReduceLROnPlateau(
                                    monitor='val_loss',
                                    factor=0.5,
                                    patience=15,
                                    verbose=0,
                                    mode='auto',
                                    min_delta=0.01,#0.0005
                                    cooldown=0,
                                    min_lr=0.0005)

early_stopping = tf.keras.callbacks.EarlyStopping(
                                    monitor='val_loss',
                                    patience=31,
                                    min_delta=0.0005)#0.0002

for att in ['texture','sand','clay','koep','mcd12','ph']:
    checkpoint = tf.keras.callbacks.ModelCheckpoint(
                '{0}{1}'.format(save_dir,att),
                monitor='val_loss',
                verbose=0,
                save_best_only=True,
                save_weights_only=False,
                mode='min',
                save_freq='epoch',
            )
    callbacks = [checkpoint,reduce_lr,early_stopping]
    weights = create_weights(train_df[att],train_df)
    val_weights = create_weights(test_df[att],test_df)
    val_data = (val_x,val_y.values,val_weights)
    model = gen_model(inputs=x.shape[-1],train_len=x.shape[0])
    model.compile(loss=nll, optimizer=keras.optimizers.Adam(learning_rate=0.1),weighted_metrics=[])
    history = model.fit(x=x,y=y,epochs=500,batch_size=8192*2,sample_weight=weights,validation_data=val_data,callbacks=callbacks)
    histories[att] = history

checkpoint = tf.keras.callbacks.ModelCheckpoint(
            '{}free_run'.format(save_dir),
            monitor='val_loss',
            verbose=0,
            save_best_only=True,
            save_weights_only=False,
            mode='min',
            save_freq='epoch',
        )

callbacks = [checkpoint,reduce_lr,early_stopping]
val_data = (val_x,val_y.values)
model = gen_model(inputs=x.shape[-1],train_len=x.shape[0])
model.compile(loss=nll, optimizer=keras.optimizers.Adam(learning_rate=0.1))
history = model.fit(x=x,y=y,epochs=500,batch_size=8192*2,validation_data=val_data,callbacks=callbacks)

### WDL Training

In [12]:
from tensorflow.keras.experimental import LinearModel, WideDeepModel

cat_dict = {'texture':13,'mcd12':18,'koep':32}

def wide_inputs(df,spatial=False):
    if spatial:
        dnn_in = df.loc[:,['sand','clay','ph','dem','NDVI_250','LST_11','ET','precip','smap']]
    else:
        dnn_in = df.loc[:,['sand','clay','ph','dem','NDVI_500','LST_11','ET','precip','smap']]
    wide_in = dnn_in
    cats = df.loc[:,['texture','mcd12','koep']]
    cat_embeds = []
    for cat in cats.columns:
        embedding = np.eye(cat_dict[cat])[cats[cat].astype(int)]
        cat_embeds.append(np.asarray(embedding))

    embeddings = np.concatenate(cat_embeds,axis=1)
    dnn_in = np.concatenate([dnn_in,embeddings],axis=1)
    return wide_in.values.astype(float),dnn_in.astype(float)

def initialize_models():
    linear_model = LinearModel()
    dnn_model = keras.Sequential([keras.layers.Dense(units=128,activation='sigmoid',name='dnn_1'),
                                 keras.layers.Dense(units=64,activation='sigmoid',name='dnn_2'),
                                 keras.layers.Dense(units=1,activation='sigmoid',name='dnn_3')])
    return linear_model,dnn_model

In [None]:
variables = ['sand','clay','ph','dem','ET','NDVI_500','LST_11','precip','smap']
wide_x,dnn_x = wide_inputs(train_df)
wide_y,dnn_y = wide_inputs(test_df)
y = train_df.in_situ
val_y = test_df.in_situ
val_data = [[wide_y,dnn_y],val_y]
save_dir = f'Models/new_train/WDL/'

reduce_lr = tf.keras.callbacks.ReduceLROnPlateau(
                                    monitor='val_loss',
                                    factor=0.5,
                                    patience=15,
                                    verbose=0,
                                    mode='auto',
                                    min_delta=0.01,#0.0005
                                    cooldown=0,
                                    min_lr=0.0005)

early_stopping = tf.keras.callbacks.EarlyStopping(
                                    monitor='val_loss',
                                    patience=31,
                                    min_delta=0.0005)#0.0002

for att in ['texture','sand','clay','koep','mcd12','ph']:
    linear_model,dnn_model = initialize_models()

    checkpoint = tf.keras.callbacks.ModelCheckpoint(
                '{0}{1}'.format(save_dir,att),
                monitor='val_loss',
                verbose=0,
                save_best_only=True,
                save_weights_only=False,
                mode='min',
                save_freq='epoch',
            )
    callbacks = [checkpoint,reduce_lr,early_stopping]
    weights = create_weights(train_df[att],train_df)
    val_weights = create_weights(test_df[att],test_df)
    val_data =  [[wide_y,dnn_y],val_y,val_weights]
    model = WideDeepModel(linear_model,dnn_model)
    for lr in [0.001,0.0005,0.0001]:
        model.compile(optimizer=['sgd',keras.optimizers.Adam(learning_rate=lr)][1],loss='mse',weighted_metrics=[])
        history = model.fit([wide_x,dnn_x],y,epochs=500,batch_size=8192*2,sample_weight=weights,validation_data=val_data,callbacks=callbacks)



checkpoint = tf.keras.callbacks.ModelCheckpoint(
            '{}free'.format(save_dir),
            monitor='val_loss',
            verbose=0,
            save_best_only=True,
            save_weights_only=False,
            mode='min',
            save_freq='epoch',
        )

linear_model,dnn_model = initialize_models()
callbacks = [checkpoint,reduce_lr,early_stopping]
val_data = [[wide_y,dnn_y],val_y]
model = WideDeepModel(linear_model,dnn_model)
for lr in [0.001,0.0005,0.0001]:
    model.compile(optimizer=['sgd',keras.optimizers.Adam(learning_rate=lr)][1],loss='mse')
    history = model.fit([wide_x,dnn_x],y,epochs=500,batch_size=8192*2,validation_data=val_data,callbacks=callbacks)
