In [None]:
import pygeostat as gs
import pandas as pd
import numpy as np
import sys, os
from matplotlib import pyplot as plt
import IPython
from keras_visualizer import visualizer
import rmsp

In [None]:
#TensorFLow
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Flatten, concatenate
from tensorflow.keras.layers import LSTM, SimpleRNN, Dropout, GRU, Conv2D, MaxPool2D
from tensorflow.keras.optimizers import Adam, Nadam, SGD
from tensorflow import keras

In [None]:
sys.path.insert(0, os.path.abspath('../Tools'))
from file_export import PickleExporter, FigureExporter

<h1 style='color:#1B127A;'>
Introduction
</h1>

This notebook is a template used for creating and training MLP ANN systems that can be trained based on the provided homotopic data and estimate the first four moments of the conditional distribution of a missing variable at a heterotopic data locations. The returned moments can be used to fit a Lambda distribution.

<h1 style='color:#1B127A;'>
Settings
</h1>

<h2 style='color:#5177F9;'>
Parameter cell
</h2>

A cell tagged as parameters.

In [None]:
# output of this notebook template
outdir = "Output/MlForConditionalDistCaseStudy/Fe/"

# input directory with pickled data
data_dir = "Output/Imputation_MLP/"
ns_data_pkl = "data_ns_Fe.pkl"
label_variable = "Fe"
feature_variables = ["Ni", "SiO2"]
out_file = "data_out.pkl"

# MLP n_nodes
mlp2_nodes_1 = 16
mlp2_nodes_2 = 16

# File suffix
file_suffix = ""

In [None]:
gs.mkdir(outdir)

In [None]:
gs.Parameters['data.tmin'] = -998
gs.Parameters['data.null'] = -999

In [None]:
pickle_data = PickleExporter(outdir)
save_figure = FigureExporter(outdir)

In [None]:
missing_variables = [label_variable]
variables = feature_variables + missing_variables

<h1 style='color:#1B127A;'>
Load Data
</h1>

In [None]:
data_ns = rmsp.from_pickle(data_dir+ns_data_pkl)
data_ns.head()

<h1 style='color:#1B127A;'>
MLP ANN Design
</h1>

<h2 style='color:#5177F9;'>
Helpers
</h2>

In [None]:
import wandb
from wandb.keras import WandbCallback

wandb.init(
    project=f"NWT_ML_Conditional_Moments_Tuning_{label_variable}",
    entity="mosi",
    group="NWTCaseStudy",
    tags=["Tuning", 'MLP', 'ConditionalMoments'],
    save_code=False
)


config = wandb.config

<h3 style='color:#51AFF9;'>
call backs and plot functions
</h3>

In [None]:
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau
try:
    from tqdm.keras import TqdmCallback
    TqdmCallback = TqdmCallback(verbose=0)
except AttributeError:
    import tensorflow_addons as tfa
    TqdmCallback = tfa.callbacks.TQDMProgressBar(show_epoch_progress=False)

def plot_model(history, ax=None, loss_dict=None):

    if ax is None:
        fig, ax = plt.subplots(1, 1, figsize=(8, 5))

    if loss_dict is None:
        loss_dict = {"loss": "loss", "val_loss": "val_loss"}

    ax.plot(history.history[loss_dict["loss"]], label="Training Loss", c="r",lw=2)
    ax.plot(history.history[loss_dict["val_loss"]], label="Validation Loss", c="blue", ls='--', lw=3)
    ax.grid()
    ax.legend(fontsize=14)

class LocalCallBacks:
    """
    A class for configuration of ANN
    """

    def __init__(
        self,
        early_stop_monitor="loss",
        min_delta=0.001,
        early_stop_patience=200,
        lr_reduce_monitor="loss",
        lr_reduce_factor=0.1,
        lr_reduce_patience=50,
        verbose=0,
        **kwargs
    ):

        # Early stop call back for keras
        self.early_stop_clbk = EarlyStopping(
            monitor=early_stop_monitor,
            min_delta=min_delta,
            patience=early_stop_patience,
            verbose=0,
            mode="auto",
            restore_best_weights=True,
        )

        # Adaptive learning rate call back for keras
        self.lr_plan = ReduceLROnPlateau(
            monitor=lr_reduce_monitor,
            factor=lr_reduce_factor,
            patience=lr_reduce_patience,
            verbose=0,
            mode="auto",
            min_delta=min_delta,
        )

<h2 style='color:#5177F9;'>
Main Configs
</h2>

In [None]:
n_features = len(feature_variables)

label_variable = missing_variables[0]

print('Feature varibale(s):', '\n'.join(feature_variables))

print(f'\nLabel variable(s): {label_variable}')

In [None]:
data_train = data_ns[['Hash'] + feature_variables + [label_variable]].copy()
data_train = data_train[data_train.notna().all(axis=1)]

data_train.reset_index(drop=True, inplace=True)

print(f'Total number of homotopic data: {len(data_train):g}')

<h2 style='color:#5177F9;'>
MLP for conditional mean
</h2>

It is important to have a separate ANN model to get the conditional mean to be able to calculate central moments and control the value using certain activation functions.

In [None]:
tf.keras.backend.clear_session()

<h3 style='color:#51AFF9;'>
Data Prep
</h3>

In [None]:
train_X = data_train[feature_variables].values
test_X = train_X # Over-fitting is desired

In [None]:
train_y = data_train[[label_variable]].values.reshape(len(data_train))
test_y = train_y

<h3 style='color:#51AFF9;'>
Model design
</h3>

In [None]:
model_first_moment = Sequential(name="FirstMomentMLP")

model_first_moment.add(
    Dense(
        64,
        activation="relu",
        kernel_initializer="glorot_normal",
        input_shape=(n_features,),
        name="Layer1",
    )
)

model_first_moment.add(
    Dense(
        16,
        activation="relu",
        kernel_initializer="glorot_normal",
        name="Layer2",
    )
)


model_first_moment.add(
    Dense(
        4,
        activation="relu",
        kernel_initializer="glorot_normal",
        name="Layer3",
    )
)

model_first_moment.add(
    Dense(
        1,
        activation="linear",
        kernel_initializer="glorot_normal",
        name="output",
    )
)


model_first_moment.summary()

<h3 style='color:#51AFF9;'>
Visualize the model
</h3>

In [None]:
visualizer(model_first_moment, format='png', filename=outdir+'model_first_moment')
IPython.display.Image (outdir+'model_first_moment.png')

<h3 style='color:#51AFF9;'>
Configurations
</h3>

The motivation to use a large batch size (the entire training data set) is that we are assuming the the training data set (sampled data) is representative of the the reference population and using the entire data set we can get a better representation of aleatory uncertainty across feature values.

Often, using large batch size can result in regularization gap where the validation accuracy is undermined. This can be due to lower number of updated because there are less updates per epoch. This can be addressed by changing the training regime. For more info read the following paper.

*Train longer, generalize better: closing the generalization gap in large batch training of neural networks*, Elad Hoffer and et al

In [None]:
config.batch_size = len(train_X)
config.n_epochs = 5000
config.initial_learning_rate = 0.01
config.loss = 'mse'

config.early_stop_monitor = 'loss'
config.min_delta = 0.001
config.early_stop_patience = 200
config.lr_reduce_monitor = 'loss'
config.lr_reduce_factor = 0.1
config.lr_reduce_patience = 50
config.verbose = 0

local_clbs = LocalCallBacks(**config)

<h3 style='color:#51AFF9;'>
Compile the model
</h3>

In [None]:
model_first_moment.compile(loss=config.loss, 
              optimizer=Nadam(learning_rate=config.initial_learning_rate), 
              metrics=[tf.keras.metrics.mean_squared_error])

<h3 style='color:#51AFF9;'>
Train the model
</h3>

In [None]:
model_first_moment_history = model_first_moment.fit(
    train_X,
    train_y,
    epochs=config.n_epochs,
    batch_size=config.batch_size,
    validation_data=(test_X, test_y),
    callbacks=[
        local_clbs.early_stop_clbk,
        local_clbs.lr_plan,
        WandbCallback(),
        TqdmCallback,
    ],
    verbose=0,
)

<h3 style='color:#51AFF9;'>
Loss vs Epoch plot
</h3>

In [None]:
plot_model(model_first_moment_history)

<h2 style='color:#5177F9;'>
Set the conditional mean and the central moments (2nd, 3rd and 4th)
</h2>

In [None]:
data_train[label_variable+'_m'] = model_first_moment.predict(data_train[feature_variables].values)

In [None]:
label_variables_dev = []
for power in (2,3,4):
    var = label_variable + f"deviation_{power}"
    label_variables_dev.append(var)
    data_train[var] = data_train[
        [label_variable, label_variable + "_m"]
    ].apply(lambda x: pow((x[0] - x[1]), power), axis=1)

<h2 style='color:#5177F9;'>
MLP for 2nd, 3rd and 4th moments
</h2>

In [None]:
tf.keras.backend.clear_session()

<h3 style='color:#51AFF9;'>
Data prep
</h3>

Note: The features remaine the same

In [None]:
train_y_dev = data_train[label_variables_dev].values.reshape(len(data_train), len(label_variables_dev))
test_y_dev = train_y_dev

<h3 style='color:#51AFF9;'>
Model Design
</h3>

Model network using Keras functional API to have shared and separated layer for each moment so we can train the network to estimate the second, third and first central deviations/moments in parallel

In [None]:
input_layer = keras.Input(shape=(len(feature_variables),), name='Input')

weight_initilizer = 'glorot_normal' 
bias_initializer = 'Zeros'


layer_shared_1 = Dense(mlp2_nodes_1, activation='relu',
                  kernel_initializer=weight_initilizer,
                  bias_initializer=bias_initializer,
                  name='shared_layer')(input_layer)

# Separation layers
layer_s_2 = Dense(mlp2_nodes_2*2, activation='relu',
                  kernel_initializer=weight_initilizer,
                  bias_initializer=bias_initializer,
                  name='separation_layer_2')(layer_shared_1)

layer_s_3 = Dense(mlp2_nodes_2*2, activation='relu',
                  kernel_initializer=weight_initilizer,
                  bias_initializer=bias_initializer,
                  name='separation_layer_3')(layer_shared_1)

layer_s_4 = Dense(mlp2_nodes_2*2, activation='relu',
                  kernel_initializer=weight_initilizer,
                  bias_initializer=bias_initializer,
                  name='separation_layer_4')(layer_shared_1)


out_layer_2 = Dense(1, activation='relu',
                    kernel_initializer=weight_initilizer,
                    bias_initializer=bias_initializer,
                    name='SecondMoment')(layer_s_2)

out_layer_3 = Dense(1, activation='linear',
                    kernel_initializer=weight_initilizer,
                    bias_initializer=bias_initializer,
                    name='ThirdMoment')(layer_s_3)

out_layer_4 = Dense(1, activation='relu',
                    kernel_initializer=weight_initilizer,
                    bias_initializer=bias_initializer,
                    name='FourthMoment')(layer_s_4)

model_central_moments = keras.Model(
    inputs=[input_layer],
    outputs=[out_layer_2, out_layer_3, out_layer_4], name='Three_Moments_Parallel'
)

In [None]:
# input_layer = keras.Input(shape=(len(feature_variables),), name='Input')

# weight_initilizer = 'glorot_normal' 
# bias_initializer = 'Zeros'


# layer_shared_1 = Dense(16, activation='relu',
#                   kernel_initializer=weight_initilizer,
#                   bias_initializer=bias_initializer,
#                   name='shared_layer')(input_layer)

# # Separation layers
# layer_s_2 = Dense(32, activation='relu',
#                   kernel_initializer=weight_initilizer,
#                   bias_initializer=bias_initializer,
#                   name='separation_layer_2')(layer_shared_1)

# layer_s_3 = Dense(32, activation='relu',
#                   kernel_initializer=weight_initilizer,
#                   bias_initializer=bias_initializer,
#                   name='separation_layer_3')(layer_shared_1)

# layer_s_4 = Dense(32, activation='relu',
#                   kernel_initializer=weight_initilizer,
#                   bias_initializer=bias_initializer,
#                   name='separation_layer_4')(layer_shared_1)


# out_layer_2 = Dense(1, activation='relu',
#                     kernel_initializer=weight_initilizer,
#                     bias_initializer=bias_initializer,
#                     name='SecondMoment')(layer_s_2)

# out_layer_3 = Dense(1, activation='linear',
#                     kernel_initializer=weight_initilizer,
#                     bias_initializer=bias_initializer,
#                     name='ThirdMoment')(layer_s_3)

# out_layer_4 = Dense(1, activation='relu',
#                     kernel_initializer=weight_initilizer,
#                     bias_initializer=bias_initializer,
#                     name='FourthMoment')(layer_s_4)

# model_central_moments = keras.Model(
#     inputs=[input_layer],
#     outputs=[out_layer_2, out_layer_3, out_layer_4], name='Three_Moments_Parallel'
# )

<h3 style='color:#51AFF9;'>
Model Summary and visualization
</h3>

In [None]:
model_central_moments.summary()
keras.utils.plot_model(model_central_moments, show_shapes=True, to_file=outdir + "model_other_moments.png",)

<h3 style='color:#51AFF9;'>
config
</h3>

In [None]:
config.batch_size = len(train_X)
config.n_epochs = 10000
config.initial_learning_rate = 0.1
config.loss = 'mse'

config.early_stop_monitor = 'loss'
config.min_delta = 0.001
config.early_stop_patience = 500
config.lr_reduce_monitor = 'loss'
config.lr_reduce_factor = 0.1
config.lr_reduce_patience = 50
config.verbose = 0

local_clbs = LocalCallBacks(**config)

<h3 style='color:#51AFF9;'>
Compile the model
</h3>

In [None]:
model_central_moments.compile(
    loss=[config.loss, config.loss, config.loss],
    optimizer=Nadam(learning_rate=config.initial_learning_rate),
)

<h3 style='color:#51AFF9;'>
Train the model
</h3>

In [None]:
train_y_dict = {'SecondMoment': train_y_dev[:,0].reshape(-1,1),
                'ThirdMoment': train_y_dev[:,1].reshape(-1,1)
                ,'FourthMoment': train_y_dev[:,2].reshape(-1,1)}

test_y_dict = {'SecondMoment': test_y_dev[:,0].reshape(-1,1),
                'ThirdMoment': test_y_dev[:,1].reshape(-1,1)
                ,'FourthMoment': test_y_dev[:,2].reshape(-1,1)}

In [None]:
model_central_moments_history = model_central_moments.fit(
    train_X,
    train_y_dict,
    epochs=config.n_epochs,
    batch_size=config.batch_size,
    validation_data=(test_X, test_y_dict),
    callbacks=[
        local_clbs.early_stop_clbk,
        local_clbs.lr_plan,
        WandbCallback(),
        TqdmCallback,
    ],
    verbose=0,
)

<h3 style='color:#51AFF9;'>
Loss vs Epoch plot
</h3>

In [None]:
fig, axes = plt.subplots(1,3, figsize= (15,3))

for label, ax in zip(train_y_dict.keys(), axes):

    plot_model(
        model_central_moments_history,
        ax=ax,
        loss_dict={"loss": f"{label}_loss", "val_loss": f"val_{label}_loss"},
    )
    ax.set_title(label)

<h1 style='color:#1B127A;'>
Export
</h1>

In [None]:
model_first_moment.save(outdir+f'modelfirstmoment{file_suffix}')
model_central_moments.save(outdir+f'modelcentralmoments{file_suffix}')