In [None]:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Flatten, Dropout
from tensorflow.keras.callbacks import LambdaCallback, ReduceLROnPlateau
from tensorflow.keras.optimizers import Adam, Nadam

import rmsp
import sys
import os
import pandas as pd
import warnings
warnings.filterwarnings("ignore")
from matplotlib import pyplot as plt
import numpy as np
import math
import shutil
import copy
import pygeostat as gs
from tqdm.notebook import trange
tf.__version__

%load_ext autoreload
%autoreload 2

In [None]:
rmsp.activate()

In [None]:
sys.path.insert(0, os.path.abspath('../Tools'))
from file_export import PickleExporter, FigureExporter

In [None]:
from lambda_distribution import *
from utility import get_lambdas_keras

# Introduction

This notebook contains a workflow to implement data imputation for North West Territories data set. RMSP is used instead of pygeostat where applicable to simplify the code and improve the performance.

Note: The tensorflow version should be 2.0 or newer. 


# Settings

In [None]:
outdir = 'Output/LambdaDistributionMl/'
gs.mkdir(outdir)

In [None]:
pickle_data = PickleExporter(outdir)
save_figure = FigureExporter(outdir)
save_figure_paper = FigureExporter(
    "../../JournalPapers/ImputationUsingLambdaDistAndMl/Latex/elsarticle-template/Figures"
)

In [None]:
import wandb
from wandb.keras import WandbCallback

wandb.init(
    project="LambdaDistribution",
    entity="mosi",
    group="ClassIa",
    tags=["Characterization"],
)

config = wandb.config

# Using MLP to fit Lambda distribution

For lambda distribution finding the shape parameters i.e. $\lambda_3$ and $\lambda_4$ given the skewness and kurtosis is very difficult. In this notebook, an MLP model is designed to provide a mapping model between the empirical (skewness, kurtosis) and the shape parameters for a specific class of lambda distributions

In [None]:
from keras_visualizer import visualizer 

## Configurations (wandb)

In [None]:
config.learning_rate = 0.001
config.loss = 'mse'

config.early_stop_min_delta = 0.0001
config.early_stop_monitor = 'loss'
config.early_stop_patience = 300

config.lr_plan_monitor = 'loss'
config.lr_plan_factor = 0.1
config.lr_plan_patience = 100
config.lr_plan_min_delta=0.0001

config.activation ='sigmoid'
config.epochs = 1000
config.batchsize = 3000

## Call backs

In [None]:
# early stop
early_stop_clbk = tf.keras.callbacks.EarlyStopping(monitor=config.early_stop_monitor,
                                                   min_delta=config.early_stop_min_delta,
                                                   patience=config.early_stop_patience,
                                                   verbose=0, 
                                                   mode='auto',
                                                   restore_best_weights=True)

# learning rate regime
lr_plan = ReduceLROnPlateau(monitor=config.lr_plan_monitor,
                            factor=config.lr_plan_factor,
                            patience=config.lr_plan_patience,
                            verbose=0,
                            mode='auto',
                            min_delta=config.lr_plan_min_delta)

## Choosing Lambda Class

In [None]:
n_mesh_lambda = 350
class_name = 'Ia'
 
sample_generator = SampleLambdaMoments(n_mesh=n_mesh_lambda)
data_lambda = sample_generator.get_sample(class_name=class_name)
data_lambda.head()

In [None]:
# n_mesh_lambda = 350
# class_name = 'Ib'
# sample_generator = SampleLambdaMoments(n_mesh=n_mesh_lambda)
# data_lambda_1b = sample_generator.get_sample(class_name=class_name)


# n_mesh_lambda = 350
# class_name = 'Ic'
# sample_generator = SampleLambdaMoments(n_mesh=n_mesh_lambda)
# data_lambda_1c = sample_generator.get_sample(class_name=class_name)

# data_lambda = pd.concat([data_lambda, data_lambda_1b, data_lambda_1c], ignore_index=True)
# data_lambda.head()

## Features ans lables

In [None]:
features = ['Skewness','Kurtosis']
n_features = len(features)

labels = ['Lambda3', 'Lambda4']
n_labels = len(labels)

trainX = data_lambda[features]
trainY= data_lambda[labels]

# reshaping to correct nump array
trainX = trainX.values.reshape(-1,n_features)
trainY = trainY.values.reshape(-1,n_labels)

<h2 style='color:#5177F9;'>
KDE for scatter plot of features and labels
</h2>

In [None]:
fig, axes = plt.subplots(1, 2, figsize=(12, 5))

data_lambda_polt = data_lambda.sample(frac=0.2)

gs.scatter_plot(
    data_lambda_polt["Skewness"], data_lambda_polt["Kurtosis"], ax=axes[0], stat_blk=False
)
gs.scatter_plot(
    data_lambda_polt["Lambda3"], data_lambda_polt["Lambda4"], ax=axes[1], stat_blk=False
)

## Design the model

In [None]:
model = Sequential(name=f'MlpForLambda_{class_name}')

model.add(Dense(128, input_shape=(n_features,), activation=config.activation,name='Dense1')) 

model.add(Dense(64, activation=config.activation,name='Dense2')) 

model.add(Dense(16, activation=config.activation,name='Dense3')) 

model.add(Dense(n_labels, activation="linear", name='Output'))

model.compile(loss=config.loss, optimizer=Adam(learning_rate=config.learning_rate), )

model.summary()

## Model Visualization

In [None]:
import IPython
visualizer(model, format='png', filename=outdir+'model')
IPython.display.Image (outdir+'model.png')

## Train the model

In [None]:
history = model.fit(trainX, trainY, epochs=config.epochs, 
                    batch_size=config.batchsize,  
                    callbacks=[early_stop_clbk, lr_plan, WandbCallback()], verbose=0)

## Performance check

### Scatter plots for each label

In [None]:
fig, main_ax = plt.subplots(1,3, figsize= (18,5))

ax = main_ax[0]
ax.plot(history.history['loss'], label='Loss')
ax.set_xlabel('epoch')
ax.set_ylabel('Loss')
ax.grid(which='major', axis='y', linestyle='--')

predictions = model.predict(trainX)

for i, label in enumerate(labels):
    ax = main_ax[1+i]
    prediction = predictions[:,i].flatten()
    true_value = trainY[:,i].flatten()
    gs.validation_plot(prediction, true_value, grid=True, ax=ax)
    ax.set_xlabel('Prediction')
    ax.set_ylabel('True')
    ax.set_title('{}'.format(label))
    ax.set_xlim([0,0.5]); ax.set_ylim([0,0.5])

### Bivariate check (MLP VS Training)

In [None]:
cmap = 'RdYlGn_r'

In [None]:
fig, axes = plt.subplots(2,2, figsize= (12,10))

axes = axes.flatten()

gs.scatter_plot(data_lambda['Lambda3'], data_lambda['Lambda4'], xlim=[0,0.5], ylim=[0,0.5], stat_blk=False,
                title='Training Skeness',c= data_lambda['Skewness'].values, ax=axes[0], 
                xlabel=r'$\lambda_3$', ylabel=r'$\lambda_4$', cbar=True, cmap=cmap)
axes[0].contour(data_lambda['Lambda3'].values.reshape(n_mesh_lambda,n_mesh_lambda),
               data_lambda['Lambda4'].values.reshape(n_mesh_lambda,n_mesh_lambda),
               data_lambda['Skewness'].values.reshape(n_mesh_lambda,n_mesh_lambda), cmap=cmap, lw =0.5)

gs.scatter_plot(data_lambda['Lambda3'], data_lambda['Lambda4'], xlim=[0,0.5], ylim=[0,0.5],
                stat_blk=False, title='Training Kurtosis',c= data_lambda['Kurtosis'].values, ax=axes[1], 
                xlabel=r'$\lambda_3$', ylabel=r'$\lambda_4$', cbar=True, cmap=cmap)
axes[1].contour(data_lambda['Lambda3'].values.reshape(n_mesh_lambda,n_mesh_lambda),
               data_lambda['Lambda4'].values.reshape(n_mesh_lambda,n_mesh_lambda),
               data_lambda['Kurtosis'].values.reshape(n_mesh_lambda,n_mesh_lambda), cmap=cmap, lw =0.5)


for i, label in enumerate(features):
    prediction = predictions[:,i].flatten()
    gs.scatter_plot(predictions[:,0], predictions[:,1], c= trainX[:,i], xlim=[0,0.5], ylim=[0,0.5],
                    stat_blk=False, title='MLP {}'.format(label),ax=axes[i+2],
                    xlabel=r'$\lambda_3$', ylabel=r'$\lambda_4$', cbar=True, cmap=cmap)
    
    axes[i+2].contour(predictions[:,0].reshape(n_mesh_lambda,n_mesh_lambda),
                       predictions[:,1].reshape(n_mesh_lambda,n_mesh_lambda),
                       trainX[:,i].reshape(n_mesh_lambda,n_mesh_lambda), cmap=cmap, lw =0.5)
    
    

plt.tight_layout(h_pad=2.0, w_pad=3)

In [None]:
fig, axes = plt.subplots(2, 2, figsize=(12, 10))

axes = axes.flatten()

gs.scatter_plot(
    data_lambda["Skewness"],
    data_lambda["Kurtosis"],
    stat_blk=False,
    title=r"$Training\;\lambda_3$",
    c=data_lambda["Lambda3"].values,
    ax=axes[0],
    xlabel=r"Skewness",
    ylabel=r"Kurosis",
    cbar=True,
    cmap=cmap,
)
axes[0].contour(
    data_lambda["Skewness"].values.reshape(n_mesh_lambda, n_mesh_lambda),
    data_lambda["Kurtosis"].values.reshape(n_mesh_lambda, n_mesh_lambda),
    data_lambda["Lambda3"].values.reshape(n_mesh_lambda, n_mesh_lambda),
    cmap=cmap,
    lw=0.5,
)

gs.scatter_plot(
    data_lambda["Skewness"],
    data_lambda["Kurtosis"],
    stat_blk=False,
    title=r"$Training\; \lambda_4$",
    c=data_lambda["Lambda4"].values,
    ax=axes[1],
    xlabel=r"Skewness",
    ylabel=r"Kurosis",
    cbar=True,
    cmap=cmap,
)
axes[1].contour(
    data_lambda["Skewness"].values.reshape(n_mesh_lambda, n_mesh_lambda),
    data_lambda["Kurtosis"].values.reshape(n_mesh_lambda, n_mesh_lambda),
    data_lambda["Lambda4"].values.reshape(n_mesh_lambda, n_mesh_lambda),
    cmap=cmap,
    lw=0.5,
)


for i, label in enumerate(["lambda_3", "lambda_4"]):
    prediction = predictions[:, i].flatten()
    gs.scatter_plot(
        trainX[:, 0],
        trainX[:, 1],
        c=predictions[:, i],
        stat_blk=False,
        title=r"$MLP\; \{}$".format(label),
        ax=axes[i + 2],
        xlabel=r"Skewness",
        ylabel=r"Kurtosis",
        cbar=True,
        cmap=cmap,
    )

    axes[i + 2].contour(
        trainX[:, 0].reshape(n_mesh_lambda, n_mesh_lambda),
        trainX[:, 1].reshape(n_mesh_lambda, n_mesh_lambda),
        predictions[:, i].reshape(n_mesh_lambda, n_mesh_lambda),
        cmap=cmap,
        lw=0.5,
    )

plt.tight_layout(h_pad=2.0, w_pad=7)
save_figure_paper("LambdaMlpTraining.png")

# Tests

In [None]:
from utility import get_lambdas_keras
from lambda_distribution import GeneralizedLambdaDist

In [None]:
import unittest

class TestLambda(unittest.TestCase):
    
    def test_lambda_fit(self):
        input_set = [0,1,0,3]
        output_set = self.lambda_fit(input_set = input_set)
        assert np.isclose(np.array(input_set), np.array([output_set]), 0, atol=0.1).all()
        
    def lambda_fit(self, input_set):
        gld = GeneralizedLambdaDist(*get_lambdas_keras(*input_set,model))
        gld.dist_plot(n_sample=1000000)
        return gld.get_moments()

unittest.main(argv=[''], verbosity=2, exit=False)

# Exports

In [None]:
# class_name = 'I'
model.save(outdir+f'Lambda_{class_name}_Keras')