# Training of the Neural Network

### In this notebook, the neural network from the paper is constructed and trained on a data subset 'Train_sub.csv'.

In [None]:
# Import
import pandas as pd 
import scipy.ndimage
import scipy.signal
import numpy as np 
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split 
import os
from sklearn.preprocessing import StandardScaler           
from sklearn.model_selection import train_test_split        
from sklearn.metrics import accuracy_score
import joblib as jb
from time import time
import glob
import skfmm
import numpy as np
import pylab as pl
import matplotlib.pyplot as plt
import tensorflow as tf
np.random.seed(42)

### Load data from .csv file and check the structure. 'head_diff' is the simulated head differences that we want to train the neural network to predict. These values are target data, while the remaining columns are used af input features.

In [None]:
data_collection = pd.read_csv('Train_sub.csv')
print(data_collection)

### Split data into training, test, and validation data and scale the data using the StandardScaler() function

In [None]:
X_train1, X_test, y_train1, y_test = train_test_split(data_collection.iloc[:,1:7], data_collection.iloc[:,0], test_size=0.2, random_state=1)
X_train, X_val, y_train, y_val = train_test_split(X_train1, y_train1, test_size=0.1, random_state=1)

In [None]:
scaler = StandardScaler() 
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

### The neural network is constructed using tensorflow.keras layers. Input has 6 features that are connected to 3 hidden layers with 75 neurons each. We add a probabilistic output layer with tensorflow probability and predicts two output values - the mean and standard deviation of the output distribution. The optimizer is Adam and we try to minimize the negative log-likelihood loss function

In [None]:
from keras.utils import plot_model
from tensorflow import keras
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input
from tensorflow.keras.layers import Dense
from tensorflow.keras.layers import Dropout

import tensorflow_probability as tfp
tfd = tfp.distributions
negloglik = lambda y, p_y: -p_y.log_prob(y)

inputA = Input(shape=(6,),name='inputA')

interpB = Dense(75, activation='relu',kernel_regularizer=tf.keras.regularizers.l2(0.0001), activity_regularizer=tf.keras.regularizers.l2(0.0001))(inputA)
interp1B = Dense(75, activation='relu',kernel_regularizer=tf.keras.regularizers.l2(0.0001), activity_regularizer=tf.keras.regularizers.l2(0.0001))(interpB)
interp2B = Dense(75, activation='relu',kernel_regularizer=tf.keras.regularizers.l2(0.0001), activity_regularizer=tf.keras.regularizers.l2(0.0001))(interp1B)
output = Dense(1+1, activation='linear')(interp2B)
outputs =  tfp.layers.DistributionLambda(lambda t: tfd.Normal(loc=t[..., :1],scale=1e-3 + tf.math.softplus(0.05 * t[..., 1:])))(output)

model = Model(inputs=inputA, outputs=outputs)
lr_schedule = keras.optimizers.schedules.ExponentialDecay(
    initial_learning_rate=0.001,
    decay_steps=20000,
    decay_rate=0.95)
model.compile(optimizer = keras.optimizers.Adam(
    learning_rate=lr_schedule), loss=negloglik)
model.summary()

In [None]:
results=model.fit(X_train,y_train,batch_size=256,epochs=300,validation_data=(X_test,y_test))

### Predict the head change from the validation data set and visiualize the results compared to the goal values. 

In [None]:
import time 
t1 = time.time()
X_val = scaler.transform(X_val)
y_hat = model(X_val)
mean = y_hat.mean()
stddev = y_hat.stddev()
t2 = time.time()
print('This took {} seconds'.format(round(t2-t1,2)))

In [None]:
from sklearn.metrics import mean_squared_error
from sklearn.metrics import median_absolute_error

y_testi = y_val
fig, ax = plt.subplots(figsize=(15,6))
ax.scatter(y_testi, mean,s=15)
ax.plot([y_testi.min(), y_testi.max()], [y_testi.min(), y_testi.max()], 'k--', lw=4)
#ax.set_xlim([-5,20])#
#ax.set_ylim([-5,20])
ax.set_xlabel('Measured head change [m]')
ax.set_ylabel('Predicted head change [m]')
#plt.show()
fig.savefig('Validation')

MSE = mean_squared_error(y_testi,mean) #Mean square of the residuals
print("MSE: {}" .format(round((MSE), 4))) #Root mean square error
print("RMSE: {}" .format(round(np.sqrt(MSE), 4))) #Root mean square error

### The network can be saved and loaded again for further use elsewhere

In [None]:
model.save("Trained_network_sub.h5")
print("Saved model to disk")

### Feel free to play around with the model and investigate the effects of more epocs, different number of hidden layers, or number of neurons.