## Neural network training notebook
### Hazard level: 475 years

In [1]:

import numpy as np

import matplotlib
# matplotlib.use('TkAgg')
import matplotlib as mpl
import matplotlib.pyplot as plt
import pandas as pd
from sklearn.neural_network import MLPRegressor
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_absolute_error, explained_variance_score,r2_score
import pickle
import time
import copy
import random

#%% Read and manipulate data


## Load model

In [9]:
#Load saved model so we don't need to train everytime
mlp = pickle.load(open('model_fitted_all_475_imp1.pkl','rb'))
scaler = pickle.load(open('scaler.pkl','rb'))



## Convert from scikit-learn to tensorflow keras

In [10]:
import tensorflow as tf
from tensorflow.python.ops.numpy_ops import np_config
np_config.enable_numpy_behavior()

In [11]:
# initialize model
tf.keras.backend.clear_session()
tf_model = tf.keras.models.Sequential()

# replicate model architecture
# input layer
tf_model.add(tf.keras.Input(shape=(41,)))
# hidden layers
for i in range(20):
    tf_model.add(tf.keras.layers.Dense(150, activation='relu'))
# final layer
tf_model.add(tf.keras.layers.Dense(1, activation='relu'))

# tf_model.layers

In [12]:
# assign weights from scikit-learn model to tf model
mlp_weights = mlp.coefs_
mlp_biases = mlp.intercepts_

for i in range(21):
    tf_model.layers[i].weights[0].assign(mlp_weights[i])
    tf_model.layers[i].bias.assign(mlp_biases[i])

In [13]:
# test if tf model matches scikit-learn model
# create random input_data
random_combinations = np.round(np.random.uniform(1,3,41),1)
print(random_combinations)

input_data = pd.DataFrame(random_combinations).T
input_data_scaled = scaler.transform(input_data)

# predict using scikit-learn model
return_value_scikit = mlp.predict(input_data_scaled)
print(return_value_scikit[0])

# predict using tf model
return_value_tf = tf_model.predict(input_data_scaled)
print(return_value_tf[0][0])

[2.6 2.4 1.9 2.  2.7 1.3 1.7 2.2 2.3 2.4 1.3 1.6 2.8 2.5 3.  1.2 2.8 2.
 2.1 2.3 1.6 2.8 1.2 2.9 2.4 1.7 2.3 3.  3.  1.9 1.2 2.5 1.5 2.4 2.4 2.2
 1.5 2.  1.8 1.1 1.3]
122.11584088543773
122.11585


## Define find_gradients

In [14]:
def find_gradients(input_data, input_label, input_target,lambd):
  with tf.GradientTape() as tape:
    tape.watch(input_data)
    prediction = tf_model(input_data)
    diff = input_target - input_data
    loss_object = tf.keras.losses.MeanSquaredError()
    loss = loss_object(input_label, prediction) + lambd*sum(abs(diff))

  # Get the gradients of the loss w.r.t to the input image.
  gradient = tape.gradient(loss, input_data)
  # Get the sign of the gradients to create the perturbation
  signed_grad = tf.sign(gradient)
  return signed_grad

In [15]:
steps = 2000
eta = 0.001

# lambd = 10000
input_data = np.ones((1,41))
# input_data = np.round(np.random.uniform(1,3,41),1)
input_label = 80

input_data = pd.DataFrame(input_data)
input_data_transform = scaler.transform(input_data)
input_data_transform = tf.convert_to_tensor(input_data_transform)

input_data_unscaled = scaler.inverse_transform(input_data_transform)
print(input_data_unscaled)

prediction_original = tf_model(input_data_transform)
print(prediction_original)

input_target = np.ones((1,41))
input_target_transform = scaler.transform(input_data)
input_target_transform = tf.convert_to_tensor(input_target_transform)

[[1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.
  1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]]
tf.Tensor([[171.64752]], shape=(1, 1), dtype=float32)


## Perform gradient descent

In [16]:
# repeat the process to carry out gradient descent
for i in range(steps):
    derivative = find_gradients(input_data_transform,input_label,input_target_transform,lambd=2000)
    # update input_data based on derivative
    input_data_transform = input_data_transform - eta*(derivative)

## Visualize results

In [17]:
prediction_changed = tf_model(input_data_transform)
print(prediction_changed)

tf.Tensor([[81.64882]], shape=(1, 1), dtype=float32)


In [20]:
input_data_changed = scaler.inverse_transform(input_data_transform)
print(np.round(input_data_changed[0],1))
input_data_optimized = input_data_changed

[1.  1.  1.  1.  1.  1.6 1.2 2.7 1.  1.  2.5 1.  1.  1.  1.  2.5 1.  1.
 1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.
 1.  1.  1.  1.  1. ]


In [21]:
input_data_optimized_transformed = scaler.transform(input_data_optimized)

In [22]:
prediction_optimized = tf_model(input_data_optimized_transformed)
print(prediction_optimized)

tf.Tensor([[81.64882]], shape=(1, 1), dtype=float32)


In [23]:
input_data_rounded = np.round(input_data_changed[0],1)

In [24]:
print(input_data_rounded[:,None])

[[1. ]
 [1. ]
 [1. ]
 [1. ]
 [1. ]
 [1.6]
 [1.2]
 [2.7]
 [1. ]
 [1. ]
 [2.5]
 [1. ]
 [1. ]
 [1. ]
 [1. ]
 [2.5]
 [1. ]
 [1. ]
 [1. ]
 [1. ]
 [1. ]
 [1. ]
 [1. ]
 [1. ]
 [1. ]
 [1. ]
 [1. ]
 [1. ]
 [1. ]
 [1. ]
 [1. ]
 [1. ]
 [1. ]
 [1. ]
 [1. ]
 [1. ]
 [1. ]
 [1. ]
 [1. ]
 [1. ]
 [1. ]]
