## Neural network training notebook
### Hazard level: 475 years

In [1]:
import numpy as np
import matplotlib
# matplotlib.use('TkAgg')
import matplotlib as mpl
import matplotlib.pyplot as plt
import pandas as pd
from sklearn.neural_network import MLPRegressor
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_absolute_error, explained_variance_score,r2_score
import pickle
import time
import copy
import random

## Load model

In [2]:
#Load saved model so we don't need to train everytime
mlp = pickle.load(open(r'C:\Users\peter\OneDrive\Desktop\CS 230\4 NNv1\00 training data\model_fitted_all_imp1_3_120.pkl','rb'))
scaler = pickle.load(open(r'C:\Users\peter\OneDrive\Desktop\CS 230\4 NNv1\02 Optimization\scaler.pkl','rb'))



In [3]:
n_input_features = mlp.n_features_in_
n_layers = mlp.n_layers_
print(n_layers)
# uniform number of hidden units
n_hidden_units = len(mlp.coefs_[1][0])

5


## Convert from scikit-learn to tensorflow keras

In [4]:
import tensorflow as tf
from tensorflow.python.ops.numpy_ops import np_config
np_config.enable_numpy_behavior()

In [5]:
# initialize model
tf.keras.backend.clear_session()
tf_model = tf.keras.models.Sequential()

# replicate model architecture
# input layer
tf_model.add(tf.keras.Input(shape=(n_input_features,)))
# hidden layers - exclude input and output layers
for i in range(n_layers-2):
    tf_model.add(tf.keras.layers.Dense(n_hidden_units, activation='relu'))
# final layer
tf_model.add(tf.keras.layers.Dense(1, activation='relu'))

In [6]:
# assign weights from scikit-learn model to tf model
mlp_weights = mlp.coefs_
mlp_biases = mlp.intercepts_

for i in range(n_layers-1):
    tf_model.layers[i].weights[0].assign(mlp_weights[i])
    tf_model.layers[i].bias.assign(mlp_biases[i])

In [7]:
# test if tf model matches scikit-learn model
# create random input_data
random_combinations = np.round(np.random.uniform(1,3,n_input_features),1)
# assign EDPS
random_combinations[0:7] = [0.0167496,0.0169762,0.016178,209.826,306.09,330.71,383.836]
print(random_combinations)

input_data = pd.DataFrame(random_combinations).T
input_data_scaled = scaler.transform(input_data)

# predict using scikit-learn model
return_value_scikit = mlp.predict(input_data_scaled)
print(return_value_scikit[0])

# predict using tf model
return_value_tf = tf_model.predict(input_data_scaled)
print(return_value_tf[0][0])

[1.67496e-02 1.69762e-02 1.61780e-02 2.09826e+02 3.06090e+02 3.30710e+02
 3.83836e+02 1.90000e+00 2.40000e+00 2.50000e+00 2.40000e+00 2.50000e+00
 2.10000e+00 2.20000e+00 1.70000e+00 2.80000e+00 1.60000e+00 1.50000e+00
 2.40000e+00 1.50000e+00 1.40000e+00 2.00000e+00 2.20000e+00 2.00000e+00
 2.40000e+00 2.60000e+00 2.90000e+00 2.40000e+00 1.50000e+00 2.40000e+00
 1.90000e+00 1.30000e+00 1.00000e+00 1.30000e+00 1.50000e+00 2.00000e+00
 1.40000e+00 1.60000e+00 1.60000e+00 2.50000e+00 2.70000e+00 2.90000e+00
 2.90000e+00 2.20000e+00 2.50000e+00 1.40000e+00 1.50000e+00 1.50000e+00]
4.7627763307984425
4.762776


## Define find_gradients

In [8]:
def find_gradients(input_data, input_label, input_target,lambd):
  with tf.GradientTape() as tape:
    tape.watch(input_data)
    prediction = tf_model(input_data)
    diff = input_target - input_data
    loss_object = tf.keras.losses.MeanSquaredError()
    loss = loss_object(input_label, prediction) + lambd*sum(abs(diff))

  # Get the gradients of the loss w.r.t to the input vector.
  gradient = tape.gradient(loss, input_data)
  # Get the sign of the gradients to create the perturbation
  signed_grad = tf.sign(gradient)
  return signed_grad

In [102]:
steps = 2000
eta = 0.001

# lambd = 10000
input_data_EDP = np.array([0.0167496,0.0169762,0.016178,209.826,306.09,330.71,383.836])
input_data_components = np.ones((1,41))[0]
input_data = np.concatenate((input_data_EDP,input_data_components),axis=0)
input_label = 4

input_data = pd.DataFrame(input_data).T
print(input_data)
input_data_transform = scaler.transform(input_data)
input_data_transform = tf.convert_to_tensor(input_data_transform)

input_data_unscaled = scaler.inverse_transform(input_data_transform)
print(input_data_unscaled)

prediction_original = tf_model(input_data_transform)
print(prediction_original)

input_target_transform = scaler.transform(input_data)
input_target_transform = tf.convert_to_tensor(input_target_transform)

        0         1         2        3       4       5        6    7    8   \
0  0.01675  0.016976  0.016178  209.826  306.09  330.71  383.836  1.0  1.0   

    9   ...   38   39   40   41   42   43   44   45   46   47  
0  1.0  ...  1.0  1.0  1.0  1.0  1.0  1.0  1.0  1.0  1.0  1.0  

[1 rows x 48 columns]
[[  0.0167496   0.0169762   0.016178  209.826     306.09      330.71
  383.836       1.          1.          1.          1.          1.
    1.          1.          1.          1.          1.          1.
    1.          1.          1.          1.          1.          1.
    1.          1.          1.          1.          1.          1.
    1.          1.          1.          1.          1.          1.
    1.          1.          1.          1.          1.          1.
    1.          1.          1.          1.          1.          1.       ]]
tf.Tensor([[28.637611]], shape=(1, 1), dtype=float32)


In [103]:
filter_array = np.ones((1,48))
filter_array[0,0:7] = 0
print(filter_array)

[[0. 0. 0. 0. 0. 0. 0. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.
  1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]]


## Perform gradient descent

In [104]:
# repeat the process to carry out gradient descent
for i in range(steps):
    derivative = find_gradients(input_data_transform,input_label,input_target_transform,lambd=500)
    # do not update first seven EDP values
    derivative = np.multiply(derivative,filter_array)
    # update input_data based on derivative
    input_data_transform = input_data_transform - eta*(derivative)

## Visualize results

In [105]:
prediction_changed = tf_model(input_data_transform)
print(prediction_changed)

tf.Tensor([[7.4060216]], shape=(1, 1), dtype=float32)


In [106]:
input_data_changed = scaler.inverse_transform(input_data_transform)
input_data_optimized = input_data_changed

In [107]:
input_data_optimized_transformed = scaler.transform(input_data_optimized)

In [108]:
prediction_optimized = tf_model(input_data_optimized_transformed)
print(prediction_optimized)

tf.Tensor([[7.4060216]], shape=(1, 1), dtype=float32)


In [109]:
input_data_rounded = np.round(input_data_changed[0],3)

In [110]:
np.set_printoptions(suppress=True)
print(input_data_rounded[:,None])

[[  0.017]
 [  0.017]
 [  0.016]
 [209.826]
 [306.09 ]
 [330.71 ]
 [383.836]
 [  1.   ]
 [  1.   ]
 [  1.   ]
 [  1.   ]
 [  1.   ]
 [  1.   ]
 [  1.008]
 [  1.683]
 [  1.988]
 [  1.   ]
 [  1.667]
 [  1.   ]
 [  1.183]
 [  1.212]
 [  1.111]
 [  1.179]
 [  1.   ]
 [  1.   ]
 [  1.   ]
 [  1.   ]
 [  1.   ]
 [  1.   ]
 [  1.   ]
 [  1.   ]
 [  1.   ]
 [  1.   ]
 [  1.   ]
 [  1.   ]
 [  1.   ]
 [  1.   ]
 [  1.   ]
 [  1.325]
 [  1.   ]
 [  1.235]
 [  1.   ]
 [  1.   ]
 [  1.   ]
 [  1.   ]
 [  1.   ]
 [  1.   ]
 [  1.   ]]
