## Neural network training notebook
### Hazard level: 475 years

In [32]:

import numpy as np

import matplotlib
# matplotlib.use('TkAgg')
import matplotlib as mpl
import matplotlib.pyplot as plt
import pandas as pd
from sklearn.neural_network import MLPRegressor
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_absolute_error, explained_variance_score,r2_score
import pickle
import time
import copy
import random

#%% Read and manipulate data


In [11]:
# Settings
pTrain = 0.8
pValidate = 0.1
pTest = 0.1

#Load data from file
df_random = pd.read_csv('475_rand_imp1.csv')
df_edge = pd.read_csv('475_edge_imp1.csv')

flag = 2 # 1 trained on all data, 2 on random data, and 3 on edge cases

if flag == 1:
        data = np.concatenate((df_random.to_numpy(),df_edge.to_numpy()))
if flag == 2:
        data = df_random.to_numpy()
        
# Definition of inputs and outputs
np.random.shuffle(data)

size_data = data.shape

input_data = data[:,:(size_data[1]-1)]
output_data = data[:,-1]

edge_data = df_edge.to_numpy()
edge_data_input = edge_data[:,:(size_data[1]-1)]
edge_data_label = edge_data[:,-1]

non_edge_data = df_random.to_numpy()
non_edge_data_input = non_edge_data[:,:(size_data[1]-1)]
non_edge_data_label = non_edge_data[:,-1]

# Normal standarization of data 
scaler = StandardScaler()
scaler.fit(input_data)

StandardScaler()

In [12]:
# Store the "transform" (mus, sigmas) in a pickle, to be applied to the data later
with open('scaler.pkl', 'wb') as f:
        pickle.dump(scaler, f)

input_data = scaler.transform(input_data)
edge_data_input = scaler.transform(edge_data_input)
non_edge_data_input = scaler.transform(non_edge_data_input)

# Split into training, validation and testing set
# p1 and p2 are thresholds to which the data is divided
# DATA MUST BE SHUFFLED
p1 = int(size_data[0]*pTrain) # train
train_data = input_data[:p1,:]
train_label = output_data[:p1]

p2 = int(size_data[0]*(pTrain+pValidate)) # validate and test
valid_data = input_data[p1:p2,:]
valid_label = output_data[p1:p2]
test_data = input_data[p2:,:]
test_label = output_data[p2:]

## Train model

In [13]:
# Training model. We're using scikit-learn since it works well with Numpy
# Hyperparameters that we found are entered.
mlp = MLPRegressor(hidden_layer_sizes=20*(150,), learning_rate_init=0.0003, alpha=0.0001)
mlp.fit(train_data, train_label)
print('Model Fitted')

#Saving model for posteriority
#
with open('model_fitted_all_475_imp1.pkl', 'wb') as f:
        pickle.dump(mlp, f)

Model Fitted


In [21]:

#Load saved model so we don't need to train everytime

mlp = pickle.load(open('model_fitted_all_475_imp1.pkl','rb'))

# Computing values predicted by the neural network

nn_train_label = mlp.predict(train_data)
nn_test_label = mlp.predict(test_data)

nn_edge_label = mlp.predict(edge_data_input)
nn_non_edge_label = mlp.predict(non_edge_data_input)


#Computing R2 error
r2_train = r2_score(train_label,nn_train_label)
r2_test = r2_score(test_label,nn_test_label)
r2_edge = r2_score(edge_data_label,nn_edge_label)
r2_non_edge = r2_score(non_edge_data_label,nn_non_edge_label)
#plotting scatter plot of NN vs Label


#Printing accuracy values
print('R2 train data')
print(r2_train)
print('R2 test data')
print(r2_test)

print('R2 edge data')
print(r2_edge)

print('R2 non-edge data')
print(r2_non_edge)


R2 train data
0.9991080242658484
R2 test data
0.9980699731801911
R2 edge data
0.8457787500025927
R2 non-edge data
0.9989229828410735


In [None]:

plt.scatter(test_label,nn_test_label,s=4)
plt.plot([0,40],[0,40],color = 'orange')
plt.xlabel('Test data')
plt.ylabel('NN prediction')
plt.title('NN prediction versus test set: 475-year RP with impeding factors')
plt.show()

In [43]:
plt.hist(output_data, bins=40)
plt.title('Histogram: $T_{FR,50}$ training data')
plt.grid(alpha = 0.5)
plt.show()

In [None]:
'''Perform sensitivity analysis'''
n_component = 41
n_increases = 21

t0 = time.time()

sens_matrix = np.zeros((n_component,n_increases))

''' Example discussed while teaching how to use the code'''

input_to_evaluate = np.ones((1,41))
input_to_evaluate[4] = 2.5
input_to_evaluate = scaler.transform(input_to_evaluate)
output_value = mlp.predict(input_to_evaluate)

for i in range(n_component):
        for j in range(n_increases):
                vector_input = np.ones((1,n_component))
                vector_input[0][i] = 1 + j*0.1
                vector_input = scaler.transform(vector_input)
                pred_val = mlp.predict(vector_input)
                sens_matrix[i][j] = pred_val[0]
t1 = time.time()

print(t1-t0)

with open('sensitivity_matrix.pkl','wb') as f:
        pickle.dump(sens_matrix,f)

np.savetxt('sensitivity_matrix.csv',sens_matrix,delimiter=',')
