In [3]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_absolute_error
from sklearn.metrics import mean_squared_error
from sklearn.metrics import r2_score
from sklearn.metrics import max_error

import tensorflow as tf
keras = tf.keras

import os
os.environ["CUDA_VISIBLE_DEVICES"] = "-1"

In [8]:
df = pd.read_csv(r'Boardwalk Full Dataset.csv')
df = df.drop(["MixtureTemperature"], axis=1)
df.head()

Unnamed: 0,EthaneFlowRate,EthanePressure,EthaneTemperature,EthaneWaterContent,NaturalGasFlowrate,NaturalGasPressure,NaturalGasTemperature,NaturalGasWaterContent,HydrateMargin
0,20000,700,70,0.0,200,700,50,1.5,18.42
1,20000,700,70,0.0,200,700,50,3.0,5.267
2,20000,700,70,0.0,200,700,50,4.5,-2.776
3,20000,700,70,0.0,200,700,50,6.0,-8.613
4,20000,700,70,0.0,200,700,50,7.5,-13.22


In [7]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 212625 entries, 0 to 212624
Data columns (total 10 columns):
 #   Column                  Non-Null Count   Dtype  
---  ------                  --------------   -----  
 0   EthaneFlowRate          212625 non-null  int64  
 1   EthanePressure          212625 non-null  int64  
 2   EthaneTemperature       212625 non-null  int64  
 3   EthaneWaterContent      212625 non-null  float64
 4   NaturalGasFlowrate      212625 non-null  int64  
 5   NaturalGasPressure      212625 non-null  int64  
 6   NaturalGasTemperature   212625 non-null  int64  
 7   NaturalGasWaterContent  212625 non-null  float64
 8   HydrateMargin           212625 non-null  float64
 9   MixtureTemperature      212625 non-null  float64
dtypes: float64(4), int64(6)
memory usage: 16.2 MB


In [9]:
df.describe()

Unnamed: 0,EthaneFlowRate,EthanePressure,EthaneTemperature,EthaneWaterContent,NaturalGasFlowrate,NaturalGasPressure,NaturalGasTemperature,NaturalGasWaterContent,HydrateMargin
count,212625.0,212625.0,212625.0,212625.0,212625.0,212625.0,212625.0,212625.0,212625.0
mean,60000.0,950.0,75.0,5e-06,900.0,750.0,65.0,4.5,16.725918
std,28284.33776,204.124625,4.082493,4e-06,432.050396,40.824925,10.000024,2.121325,19.66733
min,20000.0,700.0,70.0,0.0,200.0,700.0,50.0,1.5,-38.37
25%,40000.0,700.0,70.0,0.0,500.0,700.0,55.0,3.0,3.005
50%,60000.0,950.0,75.0,5e-06,900.0,750.0,65.0,4.5,16.66
75%,80000.0,1200.0,80.0,1e-05,1300.0,800.0,75.0,6.0,30.42
max,100000.0,1200.0,80.0,1e-05,1600.0,800.0,80.0,7.5,73.22


In [11]:
X = df.drop(['HydrateMargin'], axis=1)
y = df['HydrateMargin']
y = y
scaling_columns = [10000, 100, 10, 1/100000, 100, 100, 10, 1]
X = X / scaling_columns

In [12]:
# Testing With No Scaling
X_train, X_test, y_train, y_test =  train_test_split(X, y,
                                test_size = 0.2, random_state=69)
X_train, X_val, y_train, y_val = train_test_split(X_train, 
                                y_train, test_size = 0.1, random_state=69)

In [13]:
file_name_NN_reg = 'Hydrate Models/reduced NN 9.20.21.h5'

keras.backend.clear_session()
tf.random.set_seed(69)
np.random.seed(69)

NN_model = tf.keras.models.Sequential()
NN_model.add(keras.layers.Dense(32, activation='relu', input_shape= [len(X_train.keys())]))
NN_model.add(keras.layers.Dropout(0.1))
NN_model.add(keras.layers.Dense(12, activation='relu'))
NN_model.add(keras.layers.Dropout(0.2))
NN_model.add(keras.layers.Dense(1))

NN_model.compile(optimizer='adam', loss='mse', metrics=['mae', 'mse'])
model_checkpoint = keras.callbacks.ModelCheckpoint(file_name_NN_reg)
early_stopping = keras.callbacks.EarlyStopping(patience = 100, restore_best_weights = True)


In [14]:
NN_model.fit(X_train, y_train, validation_data=(X_val, y_val), epochs=2000, verbose=0, callbacks=[model_checkpoint, early_stopping])

nn_predictions = NN_model.predict(X_test).flatten()
#NN_model.save(file_name_NN_reg)


In [15]:
first_layer_weights = NN_model.layers[0].get_weights()[0]
first_layer_biases  = NN_model.layers[0].get_weights()[1]
second_layer_weights = NN_model.layers[2].get_weights()[0]
second_layer_biases  = NN_model.layers[2].get_weights()[1]
output_layer_weights = NN_model.layers[4].get_weights()[0]
output_layer_biases  = NN_model.layers[4].get_weights()[1]

In [16]:
print(np.shape(first_layer_weights))
print(np.shape(first_layer_biases))
print(np.shape(second_layer_weights))
print(np.shape(second_layer_biases))
print(np.shape(output_layer_weights))
print(np.shape(output_layer_biases))

(8, 32)
(32,)
(32, 12)
(12,)
(12, 1)
(1,)


In [17]:
# Saving Model weights + biases for each layer
pd.DataFrame(first_layer_weights).to_csv("L1 weights.csv")
pd.DataFrame(second_layer_weights).to_csv("L2 weights.csv")
pd.DataFrame(output_layer_weights).to_csv("Output weights.csv")

pd.DataFrame(first_layer_biases).to_csv("L1 Biases.csv")
pd.DataFrame(second_layer_biases).to_csv("L2 Biases.csv")
pd.DataFrame(output_layer_biases).to_csv("Output Biases.csv")

In [19]:
# Functions For Inference

def relu(input):
    return max(0, input)

def predict(w, b, X):
    out = np.dot(w.T, X)+b
    output = [ relu(i) for i in out ]
    return output

def pred_output(w, b, X):
    out = np.dot(w.T, X)+b
    output = out
    return output


In [23]:
# Run input through model created from 
input_array = X_test[:1].values.reshape(-1)
layer1 = predict(first_layer_weights, first_layer_biases, input_array)
layer2 = predict(second_layer_weights, second_layer_biases, layer1)
layer3 = pred_output(output_layer_weights, output_layer_biases, layer2)

In [42]:
print("Prediction Using Model From Weights + Biases: ", layer3[0] * 10000)
print("Prediction Using Original Model: ", (NN_model.predict(X_test[:1])*10000).reshape(-1)[0])
print("Actual Hydrate Margin: ", y_test.values[0] * 10000)

Prediction Using Model From Weights + Biases:  2.016095211118259
Prediction Using Original Model:  2.0160937
Actual Hydrate Margin:  3.1539999999999995
