In [11]:
import numpy as np
import pandas as pd
import glob
import os
import matplotlib.pyplot as plt
from typing import Tuple

from sklearn.ensemble import RandomForestRegressor
from xgboost import XGBRegressor
from sklearn.metrics import r2_score
from sklearn.metrics import mean_squared_error

import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense

from stesml.data_tools import get_scenario_index
from stesml.data_tools import get_train_and_test_index
from stesml.data_tools import load_data
from stesml.data_tools import get_train_data
from stesml.data_tools import get_test_data
from stesml.data_tools import get_train_and_test_data

from stesml.plot_tools import plot_test_results
from stesml.plot_tools import plot_average_error
from stesml.plot_tools import plot_progress_results

In [12]:
%load_ext tensorboard
import datetime
log_dir = "logs/fit/" + datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
tensorboard_callback = tf.keras.callbacks.TensorBoard(log_dir=log_dir, histogram_freq=1)

The tensorboard extension is already loaded. To reload it, use:
  %reload_ext tensorboard


2022-06-29 23:11:57.740152: I tensorflow/core/profiler/lib/profiler_session.cc:136] Profiler session initializing.
2022-06-29 23:11:57.740166: I tensorflow/core/profiler/lib/profiler_session.cc:155] Profiler session started.
2022-06-29 23:11:57.740187: I tensorflow/core/profiler/lib/profiler_session.cc:172] Profiler session tear down.


In [13]:
earlystopping_callback = tf.keras.callbacks.EarlyStopping(
    monitor="val_loss",
    min_delta=0,
    patience=0,
    verbose=0,
    mode="auto",
    baseline=None,
    restore_best_weights=True,
)

In [14]:
data_dir = "../data/Sulfur_Models/"

scenario_index = pd.DataFrame({"filepath": glob.glob(os.path.join(data_dir, "ML_*_*.csv"))})

In [15]:
target = 'Tavg'
scale = True

In [16]:
# Get the train and test index by randomly splitting up data (80-20 train-test split)
train_index, test_index = get_train_and_test_index(scenario_index)

In [17]:
# Get train and test data
X_train, y_train, X_test, y_test, scaler_x, scaler_y = get_train_and_test_data(scenario_index, train_index, test_index, target, scale)

In [18]:
# Get the model
model = Sequential()
model.add(Dense(50, activation='relu', input_shape=(3,)))
model.add(Dense(50, activation='relu'))
model.add(Dense(1))
model.compile(optimizer='adam', loss='mse')
model.build()
model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense (Dense)                (None, 50)                200       
_________________________________________________________________
dense_1 (Dense)              (None, 50)                2550      
_________________________________________________________________
dense_2 (Dense)              (None, 1)                 51        
Total params: 2,801
Trainable params: 2,801
Non-trainable params: 0
_________________________________________________________________


2022-06-29 23:12:18.632835: I tensorflow/core/platform/cpu_feature_guard.cc:142] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  SSE4.1 SSE4.2 AVX AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [19]:
# Fit the model to training data
model.fit(x=X_train, 
          y=y_train,
          batch_size=300,
          epochs=10,
          validation_data=(X_test, y_test),
          callbacks=[earlystopping_callback])

2022-06-29 23:12:28.541459: I tensorflow/compiler/mlir/mlir_graph_optimization_pass.cc:116] None of the MLIR optimization passes are enabled (registered 2)


Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10


<tensorflow.python.keras.callbacks.History at 0x7fe3bf6adf10>

In [20]:
# Get predictions for test data
y_hat = model.predict(X_test)

In [21]:
y_hat = scaler_y.inverse_transform(y_hat.reshape(-1,1)).reshape(1,-1)[0]
y_test = scaler_y.inverse_transform(y_test.reshape(-1,1)).reshape(1,-1)[0]

In [22]:
# Evaluate results
rmse = mean_squared_error(y_test, y_hat, squared=False)
print(rmse)

2.987462078402744


In [23]:
r2 = r2_score(y_test, y_hat)
print(r2)

0.9959118712308493


In [25]:
test_df = load_data(scenario_index, test_index)
test_df[target+"_hat"] = y_hat

In [27]:
plot_test_results(test_df, 'NN', target)

SyntaxError: positional argument follows keyword argument (1664183238.py, line 1)