In [1]:
import numpy as np
import pandas as pd
import tensorflow as tf
import os

if os.getcwd().split("/")[-1] != "explain-python":
    os.chdir("..")

<h3>Data preprocessing</h3>

<h5>Import the dataset</h5>

In [22]:
dataset = pd.read_csv("parameter_estimation/datasets/lvelminmax_flow_pres.csv")

# X contains the matrix of features (LV_AA.flow.net) and y contains the dependent variable vector (LV.el_max_factor)
X = dataset.iloc[:, 0:2].values
y = dataset.iloc[:, 3:4].values

<h5>Splitting the dataset into a training and a test set</h5>

In [23]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)

<h3>Building the artificial neural network (ANN)</h3>

In [27]:
ann = tf.keras.models.Sequential()
# define the input and first hidden layer
ann.add(tf.keras.layers.Dense(units=6, activation="relu"))
# define the second hidden layer
ann.add(tf.keras.layers.Dense(units=6, activation="relu"))
# define the output layer
ann.add(tf.keras.layers.Dense(units=1, activation=None))
# define a optimizer which updates the weights by stochastic gradient descent (adam) -> backpropagation
# define a loss function which is the mean squared error between the actual value and the predicted value
ann.compile(optimizer="adam", loss="mean_squared_error")

<h3>Training the ANN</h3>

In [28]:
# train the model on the training set
result = ann.fit(X_train, y_train, batch_size=32, epochs=5, verbose=1)
# calculate the mean loss on the training set
print(f"Mean loss of the training set: {np.mean(result.history['loss'])}")

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Mean loss of the training set: 0.044417452346533534


<h3>Evaluation the accuracy of the ANN</h3>

<h5>Mean squared error and r2 score of the model performance on the test set</h5>

In [30]:
from sklearn.metrics import mean_squared_error, r2_score

# predict the test set results
y_pred = ann.predict(X_test, verbose=0)
# calculate the mean squared error and the r2 score
mse = mean_squared_error(y_test, y_pred)
print(f"Model performance on the test set:")
print(f"- Mean Squared Error (MSE): {mse:.2f}")
r2 = r2_score(y_test, y_pred)
print(f"- R-squared (R2) Score: {r2:.2f}")

Model performance on the test set:
- Mean Squared Error (MSE): 0.00
- R-squared (R2) Score: -0.28


<h5>K-fold cross validation</h5>

In [10]:
from sklearn.model_selection import KFold

K = 5  # You can choose any suitable value for K
kf = KFold(n_splits=K, shuffle=True, random_state=42)
mse_scores = []  # To store Mean Squared Error (MSE) scores for each fold

for train_idx, val_idx in kf.split(X_train):
    X_train_kf, X_val_kf = X[train_idx], X[val_idx]
    y_train_kf, y_val_kf = y[train_idx], y[val_idx]

    # Train the model on the training data
    ann.fit(X_train_kf, y_train_kf, batch_size=32, epochs=100, verbose=0)

    # Make predictions on the validation set
    y_pred_kf = ann.predict(X_val_kf, verbose=0)

    # Calculate the Mean Squared Error (MSE) for this fold
    fold_mse = mean_squared_error(y_val_kf, y_pred_kf)
    mse_scores.append(fold_mse)

# Calculate the average MSE over all folds
average_mse = np.mean(mse_scores)
print(f"Average Mean Squared Error (MSE) across {K} folds: {average_mse:.2f}")

Average Mean Squared Error (MSE) across 5 folds: 0.00


<h3>Improving the ANN</h3>

<h5>Manual testing of the ANN</h5>

In [35]:
ann.predict([[0.5, 1.5]])



array([[0.74357593]], dtype=float32)

<h5>Saving the ANN</h5>

In [69]:
# ann.save("parameter_estimation/trained_models/new_model.keras")