In [None]:
import import_ipynb
from _1_Lab_Data import *
from _2_ANN_architecture import *
from _3_Global_Seawater_Data import *
from _plots import *
from _map_tools import *

In [None]:
df = read_xlsx()
df_filt = filter_df(df)
X, y = extract_Xy(df_filt)
X_n = norm_input(X)

In [None]:
model = create_ANN()

### Generating Tensorflow Session for Reproducible Results
To validate the model and determine the most suitable neural network architecture and hyperparameters, a tensorflow session was configured. This allows a fixed seed to be used in order to generate reproducible results.

In [None]:
import numpy as np
import random as rn
import tensorflow as tf
import os

def create_seed_session(seed=1):
    """
    establishes a session for a reproducible keras model
    """
    num_cores = 1  
    os.environ['PYTHONHASHSEED'] = '0'                      
    np.random.seed(seed)
    rn.seed(seed)
    tf.random.set_seed(seed)

    session_conf = tf.compat.v1.ConfigProto(intra_op_parallelism_threads=num_cores,
                                inter_op_parallelism_threads=num_cores, 
                                allow_soft_placement=True,
                                device_count = {'CPU' : 1, 'GPU' : 0})

    sess = tf.compat.v1.Session(graph=tf.compat.v1.get_default_graph(), config=session_conf)
    tf.compat.v1.keras.backend.set_session(sess)

### Single run

In [None]:
SEED = 41
create_seed_session(SEED)

X_train, X_val, X_test, y_train, y_val, y_test = split_dataset(X_n, y, random_seed=SEED)

model = create_ANN(hl_neurons=40, dropout_r=0.2, lr=0.001)   
history = fit_model(X_train, y_train, X_val, y_val, model, patience=100, epochs=1000, batch_size=16)

plot_history(history)
df_corrosion = global_model(model, df_temp, df_sal, df_doxy, df_pH)
field_results = compare_model(df_corrosion)
print("-----Percentage Difference [%]-----")
print(field_results.corrosion_perc)

plot_NN_output_dual(model, X_train, y_train, X_val, y_val, "Training Set", "Validation Set")
plot_NN_output_dual(model, X_test, y_test, X_n, y, "Test Set", "All")
plot_corrosion(df_corrosion)

### K-Fold Cross Validation
K-fold cross validation is used to evaluate the model on different groups of the data to see if it performs well on data in general. The dataset is divided into k subsets and one subset is used for testing and the rest is kept for training. This procedure is iterated throughout the entire dataset, so that each subset is used for testing the model. 

In [None]:
from sklearn.model_selection import KFold

num_folds = 5

SEED = 47
create_seed_session(SEED)

# setting up the k-fold
kf = KFold(n_splits=num_folds, shuffle=True, random_state=SEED)

corrosion_pred = []

# Iterate through the folds
for fold, (train_index, val_index) in enumerate(kf.split(X_n)):

    print(f"Fold {fold+1}/{num_folds}")
    
    model = create_ANN()   


    X_train_fold, X_val_fold = X_n[train_index], X_n[val_index]
    y_train_fold, y_val_fold = y[train_index], y[val_index]
      
    history = fit_model(X_train_fold, y_train_fold, X_val_fold, y_val_fold, model, patience=100, epochs=1000, batch_size=16)
    plot_history(history)
    plot_NN_output_dual(model, X_train_fold, y_train_fold, X_val_fold, y_val_fold, "Training Set", "Test Set")
    df_corrosion = global_model(model, df_temp, df_sal, df_doxy, df_pH)

    field_results = compare_model(df_corrosion)
    corrosion_pred.append(field_results.corrosion_pred)
    


#### Box Plots
The predicted corrosion rates at each of the field test locations can be compared to those from the literature. The box plots allow the variation between each fold iteration to be visualised and used as a way to investigate the robustness of the model

In [None]:
import pandas as pd
import matplotlib.pyplot as plt

df_box = pd.DataFrame(corrosion_pred)
fold_indices = [f"Fold {i+1}" for i in range(len(corrosion_pred))]
df_box.index = fold_indices

df_box = df_box.applymap(lambda x: x.item() if isinstance(x, np.ndarray) and x.size == 1 else x)


plt.figure(figsize=(10, 6))
x_offsets = np.arange(1, len(df_box.columns) + 1)
for i, column in enumerate(df_box.columns):
    # Add box plot with an x-offset
    plt.boxplot(df_box[column], positions=[i + 1])
    
    field_test = field_test_results()
    # Add corrosion markers from field_test
    corrosion_value = field_test.loc[column].corrosion
    plt.scatter(i + 1, corrosion_value, marker='o', label=f'{field_test.loc[column].name}')
    
plt.xlabel('Test Location')
plt.ylabel('Corrosion Rate [mm/year]')
plt.legend()
plt.xticks(x_offsets, df_box.columns)  # Set x-axis labels and positions
plt.show()