In [1]:
import math
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.preprocessing import MinMaxScaler

import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import GRU, LSTM, Dense, Dropout, Bidirectional
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.optimizers import Adam
from keras.models import load_model

from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
from scipy.stats import pearsonr

# Set the default font family to Times New Roman, default tick label color to black, and default line plot style
plt.rcParams["font.family"] = "Times New Roman"
plt.rcParams["xtick.color"] = "black"
plt.rcParams["ytick.color"] = "black"
plt.rcParams["axes.prop_cycle"] = plt.cycler("color", plt.cm.tab10.colors) + plt.cycler("marker", ["o", "s", "D", "v", "X", "P", ">", "<", "H", "d"])  # Set the default line plot style



# Function to create RNN dataset
def create_rnn_dataset(X_norm, y_scaled, lookback):
    X, y = [], []
    for i in range(len(X_norm)-lookback):
        X.append(X_norm[i:(i+lookback)])
        y.append(y_scaled[(i+lookback)])
    return np.array(X), np.array(y)


# Define a function to calculate performance metrics
def calculate_performance(observed, predicted):
    mse = mean_squared_error(observed, predicted)
    mae = mean_absolute_error(observed, predicted)
    rmse = math.sqrt(mse)
    r2 = r2_score(observed, predicted)
    
    # Return a dictionary with the metrics
    return float(mse), float(mae), float(rmse), float(r2)

In [2]:
# Set up GPU configuration
gpus = tf.config.experimental.list_physical_devices('GPU')
if gpus:
    try:
        tf.config.experimental.set_visible_devices(gpus[0], 'GPU')
        tf.config.experimental.set_memory_growth(gpus[0], True)
        print('GPU is being used')
    except RuntimeError as e:
        print(e)

GPU is being used


In [3]:
data_true = pd.read_csv(r"C:\Users\Shadman\Downloads\CW_TWL_Merged_Filled-Final_6VariableUpdated_v2.csv")
print(data_true.dtypes)
#drop the date column, all data are at 1hr interval. Timestamp has no additional values
data = data_true.drop(columns=['DateTime (GMT)']) 

DateTime (GMT)       object
HT (m)              float64
NTR (m)             float64
MSL (Pa)            float64
SST (°C)            float64
Air_Temp (°C)       float64
Wind Speed (m/s)    float64
TWL (m)             float64
dtype: object


In [4]:
# Load Fortmyers latest.csv dataset
X = data.iloc[:,:-1]  # All columns excepect the last column (target variable)
y = data.iloc[:, -1]  # The target variable

# Step 1: Normalize the data using MinMaxScaler
# Note: HT (m), NTR (m) can be scaled from -1 to 1 as the magnitude both has negative and positive values
# Simialarly Wind Speed (m/s) and Wind Gist (m/s) has been scaled from 0 to 1 because there are no negative values here.
# Likewise, the target varibale has been scaled from -1 to 1.
######################### HOWEVER #######################################
# As scaling HT and NTR from -1 to 1 preserves their positive and negative values, while Wind Speed and Wind Gust are appropriately scaled
# from 0 to 1 due to their non-negative nature. Similarly, scaling the target variable from -1 to 1 is beneficial if it has both positive
# a nd negative values. However, using different scaling ranges for features may cause some models, like neural networks, to focus
# disproportionately on certain variables, potentially slowing convergence. To mitigate this, consider experimenting with
# uniform scaling across all features for better consistency, especially in models that are sensitive to feature magnitudes.

########################## Therefore, as a start, all input varibales are scaled from -1 to 1 and all target varibales are scaled from -1 to 1.
########################## A future trail will be held to see if HT (m), NTR (m) can be scaled from 0 to 1, 
########################### and Wind Speed (m/s) and Wind Gist (m/s) scaled from 0 to 1 can help to improve the model.
########################### so as a start all input variables are scaled from -1 to 1. Including the windspeed and wind gust.

##### Therefore, speperate scaling algorithms are set for input and target varibales

# Step 1: Initialize MinMaxScaler for X (features) and set range to (-1, 1) 
# this can be later changed into 0 to 1
scaler_X = MinMaxScaler(feature_range=(-1, 1))
# scaler_X = MinMaxScaler(feature_range=(0, 1))
X_norm = scaler_X.fit_transform(X)

# Initialize MinMaxScaler for y (target) and set range to (-1, 1)
scaler_y = MinMaxScaler(feature_range=(-1, 1))

# Reshape y to a 2D array since MinMaxScaler expects a 2D input
y_reshaped = y.values.reshape(-1, 1)

# Fit and transform the target variable (y) to scale it between -1 and 1
y_scaled = scaler_y.fit_transform(y_reshaped)

# Inverse transform the scaled target variable back to the original scale
# This is done so that we can calcualte performance metrics
y_original = scaler_y.inverse_transform(y_scaled)



In [5]:
# Check for NaN values in scaled data
print("Check for NaN in X_norm after scaling:", np.isnan(X_norm).sum())
print("Check for NaN in y_scaled after scaling:", np.isnan(y_scaled).sum())
print("-----------------------------------------------------------")

# Print the minimum and maximum values of the scaled data
min_value_X = np.min(X_norm)
max_value_X = np.max(X_norm)
print("Minimum value of scaled data (X):", min_value_X)
print("Maximum value of scaled data (X):", max_value_X)
print("-----------------------------------------------------------")
min_value_y = np.min(y_scaled)
max_value_y = np.max(y_scaled)
print("Minimum value of scaled data (y):", min_value_y)
print("Maximum value of scaled data (y):", max_value_y)

Check for NaN in X_norm after scaling: 0
Check for NaN in y_scaled after scaling: 0
-----------------------------------------------------------
Minimum value of scaled data (X): -1.0
Maximum value of scaled data (X): 1.0
-----------------------------------------------------------
Minimum value of scaled data (y): -1.0
Maximum value of scaled data (y): 0.9999999999999998


In [6]:
print(X_norm.shape)
print(y_scaled.shape)

(61369, 6)
(61369, 1)


In [10]:
#Training data 
data_spilt=0.80
train_size = int(0.80 * len(X_norm))
print('Data Split(%): ', data_spilt*100)
print('---------------------------------')
# Split the preprocessed dataset into training and testing sets
X_train = X_norm[:train_size]
X_test = X_norm[train_size:]
y_train = y_scaled[:train_size]
y_test = y_scaled[train_size:]

# lookback samples. Initially starting at 4 hours, 1/3 of one single tide cycle. Will be updated later 3,4,5,6 .
lookback= 12

# Define the number of input features
num_input_features = X_norm.shape[1]

# Define the number of output fea.shape[1]tures
num_output_features = y_scaled.shape[1]

# Split the preprocessed dataset into training and testing sets
X_train, X_test = X_norm[:train_size], X_norm[train_size:]
y_train, y_test = y_scaled[:train_size], y_scaled[train_size:]

print("Number of samples in X_train:", X_train.shape[0])
print("Number of samples in X_test:", X_test.shape[0])
print("Number of samples in y_train:", y_train.shape[0])
print("Number of samples in y_test:", y_test.shape[0])


Data Split(%):  80.0
---------------------------------
Number of samples in X_train: 49095
Number of samples in X_test: 12274
Number of samples in y_train: 49095
Number of samples in y_test: 12274


In [11]:
### creating the rnn data set for timeseiries with lookback
X_train_rnn, y_train_rnn = create_rnn_dataset(X_train, y_train, lookback)
X_test_rnn, y_test_rnn = create_rnn_dataset(X_test, y_test, lookback)

print("Expected input data shape for test: (batch_size, timesteps, num_features)")
print("Shape of X_train_rnn:", X_train_rnn.shape)
print("There will be a total of 49089 samples. The first sample will be as following: ")

Expected input data shape for test: (batch_size, timesteps, num_features)
Shape of X_train_rnn: (49083, 12, 6)
There will be a total of 49089 samples. The first sample will be as following: 


Sample 1:

```python
[
    [HT_t1, NTR_t1, WindSpeed_t1, WindGust_t1],
    [HT_t2, NTR_t2, WindSpeed_t2, WindGust_t2],
    [HT_t3, NTR_t3, WindSpeed_t3, WindGust_t3],
    [HT_t4, NTR_t4, WindSpeed_t4, WindGust_t4],
    [HT_t5, NTR_t5, WindSpeed_t5, WindGust_t5],
    [HT_t6, NTR_t6, WindSpeed_t6, WindGust_t6],
]

Shape of X_train_rnn: (49089, 6, 4)
Samples: The model will process each of the 49,089 sequences individually to learn patterns from the historical data.
Timesteps: The model considers the past 6 time steps (lookback) for each sequence, learning how these past observations influence the next time step.
Features: The 4 features for each time step provide the model with a comprehensive view of the conditions at each point in time.


In [12]:
### Not understanding why you have to transpose here!!!! Again, the model wants this, why is that?
# X_train_rnn = X_train_rnn.transpose(0, 2, 1)
# X_test_rnn = X_test_rnn.transpose(0, 2, 1)
# print(X_train_rnn.shape, X_test_rnn.shape)

In [18]:
import tensorflow as tf
from tensorflow.keras.models import load_model

# Load the pre-trained BiLSTM model
base_model = load_model('G:/My Drive/Deflt3D FM Codes - Vtech/ML+DL_Project/DL-MidTerm/DL_FinalModels/BiLSTM_Model_Hypertuned.h5')

# Number of layers to freeze
num_layers_freeze = 3

# Freeze the first `num_layers_freeze` layers
for layer in base_model.layers[:num_layers_freeze]:
    layer.trainable = False

# Print model summary to verify
base_model.summary()

# Compile the model (no new layers are added)
base_model.compile(optimizer='adam', loss='mean_squared_error', metrics=['mae'])

# Train the transfer learning model (training the unfrozen layers of the original model)
history = base_model.fit(X_train_rnn, y_train_rnn, epochs=50, batch_size=256, validation_data=(X_test_rnn, y_test_rnn))

# Evaluate the model
test_loss, test_mae = base_model.evaluate(X_test_rnn, y_test_rnn)
print(f'Test Loss: {test_loss}, Test MAE: {test_mae}')

# Save the transfer learning model to an h5 file
base_model.save('models/hypertuned/TransferLearning_BiLSTM_Model.h5')


Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 bidirectional (Bidirectiona  (None, 12, 960)          1870080   
 l)                                                              
                                                                 
 dropout (Dropout)           (None, 12, 960)           0         
                                                                 
 bidirectional_1 (Bidirectio  (None, 12, 192)          811776    
 nal)                                                            
                                                                 
 dropout_1 (Dropout)         (None, 12, 192)           0         
                                                                 
 bidirectional_2 (Bidirectio  (None, 896)              2297344   
 nal)                                                            
                                                        