# Import libraries

In [None]:
# Fix randomness and hide warnings
seed = 42

import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
os.environ['PYTHONHASHSEED'] = str(seed)
os.environ['MPLCONFIGDIR'] = os.getcwd()+'/configs/'

import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)
warnings.simplefilter(action='ignore', category=Warning)

import numpy as np
import pandas as pd

np.random.seed(seed)

import logging

import random
random.seed(seed)

In [None]:
# Import tensorflow
import tensorflow as tf
from tensorflow import keras as tfk
from tensorflow.keras import layers as tfkl
tf.autograph.set_verbosity(0)
tf.get_logger().setLevel(logging.ERROR)
tf.compat.v1.logging.set_verbosity(tf.compat.v1.logging.ERROR)
tf.random.set_seed(seed)
tf.compat.v1.set_random_seed(seed)
print(tf.__version__)

In [None]:
import pandas as pd
import seaborn as sns
from datetime import datetime
import matplotlib.pyplot as plt
plt.rc('font', size=16)
from sklearn.preprocessing import MinMaxScaler

# Load and process data

In [None]:
dataset = pd.read_csv('Data/train218.csv')
print(dataset.shape)
dataset.head()

In [None]:
dataset.info()

In [None]:
print(dataset.shape)

In [None]:
def plot_time_series(dataset, rows):
    plt.figure(figsize=(17,5))
    for i in range(rows):
        plt.plot(dataset.iloc[i])
    plt.title('Time Series')
    plt.show()

plot_time_series(dataset, 10)

In [None]:
labels_size = 9
X_train = dataset.iloc[:,:-labels_size]
y_train = dataset.iloc[:,-labels_size:]
print(X_train.shape, y_train.shape)

In [None]:
input_shape = X_train.shape[1:]
output_shape = y_train.shape[1:]
batch_size = 64
epochs = 200

In [None]:
print(input_shape)
print(output_shape)

Sequential Train-Test split and normalization

In [None]:
plt.figure(figsize=(17,5))
plt.plot(X_train.T[0], label='Train')
plt.plot(y_train.T[0], label='Test')
plt.title('Train-Test Split')
plt.legend()
plt.show()

In [None]:
X_train = np.expand_dims(X_train, axis = -1)
y_train = np.expand_dims(y_train, axis = -1)
print(X_train.shape, y_train.shape)

## Categories

In [None]:
# Import the classes
classes_train = pd.read_csv('Data/classes_train218.csv')
classes_test = pd.read_csv('Data/classes_test218.csv')

In [None]:
from keras.utils import to_categorical

classes_dict = {
    'A' : 0,
    'B' : 1,
    'C' : 2,
    'D' : 3,
    'E' : 4,
    'F' : 5
}

classes_train = classes_train.applymap(classes_dict.get).values
classes_test = classes_test.applymap(classes_dict.get).values
# Convert numerical values to one-hot encoded vectors
classes_train = to_categorical(classes_train, num_classes=6)
classes_test = to_categorical(classes_test, num_classes=6)

## Test data

In [None]:
test_ds = pd.read_csv('Data/test218.csv')
print(test_ds.shape)
test_ds.head()

In [None]:
test_size = 9
X_test = test_ds.iloc[:,:-test_size]
y_test = test_ds.iloc[:,-test_size:]
print(X_test.shape, y_test.shape)

In [None]:
X_test = np.expand_dims(X_test, axis = -1)
y_test = np.expand_dims(y_test, axis = -1)
print(X_test.shape, y_test.shape)

# Best models


# DA-RNN

In [None]:
def da_rnn(input_shape, output_shape, lstm_units=128, dense_units=64):
    # Encoder
    encoder_inputs = tfkl.Input(shape=input_shape)
    encoder_lstm = tfkl.LSTM(units=lstm_units, return_sequences=True)(encoder_inputs)

    # First Stage Attention
    attention_1 = tfkl.Dense(input_shape[0], activation='softmax')(encoder_lstm)
    attention_1 = tfkl.Permute((2, 1))(attention_1)
    attention_1 = tfkl.Dot(axes=(2, 1))([attention_1, encoder_lstm])

    # Decoder
    repeat_vector = tfkl.RepeatVector(input_shape[0])(encoder_lstm[:, -1, :])
    decoder_inputs = tfkl.Concatenate(axis=-1)([attention_1, repeat_vector])
    decoder_lstm = tfkl.LSTM(units=lstm_units, return_sequences=True)(decoder_inputs)

    # Second Stage Attention
    attention_2 = tfkl.Dense(dense_units, activation='softmax')(decoder_lstm)
    attention_2 = tfkl.Permute((2, 1))(attention_2)
    attention_2 = tfkl.Dot(axes=(2, 1))([attention_2, decoder_lstm])

    flatten = tfkl.Flatten()(attention_2)

    # Output layer
    output = tfkl.Dense(output_shape[0])(flatten)
    output = tfkl.Activation('linear')(output)

    model = tfk.Model(inputs=encoder_inputs, outputs=output)

    model.compile(optimizer='adam', loss='mse')

    return model

In [None]:
model = da_rnn(input_shape, output_shape, 128, 128)
model.summary()
tfk.utils.plot_model(model, expand_nested=True, show_shapes=True)

In [None]:
# Train the model
history = model.fit(
    x = X_train,
    y = y_train,
    batch_size = batch_size,
    epochs = epochs,
    validation_split=.1,
    callbacks = [
        tfk.callbacks.EarlyStopping(
            monitor='val_loss',
            patience=10,
            restore_best_weights=True
        ),
        tfk.callbacks.ReduceLROnPlateau(
            monitor='val_loss',
            factor=.1,
            patience=5,
            verbose=1
        ),
        tfk.callbacks.ModelCheckpoint(
            filepath='Models/Model_DARNN',
            monitor='val_loss',
            save_best_only=True,
            verbose=1
        ),
        tfk.callbacks.TensorBoard(
            log_dir='logs/Model_full',
            histogram_freq=0,
            write_graph=True,
            write_images=True,
            update_freq='epoch',
            profile_batch=2,
            embeddings_freq=0,
            embeddings_metadata=None
        )
    ]
).history

In [None]:
best_epoch = np.argmin(history['val_loss'])
plt.figure(figsize=(17,4))
plt.plot(history['loss'], label='Training loss', alpha=.8, color='#ff7f0e')
plt.plot(history['val_loss'], label='Validation loss', alpha=.9, color='#5a9aa5')
plt.axvline(x=best_epoch, label='Best epoch', alpha=.3, ls='--', color='#5a9aa5')
plt.title('Mean Squared Error')
plt.legend()
plt.grid(alpha=.3)
plt.show()

In [None]:
# Predict the test set using the model
predictions = model.predict(X_test, verbose=0)

# Print the shape of the predictions
print(f"Predictions shape: {predictions.shape}")

# Calculate and print Mean Squared Error (MSE)
mean_squared_error = tfk.metrics.mean_squared_error(y_test[:9, ].flatten(), predictions[:9, ].flatten()).numpy()
print(f"Mean Squared Error: {mean_squared_error}")

# Calculate and print Mean Absolute Error (MAE)
mean_absolute_error = tfk.metrics.mean_absolute_error(y_test[:9, ].flatten(), predictions[:9, ].flatten()).numpy()
print(f"Mean Absolute Error: {mean_absolute_error}")

# DA-RNN with Class information

In [None]:
def da_rnn_with_classes(input_shape, output_shape, lstm_units_encoder=64, lstm_units_decoder=64, dense_units=64, num_classes=6):
    # Encoder
    encoder_inputs = tfk.layers.Input(shape=input_shape)
    encoder_lstm = tfk.layers.LSTM(units=lstm_units_encoder, return_sequences=True)(encoder_inputs)

    # Class information branch
    class_inputs = tfk.layers.Input(shape=(num_classes,))
    repeat_class = tfk.layers.RepeatVector(input_shape[0])(class_inputs)
    class_combined = tfk.layers.Concatenate(axis=-1)([encoder_lstm, repeat_class])
    encoder_lstm_with_class = tfk.layers.LSTM(units=lstm_units_decoder, return_sequences=True)(class_combined)

    # First Stage Attention
    attention_1 = tfk.layers.Dense(input_shape[0], activation='softmax')(encoder_lstm_with_class)
    attention_1 = tfk.layers.Permute((2, 1))(attention_1)
    attention_1 = tfk.layers.Dot(axes=(2, 1))([attention_1, encoder_lstm_with_class])

    # Decoder
    repeat_vector = tfk.layers.RepeatVector(input_shape[0])(encoder_lstm_with_class[:, -1, :])
    decoder_inputs = tfk.layers.Concatenate(axis=-1)([attention_1, repeat_vector])
    decoder_lstm = tfk.layers.LSTM(units=lstm_units_decoder, return_sequences=True)(decoder_inputs)

    # Second Stage Attention
    attention_2 = tfk.layers.Dense(input_shape[0], activation='softmax')(decoder_lstm)
    attention_2 = tfk.layers.Permute((2, 1))(attention_2)
    attention_2 = tfk.layers.Dot(axes=(2, 1))([attention_2, decoder_lstm])

    flatten = tfk.layers.Flatten()(attention_2)

    # Output layer
    output = tfk.layers.Dense(output_shape[0])(flatten)
    output = tfk.layers.Activation('linear')(output)


    model = tfk.models.Model(inputs=[encoder_inputs, class_inputs], outputs=output)

    model.compile(optimizer='adam', loss='mse')

    return model

In [None]:
model = da_rnn_with_classes(input_shape, output_shape, 128, 128)
model.summary()
tfk.utils.plot_model(model, expand_nested=True, show_shapes=True)

In [None]:
# Train the model
history = model.fit(
    x = [X_train, classes_train],
    y = y_train,
    batch_size = batch_size,
    epochs = epochs,
    validation_split=.1,
    callbacks = [
        tfk.callbacks.EarlyStopping(
            monitor='val_loss',
            patience=10,
            restore_best_weights=True
        ),
        tfk.callbacks.ReduceLROnPlateau(
            monitor='val_loss',
            factor=.1,
            patience=5,
            verbose=1
        ),
        tfk.callbacks.ModelCheckpoint(
            filepath='Models/Model_DARNN_Class',
            monitor='val_loss',
            save_best_only=True,
            verbose=1
        )
    ]
).history

In [None]:
best_epoch = np.argmin(history['val_loss'])
plt.figure(figsize=(17,4))
plt.plot(history['loss'], label='Training loss', alpha=.8, color='#ff7f0e')
plt.plot(history['val_loss'], label='Validation loss', alpha=.9, color='#5a9aa5')
plt.axvline(x=best_epoch, label='Best epoch', alpha=.3, ls='--', color='#5a9aa5')
plt.title('Mean Squared Error')
plt.legend()
plt.grid(alpha=.3)
plt.show()

##Test evaluation

In [None]:
model = tfk.models.load_model('Models/Model_DARNN_Class')

In [None]:
from sklearn.metrics import r2_score
# Predict the test set using the model
predictions = model.predict([X_test, classes_test], verbose=0)

for pred in predictions:
  for i in range(len(pred)):
    if np.isnan(pred[i]):
      pred[i] = 0

# Print the shape of the predictions
print(f"Predictions shape: {predictions.shape}")

# Calculate and print Mean Squared Error (MSE)
mean_squared_error = tfk.metrics.mean_squared_error(y_test.flatten(), predictions.flatten()).numpy()
print(f"Mean Squared Error: {mean_squared_error}")

# Calculate and print Mean Absolute Error (MAE)
mean_absolute_error = tfk.metrics.mean_absolute_error(y_test.flatten(), predictions.flatten()).numpy()
print(f"Mean Absolute Error: {mean_absolute_error}")

# Calculate and print Root Mean Squared Error (RMSE)
root_mean_squared_error = tf.math.sqrt(mean_squared_error)
print(f"Root Mean Squared Error: {root_mean_squared_error}")

# Calculate and print Mean Absolute Percentage Error (MAPE)
mean_absolute_percentage_error = tfk.metrics.mean_absolute_percentage_error(y_test.flatten(), predictions.flatten()).numpy()
print(f"Mean Absolute Percentage Error: {mean_absolute_percentage_error}")

# Calculate and print Coefficient of Determination (R^2)
r2 = r2_score(y_test.flatten(), predictions.flatten())
print(f"Coefficient of Determination (R^2): {r2}")

print('residuals: ', np.square(predictions - y_test))

# Calculate and print Adjusted Coefficient of Determination (Adj. R^2)
adjusted_r2 = 1 - (1-r2)*(len(y_test)-1)/(len(y_test)-X_test.shape[1]-1)
print(f"Adjusted Coefficient of Determination (Adj. R^2): {adjusted_r2}")

# DA-RNN with Advanced Attention Mechanism (SE) and Classes

In [None]:
def se_block(in_block, ch, idx, ratio=16):
    # 1. Define the squeeze block (global pooling)
    squeeze = tfkl.GlobalAveragePooling1D(name='squeeze' + str(idx))(in_block)
    # 2. Define the excitation block (ch/ratio to compress the information)
    excitation = tfkl.Dense(units = ch/ratio, activation = 'relu', name='excitation_a' + str(idx))(squeeze)
    # 3. Expand the information and mormalize with a sigmoid (give more information to some channel)
    excitation = tfkl.Dense(units = ch, activation='sigmoid', name='excitation_b' + str(idx))(excitation)
    # 4. Multiplication layer.
    scaled_input = tfkl.multiply([in_block, excitation])

    return scaled_input

def da_rnn_se_classes(input_shape, output_shape, lstm_units_encoder=64, lstm_units_decoder=64, dense_units=64, num_classes=6):
    # Encoder
    encoder_inputs = tfk.layers.Input(shape=input_shape)
    encoder_lstm = tfk.layers.LSTM(units=lstm_units_encoder, return_sequences=True)(encoder_inputs)

    # Class information branch
    class_inputs = tfk.layers.Input(shape=(num_classes,))
    repeat_class = tfk.layers.RepeatVector(input_shape[0])(class_inputs)
    class_combined = tfk.layers.Concatenate(axis=-1)([encoder_lstm, repeat_class])
    encoder_lstm_with_class = tfk.layers.LSTM(units=lstm_units_decoder, return_sequences=True)(class_combined)

    # First Stage Attention
    attention_1 = se_block(encoder_lstm_with_class, lstm_units_decoder, idx = 1)

    # Decoder
    repeat_vector = tfk.layers.RepeatVector(input_shape[0])(encoder_lstm_with_class[:, -1, :])
    decoder_inputs = tfk.layers.Concatenate(axis=-1)([attention_1, repeat_vector])
    decoder_lstm = tfk.layers.LSTM(units=lstm_units_decoder, return_sequences=True)(decoder_inputs)

    # Second Stage Attention
    attention_2 = se_block(decoder_lstm, lstm_units_decoder, idx = 2)

    flatten = tfk.layers.Flatten()(attention_2)

    # Output layer
    output = tfk.layers.Dense(output_shape[0])(flatten)
    output = tfk.layers.Activation('linear')(output)


    model = tfk.models.Model(inputs=[encoder_inputs, class_inputs], outputs=output)

    model.compile(optimizer='adam', loss='mse')

    return model

In [None]:
model = da_rnn_se_classes(input_shape, output_shape, 128, 64, 64)
model.summary()
tfk.utils.plot_model(model, expand_nested=True, show_shapes=True)

In [None]:
# Train the model
history = model.fit(
    x = [X_train, classes_train],
    y = y_train,
    batch_size = batch_size,
    epochs = epochs,
    validation_split=.1,
    callbacks = [
        tfk.callbacks.EarlyStopping(
            monitor='val_loss',
            patience=10,
            restore_best_weights=True
        ),
        tfk.callbacks.ReduceLROnPlateau(
            monitor='val_loss',
            factor=.1,
            patience=5,
            verbose=1
        ),
        tfk.callbacks.ModelCheckpoint(
            filepath='Models/SubmissionModel_49',
            monitor='val_loss',
            save_best_only=True,
            verbose=1
        )
    ]
).history

In [None]:
model.save('Models/SubmissionModel_49')

In [None]:
best_epoch = np.argmin(history['val_loss'])
plt.figure(figsize=(17,4))
plt.plot(history['loss'], label='Training loss', alpha=.8, color='#ff7f0e')
plt.plot(history['val_loss'], label='Validation loss', alpha=.9, color='#5a9aa5')
plt.axvline(x=best_epoch, label='Best epoch', alpha=.3, ls='--', color='#5a9aa5')
plt.title('Mean Squared Error')
plt.legend()
plt.grid(alpha=.3)
plt.show()

In [None]:
model = tfk.models.load_model('Models/SubmissionModel_49')

In [None]:
from sklearn.metrics import r2_score
# Predict the test set using the model
predictions = model.predict([X_test, classes_test], verbose=0)

for pred in predictions:
  for i in range(len(pred)):
    if np.isnan(pred[i]):
      pred[i] = 0

# Print the shape of the predictions
print(f"Predictions shape: {predictions.shape}")

# Calculate and print Mean Squared Error (MSE)
mean_squared_error = tfk.metrics.mean_squared_error(y_test.flatten(), predictions.flatten()).numpy()
print(f"Mean Squared Error: {mean_squared_error}")

# Calculate and print Mean Absolute Error (MAE)
mean_absolute_error = tfk.metrics.mean_absolute_error(y_test.flatten(), predictions.flatten()).numpy()
print(f"Mean Absolute Error: {mean_absolute_error}")

# Calculate and print Root Mean Squared Error (RMSE)
root_mean_squared_error = tf.math.sqrt(mean_squared_error)
print(f"Root Mean Squared Error: {root_mean_squared_error}")

# Calculate and print Mean Absolute Percentage Error (MAPE)
mean_absolute_percentage_error = tfk.metrics.mean_absolute_percentage_error(y_test.flatten(), predictions.flatten()).numpy()
print(f"Mean Absolute Percentage Error: {mean_absolute_percentage_error}")

# Calculate and print Coefficient of Determination (R^2)
r2 = r2_score(y_test.flatten(), predictions.flatten())
print(f"Coefficient of Determination (R^2): {r2}")

# Calculate and print Adjusted Coefficient of Determination (Adj. R^2)
adjusted_r2 = 1 - (1-r2)*(len(y_test)-1)/(len(y_test)-X_test.shape[1]-1)
print(f"Adjusted Coefficient of Determination (Adj. R^2): {adjusted_r2}")

# DA-RNN with Advanced Attention Mechanism (CBAM) and Classes

In [None]:
def CBAM_block(in_block, ch, ratio=16):
    # 1. Channel attention block
    avg_pool = tf.reduce_mean(in_block, axis=1, keepdims=True)
    max_pool = tf.reduce_max(in_block, axis=1, keepdims=True)

    dense1 = tfkl.Dense(units=ch // ratio, activation='relu')
    avg_reduced = dense1(avg_pool)
    max_reduced = dense1(max_pool)

    dense2 = tfkl.Dense(units=ch, activation='sigmoid')
    avg_attention = dense2(avg_reduced)
    max_attention = dense2(max_reduced)

    x = tf.add(avg_attention, max_attention)
    x = tf.nn.sigmoid(x)
    x = tf.multiply(in_block, x)

    # 2. Spatial attention block
    y_mean = tf.reduce_mean(x, axis=-1, keepdims=True)
    y_max = tf.reduce_max(x, axis=-1, keepdims=True)

    y = tf.concat([y_mean, y_max], axis=-1)
    y = tfkl.Conv1D(filters=1, kernel_size=5, padding='same', activation='sigmoid')(y)
    y = tf.multiply(x, y)

    return y


def da_rnn_CBAM_classes(input_shape, output_shape, lstm_units_encoder=64, lstm_units_decoder=64, dense_units=64, num_classes=6):
    # Encoder
    encoder_inputs = tfk.layers.Input(shape=input_shape)
    encoder_lstm = tfk.layers.LSTM(units=lstm_units_encoder, return_sequences=True)(encoder_inputs)

    # Class information branch
    class_inputs = tfk.layers.Input(shape=(num_classes,))
    repeat_class = tfk.layers.RepeatVector(input_shape[0])(class_inputs)
    class_combined = tfk.layers.Concatenate(axis=-1)([encoder_lstm, repeat_class])
    encoder_lstm_with_class = tfk.layers.LSTM(units=lstm_units_decoder, return_sequences=True)(class_combined)

    # First Stage Attention
    attention_1 = CBAM_block(encoder_lstm_with_class, lstm_units_decoder)

    batchnorm1 = tfkl.BatchNormalization()(attention_1)

    # Decoder
    repeat_vector = tfk.layers.RepeatVector(input_shape[0])(encoder_lstm_with_class[:, -1, :])
    #decoder_inputs = tfk.layers.Concatenate(axis=-1)([attention_1, repeat_vector])
    decoder_inputs = tfk.layers.Concatenate(axis=-1)([batchnorm1, repeat_vector])
    decoder_lstm = tfk.layers.LSTM(units=lstm_units_decoder, return_sequences=True)(decoder_inputs)

    # Second Stage Attention
    attention_2 = CBAM_block(decoder_lstm, lstm_units_decoder)

    flatten = tfk.layers.Flatten()(attention_2)

    # Output layer
    output = tfk.layers.Dense(output_shape[0])(flatten)
    output = tfk.layers.Activation('linear')(output)


    model = tfk.models.Model(inputs=[encoder_inputs, class_inputs], outputs=output)

    model.compile(optimizer='adam', loss='mse')

    return model

In [None]:
model = da_rnn_CBAM_classes(input_shape, output_shape, 128, 64, 64) #change again second 128 to 64
model.summary()
tfk.utils.plot_model(model, expand_nested=True, show_shapes=True)

In [None]:
# Train the model
history = model.fit(
    x = [X_train, classes_train],
    y = y_train,
    batch_size = batch_size,
    epochs = 25,
    validation_split=.1,
    callbacks = [
        tfk.callbacks.EarlyStopping(
            monitor='val_loss',
            patience=3,
            restore_best_weights=True
        ),
        tfk.callbacks.ReduceLROnPlateau(
            monitor='val_loss',
            factor=.1,
            patience=5,
            verbose=1
        ),
        tfk.callbacks.ModelCheckpoint(
            filepath='Models/SubmissionModel_46',
            monitor='val_loss',
            save_best_only=True,
            verbose=1
        )
    ]
).history

In [None]:
best_epoch = np.argmin(history['val_loss'])
plt.figure(figsize=(17,4))
plt.plot(history['loss'], label='Training loss', alpha=.8, color='#ff7f0e')
plt.plot(history['val_loss'], label='Validation loss', alpha=.9, color='#5a9aa5')
plt.axvline(x=best_epoch, label='Best epoch', alpha=.3, ls='--', color='#5a9aa5')
plt.title('Mean Squared Error')
plt.legend()
plt.grid(alpha=.3)
plt.show()

In [None]:
model = tfk.models.load_model('Models/SubmissionModel_46')

## Test Evaluation

In [None]:
from sklearn.metrics import r2_score
# Predict the test set using the model
predictions = model.predict([X_test, classes_test], verbose=0)

for pred in predictions:
  for i in range(len(pred)):
    if np.isnan(pred[i]):
      pred[i] = 0

predictions = predictions[:, :9]
predictions = np.expand_dims(predictions, axis = -1)

# Print the shape of the predictions
print(f"Predictions shape: {predictions.shape}")

# Calculate and print Mean Squared Error (MSE)
mean_squared_error = np.mean(np.square(y_test[:,:9]-predictions))
print(f"Mean Squared Error: {mean_squared_error}")

# Calculate and print Mean Absolute Error (MAE)
mean_absolute_error = tfk.metrics.mean_absolute_error(y_test[:, :9].flatten(), predictions.flatten()).numpy()
print(f"Mean Absolute Error: {mean_absolute_error}")

# Calculate and print Root Mean Squared Error (RMSE)
root_mean_squared_error = tf.math.sqrt(mean_squared_error)
print(f"Root Mean Squared Error: {root_mean_squared_error}")

# Calculate and print Mean Absolute Percentage Error (MAPE)
mean_absolute_percentage_error = tfk.metrics.mean_absolute_percentage_error(y_test[:, :9].flatten(), predictions.flatten()).numpy()
print(f"Mean Absolute Percentage Error: {mean_absolute_percentage_error}")

# Calculate and print Coefficient of Determination (R^2)
r2 = r2_score(y_test.flatten(), predictions.flatten())
print(f"Coefficient of Determination (R^2): {r2}")

# Calculate and print Adjusted Coefficient of Determination (Adj. R^2)
adjusted_r2 = 1 - (1-r2)*(len(y_test)-1)/(len(y_test)-X_test.shape[1]-1)
print(f"Adjusted Coefficient of Determination (Adj. R^2): {adjusted_r2}")

# Hypertuning weighted output between two best models, using our test set as validation

In [None]:
model1 = tfk.models.load_model('Models/SubmissionModel_46')
model2 = tfk.models.load_model('Models/SubmissionModel_49')

In [None]:
import numpy as np
from sklearn.model_selection import ParameterGrid
from sklearn.metrics import mean_squared_error
import tensorflow as tf
from tensorflow.keras import backend as K



# Function to calculate predictions with given alpha value
def calculate_predictions(alpha, out1, out2, classes_test):
    for i in range(len(out1)):
        if classes_test[i][0] == 1 or classes_test[i][4] == 1:
            out1[i] *= alpha
        else:
            out1[i] *= 1 - alpha

    for i in range(len(out2)):
        if classes_test[i][0] == 1 or classes_test[i][4] == 1:
            out2[i] *= 1 - alpha
        else:
            out2[i] *= alpha

    return out1 + out2

# Hyperparameter grid for alpha
param_grid = {'alpha': np.linspace(0, 1, 100)}

# Initialize best values
best_alpha = None
best_mse = float('inf')
mse_values = []  # To store MSE values for each alpha


# Loop through the hyperparameter grid
for params in ParameterGrid(param_grid):
    alpha = params['alpha']
    print("\n" + "+"*50)
    print(f'Alpha = {alpha}')

    # Calculate predictions
    out1 = model1.predict([X_test, classes_test])
    out2 = model2.predict([X_test, classes_test])
    predictions = calculate_predictions(alpha, out1, out2, classes_test)

    # Calculate mean squared error
    mse = mean_squared_error(y_test.flatten(), predictions.flatten())
    mse_values.append(mse)



    improvement_status = "Improved!" if mse < best_mse else "Not Improved"

    # Update best values if necessary
    if mse < best_mse:
        best_mse = mse
        best_alpha = alpha

    # Print details
    print(f'Mean Squared Error: {mse} ({improvement_status})')

# Print the best hyperparameters and corresponding MSE
print("\n" + "+"*50)
print("Best Hyperparameters:")
print(f'  Alpha: {best_alpha}')
print(f'  Mean Squared Error: {best_mse}')
print("+"*50)


# Plot the MSE values as a function of alpha
plt.figure(figsize=(10, 6))
plt.plot(param_grid['alpha'], mse_values, marker='o')
plt.title('Mean Squared Error as a function of Alpha')
plt.xlabel('Alpha')
plt.ylabel('Mean Squared Error')
plt.grid(True)
plt.show()


# (Temptative) All Tested Models

## ResNet + LSTM

In [None]:
def CBAM_block(in_block, ch, ratio=16):
    # 1. Channel attention block
    avg_pool = tf.reduce_mean(in_block, axis=1, keepdims=True)
    max_pool = tf.reduce_max(in_block, axis=1, keepdims=True)

    dense1 = tfkl.Dense(units=ch // ratio, activation='relu')
    avg_reduced = dense1(avg_pool)
    max_reduced = dense1(max_pool)

    dense2 = tfkl.Dense(units=ch, activation='sigmoid')
    avg_attention = dense2(avg_reduced)
    max_attention = dense2(max_reduced)

    x = tf.add(avg_attention, max_attention)
    x = tf.nn.sigmoid(x)
    x = tf.multiply(in_block, x)

    # 2. Spatial attention block
    y_mean = tf.reduce_mean(x, axis=-1, keepdims=True)
    y_max = tf.reduce_max(x, axis=-1, keepdims=True)

    y = tf.concat([y_mean, y_max], axis=-1)
    y = tfkl.Conv1D(filters=1, kernel_size=5, padding='same', activation='sigmoid')(y)
    y = tf.multiply(x, y)

    return y

def ResBs_CBAM_ConvLSTM(block_input, num_filters):
    block_input_short = tfkl.Conv1D(num_filters, kernel_size=1, padding='same')(block_input)
    block_input_short = tfkl.BatchNormalization()(block_input_short)

    conv1 = tfkl.Conv1D(filters=num_filters, kernel_size=5, strides=2, padding='same')(block_input)
    norm1 = tfkl.BatchNormalization()(conv1)
    relu1 = tfkl.Activation('relu')(norm1)

    lstm = tfkl.LSTM(units=num_filters, return_sequences=True)(relu1)

    conv2 = tfkl.Conv1D(num_filters, kernel_size=7, padding='same')(lstm)
    norm2 = tfkl.BatchNormalization()(conv2)

    CBAM_output = CBAM_block(norm2, num_filters, ratio=16)
    block_input_short = tfkl.Conv1D(num_filters, kernel_size=1, padding='same')(block_input_short)

    # Adjust the following line to ensure compatibility with input shapes
    res_output = tfkl.Add()([tfkl.Cropping1D(cropping=(0, block_input_short.shape[1] - CBAM_output.shape[1]))(block_input_short), CBAM_output])
    relu2 = tfkl.Activation('relu')(res_output)

    return relu2

def ResBs_CBAM_IdentityLSTM(block_input, num_filters):

    conv1 = tfkl.Conv1D(filters=num_filters, kernel_size=7, padding='same')(block_input)
    norm1 = tfkl.BatchNormalization()(conv1)
    relu1 = tfkl.Activation('relu')(norm1)
    CBAM_output = CBAM_block(relu1, num_filters, ratio=16)


    # Adjust the following line to ensure compatibility with input shapes
    res_output = tfkl.Add()([tfkl.Cropping1D(cropping=(0, block_input.shape[1] - CBAM_output.shape[1]))(block_input), CBAM_output])
    relu2 = tfkl.Activation('relu')(res_output)

    return relu2

def resnet_CBAM_LSTM_time_series(input_shape, output_shape, N):
    # Input shape for time series data
    ts_input = tfkl.Input(shape=input_shape, name='time_series_input')

    ResNet = tfkl.Conv1D(filters=64, kernel_size=15, padding='same')(ts_input)
    ResNet = tfkl.BatchNormalization()(ResNet)
    ResNet = tfkl.Activation('relu')(ResNet)
    ResNet = tfkl.MaxPooling1D(pool_size=2, strides=2)(ResNet)

    filters = 64
    M = int((N - 2) / 2)
    for i in range(M):
        filters = filters * 2
        ResNet = ResBs_CBAM_ConvLSTM(ResNet, filters)
        ResNet = ResBs_CBAM_IdentityLSTM(ResNet, filters)

    ResNet = tfkl.GlobalAveragePooling1D(name='gap_layer')(ResNet)

    # Output layer for time series forecasting (single value prediction)
    output = tfkl.Dense(output_shape[0], activation='linear', name='output')(ResNet)

    model = tfk.Model(inputs=ts_input, outputs=output)

    model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=1e-3), loss='mse')

    return model

In [None]:
model = resnet_CBAM_LSTM_time_series(input_shape, output_shape, N = 4)
model.summary()
tfk.utils.plot_model(model, expand_nested=True, show_shapes=True)

hypertune

In [None]:
!pip install scikeras

In [None]:
from sklearn.model_selection import GridSearchCV
import tensorflow as tf
from tensorflow.keras import layers as tfkl
from tensorflow.keras.models import Sequential
from scikeras.wrappers import KerasRegressor

# Define the hyperparameters grid
param_grid = {
    'ch': [8, 16, 32],
    'N': [4, 6, 8],
    'input_shape': [input_shape],
    'output_shape': [output_shape]
}

# Create the GridSearchCV object
model = KerasRegressor(build_fn=resnet_CBAM_LSTM_time_series, N=4, ch = 8, input_shape = input_shape, output_shape = output_shape, epochs=10, batch_size=32, verbose=0)
grid = GridSearchCV(estimator=model, param_grid=param_grid, scoring='neg_mean_squared_error', cv=3, verbose = 3)

# Assuming y_train has shape (num_samples, num_time_steps, num_features)
y_train_reshaped = y_train.reshape(y_train.shape[0], -1)


# Fit the model to the data
grid_result = grid.fit(X_train, y_train_reshaped)

# Print the best parameters and corresponding MSE
print("Best: %f using %s" % (grid_result.best_score_, grid_result.best_params_))
means = grid_result.cv_results_['mean_test_score']
stds = grid_result.cv_results_['std_test_score']
params = grid_result.cv_results_['params']
for mean, stdev, param in zip(means, stds, params):
    print("%f (%f) with: %r" % (mean, stdev, param))



In [None]:
# Train the model
history = model.fit(
    x = X_train,
    y = y_train,
    batch_size = batch_size,
    epochs = epochs,
    validation_split=.1,
    callbacks = [
        tfk.callbacks.EarlyStopping(
            monitor='val_loss',
            patience=10,
            restore_best_weights=True
        ),
        tfk.callbacks.ReduceLROnPlateau(
            monitor='val_loss',
            factor=.1,
            patience=5,
            verbose=1
        ),
        tfk.callbacks.ModelCheckpoint(
            filepath='Models/Model_RESNET_LSTM',
            monitor='val_loss',
            save_best_only=True,
            verbose=1
        )
    ]
).history

In [None]:
best_epoch = np.argmin(history['val_loss'])
plt.figure(figsize=(17,4))
plt.plot(history['loss'], label='Training loss', alpha=.8, color='#ff7f0e')
plt.plot(history['val_loss'], label='Validation loss', alpha=.9, color='#5a9aa5')
plt.axvline(x=best_epoch, label='Best epoch', alpha=.3, ls='--', color='#5a9aa5')
plt.title('Mean Squared Error')
plt.legend()
plt.grid(alpha=.3)
plt.show()

plt.figure(figsize=(18,3))
plt.plot(history['lr'], label='Learning Rate', alpha=.8, color='#ff7f0e')
plt.axvline(x=best_epoch, label='Best epoch', alpha=.3, ls='--', color='#5a9aa5')
plt.legend()
plt.grid(alpha=.3)
plt.show()

In [None]:
model.save('Models/Model_RESNET_LSTM')
model = tfk.models.load_model('Models/Model_RESNET_LSTM')

###Test evaluation

In [None]:
# Predict the test set using the model
predictions = model.predict(X_test, verbose=0)

# Print the shape of the predictions
print(f"Predictions shape: {predictions.shape}")

# Calculate and print Mean Squared Error (MSE)
mean_squared_error = tfk.metrics.mean_squared_error(y_test[:9, ].flatten(), predictions[:9, ].flatten()).numpy()
print(f"Mean Squared Error: {mean_squared_error}")

# Calculate and print Mean Absolute Error (MAE)
mean_absolute_error = tfk.metrics.mean_absolute_error(y_test[:9, ].flatten(), predictions[:9, ].flatten()).numpy()
print(f"Mean Absolute Error: {mean_absolute_error}")

In [None]:
plt.figure(figsize=(17,5))
plt.plot(X_test.T[0], label='Train')
plt.plot(y_test.T[0], label='Test')
plt.plot(predictions[0], label='Predicted')
plt.title('Test Split')
plt.legend()
plt.show()

## ResNet + Squeeze & Excitation


In [None]:
# Squeeze and excitation module
def se_block(in_block, ch, ratio=16):
    # 1. Define the squeeze block (global pooling)
    squeeze = tfkl.GlobalAveragePooling1D()(in_block)
    # 2. Define the excitation block (ch/ratio to compress the information)
    excitation = tfkl.Dense(units = ch/ratio, activation = 'relu')(squeeze)
    # 3. Expand the information and mormalize with a sigmoid (give more information to some channel)
    excitation = tfkl.Dense(units = ch, activation='sigmoid')(excitation)
    # 4. Multiplication layer.
    scaled_input = tfkl.multiply([in_block, excitation])

    return scaled_input

# ResNet with Squeeze and excitation block model definition
## ADD the SQ block

def ResBs_SE_Conv(block_input, num_filters):

    # 0. Filter Block input and BatchNormalization
    block_input_short = tfkl.Conv1D(num_filters, kernel_size=7, strides=2,  padding = 'valid')(block_input)
    block_input_short = tfkl.BatchNormalization()(block_input_short)

    # 1. First Convolutional Layer
    conv1 = tfkl.Conv1D(filters=num_filters, kernel_size=7, strides=2, padding= 'valid')(block_input)
    norm1 = tfkl.BatchNormalization()(conv1)
    relu1 = tfkl.Activation('relu')(norm1)
    dropout = tfkl.Dropout(0.2)(relu1)

    # 2. Second Convolutional Layer
    conv2 = tfkl.Conv1D(num_filters, kernel_size=7, padding= 'same')(dropout) #per avere concordanza
    norm2 = tfkl.BatchNormalization()(conv2)

    # Introduce the squeeze and excitation block (best part: after the second convolutional layer)
    # 3. SE (the number of channels is the number of filters of the block)
    se = se_block(norm2, ch = num_filters)

    # 4. Summing Layer (adding a residual connection)
    sum = tfkl.Add()([block_input_short, se])

    # 5. Activation Layer
    relu2 = tfkl.Activation('relu')(sum)

    return relu2

def ResBs_SE_Identity(block_input, num_filters):

    # 1. First Convolutional Layer
    conv1 = tfkl.Conv1D(filters=num_filters, kernel_size=7, padding= 'same')(block_input)
    norm1 = tfkl.BatchNormalization()(conv1)
    relu1 = tfkl.Activation('relu')(norm1)
    dropout = tfkl.Dropout(0.2)(relu1)

    # 2. Second Convolutional Layer
    conv2 = tfkl.Conv1D(num_filters, kernel_size=7, padding= 'same')(dropout) #per avere concordanza
    norm2 = tfkl.BatchNormalization()(conv2)

    # Introduce the squeeze and excitation block (best part: after the second convolutional layer)
    # 3. SE (the number of channels is the number of filters of the block)
    se = se_block(norm2, ch = num_filters)

    # 4. Summing Layer (adding a residual connection)
    sum = tfkl.Add()([block_input, se])

    # 5. Activation Layer
    relu2 =tfkl. Activation('relu')(sum)

    return relu2

# model integrating deep + wide
def resnet_SE_deep_wide(input_shape, output_shape, N=8):
    input = tfkl.Input(shape=input_shape, name='ecg_signal')

    ResNet = tfkl.Conv1D(filters=64,kernel_size=15, padding = 'same')(input)
    ResNet = tfkl.BatchNormalization()(ResNet)
    ResNet = tfkl.Activation('relu')(ResNet)
    ResNet = tfkl.MaxPooling1D(pool_size=2, strides = 2)(ResNet)

    # B.5 ResBs (x8) blocks
    # The number of filters starts from 64 and doubles every two blocks

    # First two ResNet blocks are identity blocks
    ResNet = ResBs_SE_Identity(ResNet, 64)
    ResNet = ResBs_SE_Identity(ResNet, 64)

    filters = 64
    M= int((N -2 )/2)
    for i in range(M):
        filters = filters*2

        # define N-th ResBs block
        ResNet = ResBs_SE_Conv(ResNet, filters)
        ResNet = ResBs_SE_Identity(ResNet, filters)

    ResNet = tfkl.GlobalAveragePooling1D(name='gap_layer')(ResNet)

    # Output layer for time series forecasting (single value prediction)
    output = tfkl.Dense(output_shape[0], activation='linear', name='output')(ResNet)

    model = tfk.Model(inputs=input, outputs=output)

    model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=1e-3), loss='mse')

    return model

In [None]:
model = resnet_SE_deep(input_shape, output_shape, N = 6)
model.summary()
tfk.utils.plot_model(model, expand_nested=True, show_shapes=True)

In [None]:
# Train the model
history = model.fit(
    x = X_train,
    y = y_train,
    batch_size = batch_size,
    epochs = epochs,
    validation_split=.1,
    callbacks = [
        tfk.callbacks.EarlyStopping(
            monitor='val_loss',
            patience=10,
            restore_best_weights=True
        ),
        tfk.callbacks.ReduceLROnPlateau(
            monitor='val_loss',
            factor=.1,
            patience=5,
            verbose=1
        ),
        tfk.callbacks.ModelCheckpoint(
            filepath='Models/Model_RESNET_LSTM',
            monitor='val_loss',
            save_best_only=True,
            verbose=1
        )
    ]
).history

In [None]:
best_epoch = np.argmin(history['val_loss'])
plt.figure(figsize=(17,4))
plt.plot(history['loss'], label='Training loss', alpha=.8, color='#ff7f0e')
plt.plot(history['val_loss'], label='Validation loss', alpha=.9, color='#5a9aa5')
plt.axvline(x=best_epoch, label='Best epoch', alpha=.3, ls='--', color='#5a9aa5')
plt.title('Mean Squared Error')
plt.legend()
plt.grid(alpha=.3)
plt.show()

plt.figure(figsize=(18,3))
plt.plot(history['lr'], label='Learning Rate', alpha=.8, color='#ff7f0e')
plt.axvline(x=best_epoch, label='Best epoch', alpha=.3, ls='--', color='#5a9aa5')
plt.legend()
plt.grid(alpha=.3)
plt.show()

## ResNet + Squeeze & Excitation + LSTM

In [None]:
# Squeeze and excitation module
def se_block(in_block, ch, ratio=16):
    # 1. Define the squeeze block (global pooling)
    squeeze = tfkl.GlobalAveragePooling1D()(in_block)
    # 2. Define the excitation block (ch/ratio to compress the information)
    excitation = tfkl.Dense(units = ch/ratio, activation = 'relu')(squeeze)
    # 3. Expand the information and mormalize with a sigmoid (give more information to some channel)
    excitation = tfkl.Dense(units = ch, activation='sigmoid')(excitation)
    # 4. Multiplication layer.
    scaled_input = tfkl.multiply([in_block, excitation])

    return scaled_input

# ResNet with Squeeze and excitation block model definition
## ADD the SQ block

def ResBs_SE_LSTM(block_input, num_filters):

    # 0. Filter Block input and BatchNormalization
    block_input_short = tfkl.Conv1D(num_filters, kernel_size=1, padding='same')(block_input)  # Adjusted Conv1D layer for shape matching
    block_input_short = tfkl.BatchNormalization()(block_input_short)

    # 1. LSTM layer instead of Conv1D
    lstm = tfkl.LSTM(units=num_filters, return_sequences=True)(block_input)
    lstm = tfkl.Conv1D(num_filters, kernel_size=1, padding='same')(lstm)  # Adjusted Conv1D layer for shape matching
    norm1 = tfkl.BatchNormalization()(lstm)
    relu1 = tfkl.Activation('relu')(norm1)
    dropout = tfkl.Dropout(0.2)(relu1)

    # Rest of your implementation remains the same...
    # 2. Second Convolutional Layer
    conv2 = tfkl.Conv1D(num_filters, kernel_size=7, padding='same')(dropout)
    norm2 = tfkl.BatchNormalization()(conv2)

    # Introduce the squeeze and excitation block (best part: after the second convolutional layer)
    # 3. SE (the number of channels is the number of filters of the block)
    se = se_block(norm2, ch=num_filters)

    # 4. Summing Layer (adding a residual connection)
    sum_layer = tfkl.Add()([block_input_short, se])

    # 5. Activation Layer
    relu2 = tfkl.Activation('relu')(sum_layer)

    return relu2

# Modify your resnet_SE_deep_wide function to incorporate the LSTM-based block

def resnet_SE_deep_LSTM(input_shape, output_shape, N=8):
    input_layer = tfkl.Input(shape=input_shape, name='ecg_signal')

    ResNet = tfkl.Conv1D(filters=64, kernel_size=15, padding='same')(input_layer)
    ResNet = tfkl.BatchNormalization()(ResNet)
    ResNet = tfkl.Activation('relu')(ResNet)
    ResNet = tfkl.MaxPooling1D(pool_size=2, strides=2)(ResNet)

    # First two ResNet blocks are identity blocks
    ResNet = ResBs_SE_Identity(ResNet, 64)
    ResNet = ResBs_SE_Identity(ResNet, 64)

    filters = 64
    M = int((N - 2) / 2)
    for i in range(M):
        filters = filters * 2

        # Use the LSTM-based block
        ResNet = ResBs_SE_LSTM(ResNet, filters)
        ResNet = ResBs_SE_Identity(ResNet, filters)

    ResNet = tfkl.GlobalAveragePooling1D(name='gap_layer')(ResNet)

    # Output layer for time series forecasting (single value prediction)
    output_layer = tfkl.Dense(output_shape[0], activation='linear', name='output')(ResNet)

    model = tfk.Model(inputs=input_layer, outputs=output_layer)

    model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=1e-3), loss='mse')

    return model


In [None]:
model = resnet_SE_deep(input_shape, output_shape, N = 6)
model.summary()
tfk.utils.plot_model(model, expand_nested=True, show_shapes=True)

In [None]:
# Train the model
history = model.fit(
    x = X_train,
    y = y_train,
    batch_size = batch_size,
    epochs = epochs,
    validation_split=.1,
    callbacks = [
        tfk.callbacks.EarlyStopping(
            monitor='val_loss',
            patience=10,
            restore_best_weights=True
        ),
        tfk.callbacks.ReduceLROnPlateau(
            monitor='val_loss',
            factor=.1,
            patience=5,
            verbose=1
        ),
        tfk.callbacks.ModelCheckpoint(
            filepath='Models/Model_RESNET_LSTM',
            monitor='val_loss',
            save_best_only=True,
            verbose=1
        )
    ]
).history

In [None]:
best_epoch = np.argmin(history['val_loss'])
plt.figure(figsize=(17,4))
plt.plot(history['loss'], label='Training loss', alpha=.8, color='#ff7f0e')
plt.plot(history['val_loss'], label='Validation loss', alpha=.9, color='#5a9aa5')
plt.axvline(x=best_epoch, label='Best epoch', alpha=.3, ls='--', color='#5a9aa5')
plt.title('Mean Squared Error')
plt.legend()
plt.grid(alpha=.3)
plt.show()

plt.figure(figsize=(18,3))
plt.plot(history['lr'], label='Learning Rate', alpha=.8, color='#ff7f0e')
plt.axvline(x=best_epoch, label='Best epoch', alpha=.3, ls='--', color='#5a9aa5')
plt.legend()
plt.grid(alpha=.3)
plt.show()

In [None]:
# Predict the test set using the model
predictions = model.predict(X_test, verbose=0)

# Print the shape of the predictions
print(f"Predictions shape: {predictions.shape}")

# Calculate and print Mean Squared Error (MSE)
mean_squared_error = tfk.metrics.mean_squared_error(y_test[:9, ].flatten(), predictions[:9, ].flatten()).numpy()
print(f"Mean Squared Error: {mean_squared_error}")

# Calculate and print Mean Absolute Error (MAE)
mean_absolute_error = tfk.metrics.mean_absolute_error(y_test[:9, ].flatten(), predictions[:9, ].flatten()).numpy()
print(f"Mean Absolute Error: {mean_absolute_error}")

## (Temptative) Transfer Learning From Best Model

In [None]:
# Load the best model
best_model = tfk.models.load_model('Models/SubmissionModel_46')

In [None]:
# Feez the layers of the model
for layer in best_model.layers:
    layer.trainable = False


### First model (no dense)

In [None]:
# Assuming best_model is a Sequential or Functional model
# Get all layers except the last one from best_model
modified_best_model = tfk.Model(inputs=best_model.inputs, outputs=best_model.layers[-2].output)

# Add a new Dense layer with neurons specified by output_shape
modified_best_model_output = tfkl.Dense(output_shape[0], activation='linear')(modified_best_model.output)

# Create the final model by defining inputs and outputs
final_model_1 = tfk.Model(inputs=modified_best_model.inputs, outputs=modified_best_model_output)

# Compile the final model
final_model_1.compile(optimizer='adam', loss='mse')

### Second Model (with Dense)

In [None]:
modified_best_model = tfk.Model(inputs=best_model.inputs, outputs=best_model.layers[-2].output)

# Add a new Dense layer with 16 neurons between modified_best_model and the output
dense_layer = tfkl.Dense(64, activation='relu')(modified_best_model.output)

# Add another Dense layer for the final output with neurons specified by output_shape
final_output = tfkl.Dense(output_shape[0], activation='linear')(dense_layer)

# Create the final model by defining inputs and outputs
final_model_2 = tfk.Model(inputs=modified_best_model.inputs, outputs=final_output)

# Compile the final model
final_model_2.compile(optimizer='adam', loss='mse')

In [None]:
final_model_1.summary()
tfk.utils.plot_model(final_model_1, expand_nested=True, show_shapes=True)

In [None]:
# Train the model
history = final_model_1.fit(
    x = [X_train, classes_train],
    y = y_train,
    batch_size = batch_size,
    epochs = 15,
    validation_split=.1,
    callbacks = [
        tfk.callbacks.EarlyStopping(
            monitor='val_loss',
            patience=3,
            restore_best_weights=True
        ),
        tfk.callbacks.ReduceLROnPlateau(
            monitor='val_loss',
            factor=.1,
            patience=5,
            verbose=1
        ),
        tfk.callbacks.ModelCheckpoint(
            filepath='Models/Model_full_transfer_learning',
            monitor='val_loss',
            save_best_only=True,
            verbose=1
        )
    ]
).history

In [None]:
best_epoch = np.argmin(history['val_loss'])
plt.figure(figsize=(17,4))
plt.plot(history['loss'], label='Training loss', alpha=.8, color='#ff7f0e')
plt.plot(history['val_loss'], label='Validation loss', alpha=.9, color='#5a9aa5')
plt.axvline(x=best_epoch, label='Best epoch', alpha=.3, ls='--', color='#5a9aa5')
plt.title('Mean Squared Error')
plt.legend()
plt.grid(alpha=.3)
plt.show()

In [None]:
from sklearn.metrics import r2_score
# Predict the test set using the model
predictions = final_model_1.predict([X_test, classes_test], verbose=0)

for pred in predictions:
  for i in range(len(pred)):
    if np.isnan(pred[i]):
      pred[i] = 0

predictions = np.expand_dims(predictions, axis = -1)

# Print the shape of the predictions
print(f"Predictions shape: {predictions.shape}")

# Calculate and print Mean Squared Error (MSE)
mean_squared_error = np.mean(np.square(y_test-predictions))
print(f"Mean Squared Error: {mean_squared_error}")

# Calculate and print Mean Absolute Error (MAE)
mean_absolute_error = tfk.metrics.mean_absolute_error(y_test.flatten(), predictions.flatten()).numpy()
print(f"Mean Absolute Error: {mean_absolute_error}")

# Calculate and print Root Mean Squared Error (RMSE)
root_mean_squared_error = tf.math.sqrt(mean_squared_error)
print(f"Root Mean Squared Error: {root_mean_squared_error}")

# Calculate and print Mean Absolute Percentage Error (MAPE)
mean_absolute_percentage_error = tfk.metrics.mean_absolute_percentage_error(y_test.flatten(), predictions.flatten()).numpy()
print(f"Mean Absolute Percentage Error: {mean_absolute_percentage_error}")

# Calculate and print Coefficient of Determination (R^2)
r2 = r2_score(y_test.flatten(), predictions.flatten())
print(f"Coefficient of Determination (R^2): {r2}")

# Calculate and print Adjusted Coefficient of Determination (Adj. R^2)
adjusted_r2 = 1 - (1-r2)*(len(y_test)-1)/(len(y_test)-X_test.shape[1]-1)
print(f"Adjusted Coefficient of Determination (Adj. R^2): {adjusted_r2}")

# Autocorrelation Model

In [None]:
def autocorrelation(X, lags):
    autocorrelations = []

    for data in X:
        mean = np.mean(data)
        var = np.var(data)
        ndata = np.array(data) - mean

        acorr = np.zeros(len(lags))

        for l in lags:
            c = 1.0  # Self correlation

            if l > 0:
                tmp = ndata[l:] * ndata[:-l]
                if var == 0:
                  c = 0
                else:
                  c = np.sum(tmp) / (len(data) - l) / var

            acorr[l] = c

        autocorrelations.append(acorr)

    return autocorrelations
# Assuming lag_max is defined as the maximum lag to consider
lag_max = range(10)
autocorr_features_train = autocorrelation(X_train, lag_max)
autocorr_features_train = np.expand_dims(autocorr_features_train, axis = -1)

In [None]:
autocorr_features_train.shape

In [None]:
def da_rnn_with_autocorr(input_shape, output_shape, lstm_units_encoder=64, lstm_units_decoder=64, dense_units=64, autocorr_features=None):
    # Encoder
    encoder_inputs = tfkl.Input(shape=input_shape)
    encoder_lstm = tfkl.LSTM(units=lstm_units_encoder, return_sequences=True)(encoder_inputs)

    # Autocorrelation branch
    if autocorr_features is not None:
        autocorr_inputs = tfkl.Input(shape=(autocorr_features,))
        autocorr_dense = tfkl.Dense(units=dense_units, activation='relu')(autocorr_inputs)
        repeat_autocorr = tfkl.RepeatVector(input_shape[0])(autocorr_dense)
        autocorr_combined = tfkl.Concatenate(axis=-1)([encoder_lstm, repeat_autocorr])
        encoder_lstm = tfkl.LSTM(units=lstm_units_decoder, return_sequences=True)(autocorr_combined)

    # First Stage Attention
    attention_1 = tfkl.Dense(input_shape[0], activation='softmax')(encoder_lstm)
    attention_1 = tfkl.Permute((2, 1))(attention_1)
    attention_1 = tfkl.Dot(axes=(2, 1))([attention_1, encoder_lstm])

    # Decoder
    repeat_vector = tfkl.RepeatVector(input_shape[0])(encoder_lstm[:, -1, :])
    decoder_inputs = tfkl.Concatenate(axis=-1)([attention_1, repeat_vector])
    decoder_lstm = tfkl.LSTM(units=lstm_units_decoder, return_sequences=True)(decoder_inputs)

    # Second Stage Attention
    attention_2 = tfkl.Dense(input_shape[0], activation='softmax')(decoder_lstm)
    attention_2 = tfkl.Permute((2, 1))(attention_2)
    attention_2 = tfkl.Dot(axes=(2, 1))([attention_2, decoder_lstm])

    # Additional Dense Layer with Dropout
    dense_layer = tfkl.Dense(dense_units, activation='relu')(attention_2)
    dense_layer = tfkl.Dropout(0.1)(dense_layer)

    flatten = tfkl.Flatten()(dense_layer)

    # Output layer
    output = tfkl.Dense(output_shape[0])(flatten)
    output = tfkl.Activation('linear')(output)

    if autocorr_features is not None:
        model = tfk.Model(inputs=[encoder_inputs, autocorr_inputs], outputs=output)
    else:
        model = tfk.Model(inputs=encoder_inputs, outputs=output)

    model.compile(optimizer='adam', loss='mse')

    return model

In [None]:
model = da_rnn_with_autocorr(input_shape, output_shape, 64, 64, 64, autocorr_features = autocorr_features_train.shape[1])
model.summary()
tfk.utils.plot_model(model, expand_nested=True, show_shapes=True)

In [None]:
# Train the model
history = model.fit(
    x = [X_train, autocorr_features_train],
    y = y_train,
    batch_size = batch_size,
    epochs = epochs,
    validation_split=.1,
    callbacks = [
        tfk.callbacks.EarlyStopping(
            monitor='val_loss',
            patience=10,
            restore_best_weights=True
        ),
        tfk.callbacks.ReduceLROnPlateau(
            monitor='val_loss',
            factor=.1,
            patience=5,
            verbose=1
        ),
        tfk.callbacks.ModelCheckpoint(
            filepath='Models/Model_autocorrelation',
            monitor='val_loss',
            save_best_only=True,
            verbose=1
        )
    ]
).history

In [None]:
best_epoch = np.argmin(history['val_loss'])
plt.figure(figsize=(17,4))
plt.plot(history['loss'], label='Training loss', alpha=.8, color='#ff7f0e')
plt.plot(history['val_loss'], label='Validation loss', alpha=.9, color='#5a9aa5')
plt.axvline(x=best_epoch, label='Best epoch', alpha=.3, ls='--', color='#5a9aa5')
plt.title('Mean Squared Error')
plt.legend()
plt.grid(alpha=.3)
plt.show()

In [None]:
model = tfk.models.load_model('Models/Model_autocorrelation')

In [None]:
from sklearn.metrics import r2_score

# Predict the test set using the model
predictions = model.predict([X_test, autocorr_features] , verbose=0)

# Print the shape of the predictions
print(f"Predictions shape: {predictions.shape}")

# Calculate and print Mean Squared Error (MSE)
mean_squared_error = tfk.metrics.mean_squared_error(y_test.flatten(), predictions.flatten()).numpy()
print(f"Mean Squared Error: {mean_squared_error}")

# Calculate and print Mean Absolute Error (MAE)
mean_absolute_error = tfk.metrics.mean_absolute_error(y_test.flatten(), predictions.flatten()).numpy()
print(f"Mean Absolute Error: {mean_absolute_error}")

# Calculate and print Root Mean Squared Error (RMSE)
root_mean_squared_error = tf.math.sqrt(mean_squared_error)
print(f"Root Mean Squared Error: {root_mean_squared_error}")

# Calculate and print Mean Absolute Percentage Error (MAPE)
mean_absolute_percentage_error = tfk.metrics.mean_absolute_percentage_error(y_test.flatten(), predictions.flatten()).numpy()
print(f"Mean Absolute Percentage Error: {mean_absolute_percentage_error}")

# Calculate and print Coefficient of Determination (R^2)
r2 = r2_score(y_test.flatten(), predictions.flatten())
print(f"Coefficient of Determination (R^2): {r2}")

# Calculate and print Adjusted Coefficient of Determination (Adj. R^2)
adjusted_r2 = 1 - (1-r2)*(len(y_test)-1)/(len(y_test)-X_test.shape[1]-1)
print(f"Adjusted Coefficient of Determination (Adj. R^2): {adjusted_r2}")