Training Script

In [None]:
# Msc Computing - University of Sunderland - 2023-2024
# Alexandru Sandor
# bi52eb

# Training Script
# This file contains the code for training the models used in the dissertation thesis

In [None]:
# Numpy library helps with numerical operation
import numpy as np

# Pandas library is used for data manipulation
import pandas as pd

# A sklearn MinMaxScaler for normalization of our dataset
from sklearn.preprocessing import MinMaxScaler

# A very popular library for plotting graphs
import matplotlib.pyplot as plt

# Using the TensorFlow library for machine learning
import tensorflow as tf

# The regularizer helps with avoiding overfiting
from tensorflow.keras.regularizers import l2

# Importing Sequential model from Keras
from tensorflow.keras.models import Sequential

# Importing LSTM, Dense, and Dropout layers from Keras
from tensorflow.keras.layers import LSTM, Dense, Dropout, Conv1D, MaxPooling1D, Flatten

# Importing evaluation metrics from sklearn, these are used to measure the performance of the models
from sklearn.metrics import mean_squared_error, mean_absolute_error, mean_absolute_percentage_error

In [None]:
# Training Datasets Explained
# Short Shot
# -------- Spread: 10 Days
# --- Granulation: 1 Minute
# -- Observations: 15831
#
# Long Shot
# -------- Spread: 1 Year
# --- Granulation: 1 Day
# -- Observations: 368

In [None]:
# Load the dataset (for this study, two different datasets are used both in CSV format - BTC-USD_6M and BTC-USD_24H )

# Name of the dataset file
file_path = 'BTC-USD_10D_Training.csv'

# The CSV file is read into a pandas DataFrame
df = pd.read_csv(file_path)

In [None]:
# Converting  the 'Date' column to datetime and setting it as index, since it contains strings in it raw format
df['Date'] = pd.to_datetime(df['Date'])

# 'Date' column is set as the index of the DataFrame
df.set_index('Date', inplace=True)

In [None]:
# Extract the 'Close' prices into a numpy array
close_prices = df['Close'].values

In [None]:
# Normalizing 'Close' prices

# First create the MinMaxScaler object
scaler = MinMaxScaler(feature_range=(0, 1))

# Apply normalization to 'Close' prices
scaled_close_prices = scaler.fit_transform(close_prices.reshape(-1, 1))

In [None]:
# A Function to create sequences for the LSTM model, based on a predifined number
def create_sequences(data, seq_length):
    sequences = []
    labels = []

    # Looping through the data and  creating sequences and labels for each sequence
    for i in range(len(data) - seq_length):
        sequences.append(data[i:i+seq_length])
        labels.append(data[i+seq_length])
    return np.array(sequences), np.array(labels)

In [None]:
# Length of each sequence -  this means that the model will require the last 60 units in order to predict the next one
seq_length = 60

# Call the create_sequence function
X, y = create_sequences(scaled_close_prices, seq_length)

In [None]:
# Spliting the training and testing  data

# 80% of data goes to training
split_ratio = 0.8 

# Size of training data
train_size = int(len(X) * split_ratio)

# Splitting sequences into training and test sets
X_train, X_test = X[:train_size], X[train_size:]

# Splitting labels into training and test sets
y_train, y_test = y[:train_size], y[train_size:]

In [None]:
# Model Architectural Components
#
# Sequential Model - This means that the model is built using stack of layers in a sequential manner
#
# LSTM Layer - RNN type used for sequential predictions
# # Arguments:
# # # No. of neurons in the layer, which will determine the dimension of the output
# # # return_sequence - if set to true will return the full sequence of output for each input sequence, but this is required only when the next layer expects a sequence as input
# # # input_shape=(seq_length, 1) - shape of the input data ( seq_lenght - length of the input sequence,  1 - this means that there is just one feature in each time step - for this case, the 'Close' prices )
#
# Dropout Layer - a method for preventing overfiting through randomnly  set a fraction of the inputs to zero, at  each update during training
#
# Dense - fully connected layer where each input is connected to each output with a learnable weight
#
# Final Dense Layer - this  layer with 1 unit is producing the final prediction ( for regression tasks, such as predicting the next 'Close' price, this layer outputs a single continuous value )

LSTM - Model Config_01



In [None]:
# # Model Arch 01
# model = Sequential([
#     LSTM(50, return_sequences=False, input_shape=(seq_length, 1)),
#     # LSTM layer with 50 units, expects sequences (return_sequences=True) as input
#     # Input shape is (seq_length, 1) where seq_length is the length of each sequence and 1 is the number of features (in this case, only 'Close' price)
#     Dense(1)
# ])

LSTM - Model Config_02

In [None]:
# # Model Arch 02
# model = Sequential([
#     LSTM(20, return_sequences=True, input_shape=(seq_length, 1)),
#     # LSTM layer with 50 units, expects sequences (return_sequences=True) as input
#     # Input shape is (seq_length, 1) where seq_length is the length of each sequence and 1 is the number of features (in this case, only 'Close' price)

#     Dropout(0.4),
#     # Dropout layer to prevent overfitting by randomly setting 20% of input units to 0 at each update during training

#     LSTM(20, return_sequences=False),
#     # LSTM layer with 50 units, does not return sequences (return_sequences=False), used for final sequence processing

#     Dropout(0.4),
#     # Dropout layer to prevent overfitting

#     Dense(25),
#     # Fully connected Dense layer with 25 units

#     Dense(1)
#     # Output layer with 1 unit for regression task (predicting the next 'Close' price)
# ])

LSTM - Model Config_03 -




In [None]:
# # Model Arch 03
# model = Sequential([
#     LSTM(50, return_sequences=True, input_shape=(seq_length, 1)),
#     # LSTM layer with 50 units, expects sequences (return_sequences=True) as input
#     # Input shape is (seq_length, 1) where seq_length is the length of each sequence and 1 is the number of features (in this case, only 'Close' price)

#     Dropout(0.2),
#     # Dropout layer to prevent overfitting by randomly setting 20% of input units to 0 at each update during training

#     LSTM(50, return_sequences=False),
#     # LSTM layer with 50 units, does not return sequences (return_sequences=False), used for final sequence processing

#     Dropout(0.2),
#     # Dropout layer to prevent overfitting

#     Dense(25),
#     # Fully connected Dense layer with 25 units

#     Dense(1)
#     # Output layer with 1 unit for regression task (predicting the next 'Close' price)
# ])

LSTM - Model Config_04

In [None]:
# # Model Arch 04
# model = Sequential([
#     Conv1D(filters=64, kernel_size=3, activation='relu', input_shape=(seq_length, 1)),
#     MaxPooling1D(pool_size=2),
#     LSTM(50, return_sequences=True, kernel_regularizer=l2(0.001)),
#     Dropout(0.2),
#     LSTM(50, return_sequences=False, kernel_regularizer=l2(0.001)),
#     Dropout(0.2),
#     Dense(25, kernel_regularizer=l2(0.001)),
#     Dense(1)
# ])

LSTM - Model Config_05 -

In [None]:
# # Model Arch 05
# model = Sequential([
#     LSTM(20, return_sequences=False, input_shape=(seq_length, 1), kernel_regularizer=l2(0.01)),
#     Dropout(0.4),
#     Dense(1)
# ])

LSTM - Model Config_06 -

In [None]:
# # Model Arch 06
# model = Sequential([
#     LSTM(75, return_sequences=True, input_shape=(seq_length, 1), kernel_regularizer=l2(0.1)),
#     # LSTM layer with 50 units, expects sequences (return_sequences=True) as input
#     # Input shape is (seq_length, 1) where seq_length is the length of each sequence and 1 is the number of features (in this case, only 'Close' price)

#     Dropout(0.5),
#     # Dropout layer to prevent overfitting by randomly setting 20% of input units to 0 at each update during training

#     # LSTM(75, return_sequences=False, kernel_regularizer=l2(0.1)),
#     # # LSTM layer with 50 units, does not return sequences (return_sequences=False), used for final sequence processing

#     # Dropout(0.3),
#     # # Dropout layer to prevent overfitting

#     Dense(25),
#     # Fully connected Dense layer with 25 units

#     Dense(1)
#     # Output layer with 1 unit for regression task (predicting the next 'Close' price)
# ])

In [None]:
# Print the strcture of the model
print(model.summary())

In [None]:
# The LSTM model is compiled using the Adam optimizer ( effective for training neural networks ) and using Mean Squared Error as the loss function

# Mean Squared Error:
# # is calculated as the average of the squared differences between the predicted values and the actual target values
# # quantifies the difference between the predicted values and the actual values. 
# # a lower MSE indicates that the model's predictions are closer to the actual values, implying better accuracy.

model.compile(optimizer= tf.keras.optimizers.Adam(learning_rate=0.0001), loss='mean_squared_error')

In [None]:
# Training the LSTM model - ( epoch - are iterations over the entire dataset and with a batch size of 32 (number of samples per gradient update)) -
# using validation data  for monitoring performance
history = model.fit(X_train, y_train, epochs=5, batch_size=32, validation_data=(X_test, y_test), verbose=1)

In [None]:
# Block for evaluating the model

# Compute training loss
train_loss = model.evaluate(X_train, y_train, verbose=0)

# Compute test loss
test_loss = model.evaluate(X_test, y_test, verbose=0)

# Print training and test loss
print(f'Train Loss: {train_loss:.4f}, Test Loss: {test_loss:.4f}')


In [None]:
# Making predictions with the LSTM model - the fun part

# Getting predictions on training data
train_predictions = model.predict(X_train)

acc_train_pred = train_predictions

# Getting predictions on test data
test_predictions = model.predict(X_test)

acc_test_pred = test_predictions

# Inverse transforming training predictions to get actual 'Close' prices
train_predictions = scaler.inverse_transform(train_predictions)

# Inverse transforming test predictions to get actual 'Close' prices
test_predictions = scaler.inverse_transform(test_predictions)

# Inverse transforming training labels to get actual 'Close' prices
y_train_scaled = scaler.inverse_transform(y_train.reshape(-1, 1))

# Inverse transforming test labels to get actual 'Close' prices
y_test_scaled = scaler.inverse_transform(y_test.reshape(-1, 1))

In [None]:
# Calculate evaluation metrics

# Calculating Mean Absolute Error between actual test prices and predicted test prices
mae = mean_absolute_error(y_test_scaled, test_predictions)

# Printing Mean Absolute Error
print(f'Mean Absolute Error: {mae:.4f}')

# Function for computing the accuracy assuming y_true are the actual values and y_pred are the predicted values
# This function will calculate the accuracy at a certain tolerance level - in this case 5%
def calculate_accuracy(y_true, y_pred, tolerance):
    # Calculate the absolute percentage error for each prediction
    absolute_percentage_error = np.abs((y_true - y_pred) / y_true) * 100

    # Calculate accuracy as the percentage of predictions within the tolerance
    accuracy = np.mean(absolute_percentage_error <= tolerance) * 100

    return accuracy

accuracy_test = calculate_accuracy(y_test, acc_test_pred, 5)
print(f'Accuracy on test within 5% tolerance: {accuracy_test:.2f}%')

accuracy_train = calculate_accuracy(y_train, acc_train_pred, 5)
print(f'Accuracy on train within 5% tolerance: {accuracy_train:.2f}%')


In [None]:
# Preparing data for plotting

# Selecting training  data for plotting
train = df[:train_size+seq_length]

# Selecting validation (test) data for plotting
valid = df[train_size+seq_length:]

# Adding predicted 'Close'  prices to validation DataFrame
valid['Predictions'] = test_predictions

# Indices for training predictions
train_indices = df.index[seq_length:train_size+seq_length]

# Indices for validation predictions
valid_indices = df.index[train_size+seq_length:]

In [None]:
# Plotting actual prices, training predictions, and test predictions
plt.figure(figsize=(16,8))
plt.title('LSTM - Config 4')
plt.xlabel('Date')
plt.ylabel('Close Price USD ($)')
plt.plot(df['Close'], label='Actual Prices')

# Plotting training predictions
plt.plot(train_indices, train_predictions, label='Train Predictions', linestyle='--')

# Plotting test predictions
plt.plot(valid_indices, valid['Predictions'], label='Test Predictions', linestyle='--')

# Adding a  legend
plt.legend(['Actual Prices', 'Train Predictions', 'Test Predictions'], loc='lower right')

# Displaying the plot
plt.show()

In [None]:
# Save the trained model
model.save('lstm_model.h5')