Testing Script

In [None]:
# Msc Computing - University of Sunderland - 2023-2024
# Alexandru Sandor
# bi52eb

# Testing Script
# This file contains the code for testing the models used in the dissertation thesis

In [None]:
# Numpy library helps with numerical operation
import numpy as np

# Pandas library is used for data manipulation
import pandas as pd

# A very popular library for plotting graphs
import matplotlib.pyplot as plt

# Importing Sequential model  from Keras
from tensorflow.keras.models import load_model

# A sklearn MinMaxScaler for normalization of our dataset
from sklearn.preprocessing import MinMaxScaler

# Importing evaluation metrics from sklearn, these are used to measure the performance of the models
from sklearn.metrics import mean_absolute_error

In [None]:
# Loading  the trained LSTM model
model = load_model('models/lstm_model_07.h5')

In [None]:
# Load data from CSV into a  DataFrame
df = pd.read_csv('data/BTC-USD_3M_test_04.csv')

In [None]:
# Convert 'Date' column to datetime and set as .index
df['Date'] = pd.to_datetime(df['Date'])
df.set_index('Date', inplace=True)

In [None]:
#df = df.iloc[:800]
df_tail = df.tail(21)
df = df.iloc[:-20]

In [None]:
# Extract 'Close' prices
close_prices = df['Close'].values.reshape(-1, 1)

In [None]:
# Normalize data
scaler = MinMaxScaler(feature_range=(0, 1))
scaled_data = scaler.fit_transform(close_prices)

In [None]:
# Prepare sequences for LSTM model - this is the exact same function like the one in the Training script
# xs:  a list of input sequences, where each sequence is of length seq_length.
# ys: a list of target values, each corresponding to the next value in the sequence after the input sequence x
def create_sequences(data, seq_length):
    xs, ys = [], []
    for i in range(len(data)-seq_length):
        x = data[i:i+seq_length]
        y = data[i+seq_length]
        xs.append(x)
        ys.append(y)
    return np.array(xs), np.array(ys)

In [None]:
# Setting the sequence length for the LSTM input
sequence_length = 60

# Calling the create sequences func
X, y = create_sequences(scaled_data, sequence_length)

# Reshape data for LSTM (samples, time steps, features)
# This is necessary so the data is properly formatted for input into an LSTM
# Eg. (100, 10, 1): this bassically means 100 sequences, each of length 10, with 1 feature per time step.
X = np.reshape(X, (X.shape[0], X.shape[1], 1))

In [None]:
# Predict future  values
predictions = []
current_sequence = X[-1]  # Start with the last sequence in X

# The loop predicts the next n units  based on the number of iterations
for _ in range(20):
    next_value = model.predict(current_sequence.reshape(1, sequence_length, 1))[0, 0]
    predictions.append(next_value)

    # Update current sequence to include  predicted value and drop the first value
    current_sequence = np.roll(current_sequence, -1, axis=0)
    current_sequence[-1] = next_value

# Inverse transform predictions to get actual prices - similar to what was done in the training script
predicted_prices = scaler.inverse_transform(np.array(predictions).reshape(-1, 1))

In [None]:
# Calculate theMAE

# Remove the first value to align with predictions
actual_prices = df_tail['Close'].values[1:] 
mae = mean_absolute_error(actual_prices, predicted_prices)
print(f"Mean Absolute Error (MAE): {mae:.2f}")

In [None]:
# Calculate and compare trends for both real and predicted data
real_trend = actual_prices[-1] - actual_prices[0]
predicted_trend = predicted_prices[-1] - predicted_prices[0]

# Compute trend direction
real_trend_direction = "upward" if real_trend > 0 else "downward" if real_trend < 0 else "no change"
predicted_trend_direction = "upward" if predicted_trend > 0 else "downward" if predicted_trend < 0 else "no change"

print(f'Real trend value: {real_trend}')
print(f'Real trend direction: {real_trend_direction}')
print(f'Predicted trend value: {predicted_trend}')
print(f'Predicted trend direction: {predicted_trend_direction}')

In [None]:
# Plotting the real data and determined trend for the  n last units ( have to hardcode this everytime the predicted units change  )
plt.figure(figsize=(14, 7))
plt.plot(df.index, df['Close'], label='Historical Close Prices', color='blue')
plt.plot(df_tail.index[1:], df_tail['Close'].iloc[1:], label='Historical Close Prices', color='green')
plt.plot(pd.date_range(start=df.index[-1], periods=21, freq='D')[1:], predicted_prices, label='Predicted Close Prices', linestyle='--', color='red')
plt.title('Model_07_Testing_Dataset_04')
plt.xlabel('Date')
plt.ylabel('Close Price')
plt.legend()
plt.grid(True)
plt.show()