In [None]:
# Msc Computing - University of Sunderland - 2023-2024
# Alexandru Sandor
# bi52eb

# Training Script
# This file contains the code for backtesting the models used in the dissertation thesis

In [None]:
# Numpy library helps with numerical operation
import numpy as np

# Pandas library is used for data manipulation
import pandas as pd

# A very popular library for plotting graphs
import matplotlib.pyplot as plt

# Using the TensorFlow library for machine learning
import tensorflow as tf

# Importing model importing function from Keras
from tensorflow.keras.models import load_model

# A sklearn MinMaxScaler for normalization of our dataset
from sklearn.preprocessing import MinMaxScaler

import os
import sys

In [None]:
# This is a set of functions which enable and disable outputing - helped structure the prints during backtesting (otherwise every step is printed and was not neccesary)

# Disable
def blockPrint():
    sys.stdout = open(os.devnull, 'w')

# Restore
def enablePrint():
    sys.stdout = sys.__stdout__

# Enable printing
enablePrint()

In [None]:
# Loading the pre-trained model
model = load_model('models/lstm_model_07.h5')

# Loading the data from CSV
df = pd.read_csv('data/BTC-USD_1Y_Testing_04.csv')

# Convert 'Date' column to datetime and set as index - same as in the training and testing scripts
df['Date'] = pd.to_datetime(df['Date'])
df.set_index('Date', inplace=True)

# Extract 'Close' prices
close_prices = df['Close'].values.reshape(-1, 1)

# Normalizing data
scaler = MinMaxScaler(feature_range=(0, 1))
scaled_data = scaler.fit_transform(close_prices)

In [None]:
# Prepare sequences for LSTM model - the exact same func like in the testing script
def create_sequences(data, seq_length):
    xs, ys = [], []
    for i in range(len(data) - seq_length):
        x = data[i:i + seq_length]
        y = data[i + seq_length]
        xs.append(x)
        ys.append(y)
    return np.array(xs), np.array(ys)

# Setting the sequence length for the LSTM input
sequence_length = 60

# Calling the create sequences func
X, y = create_sequences(scaled_data, sequence_length)

# Printing shapes to understand the dimensions
print(f"Shape of X before reshaping: {X.shape}")
print(f"Shape of y: {y.shape}")

# Ensure X has the right shape before reshaping
# The array X must be 3-dimensional.
# The second dimension of X (which corresponds to the sequence length) must match the expected sequence_length.
if X.ndim == 3 and X.shape[1] == sequence_length:
    X = np.reshape(X, (X.shape[0], X.shape[1], 1))
else:
    print("Error: X does not have the expected number of dimensions or sequence length.")

# Debugging: Print shape after reshaping
print(f"Shape of X after reshaping: {X.shape}")

# Initialize portfolio
initial_capital = 50000
capital = initial_capital

# Initially holding of BTC, set to 0
btc_held = 0

# Saving the last bought price to manage risk further in the backtesting algorithm
last_bought_price = 0 

# Thresholds for determining the strength of the trend
# Short - Shot
# up & down = 0.0015
#
# Long - Shot
# up = 0.05
# down = 0.005
upward_threshold = 0.05
downward_threshold = -0.005

# Get the observation at which the last prediction was made - the backtesting will stop at the last trade ( no matter if it's a sell or buy )
buy_signals = []
sell_signals = []

# First BTC pruchase price
last_action_price = 0
first_purchase_price = 0

# A variable to retain the price of BTC at the first price - used in evaluating the revenues
register_first_purchase = True

In [None]:
# The actual backtesting algorithm which works as follow:
# A loop is initialized which iterates  over the data starting from 'sequence_length' item to the lenght of of the scaled data -61 at an n step
# # At each iteration, extract a sequence of past data and the current price
# # Predict the next n future prices using the current sequence
# # Update the predicted sequence with each new predicted price 
# # Compute the mean predicted price change percentage
# # If the predicted change is above the up treshold,  buy BTC if there is enough capital
# # If the predicted change is beloww the down treshold,  sell BTC if held > 0 and is profitable
# # Print the action taken
# end 
print('Start Backtesting...')

# Iterate over the data starting from sequence_length to len(scaled_data) - 61 with a step size of n
for i in range(sequence_length, len(scaled_data) - 61, 10):  # Ensure we have 60 future points to predict

    # Reshape this sequence to match the model's expected input shape: (1, sequence_length, 1)
    current_sequence = scaled_data[i - sequence_length:i].reshape(1, sequence_length, 1)

    # Retrieve the current price by inversely transforming the current point in scaled_data to the original scale
    current_price = scaler.inverse_transform(scaled_data[i].reshape(-1, 1))[0, 0]
    
    # Initializijg  an empty list to store the predicted future prices.
    future_prices = []
    
    # Block printing for the model steps
    blockPrint()

    for j in range(60):

        # Predict the next value using the current sequence and add this value to future_prices
        next_value = model.predict(current_sequence)[0, 0]
        next_price = scaler.inverse_transform([[next_value]])[0, 0]
        future_prices.append(next_price)
        
        # Update the current sequence by appending the new predicted value and dropping the oldest value
        new_value_scaled = scaler.transform([[next_price]])
        current_sequence = np.append(current_sequence[:, 1:, :], new_value_scaled.reshape(1, 1, 1), axis=1)

    # Enable printing
    enablePrint()

    # Calculate the average predicted price change percentage using the mean of future_prices
    predicted_change_percentage = (np.mean(future_prices) - current_price) / current_price

    # Strong upward trend detected, decide to buy
    if predicted_change_percentage > upward_threshold:
        print('Strong Buy signal with a predicted increase percentage of: ' + str(predicted_change_percentage))
        if capital > 0:

            #Convert the capital to BTC
            btc_held += capital / current_price
            capital = 0

            # Record the price and the transaction
            last_bought_price = current_price  # Update last bought price
            buy_signals.append((i, current_price))
            print('BTC bought at: ' + str(last_bought_price))
            print('BTC_held: ' + str(btc_held))
            last_action_price = current_price

            # If it is the first purchase, record the first purchase price
            if register_first_purchase:
                first_purchase_price = current_price
                register_first_purchase = False

    # Strong downward trend detected, decide to sell
    elif predicted_change_percentage < downward_threshold:
        print('Strong Sell signal with a predicted decrease percentage of: ' + str(predicted_change_percentage))
        if btc_held > 0 and current_price > last_bought_price:

            # Convert BTC to capital
            capital += btc_held * current_price
            btc_held = 0

            # Record the price and the transaction
            last_bought_price = 0 
            sell_signals.append((i, current_price))
            last_action_price = current_price
            print('BTC sold at: ' + str(current_price))
            print('Capital: ' + str(capital))
            
    # Weak trend detected, hold position
    else:
        print('Weak Sell/Buy signal - Holding')
        pass

    #print('Change percentage' + str(i))
    print('----------------------------------------')

In [None]:
# Calculate the final portfolio values of the ML based strategy
final_price = df['Close'].values[-1]
final_portfolio_value = capital + btc_held * last_action_price #final_price

# Calculate the final portfolio values of the Buy&Hold strategy
initial_btc = initial_capital / first_purchase_price
buy_and_hold_value = initial_btc * last_action_price

# Print revenues
print(first_purchase_price)
print(last_action_price)
print(f"Final portfolio value (trading strategy): ${final_portfolio_value:.2f}")
print(f"Final portfolio value (buy and hold): ${buy_and_hold_value:.2f}")

# Plotting
plt.figure(figsize=(14, 7))
plt.plot(df.index, df['Close'], label='Close Prices', color='blue', alpha=0.3)
# Converting indices to dates
buy_dates = [df.index[i] for i, _ in buy_signals]
sell_dates = [df.index[i] for i, _ in sell_signals]
buy_prices = [price for _, price in buy_signals]
sell_prices = [price for _, price in sell_signals]

# Adding scatter points for the strong buy and sell signals to analyze the decisions
plt.scatter(buy_dates, buy_prices, color='green', label='Buy Signals', marker='^', alpha=1)
plt.scatter(sell_dates, sell_prices, color='red', label='Sell Signals', marker='v', alpha=1)
plt.title('Long-Shot_Backtesting')
plt.xlabel('Date')
plt.ylabel('Close Price')
plt.legend()
plt.grid(True)
plt.show()