4.1 Data Collection and Preprocessing

4.1.1 Loading the Data

In [3]:
#IMPORTING THE PYTHON LIBRARIES, UPLOADING THE REQUIRED DATASET AND PERFORMING BASIC OPERATIONS ON THE DATA SET  
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt


# Specify the file path of the CSV file
file_path = r"C:\Users\user\Desktop\Pro1/BTC-USD.csv"

# Load the dataframe from the CSV file
df = pd.read_csv(file_path, index_col='Date', parse_dates=True)

# Display the loaded dataframe
df



Unnamed: 0_level_0,Open,High,Low,Close,Adj Close,Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2014-09-17,465.864014,468.174011,452.421997,457.334015,457.334015,2.105680e+07
2014-09-18,456.859985,456.859985,413.104004,424.440002,424.440002,3.448320e+07
2014-09-19,424.102997,427.834991,384.532013,394.795990,394.795990,3.791970e+07
2014-09-20,394.673004,423.295990,389.882996,408.903992,408.903992,3.686360e+07
2014-09-21,408.084991,412.425995,393.181000,398.821014,398.821014,2.658010e+07
...,...,...,...,...,...,...
2024-07-24,65927.859375,67113.984375,65146.996094,65372.132813,65372.132813,2.747094e+10
2024-07-25,65375.875000,66112.421875,63473.472656,65777.226563,65777.226563,3.831576e+10
2024-07-26,,,,,,
2024-07-27,,,,,,


4.1.2 Data Cleaning

In [21]:
#Remove any missing values
df.dropna(inplace=True)
print(df.info())

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 3600 entries, 2014-09-18 to 2024-07-28
Data columns (total 8 columns):
 #   Column      Non-Null Count  Dtype  
---  ------      --------------  -----  
 0   Open        3600 non-null   float64
 1   High        3600 non-null   float64
 2   Low         3600 non-null   float64
 3   Close       3600 non-null   float64
 4   Adj Close   3600 non-null   float64
 5   Volume      3600 non-null   float64
 6   Return      3600 non-null   float64
 7   Log Return  3600 non-null   float64
dtypes: float64(8)
memory usage: 253.1 KB
None


4.1.3. Feature Engineering


In [22]:
#Creating Additional features
df['Return'] = df['Adj Close'].pct_change()
df['Log Return'] = np.log1p(df['Return'])
df.dropna(inplace=True)  # Remove rows with NaN values generated by pct_change and log1p
print(df.head())

                  Open        High         Low       Close   Adj Close  \
Date                                                                     
2014-09-19  424.102997  427.834991  384.532013  394.795990  394.795990   
2014-09-20  394.673004  423.295990  389.882996  408.903992  408.903992   
2014-09-21  408.084991  412.425995  393.181000  398.821014  398.821014   
2014-09-22  399.100006  406.915985  397.130005  402.152008  402.152008   
2014-09-23  402.092010  441.557007  396.196991  435.790985  435.790985   

                Volume    Return  Log Return  
Date                                          
2014-09-19  37919700.0 -0.069843   -0.072402  
2014-09-20  36863600.0  0.035735    0.035111  
2014-09-21  26580100.0 -0.024659   -0.024968  
2014-09-22  24127600.0  0.008352    0.008317  
2014-09-23  45099500.0  0.083647    0.080333  


4.1.4 Normalizing  the data

In [20]:
from sklearn.preprocessing import MinMaxScaler
scaler = MinMaxScaler()
scaled_data = scaler.fit_transform(df[['Open', 'High', 'Low', 'Close', 'Volume', 'Log Return']])

# Creating a DataFrame with the scaled data
scaled_data_df = pd.DataFrame(scaled_data, index=df.index, columns=['Open', 'High', 'Low', 'Close', 'Volume', 'Log Return'])
print(scaled_data_df.head())


                Open      High       Low     Close    Volume  Log Return
Date                                                                    
2014-09-18  0.003840  0.003333  0.003395  0.003379  0.000081    0.565467
2014-09-19  0.003391  0.002939  0.002993  0.002972  0.000091    0.568717
2014-09-20  0.002987  0.002877  0.003069  0.003166  0.000088    0.724566
2014-09-21  0.003171  0.002729  0.003115  0.003027  0.000059    0.637476
2014-09-22  0.003048  0.002654  0.003170  0.003073  0.000052    0.685726


4.2.Model Development
Developing the LSTM, CNN, and Transformer models using TensorFlow and Keras

4.2.1 Preparing the Data for Model Training

In [23]:
def create_sequences(data, seq_length):
    X, y = [], []
    for i in range(len(data) - seq_length):
        X.append(data[i:i + seq_length])
        y.append(data[i + seq_length, 3])  # Predicting the 'Close' price
    return np.array(X), np.array(y)

seq_length = 60
X, y = create_sequences(scaled_data_df.values, seq_length)

# Spliting  the data into training and testing sets
split = int(0.8 * len(X))
X_train, X_test = X[:split], X[split:]
y_train, y_test = y[:split], y[split:]

print(X_train.shape, X_test.shape, y_train.shape, y_test.shape)

(2832, 60, 6) (708, 60, 6) (2832,) (708,)


4.2.2 LSTM Model 

In [24]:
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout

#Building the Model
lstm_model = Sequential([
    LSTM(50, return_sequences=True, input_shape=(seq_length, X_train.shape[2])),
    Dropout(0.2),
    LSTM(50, return_sequences=False),
    Dropout(0.2),
    Dense(25),
    Dense(1)
])

# Compile the model
lstm_model.compile(optimizer='adam', loss='mean_squared_error')

# Train the model
history_lstm = lstm_model.fit(X_train, y_train, batch_size=64, epochs=50, validation_data=(X_test, y_test))

  super().__init__(**kwargs)


Epoch 1/50
[1m45/45[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m22s[0m 193ms/step - loss: 0.0182 - val_loss: 0.0019
Epoch 2/50
[1m45/45[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 131ms/step - loss: 0.0021 - val_loss: 0.0013
Epoch 3/50
[1m45/45[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 161ms/step - loss: 0.0018 - val_loss: 0.0022
Epoch 4/50
[1m45/45[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 138ms/step - loss: 0.0016 - val_loss: 0.0010
Epoch 5/50
[1m45/45[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 169ms/step - loss: 0.0013 - val_loss: 9.7761e-04
Epoch 6/50
[1m45/45[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 152ms/step - loss: 0.0013 - val_loss: 0.0014
Epoch 7/50
[1m45/45[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 128ms/step - loss: 0.0011 - val_loss: 9.2340e-04
Epoch 8/50
[1m45/45[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 148ms/step - loss: 0.0012 - val_loss: 0.0011
Epoch 9/50
[1m45/45[0m [32m

4.2.3 CNN Model

In [26]:
from tensorflow.keras.layers import Conv1D, MaxPooling1D, Flatten

# Build the CNN model
cnn_model = Sequential([
    Conv1D(filters=64, kernel_size=3, activation='relu', input_shape=(seq_length, X_train.shape[2])),
    MaxPooling1D(pool_size=2),
    Flatten(),
    Dense(50, activation='relu'),
    Dense(1)
])

# Compile the model
cnn_model.compile(optimizer='adam', loss='mean_squared_error')

# Train the model
history_cnn = cnn_model.fit(X_train, y_train, batch_size=64, epochs=50, validation_data=(X_test, y_test))


Epoch 1/50
[1m45/45[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 29ms/step - loss: 0.0235 - val_loss: 0.0044
Epoch 2/50
[1m45/45[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 25ms/step - loss: 0.0017 - val_loss: 0.0036
Epoch 3/50
[1m45/45[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 16ms/step - loss: 8.8441e-04 - val_loss: 0.0020
Epoch 4/50
[1m45/45[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 17ms/step - loss: 8.3372e-04 - val_loss: 0.0027
Epoch 5/50
[1m45/45[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 21ms/step - loss: 6.9505e-04 - val_loss: 0.0017
Epoch 6/50
[1m45/45[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 16ms/step - loss: 7.3126e-04 - val_loss: 0.0021
Epoch 7/50
[1m45/45[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 25ms/step - loss: 4.7692e-04 - val_loss: 0.0014
Epoch 8/50
[1m45/45[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 18ms/step - loss: 4.1307e-04 - val_loss: 0.0017
Epoch 9/50
[1m45/45[0m

4.2.4 Transfomer Model

In [34]:
from tensorflow.keras.layers import Input, Dense, Dropout, LayerNormalization, MultiHeadAttention
from tensorflow.keras.models import Model

# Transformer model components
class TransformerBlock(tf.keras.layers.Layer):
    def __init__(self, embed_dim, num_heads, ff_dim, rate=0.1):
        super(TransformerBlock, self).__init__()
        self.att = MultiHeadAttention(num_heads=num_heads, key_dim=embed_dim)
        self.ffn = tf.keras.Sequential(
            [Dense(ff_dim, activation="relu"), Dense(embed_dim),]
        )
        self.layernorm1 = LayerNormalization(epsilon=1e-6)
        self.layernorm2 = LayerNormalization(epsilon=1e-6)
        self.dropout1 = Dropout(rate)
        self.dropout2 = Dropout(rate)

    def call(self, inputs, training):
        attn_output = self.att(inputs, inputs)
        attn_output = self.dropout1(attn_output, training=training)
        out1 = self.layernorm1(inputs + attn_output)
        ffn_output = self.ffn(out1)
        ffn_output = self.dropout2(ffn_output, training=training)
        return self.layernorm2(out1 + ffn_output)

def build_transformer_model(seq_length, num_features, embed_dim, num_heads, ff_dim, num_blocks):
    inputs = Input(shape=(seq_length, num_features))
    x = Dense(embed_dim)(inputs)
    for _ in range(num_blocks):
        x = TransformerBlock(embed_dim, num_heads, ff_dim)(x, training=True)  # Pass training=True
    x = Flatten()(x)
    x = Dense(50, activation='relu')(x)
    outputs = Dense(1)(x)
    model = Model(inputs=inputs, outputs=outputs)
    return model




In [36]:
# Build the Transformer model
transformer_model = build_transformer_model(seq_length, X_train.shape[2], embed_dim=64, num_heads=4, ff_dim=128, num_blocks=2)
# Compile the model
transformer_model.compile(optimizer='adam', loss='mean_squared_error')

# Train the model
transformer_model.fit(X_train, y_train, batch_size=64, epochs=50, validation_data=(X_test, y_test))


Epoch 1/50
[1m45/45[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m45s[0m 387ms/step - loss: 5.5864 - val_loss: 0.0054
Epoch 2/50
[1m45/45[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m16s[0m 347ms/step - loss: 0.0168 - val_loss: 0.0104
Epoch 3/50
[1m45/45[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m20s[0m 343ms/step - loss: 0.0171 - val_loss: 0.0168
Epoch 4/50
[1m45/45[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m17s[0m 374ms/step - loss: 0.0142 - val_loss: 0.0279
Epoch 5/50
[1m45/45[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m21s[0m 369ms/step - loss: 0.0159 - val_loss: 0.0114
Epoch 6/50
[1m45/45[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m19s[0m 411ms/step - loss: 0.0126 - val_loss: 0.0178
Epoch 7/50
[1m45/45[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m16s[0m 367ms/step - loss: 0.0132 - val_loss: 0.0060
Epoch 8/50
[1m45/45[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m16s[0m 350ms/step - loss: 0.0093 - val_loss: 0.0031
Epoch 9/50
[1m45/45[0m [32m━━

<keras.src.callbacks.history.History at 0x27d7a0b7fd0>

4.3 Model Evaluation
Evaluating the performance of the models using Mean Absolute Error (MAE), Root Mean Squared Error (RMSE), and R-Squared (R²) metrics.

In [37]:
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score

def evaluate_model(model, X_test, y_test):
    predictions = model.predict(X_test)
    mae = mean_absolute_error(y_test, predictions)
    rmse = mean_squared_error(y_test, predictions, squared=False)
    r2 = r2_score(y_test, predictions)
    return mae, rmse, r2

# Evaluate LSTM model
lstm_mae, lstm_rmse, lstm_r2 = evaluate_model(lstm_model, X_test, y_test)

# Evaluate CNN model
cnn_mae, cnn_rmse, cnn_r2 = evaluate_model(cnn_model, X_test, y_test)

# Evaluate Transformer model
transformer_mae, transformer_rmse, transformer_r2 = evaluate_model(transformer_model, X_test, y_test)

print(f"LSTM - MAE: {lstm_mae}, RMSE: {lstm_rmse}, R²: {lstm_r2}")
print(f"CNN - MAE: {cnn_mae}, RMSE: {cnn_rmse}, R²: {cnn_r2}")
print(f"Transformer - MAE: {transformer_mae}, RMSE: {transformer_rmse}, R²: {transformer_r2}")


[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 82ms/step
[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 13ms/step
[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 111ms/step
LSTM - MAE: 0.026940508434325224, RMSE: 0.03472354218625569, R²: 0.9784545552367052
CNN - MAE: 0.015644803785108293, RMSE: 0.022554076760470745, R²: 0.9909101330004121
Transformer - MAE: 0.02732815136333005, RMSE: 0.032084627480957334, R²: 0.981604930924768


4.4 Trading Strategy Development
Based on the predictions from the best-performing model, Developing a trading strategy and evaluate its performance

In [41]:
# Generate trading signals
predictions = transformer_model.predict(X_test)
signals = np.where(predictions > 0, 1, -1)  # Buy if prediction is positive, sell if negative

# Implement a simple trading strategy
initial_balance = 10000  # Starting with $10,000
balance = initial_balance
positions = []
for i in range(len(signals)):
    if signals[i] == 1:
        positions.append(y_test[i])
    elif signals[i] == -1 and positions:
        balance += positions.pop() - y_test[i]

print(f"Final balance: ${balance}")


[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 66ms/step
Final balance: $10000


5.4 Trading Strategy Performance

In [42]:
def generate_trading_signals(predictions, actual_prices):
    signals = []
    for i in range(1, len(predictions)):
        if predictions[i] > actual_prices[i-1]:
            signals.append('Buy')
        else:
            signals.append('Sell')
    return signals

def backtest_strategy(predictions, actual_prices, initial_balance=10000):
    balance = initial_balance
    position = 0  # Number of BTC held
    for i in range(1, len(predictions)):
        if predictions[i] > actual_prices[i-1] and position == 0:  # Buy signal
            position = balance / actual_prices[i-1]
            balance = 0
        elif predictions[i] < actual_prices[i-1] and position > 0:  # Sell signal
            balance = position * actual_prices[i-1]
            position = 0
    final_balance = balance + (position * actual_prices[-1])  # Close any open position
    return final_balance

# Generating trading signals using the best-performing model
best_model = cnn_model  #CNN model performed best
predictions = best_model.predict(X_test)
trading_signals = generate_trading_signals(predictions, y_test)

#Backtesting the strategy using the CNN model
initial_balance = 10000
final_balance = backtest_strategy(predictions, y_test, initial_balance)

print(f"Initial Balance: ${initial_balance}")
print(f"Final Balance: ${final_balance}")
print(f"Profit/Loss: ${final_balance - initial_balance}")


[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 10ms/step
Initial Balance: $10000
Final Balance: $13963.076313200256
Profit/Loss: $3963.0763132002558


5.1.1 LSTM Model Performance

In [43]:
# Evaluate LSTM model
lstm_mae, lstm_rmse, lstm_r2 = evaluate_model(lstm_model, X_test, y_test)

print(f"LSTM Model Performance:")
print(f"Mean Absolute Error (MAE): {lstm_mae}")
print(f"Root Mean Squared Error (RMSE): {lstm_rmse}")
print(f"R-Squared (R²): {lstm_r2}")

[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 45ms/step
LSTM Model Performance:
Mean Absolute Error (MAE): 0.026940508434325224
Root Mean Squared Error (RMSE): 0.03472354218625569
R-Squared (R²): 0.9784545552367052


5.1.2 CNN Model Performance

In [44]:
# Evaluate CNN model
cnn_mae, cnn_rmse, cnn_r2 = evaluate_model(cnn_model, X_test, y_test)

print(f"CNN Model Performance:")
print(f"Mean Absolute Error (MAE): {cnn_mae}")
print(f"Root Mean Squared Error (RMSE): {cnn_rmse}")
print(f"R-Squared (R²): {cnn_r2}")

[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 15ms/step
CNN Model Performance:
Mean Absolute Error (MAE): 0.015644803785108293
Root Mean Squared Error (RMSE): 0.022554076760470745
R-Squared (R²): 0.9909101330004121


5.1.3 Transformer Model Performance

In [45]:
# Evaluate Transformer model
transformer_mae, transformer_rmse, transformer_r2 = evaluate_model(transformer_model, X_test, y_test)

print(f"Transformer Model Performance:")
print(f"Mean Absolute Error (MAE): {transformer_mae}")
print(f"Root Mean Squared Error (RMSE): {transformer_rmse}")
print(f"R-Squared (R²): {transformer_r2}")


[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 89ms/step
Transformer Model Performance:
Mean Absolute Error (MAE): 0.02732815136333005
Root Mean Squared Error (RMSE): 0.032084627480957334
R-Squared (R²): 0.981604930924768
