In [1]:
import pandas as pd
import numpy as np
import tensorflow as tf
import matplotlib.pyplot as plt
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
import joblib
import os
import datetime
import random as python_random

In [2]:
# Define the file path and read the data
crypto_file_path = 'cryptos/'
btc_usdt = crypto_file_path + 'BTC-USDT.parquet'
btc_df = pd.read_parquet(btc_usdt)

# Drop unnecessary columns
btc_df = btc_df.drop(['quote_asset_volume', 'number_of_trades', 'taker_buy_base_asset_volume', 'taker_buy_quote_asset_volume', 'ignore', 'close_time'], axis=1)
btc_df.columns = ['open_time', 'Open', 'High', 'Low', 'Close', 'Volume', 'MACD', 'MACDSignal', 'MACDHist', 'RSI', 'log_returns']

# Convert timestamps from milliseconds to datetime
def convert_timestamp(ts):
    return datetime.datetime.fromtimestamp(ts / 1000)

btc_df['open_time'] = btc_df['open_time'].apply(convert_timestamp)
btc_df.set_index('open_time', inplace=True)

# Print the filtered DataFrame
print(btc_df)

                         Open      High       Low     Close     Volume  \
open_time                                                                
2017-08-17 13:31:00   4261.48   4261.48   4261.48   4261.48   0.000000   
2017-08-17 13:32:00   4280.56   4280.56   4280.56   4280.56   0.261074   
2017-08-17 13:33:00   4261.48   4261.48   4261.48   4261.48   0.012008   
2017-08-17 13:34:00   4261.48   4261.48   4261.48   4261.48   0.140796   
2017-08-17 13:35:00   4261.48   4261.48   4261.48   4261.48   0.000000   
...                       ...       ...       ...       ...        ...   
2024-03-27 05:37:00  69830.00  69860.90  69673.14  69673.15  62.333630   
2024-03-27 05:38:00  69673.15  69830.00  69673.14  69829.99  25.101190   
2024-03-27 05:39:00  69829.99  69859.99  69760.00  69779.99  21.345860   
2024-03-27 05:40:00  69780.00  69783.99  69728.00  69747.98  25.073170   
2024-03-27 05:41:00  69747.99  69866.55  69747.98  69862.78  39.976380   

                           MACD  MACD

In [3]:

scaler = MinMaxScaler(feature_range=(0,1))

btc_scaled = scaler.fit_transform(btc_df[['Open', 'High', 'Low', 'Close', 'Volume', 'MACD', 'MACDSignal', 'MACDHist', 'RSI', 'log_returns']])

#create test
print(btc_scaled)


[[0.02017716 0.02017675 0.02038337 ...        nan        nan 0.50954237]
 [0.0204461  0.02044568 0.02065261 ...        nan        nan 0.53985015]
 [0.02017716 0.02017675 0.02038337 ...        nan        nan 0.47923458]
 ...
 [0.94438608 0.94478963 0.9446472  ... 0.44801263 0.32614727 0.50468288]
 [0.94368146 0.94371841 0.94419564 ... 0.45063162 0.31324815 0.50642949]
 [0.94323027 0.9448821  0.94447758 ... 0.46219216 0.40430062 0.52069968]]


In [4]:
# Filter the DataFrame based on the date range for training dataset
train_start_date = datetime.datetime.strptime('2023-01-01', '%Y-%m-%d') 
train_end_date = datetime.datetime.strptime('2023-02-01', '%Y-%m-%d')  

train_df = btc_df[(btc_df.index >= train_start_date) & (btc_df.index <= train_end_date)]

#create a 'Close' scalar too
close_np = train_df['Close'].values
close_scaler = MinMaxScaler(feature_range=(0, 1))
close_prices_scaled = close_scaler.fit_transform(close_np.reshape(-1, 1))



# Filter the DataFrame based on the date range for testing dataset

test_start_date = datetime.datetime.strptime('2023-02-01', '%Y-%m-%d') 
test_end_date = datetime.datetime.strptime('2023-03-01', '%Y-%m-%d')  

test_df = btc_df[(btc_df.index >= test_start_date) & (btc_df.index <= test_end_date)]

#print(train_df.head())

#print(test_df.head())

In [5]:
def create_sequences(data, sequence_length, prediction_length):
    xs, ys = [], []
    for i in range(len(data) - sequence_length - prediction_length + 1):
        x = data.iloc[i:(i + sequence_length)].values  # Assuming multiple features
        y = data.iloc[i + sequence_length + prediction_length - 1]['Close']  # Target feature
        xs.append(x)
        ys.append(y)
    return np.array(xs), np.array(ys)

sequence_length = 60
X, y = create_sequences(train_df, sequence_length, 5) #past 60 minutes to predict the next 5

In [6]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout
print(X.shape[1], X.shape[2])

lstm_model = Sequential([
    LSTM(50, return_sequences=True, input_shape=(X.shape[1], X.shape[2])),
    Dropout(0.2),
    LSTM(50, return_sequences=False),
    Dropout(0.2),
    Dense(1)
])

lstm_model.compile(optimizer='adam', loss='mean_squared_error')

# Model Summary
lstm_model.summary()

60 10


  super().__init__(**kwargs)


In [7]:
lstm_model.fit(X,y, epochs=5, batch_size=64)

Epoch 1/5
[1m697/697[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m18s[0m 24ms/step - loss: 407333504.0000
Epoch 2/5
[1m697/697[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m17s[0m 24ms/step - loss: 406630400.0000
Epoch 3/5
[1m697/697[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m17s[0m 24ms/step - loss: 405194368.0000
Epoch 4/5
[1m697/697[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m16s[0m 23ms/step - loss: 403869184.0000
Epoch 5/5
[1m697/697[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m16s[0m 23ms/step - loss: 401374048.0000


<keras.src.callbacks.history.History at 0x161000be320>

In [8]:
from backtesting import Backtest, Strategy
from backtesting.lib import crossover

N_TRAIN = 60

N_TRAIN = 60

class LSTMStrategy(Strategy):
    def init(self):
        self.n_lookahead = 5  # minutes
        self.sequence_length = 60
        self.prediction_length = 5
        self.train_size = 300  # Adjust based on your dataset size

        print("Initializing and training the LSTM model...")
        # Load or define your model here
        self.model = lstm_model

        # Train the model with initial data
        df = self.data.df.iloc[:self.train_size]
        X, y = create_sequences(df, self.sequence_length, self.prediction_length)
        self.model.fit(X, np.array(y), epochs=5, batch_size=64)
        print("Model training completed.")

    def predict_next(self):
        print("Preparing data for prediction...")
        if len(self.data) < self.sequence_length + self.prediction_length:
            print("Not enough data to predict.")
            return np.nan
        
        df = self.data.df.iloc[-(self.sequence_length + self.prediction_length):]
        X, _ = create_sequences(df, self.sequence_length, self.prediction_length)
        predicted_price_scaled = self.model.predict(X[-1].reshape(1, self.sequence_length, -1))
        print(f"Predicted scaled price: {predicted_price_scaled.flatten()[0]}")
        return predicted_price_scaled.flatten()[0]

    def next(self):
        predicted_price = self.predict_next()
        current_price = self.data.Close[-1]
        current_index = self.data.index
        print(f"Current close price: {current_price} at index {current_index}, Predicted price: {predicted_price}")

        if not self.position:
            if predicted_price > current_price:
                print("Predicted price is higher than current. Buying...")
                self.buy()
            elif predicted_price < current_price:
                print("Predicted price is lower than current. Selling...")
                self.sell()
        else:
            # Check if it's time to exit
            if self.position.is_long and predicted_price < current_price:
                print("Closing long position...")
                self.position.close()
            elif self.position.is_short and predicted_price > current_price:
                print("Closing short position...")
                self.position.close()

# Test the strategy with your data
bt = Backtest(test_df, LSTMStrategy, commission=.0002, cash=100000000, exclusive_orders=True)
stats = bt.run()
print(stats)
bt.plot()
            

  from .autonotebook import tqdm as notebook_tqdm


Initializing and training the LSTM model...
Epoch 1/5
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 25ms/step - loss: 526412000.0000
Epoch 2/5
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 25ms/step - loss: 526304832.0000
Epoch 3/5
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 27ms/step - loss: 526386208.0000
Epoch 4/5
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 24ms/step - loss: 526453600.0000
Epoch 5/5
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 24ms/step - loss: 526406784.0000
Model training completed.
Preparing data for prediction...
Not enough data to predict.
Current close price: 22961.68, Predicted price: nan
Preparing data for prediction...
Not enough data to predict.
Current close price: 22993.27, Predicted price: nan
Preparing data for prediction...
Not enough data to predict.
Current close price: 22952.98, Predicted price: nan
Preparing data for prediction...
Not enough data to predict.
Curre

KeyboardInterrupt: 