In [68]:
import pandas as pd
import numpy as np
import tensorflow as tf
import matplotlib.pyplot as plt
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
import joblib
import os
import datetime
import random as python_random

In [69]:
# Define the file path and read the data
crypto_file_path = 'cryptos/'
btc_usdt = crypto_file_path + 'BTC-USDT.parquet'
btc_df = pd.read_parquet(btc_usdt)

# Drop unnecessary columns
btc_df = btc_df.drop(['quote_asset_volume', 'number_of_trades', 'taker_buy_base_asset_volume', 'taker_buy_quote_asset_volume', 'ignore'], axis=1)
btc_df.columns = ['open_time', 'Open', 'High', 'Low', 'Close', 'Volume', 'close_time', 'MACD', 'MACDSignal', 'MACDHist', 'RSI', 'log_returns']
# Convert timestamps from milliseconds to datetime
def convert_timestamp(ts):
    return datetime.datetime.fromtimestamp(ts / 1000)

btc_df['open_time'] = btc_df['open_time'].apply(convert_timestamp)
btc_df['close_time'] = btc_df['close_time'].apply(convert_timestamp)

# Define your date range
start_date = datetime.datetime.strptime('2023-01-01', '%Y-%m-%d')  # Replace 'YYYY-MM-DD' with your start date
end_date = datetime.datetime.strptime('2023-02-01', '%Y-%m-%d')    # Replace 'YYYY-MM-DD' with your end date

# Filter the DataFrame based on the date range
btc_df = btc_df[(btc_df['open_time'] >= start_date) & (btc_df['open_time'] <= end_date)]

# Print the filtered DataFrame
print(btc_df)

                  open_time      Open      High       Low     Close  \
2817298 2023-01-01 00:00:00  16584.73  16588.32  16584.00  16588.05   
2817299 2023-01-01 00:01:00  16587.63  16595.58  16584.54  16588.35   
2817300 2023-01-01 00:02:00  16588.35  16594.00  16588.35  16592.45   
2817301 2023-01-01 00:03:00  16592.45  16604.11  16591.71  16598.30   
2817302 2023-01-01 00:04:00  16598.30  16606.84  16592.15  16593.40   
...                     ...       ...       ...       ...       ...   
2861934 2023-01-31 23:56:00  22905.02  22910.00  22899.02  22907.75   
2861935 2023-01-31 23:57:00  22910.00  22911.32  22899.49  22901.74   
2861936 2023-01-31 23:58:00  22901.74  22903.02  22890.20  22900.41   
2861937 2023-01-31 23:59:00  22900.66  22901.84  22884.86  22897.54   
2861938 2023-02-01 00:00:00  22897.54  22991.90  22896.21  22975.06   

             Volume              close_time      MACD  MACDSignal  MACDHist  \
2817298    84.19388 2023-01-01 00:00:59.999  3.289800    3.219435  0

In [70]:
scaler = MinMaxScaler(feature_range=(0, 1))

test_df = btc_df

test_df['open_time'] = pd.to_datetime(test_df['open_time'])
test_df.set_index('open_time', inplace=True)

test_df.drop(['close_time', 'log_returns'], axis=1, inplace=True)

test_df_scaled = scaler.fit_transform(test_df[['Open', 'High', 'Low', 'Close', 'Volume', 'MACD', 'MACDSignal', 'MACDHist', 'RSI']])


In [71]:

#store close time and open time
close_time = btc_df['close_time'].copy()
open_time = btc_df['open_time'].copy()

btc_df = btc_df.drop(['close_time', 'open_time'], axis = 1)


btc_scaled = scaler.fit_transform(btc_df[['Open', 'High', 'Low', 'Close', 'Volume', 'MACD', 'MACDSignal', 'MACDHist', 'RSI']])

print(btc_scaled)


KeyError: 'close_time'

In [None]:
def create_sequences(data, sequence_length, prediction_length):
    xs = []
    ys = []
    for i in range(len(data) - sequence_length - prediction_length + 1):
        x = data[i:(i + sequence_length)]
        y = data[i + sequence_length + prediction_length - 1, 3]  # Assuming 'Close' at index 3
        xs.append(x)
        ys.append(y)
    return np.array(xs), np.array(ys)

sequence_length = 60
X, y = create_sequences(btc_scaled, sequence_length, 5) #past 60 minutes to predict the next 5


In [None]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout

model = Sequential([
    LSTM(50, return_sequences=True, input_shape=(X.shape[1], X.shape[2])),
    Dropout(0.2),
    LSTM(50, return_sequences=False),
    Dropout(0.2),
    Dense(1)
])

model.compile(optimizer='adam', loss='mean_squared_error')

# Model Summary
model.summary()

  super().__init__(**kwargs)


In [None]:
model.fit(X,y, epochs=5, batch_size=64)

Epoch 1/5
[1m697/697[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m16s[0m 21ms/step - loss: 0.0152
Epoch 2/5
[1m697/697[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m14s[0m 21ms/step - loss: 0.0020
Epoch 3/5
[1m697/697[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m15s[0m 21ms/step - loss: 0.0015
Epoch 4/5
[1m697/697[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m14s[0m 20ms/step - loss: 0.0013
Epoch 5/5
[1m697/697[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m14s[0m 20ms/step - loss: 0.0011


<keras.src.callbacks.history.History at 0x168affd2890>

In [47]:
from backtesting import Backtest, Strategy
from backtesting.lib import crossover

class LSTMStrategy(Strategy):
    def init(self):
        self.n_lookahead = 5 #predict next X minutes
        self.data_predicted = self.I(self.predict_next, self.data.Close)

    def predict_next(self, data):
        if len(data) < sequence_length:
            return np.nan
        
        # Extract the last 'sequence_length' data points to form the input sequence
        # Ensure it includes all features
        last_sequence = data[-sequence_length:]  # This should extract the last sequence_length rows
        print("Shape of the last sequence: ", last_sequence.shape)
        # Ensure last_sequence is properly reshaped for scaling
        # It should be a 2D array of shape (sequence_length, number of features)
        last_sequence = last_sequence.reshape(-1, 9)  # Adjust the 9 to match the number of features

        # Scale the sequence
        last_sequence_scaled = scaler.transform(last_sequence)

        # Reshape for the LSTM input (1, sequence_length, number of features)
        last_sequence_scaled = last_sequence_scaled.reshape(1, sequence_length, 9)

        # Predict the next price
        predicted_price = model.predict(last_sequence_scaled)

        # Inverse scale the predicted price if needed
        # Ensure you're inverse scaling the correct shape or value
        predicted_price = scaler.inverse_transform(predicted_price.reshape(-1, 1))[0, 0]

        return predicted_price
    
    def next(self):
        if len(self.data) < sequence_length:
            return
        
        #if predicted is higher than the current close,
        if self.data_predicted[-1] > self.data.Close[-1]:
            self.buy()
        elif self.data_predicted[-1] < self.data.Close[-1]:
            self.sell()

        for trade in self.trades:
            if self.data.index[-1] - trade.entry_time >= pd.Timedelta(minutes=self.n_lookahead):
                trade.close()

bt = Backtest(test_df, LSTMStrategy, commission=.0002, cash=10000, exclusive_orders=True)
stats = bt.run()
bt.plot()
            

Shape of the last sequence:  (60,)


  bt = Backtest(test_df, LSTMStrategy, commission=.0002, cash=10000, exclusive_orders=True)


RuntimeError: Indicator "predict_n…(C)" errored with exception: cannot reshape array of size 60 into shape (9)