In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dropout, Dense, BatchNormalization
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.constraints import max_norm
from tensorflow.keras.callbacks import EarlyStopping

2024-03-15 13:47:07.478811: I tensorflow/core/util/port.cc:113] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2024-03-15 13:47:07.480858: I external/local_tsl/tsl/cuda/cudart_stub.cc:31] Could not find cuda drivers on your machine, GPU will not be used.
2024-03-15 13:47:07.501880: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2024-03-15 13:47:07.501892: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-03-15 13:47:07.502444: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to

In [2]:
def calculate_moving_averages(df, window_sizes):
    for window_size in window_sizes:
        column_name = f'MA_{window_size}'
        df[column_name] = df['close'].rolling(window=window_size).mean()
    return df

In [3]:
df = pd.read_csv('bnb_historical_data.csv')

In [4]:
window_sizes = [7, 25, 99, 150]
df = calculate_moving_averages(df, window_sizes)

In [5]:
features = df[['MA_7', 'MA_25', 'MA_99', 'MA_150' ,'close']].dropna()
target = features['close'].values.reshape(-1, 1)
features = features.values

# Normalize features using Min-Max scaling
scaler = MinMaxScaler(feature_range=(0, 1))
features_scaled = scaler.fit_transform(features)

In [6]:
features_scaled

array([[0.28770732, 0.28649775, 0.28619765, 0.28658882, 0.29124236],
       [0.28770732, 0.28647033, 0.28618375, 0.28655662, 0.29124236],
       [0.28770732, 0.28644291, 0.28616986, 0.28652442, 0.29124236],
       ...,
       [0.72946341, 0.73064056, 0.72242448, 0.72208092, 0.73183978],
       [0.72936585, 0.73061314, 0.72259817, 0.72211772, 0.73183978],
       [0.72995122, 0.73050346, 0.72277881, 0.72215451, 0.73251867]])

In [7]:
def create_sequences(data, target, sequence_length):
    sequences = []
    labels = []
    for i in range(len(data) - sequence_length):
        seq = data[i:i + sequence_length]
        label = target[i + sequence_length]
        sequences.append(seq)
        labels.append(label)
    return np.array(sequences), np.array(labels)

In [8]:
sequence_length = 20  # Adjust as needed
X, y = create_sequences(features_scaled, target, sequence_length)

In [9]:
#X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [10]:
model = Sequential()
model.add(LSTM(units=100, activation='relu', input_shape=(X.shape[1], X.shape[2]), return_sequences=True))
model.add(Dropout(0.3))
model.add(BatchNormalization())

# Second LSTM layer
model.add(LSTM(units=150, activation='relu', return_sequences=True))
model.add(Dropout(0.3))
model.add(BatchNormalization())

# Third LSTM layer
model.add(LSTM(units=200, activation='relu'))
model.add(Dropout(0.3))
model.add(BatchNormalization())

# Dense layer
model.add(Dense(units=1, activation='linear'))

model.compile(optimizer=Adam(learning_rate=0.001), loss='mean_squared_error')

In [11]:
model.fit(X, y, epochs=50, batch_size=32, validation_split=0.1)

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


<keras.src.callbacks.History at 0x7f9889fa08e0>

In [12]:
model.save("trained_model.keras")

In [13]:
df_24 = pd.read_csv('bnb_historical_data_24.csv')

combined_data = pd.concat([df, df_24], axis=0, ignore_index=True)
combined_data = combined_data.drop_duplicates(subset='timestamp')

In [14]:
window_sizes = [7, 25, 99, 150]
combined_data = calculate_moving_averages(combined_data, window_sizes)

In [15]:
features_24 = combined_data[['MA_7', 'MA_25', 'MA_99', 'MA_150' ,'close']].dropna()
target_24 = features_24['close'].values.reshape(-1, 1)
ploting = combined_data.dropna()
features_24 = features_24.values

features_scaled_24 = scaler.fit_transform(features_24)

In [16]:
sequence_length = 20  # Adjust as needed
X_24, y_24 = create_sequences(features_scaled_24, target_24, sequence_length)

In [17]:
loss_24 = model.evaluate(X_24, y_24)
print(f'Mean Squared Error on Test Set: {loss_24}')

Mean Squared Error on Test Set: nan


In [18]:
predictions = model.predict(X_24)
predictions_df = pd.DataFrame(predictions, columns=['predictions'], index=ploting.index[20:])
ploting['predictions'] = predictions_df['predictions']



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  ploting['predictions'] = predictions_df['predictions']


In [19]:
ploting['timestamp'] = pd.to_datetime(ploting['timestamp']) 
desired_months = [1, 2, 3]  # January, February, and March
filtered_df = ploting[ploting['timestamp'].dt.month.isin(desired_months)]

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  ploting['timestamp'] = pd.to_datetime(ploting['timestamp'])


In [20]:
filtered_df.head(100)

Unnamed: 0,index_get,timestamp,open,high,low,close,volume,close_time,quote_asset_volume,number_of_trades,taker_buy_base_asset_volume,taker_buy_quote_asset_volume,ignore,MA_7,MA_25,MA_99,MA_150,predictions
149,149,2023-01-01 02:29:00,245.6,245.7,245.6,245.6,14.667,1672540199999,3602.4533,26,2.381,585.0117,0,245.628571,245.640,245.746465,245.900667,
150,150,2023-01-01 02:30:00,245.7,245.7,245.6,245.6,36.024,1672540259999,8850.2972,46,28.028,6886.4796,0,245.628571,245.636,245.744444,245.896000,
151,151,2023-01-01 02:31:00,245.7,245.7,245.5,245.6,131.485,1672540319999,32290.1783,93,47.906,11765.8213,0,245.628571,245.632,245.742424,245.891333,
152,152,2023-01-01 02:32:00,245.6,245.7,245.5,245.6,68.648,1672540379999,16859.3951,67,60.975,14975.6186,0,245.628571,245.632,245.739394,245.887333,
153,153,2023-01-01 02:33:00,245.7,245.8,245.6,245.8,149.291,1672540439999,36687.2963,64,149.230,36672.3147,0,245.657143,245.636,245.737374,245.886000,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
244,244,2023-01-01 04:04:00,245.2,245.3,245.1,245.1,77.485,1672545899999,18995.4326,68,23.791,5833.5976,0,245.257143,245.272,245.372727,245.476667,
245,245,2023-01-01 04:05:00,245.1,245.2,245.1,245.1,109.263,1672545959999,26780.5882,51,2.269,556.3588,0,245.228571,245.268,245.367677,245.473333,
246,246,2023-01-01 04:06:00,245.1,245.2,245.1,245.2,25.128,1672546019999,6159.7923,34,9.195,2254.6140,0,245.214286,245.268,245.362626,245.470000,
247,247,2023-01-01 04:07:00,245.2,245.2,245.0,245.0,154.438,1672546079999,37845.6339,65,18.458,4524.7649,0,245.171429,245.256,245.355556,245.466667,


In [21]:
import plotly.graph_objects as go

candlestick_trace  = go.Candlestick(x=filtered_df['timestamp'],
                open=filtered_df['open'],
                high=filtered_df['high'],
                low=filtered_df['low'],
                close=filtered_df['close'])

symbol = 'BNBUSDT'

trace_ma_7 = go.Scatter(x=filtered_df['timestamp'], y=filtered_df['MA_7'], mode='lines', name='MA(7)', line=dict(color='green', dash='dash'))
trace_ma_25 = go.Scatter(x=filtered_df['timestamp'], y=filtered_df['MA_25'], mode='lines', name='MA(25)', line=dict(color='red', dash='dash'))
trace_ma_99 = go.Scatter(x=filtered_df['timestamp'], y=filtered_df['MA_99'], mode='lines', name='MA(99)', line=dict(color='purple', dash='dash'))

pred = go.Scatter(x=filtered_df['timestamp'], y=filtered_df['predictions'], mode='lines', name='predicted-price', line=dict(color='blue'))



fig = go.Figure(data=[candlestick_trace, trace_ma_7, trace_ma_25, trace_ma_99, pred])

fig.update_layout(title=f'Historical Price Data for {symbol}',
                  xaxis_title='Timestamp',
                  yaxis_title='Price (USDT)',
                  xaxis_rangeslider_visible=False)

# Show the interactive plot in a browser
fig.write_html('plot.html')