In [44]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dropout, Dense, BatchNormalization
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.constraints import max_norm
from tensorflow.keras.callbacks import EarlyStopping

In [45]:
def calculate_moving_averages(df, window_sizes):
    for window_size in window_sizes:
        column_name = f'MA_{window_size}'
        df[column_name] = df['close'].rolling(window=window_size).mean()
    return df

In [3]:
df = pd.read_csv('bnb_historical_data.csv')

In [4]:
window_sizes = [7, 25, 99]
df = calculate_moving_averages(df, window_sizes)

In [5]:
features = df[['MA_7', 'MA_25', 'MA_99', 'open', 'high', 'low', 'close', 'volume']].dropna()
target = features['close'].values.reshape(-1, 1)
features = features.values

# Normalize features using Min-Max scaling
scaler = MinMaxScaler(feature_range=(0, 1))
features_scaled = scaler.fit_transform(features)

In [6]:
features_scaled

array([[2.87707317e-01, 2.87457497e-01, 2.88031848e-01, ...,
        2.94117647e-01, 2.91242363e-01, 1.37694443e-03],
       [2.87512195e-01, 2.87375233e-01, 2.87983214e-01, ...,
        2.94117647e-01, 2.91242363e-01, 8.64287955e-04],
       [2.87609756e-01, 2.87320390e-01, 2.87941528e-01, ...,
        2.94117647e-01, 2.91921249e-01, 5.89021604e-04],
       ...,
       [7.29463415e-01, 7.30640562e-01, 7.22424478e-01, ...,
        7.32251521e-01, 7.31839783e-01, 4.01286874e-03],
       [7.29365854e-01, 7.30613140e-01, 7.22598171e-01, ...,
        7.32927654e-01, 7.31839783e-01, 9.71408071e-03],
       [7.29951220e-01, 7.30503455e-01, 7.22778812e-01, ...,
        7.32927654e-01, 7.32518669e-01, 8.31035114e-03]])

In [7]:
def create_sequences(data, target, sequence_length):
    sequences = []
    labels = []
    for i in range(len(data) - sequence_length):
        seq = data[i:i + sequence_length]
        label = target[i + sequence_length]
        sequences.append(seq)
        labels.append(label)
    return np.array(sequences), np.array(labels)

In [8]:
sequence_length = 20  # Adjust as needed
X, y = create_sequences(features_scaled, target, sequence_length)

In [9]:
#X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [47]:
model = Sequential()
model.add(LSTM(units=100, activation='relu', input_shape=(X.shape[1], X.shape[2]), return_sequences=True))
model.add(Dropout(0.3))
model.add(BatchNormalization())

# Second LSTM layer
model.add(LSTM(units=150, activation='relu', return_sequences=True))
model.add(Dropout(0.3))
model.add(BatchNormalization())

# Third LSTM layer
model.add(LSTM(units=200, activation='relu'))
model.add(Dropout(0.3))
model.add(BatchNormalization())

# Dense layer
model.add(Dense(units=1, activation='linear'))

model.compile(optimizer=Adam(learning_rate=0.001), loss='mean_squared_error')

In [48]:
model.fit(X, y, epochs=10, batch_size=32, validation_split=0.1)

Epoch 1/10
[1m14791/14791[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m886s[0m 60ms/step - loss: 9524.6748 - val_loss: 30.5525
Epoch 2/10
[1m14791/14791[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m900s[0m 61ms/step - loss: 87.6441 - val_loss: 60.9337
Epoch 3/10
[1m14791/14791[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m873s[0m 59ms/step - loss: 91.8630 - val_loss: 50.8669
Epoch 4/10
[1m 5795/14791[0m [32m━━━━━━━[0m[37m━━━━━━━━━━━━━[0m [1m33:51[0m 226ms/step - loss: 121.1502

KeyboardInterrupt: 

In [20]:
model.save("trained_model.keras")

In [14]:
df_24 = pd.read_csv('bnb_historical_data_24.csv')

combined_data = pd.concat([df, df_24], axis=0, ignore_index=True)

In [15]:
window_sizes = [7, 25, 99]
combined_data = calculate_moving_averages(combined_data, window_sizes)

In [39]:
features_24 = combined_data[['MA_7', 'MA_25', 'MA_99', 'open', 'high', 'low', 'close', 'volume']].dropna()
target_24 = features_24['close'].values.reshape(-1, 1)
ploting = combined_data.dropna()
features_24 = features_24.values

scaler_24 = MinMaxScaler(feature_range=(0, 1))
features_scaled_24 = scaler.fit_transform(features_24)

In [17]:
sequence_length = 20  # Adjust as needed
X_24, y_24 = create_sequences(features_scaled_24, target_24, sequence_length)

In [18]:
loss_24 = model.evaluate(X_24, y_24)
print(f'Mean Squared Error on Test Set: {loss_24}')

2024-03-12 14:15:55.450792: W external/local_tsl/tsl/framework/cpu_allocator_impl.cc:83] Allocation of 389684480 exceeds 10% of free system memory.


[1m19028/19028[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m113s[0m 6ms/step - loss: 338.5935
Mean Squared Error on Test Set: 303.1624755859375


In [40]:
predictions = model.predict(X_24)
predictions_df = pd.DataFrame(predictions, columns=['predictions'], index=ploting.index[20:])
ploting.loc[ploting.index[20:], 'predictions'] = predictions_df['predictions']

[1m19028/19028[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m115s[0m 6ms/step




A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



In [41]:
ploting['timestamp'] = pd.to_datetime(ploting['timestamp']) 
desired_months = [1, 2, 3]  # January, February, and March
filtered_df = ploting[ploting['timestamp'].dt.month.isin(desired_months)]



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



In [42]:
filtered_df.head(100)

Unnamed: 0,index_get,timestamp,open,high,low,close,volume,close_time,quote_asset_volume,number_of_trades,taker_buy_base_asset_volume,taker_buy_quote_asset_volume,ignore,MA_7,MA_25,MA_99,predictions
98,98,2023-01-01 01:38:00,245.5,245.6,245.5,245.6,87.649,1672537139999,21518.6660,58,8.365,2054.4440,0,245.628571,245.780,246.013131,
99,99,2023-01-01 01:39:00,245.6,245.6,245.5,245.6,55.016,1672537199999,13509.9350,35,35.070,8613.1920,0,245.600000,245.768,246.006061,
100,100,2023-01-01 01:40:00,245.6,245.7,245.5,245.7,37.494,1672537259999,9208.0832,51,29.940,7353.5337,0,245.614286,245.760,246.000000,
101,101,2023-01-01 01:41:00,245.7,245.7,245.6,245.7,25.756,1672537319999,6327.2786,30,16.050,3943.4850,0,245.628571,245.756,245.994949,
102,102,2023-01-01 01:42:00,245.7,245.7,245.6,245.6,10.568,1672537379999,2595.9883,29,4.875,1197.7875,0,245.628571,245.744,245.990909,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
193,193,2023-01-01 03:13:00,245.5,245.5,245.4,245.5,122.038,1672542839999,29958.0273,42,90.049,22107.0295,0,245.342857,245.312,245.575758,241.713486
194,194,2023-01-01 03:14:00,245.5,245.6,245.4,245.5,257.749,1672542899999,63276.1192,107,78.194,19196.9667,0,245.385714,245.320,245.574747,241.713196
195,195,2023-01-01 03:15:00,245.5,245.5,245.4,245.4,8.188,1672542959999,2009.7087,41,3.735,916.9425,0,245.414286,245.320,245.571717,241.702515
196,196,2023-01-01 03:16:00,245.4,245.5,245.4,245.5,30.490,1672543019999,7483.3672,23,11.212,2752.5460,0,245.428571,245.328,245.571717,241.690125


In [43]:
import plotly.graph_objects as go

candlestick_trace  = go.Candlestick(x=filtered_df['timestamp'],
                open=filtered_df['open'],
                high=filtered_df['high'],
                low=filtered_df['low'],
                close=filtered_df['close'])

symbol = 'BNBUSDT'

trace_ma_7 = go.Scatter(x=filtered_df['timestamp'], y=filtered_df['MA_7'], mode='lines', name='MA(7)', line=dict(color='green', dash='dash'))
trace_ma_25 = go.Scatter(x=filtered_df['timestamp'], y=filtered_df['MA_25'], mode='lines', name='MA(25)', line=dict(color='red', dash='dash'))
trace_ma_99 = go.Scatter(x=filtered_df['timestamp'], y=filtered_df['MA_99'], mode='lines', name='MA(99)', line=dict(color='purple', dash='dash'))

pred = go.Scatter(x=filtered_df['timestamp'], y=filtered_df['predictions'], mode='lines', name='predicted-price', line=dict(color='blue'))



fig = go.Figure(data=[candlestick_trace, trace_ma_7, trace_ma_25, trace_ma_99, pred])

fig.update_layout(title=f'Historical Price Data for {symbol}',
                  xaxis_title='Timestamp',
                  yaxis_title='Price (USDT)',
                  xaxis_rangeslider_visible=False)

# Show the interactive plot in a browser
fig.write_html('plot.html')