In [1]:
from binance.client import Client
import pandas as pd
import time
from prophet import Prophet
import os
import numpy as np

  from .autonotebook import tqdm as notebook_tqdm


In [70]:
api=os.environ['API_KEY']
secret=os.environ['SECRET_KEY']
mainnet=Client(api,secret)
mainnet.API_URL="https://api.binance.com/api"

In [None]:
def get_kline_data(symbol,lasthowmany,interval):
    lastone=int(time.time()*1000)
    df=[]
    for _ in range(lasthowmany):
        data=mainnet.get_klines(
            symbol=symbol,
            limit=500,
            endTim1751454000000e=lastone,
            interval=interval
        )
        lastone=int(data[0][0])
        data.reverse()
        df=df+data
        if(len(data)<500):
            break
    return df

In [None]:
columns=["open_time","open","high","low","close","volume","close_time","quote_asset_volume","number_of_trades", "taker_buy_base_asset_volume","taker_buy_quote_asset_volume","ignore"]
daily_data=pd.DataFrame(get_kline_data("BTCUSDT",280,"1d"),columns=columns)[["open_time","open","high","low","close","volume"]]

In [83]:
sorted_values=daily_data.sort_values(by="open_time").reset_index(drop=True)
sorted_values["open_time"]=pd.to_datetime(sorted_values['open_time'],unit="ms")

In [2]:
daily_data=sorted_values.drop(columns=['close_time'])
daily_data.to_csv("daily_bitcoin_price.csv")

NameError: name 'sorted_values' is not defined

In [3]:
coin_data=pd.read_csv("../Data/Bitcoin_data.csv")
coin_data=coin_data.sort_values(by="open_time").reset_index(drop=True)[["open_time","open","high","low","close"]] #making values go from low to high

In [4]:
from ta.momentum import RSIIndicator
from ta.trend import MACD, EMAIndicator
from ta.volatility import AverageTrueRange
rsi = RSIIndicator(close=coin_data["close"], window=14)
coin_data["rsi"] = rsi.rsi()

#MACD
macd = MACD(close=coin_data["close"])
coin_data["macd"] = macd.macd()
coin_data["macd_signal"] = macd.macd_signal()

#EMA
ema = EMAIndicator(close=coin_data["close"], window=20)
coin_data["ema_20"] = ema.ema_indicator()

#ATR
atr = AverageTrueRange(high=coin_data["high"], low=coin_data["low"], close=coin_data["close"], window=14)
coin_data["atr"] = atr.average_true_range()

In [6]:
len(coin_data)

69001

In [5]:
coin_data["open_time"]=pd.to_datetime(coin_data['open_time'],unit="ms")
#coin_data['close_time']=pd.to_datetime(coin_data['close_time'],unit="ms")


In [6]:
coin_data_x=coin_data[:-1]
coin_data_y=coin_data['close'][1:].reset_index(drop=True)

In [7]:
#removing time columns of previous values i.e x and getting time column of current xepected i.e output
coin_x_data=coin_data_x.drop(columns=["open_time"])
lstm_data=pd.concat([coin_data.iloc[1:]['open_time'].reset_index(drop=True),coin_x_data],axis=1).fillna(0)

#feature engineering
lstm_data['weekday']=lstm_data['open_time'].dt.day_of_week
lstm_data['day_of_year']=lstm_data['open_time'].dt.day_of_year
lstm_data['hour']=lstm_data['open_time'].dt.hour
lstm_data['month_end']=lstm_data['open_time'].dt.is_month_end.astype(np.float32)
lstm_data['month_start']=lstm_data['open_time'].dt.is_month_start.astype(np.float32)
lstm_data['quarter_start']=lstm_data['open_time'].dt.is_quarter_start.astype(np.float32)
lstm_data["quarter_end"]=lstm_data['open_time'].dt.is_quarter_end.astype(np.float32)
lstm_data["year"]=lstm_data['open_time'].dt.year
lstm_data['month']=lstm_data['open_time'].dt.month
lstm_data['is_recent']=lstm_data['open_time'].dt.year>2023
lstm_data['is_recent']=lstm_data['is_recent'].astype(np.float64)

#preparing datsets for scaling
Y=pd.DataFrame(coin_data_y)
X=lstm_data.drop(columns=['open_time'])

In [14]:
#scaling
from sklearn.preprocessing import MinMaxScaler,StandardScaler
outscaler=MinMaxScaler()
inpscaler=MinMaxScaler()
Y_scaled=outscaler.fit_transform(Y)
X_scaled=inpscaler.fit_transform(X)

In [43]:
#reshaping timestamps
X=X_scaled.reshape(int(X_scaled.shape[0]/15),15,X_scaled.shape[1])
Y=Y_scaled.reshape(int(Y_scaled.shape[0]/15),15)

In [44]:

importance=np.linspace(0,1,15).reshape(1,15,1)
x=np.repeat(importance,repeats=4600,axis=0)
X=np.append(X,x,axis=2)

In [45]:
from tensorflow.keras.layers import Bidirectional,Input,Dense,Dropout,LayerNormalization,LSTM #type:ignore
from tensorflow.keras.models import Model #type:ignore
from tensorflow.keras.callbacks import EarlyStopping,LearningRateScheduler #type:ignore
from tensorflow.keras.regularizers import l2 #type:ignore
input_layer=Input(shape=(15,20),name="input")
lstm=LSTM(64,activation="relu",kernel_regularizer=l2(0.001))(input_layer)
norm=LayerNormalization()(lstm)
drop=Dropout(0.2)(norm)
output=Dense(15)(drop)
model=Model(inputs=input_layer,outputs=output)

In [47]:
import math
def schedule(epoch,lr):
    if epoch<10:
        return lr
    else:
        return lr*math.exp(-0.001)
sched=LearningRateScheduler(schedule,verbose=0)
weight_pre=np.linspace(0.5,0.8,len(X))
stop=EarlyStopping(patience=10,restore_best_weights=True,monitor="val_loss")
model.compile(optimizer='adam',loss="mae",metrics=['mae'])
model.fit(x=X,y=Y,validation_split=0.2,sample_weight=weight_pre,epochs=30,batch_size=32,callbacks=[stop,sched])

Epoch 1/30
[1m115/115[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 11ms/step - loss: 0.3442 - mae: 0.5079 - val_loss: 0.1739 - val_mae: 0.2029 - learning_rate: 0.0010
Epoch 2/30
[1m115/115[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 9ms/step - loss: 0.0932 - mae: 0.1260 - val_loss: 0.0910 - val_mae: 0.1027 - learning_rate: 0.0010
Epoch 3/30
[1m115/115[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 9ms/step - loss: 0.0417 - mae: 0.0496 - val_loss: 0.0768 - val_mae: 0.0882 - learning_rate: 0.0010
Epoch 4/30
[1m115/115[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 10ms/step - loss: 0.0268 - mae: 0.0296 - val_loss: 0.0941 - val_mae: 0.1126 - learning_rate: 0.0010
Epoch 5/30
[1m115/115[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 10ms/step - loss: 0.0214 - mae: 0.0235 - val_loss: 0.0387 - val_mae: 0.0426 - learning_rate: 0.0010
Epoch 6/30
[1m115/115[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 10ms/step - loss: 0.0186 - mae: 0.0206 -

<keras.src.callbacks.history.History at 0x7ae338674770>

Statistics Based Training

In [51]:
from prophet import Prophet
model_daily=Prophet()
model_hourly=Prophet()

In [162]:
daily_data=pd.read_csv("daily_bitcoin_price.csv")[["open_time","close"]]

In [163]:
data=daily_data.rename(columns={"open_time":"ds","close":"y"})

In [164]:
model_daily.fit(data)

11:57:02 - cmdstanpy - INFO - Chain [1] start processing
11:57:03 - cmdstanpy - INFO - Chain [1] done processing


<prophet.forecaster.Prophet at 0x722626c50c20>

In [49]:
X_data=coin_data_x.copy()#.drop(columns=['close_time'])
Y_data=coin_data_y.copy()
X_data=X_data.rename(columns={'open_time':"ds","close":"previous_close"}).fillna(0)

In [52]:
for col in X_data.columns[1:]:
    model_hourly.add_regressor(col)
hourly_data=pd.concat([X_data,Y_data],axis=1).rename(columns={"close":"y"})

In [53]:
model_hourly.fit(hourly_data)

16:25:32 - cmdstanpy - INFO - Chain [1] start processing
16:26:24 - cmdstanpy - INFO - Chain [1] done processing


<prophet.forecaster.Prophet at 0x7ae3303d4cb0>

Saving Models

In [59]:
import tf2onnx
import tensorflow as tf
spec=(tf.TensorSpec([None,15,20],tf.float32,name="input"),)
onnx_model, _ = tf2onnx.convert.from_keras(model, input_signature=spec, opset=13)
with open("../models/lstm_model.onnx", "wb") as f:
    f.write(onnx_model.SerializeToString())


I0000 00:00:1751542032.735157   56980 devices.cc:76] Number of eligible GPUs (core count >= 8, compute capability >= 0.0): 0 (Note: TensorFlow was not compiled with CUDA or ROCm support)
I0000 00:00:1751542032.737568   56980 single_machine.cc:374] Starting new session
I0000 00:00:1751542032.871238   56980 devices.cc:76] Number of eligible GPUs (core count >= 8, compute capability >= 0.0): 0 (Note: TensorFlow was not compiled with CUDA or ROCm support)
I0000 00:00:1751542032.871414   56980 single_machine.cc:374] Starting new session
I0000 00:00:1751542032.942850   56980 mlir_graph_optimization_pass.cc:425] MLIR V1 optimization pass is not enabled
rewriter <function rewrite_constant_fold at 0x7ae3014b5760>: exception `np.cast` was removed in the NumPy 2.0 release. Use `np.asarray(arr, dtype=dtype)` instead.
rewriter <function rewrite_constant_fold at 0x7ae3014b5760>: exception `np.cast` was removed in the NumPy 2.0 release. Use `np.asarray(arr, dtype=dtype)` instead.
rewriter <function r

In [60]:
import joblib
joblib.dump(outscaler,"ouput_scaler.pkl")
joblib.dump(inpscaler,"input_scaler.pkl")

['input_scaler.pkl']

In [None]:
import pickle
with open("daily_stat.pkl","wb") as  f:
    pickle.dump(model_daily,f)
with open("hourly_stat.pkl","wb") as  f:
    pickle.dump(model_hourly,f)

In [21]:
#generating random data as binance is not avilable on hugging face spaces anmd coin gecko 
# do not provide some required features
random_data=pd.DataFrame()
for col in coin_data.columns[7:]:
    random=Prophet()
    random.fit(coin_data[[col,"open_time"]].rename(columns={col:"y","open_time":"ds"}))
    ds=random.make_future_dataframe(1440,freq="h")[-1440:]
    pred=random.predict(ds)
    df=(pred['yhat_lower']+pred['yhat_upper'])/2
    random_data=pd.concat([random_data,df],axis=1)

18:35:18 - cmdstanpy - INFO - Chain [1] start processing
18:36:11 - cmdstanpy - INFO - Chain [1] done processing
18:36:16 - cmdstanpy - INFO - Chain [1] start processing
18:37:09 - cmdstanpy - INFO - Chain [1] done processing
18:37:13 - cmdstanpy - INFO - Chain [1] start processing
18:37:52 - cmdstanpy - INFO - Chain [1] done processing
18:37:57 - cmdstanpy - INFO - Chain [1] start processing
18:38:46 - cmdstanpy - INFO - Chain [1] done processing


In [24]:
rand=random_data.copy()

In [30]:
random_un_avilable_features=pd.DataFrame(random_data.values,columns=coin_data.columns[7:])

In [32]:
random_un_avilable_features.to_csv("../Data/random_un_avilable_features.csv",index=False)

01:39:17 - cmdstanpy - INFO - Chain [1] start processing


In [3]:
data=pd.read_csv("../Data/random_un_avilable_features.csv")
data

Unnamed: 0,quote_asset_volume,number_of_trades,taker_buy_base_asset_volume,taker_buy_quote_asset_volume
0,9.222408e+07,179326.623882,505.339246,4.612218e+07
1,9.161722e+07,170215.196710,399.538443,4.486107e+07
2,8.667917e+07,169245.322554,284.626219,3.893690e+07
3,8.200503e+07,168767.963208,276.567090,3.873445e+07
4,7.888336e+07,169221.859637,251.364054,3.782706e+07
...,...,...,...,...
1435,1.217626e+08,265735.208522,895.537788,6.134585e+07
1436,1.225006e+08,267925.656345,969.173176,6.160272e+07
1437,1.159318e+08,254553.750171,785.421935,5.463344e+07
1438,1.036725e+08,240812.404071,529.320428,4.877405e+07


In [12]:
model=Prophet()
data=coin_data[['open_time','number_of_trades']].rename(columns={"open_time":"ds","number_of_trades":"y"})
model.fit(data)

10:13:16 - cmdstanpy - INFO - Chain [1] start processing
10:14:20 - cmdstanpy - INFO - Chain [1] done processing


<prophet.forecaster.Prophet at 0x7115fbc7d610>

In [19]:
model.make_future_dataframe(1440,freq="h")[-1440:]

Unnamed: 0,ds
68863,2025-06-30 19:00:00
68864,2025-06-30 20:00:00
68865,2025-06-30 21:00:00
68866,2025-06-30 22:00:00
68867,2025-06-30 23:00:00
...,...
70298,2025-08-29 14:00:00
70299,2025-08-29 15:00:00
70300,2025-08-29 16:00:00
70301,2025-08-29 17:00:00
