In [48]:
import pandas as pd 
import numpy as np
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split

In [49]:
df = pd.read_csv('bnb-usd.csv')
df.head(5) 

Unnamed: 0,Date,Open,High,Low,Close,Volume,Dividends,Stock Splits
0,2017-11-09 00:00:00+00:00,2.05314,2.17423,1.89394,1.99077,19192200,0.0,0.0
1,2017-11-10 00:00:00+00:00,2.00773,2.06947,1.64478,1.79684,11155000,0.0,0.0
2,2017-11-11 00:00:00+00:00,1.78628,1.91775,1.61429,1.67047,8178150,0.0,0.0
3,2017-11-12 00:00:00+00:00,1.66889,1.6728,1.46256,1.51969,15298700,0.0,0.0
4,2017-11-13 00:00:00+00:00,1.52601,1.73502,1.5176,1.68662,12238800,0.0,0.0


In [50]:
df.tail(20)

Unnamed: 0,Date,Open,High,Low,Close,Volume,Dividends,Stock Splits
2379,2024-05-15 00:00:00+00:00,566.707031,587.421509,561.127258,582.074341,1895100260,0.0,0.0
2380,2024-05-16 00:00:00+00:00,582.100159,586.43634,565.762329,569.190247,1911862000,0.0,0.0
2381,2024-05-17 00:00:00+00:00,569.190247,584.666016,568.836731,581.178345,1557134929,0.0,0.0
2382,2024-05-18 00:00:00+00:00,581.178345,581.987183,575.018555,580.48114,1358737176,0.0,0.0
2383,2024-05-19 00:00:00+00:00,580.481567,581.876709,572.369629,574.631653,1298887094,0.0,0.0
2384,2024-05-20 00:00:00+00:00,574.631653,601.563843,569.52356,600.616333,2145457267,0.0,0.0
2385,2024-05-21 00:00:00+00:00,600.601074,629.861084,593.932739,618.420471,3564986916,0.0,0.0
2386,2024-05-22 00:00:00+00:00,618.420471,625.566284,610.546021,614.671875,2138536549,0.0,0.0
2387,2024-05-23 00:00:00+00:00,614.671875,616.524475,581.954102,598.670837,2631566566,0.0,0.0
2388,2024-05-24 00:00:00+00:00,598.670837,604.31488,588.893982,600.225891,1778282198,0.0,0.0


In [51]:
df.columns

Index(['Date', 'Open', 'High', 'Low', 'Close', 'Volume', 'Dividends',
       'Stock Splits'],
      dtype='object')

In [52]:
df.drop([ 'Open', 'High', 'Low',  'Dividends', 'Stock Splits', 'Volume'], inplace = True, axis = 1)

In [53]:
df['time'] = pd.to_datetime(df['Date'])
df['formatted_time'] = df['time'].dt.strftime('%d-%m-%Y')
df['formatted_time']

0       09-11-2017
1       10-11-2017
2       11-11-2017
3       12-11-2017
4       13-11-2017
           ...    
2394    30-05-2024
2395    31-05-2024
2396    01-06-2024
2397    02-06-2024
2398    03-06-2024
Name: formatted_time, Length: 2399, dtype: object

In [54]:
df.drop([ 'Date', 'time'], inplace = True, axis = 1)

In [55]:
df

Unnamed: 0,Close,formatted_time
0,1.990770,09-11-2017
1,1.796840,10-11-2017
2,1.670470,11-11-2017
3,1.519690,12-11-2017
4,1.686620,13-11-2017
...,...,...
2394,595.026794,30-05-2024
2395,593.522339,31-05-2024
2396,601.304199,01-06-2024
2397,603.026611,02-06-2024


In [63]:
import pandas_ta as ta
 
df['SMA'] = ta.sma(df['Close'], length=5) 
df['EMA'] = ta.ema(df['Close'], length=5) 
df['RSI'] = ta.rsi(df['Close'], length=14) 

In [65]:
df.fillna(0, inplace = True)

In [66]:
new_df = df.copy()

In [69]:
X = new_df.drop(['Close', 'formatted_time'], axis = 1).values
Y = new_df['Close'].values

In [72]:
X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=0.2, random_state=42)

In [73]:
from sklearn.model_selection import GridSearchCV
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score 

In [74]:
rf = RandomForestRegressor()
 
param_grid = {
    'n_estimators': [50, 100, 150, 200, 250, 300]
}
grid_search_rf = GridSearchCV(estimator=rf, param_grid=param_grid, cv=3)
grid_search_rf.fit(X_train, y_train)


In [75]:
best_rf = grid_search_rf.best_estimator_
best_rf

In [76]:
y_pred_rf = best_rf.predict(X_test)

array([269.49240028, 311.05429993,  12.64619291, 298.4439624 ,
       310.89083801,  10.72428505, 412.00617722, 243.44063812,
       389.84866547,  30.44300324, 234.2395401 ,  10.3982472 ,
       218.28848427, 537.41770142, 205.43152451, 284.17972931,
        13.84207033, 342.04828964, 389.93812042, 377.9700679 ,
        23.14603919, 212.69941467,  23.31333476, 213.46875542,
       294.06224319,  23.93632927, 362.31590652, 302.53931015,
       377.01509354, 239.29625816, 290.72716553,  30.05919868,
       323.50424469, 140.77760376, 129.40625023, 309.81329086,
        14.88815508,   9.25063257,  18.3057187 ,  17.24304904,
         6.34145166, 400.10805725,  14.7970037 ,  31.41898348,
         9.10864037,  30.49130333, 312.54101807, 210.76271629,
       307.26790787,  28.20147214,  27.0582642 , 286.27447098,
         6.10386303, 305.73369812, 285.66168945, 407.17788986,
       563.58656128, 468.07298508, 301.45068344, 422.41139725,
       316.1171698 ,  17.51718543, 329.84935867,  15.44

In [77]:
rf_metrics = {
    'RMSE': np.sqrt(mean_squared_error(y_test, y_pred_rf)),
    'MSE': mean_squared_error(y_test, y_pred_rf),
    'MAE': mean_absolute_error(y_test, y_pred_rf),
    'R2': r2_score(y_test, y_pred_rf)
}
print(rf_metrics)


{'RMSE': 8.496377661131993, 'MSE': 72.18843336058275, 'MAE': 3.1754283514395354, 'R2': 0.9978122599131659}


In [79]:
data = df [len(df) - 15:]

In [None]:
for i in range(2):
    sma = ta.sma(data['Close'], length=5).iloc[-1]
    ema = ta.ema(data['Close'], length=5).iloc[-1]
    rsi = ta.rsi(data['Close'], length=14).iloc[-1]
    # sma_5, ema_5, rsi_14 = scaler.fit_transform(np.array([sma, ema, rsi]).reshape(-1, 1))
    # new_close = best_rf.predict([[0.163528, 0.0, 0.0, 0.0, 1.0, 0.0, sma_5[0], ema_5[0], rsi_14[0]]])
    new_close = best_rf.predict([[sma, ema, rsi]])
    print(new_close)
    new_row = pd.DataFrame({"Close": new_close, "formatted_time" : "0", "SMA" : sma, "EMA" : ema, "RSI" : rsi})
    data = pd.concat([data, new_row], ignore_index= True)
    

[629.37156097]
[645.69927185]
[652.99732025]
[655.68986511]
[653.96038605]
[655.63752747]
[655.63752747]
[655.63752747]
[655.63752747]
[655.63752747]


In [81]:
import pickle 
with open('bnb.pkl', 'wb') as f:
    pickle.dump(best_rf, f)