In [73]:
import pandas as pd 
import numpy as np
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split

In [74]:
df = pd.read_csv('eth.csv')
df.head(5) 

Unnamed: 0,Date,Open,High,Low,Close,Volume,Dividends,Stock Splits
0,2017-11-09 00:00:00+00:00,308.644989,329.451996,307.056,320.884003,893249984,0.0,0.0
1,2017-11-10 00:00:00+00:00,320.67099,324.717987,294.541992,299.252991,885985984,0.0,0.0
2,2017-11-11 00:00:00+00:00,298.585999,319.453003,298.191986,314.681,842300992,0.0,0.0
3,2017-11-12 00:00:00+00:00,314.690002,319.153015,298.513,307.90799,1613479936,0.0,0.0
4,2017-11-13 00:00:00+00:00,307.024994,328.415009,307.024994,316.716003,1041889984,0.0,0.0


In [75]:
df.columns

Index(['Date', 'Open', 'High', 'Low', 'Close', 'Volume', 'Dividends',
       'Stock Splits'],
      dtype='object')

In [76]:
df.drop([ 'Open', 'High', 'Low',  'Dividends', 'Stock Splits', 'Volume'], inplace = True, axis = 1)

In [77]:
df['time'] = pd.to_datetime(df['Date'])
df['formatted_time'] = df['time'].dt.strftime('%d-%m-%Y')
df['formatted_time']

0       09-11-2017
1       10-11-2017
2       11-11-2017
3       12-11-2017
4       13-11-2017
           ...    
2394    30-05-2024
2395    31-05-2024
2396    01-06-2024
2397    02-06-2024
2398    03-06-2024
Name: formatted_time, Length: 2399, dtype: object

In [78]:
df.drop([ 'Date', 'time'], inplace = True, axis = 1)

In [79]:
df

Unnamed: 0,Close,formatted_time
0,320.884003,09-11-2017
1,299.252991,10-11-2017
2,314.681000,11-11-2017
3,307.907990,12-11-2017
4,316.716003,13-11-2017
...,...,...
2394,3746.849609,30-05-2024
2395,3760.026611,31-05-2024
2396,3813.198975,01-06-2024
2397,3780.895996,02-06-2024


In [80]:
import pandas_ta as ta
 
df['SMA'] = ta.sma(df['Close'], length=5) 
df['EMA'] = ta.ema(df['Close'], length=5) 
df['RSI'] = ta.rsi(df['Close'], length=14) 

In [81]:
df

Unnamed: 0,Close,formatted_time,SMA,EMA,RSI
0,320.884003,09-11-2017,,,
1,299.252991,10-11-2017,,,
2,314.681000,11-11-2017,,,
3,307.907990,12-11-2017,,,
4,316.716003,13-11-2017,311.888397,311.888397,
...,...,...,...,...,...
2394,3746.849609,30-05-2024,3813.641357,3775.709597,63.639676
2395,3760.026611,31-05-2024,3800.467187,3770.481935,64.095183
2396,3813.198975,01-06-2024,3784.705615,3784.720948,65.948936
2397,3780.895996,02-06-2024,3772.833545,3783.445964,63.794067


In [82]:
df.fillna(0, inplace = True)

In [83]:
new_df = df.copy()

In [84]:
X = new_df.drop(['Close', 'formatted_time'], axis = 1).values
Y = new_df['Close'].values

In [85]:
X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=0.2, random_state=42)

In [86]:
from sklearn.model_selection import GridSearchCV
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score 


In [87]:
rf = RandomForestRegressor()
 
param_grid = {
    'n_estimators': [50, 100, 150, 200, 250, 300]
}
grid_search_rf = GridSearchCV(estimator=rf, param_grid=param_grid, cv=3)
grid_search_rf.fit(X_train, y_train)


In [88]:
best_rf = grid_search_rf.best_estimator_
best_rf

In [89]:
y_pred_rf = best_rf.predict(X_test)
y_pred_rf

array([1847.41031067, 1857.37495789,  152.28909279, 1815.73710388,
       1815.88602417,  522.4699176 , 3330.83200439, 1248.22285156,
       2771.22526611,  381.17614868, 1198.41035767,  860.12414795,
       1924.00988159, 3871.75264404, 1647.06398926, 1298.07250671,
        762.27072418, 1583.59577393, 3167.6836853 , 2664.84549683,
        436.95987198, 1645.38859985,  410.5825592 , 1653.9924115 ,
       2041.56462585,  174.14739822, 2355.9504187 , 1957.60639832,
       2648.36733398, 1986.99441772, 1296.70941101,  585.6500769 ,
       1884.64019165, 1816.52532104, 1779.07080017, 1617.91027893,
        495.34073044,  121.58123158,  172.44749939,  239.43913452,
        112.59218342, 2822.3640332 ,  145.45992237,  250.18576042,
        984.02093262,  599.63036987, 1809.15323792, 1629.62622314,
       2278.55307739,  485.45177032,  398.96509537, 1776.98801758,
        158.41786972, 1560.81018127, 1583.75875732, 3089.72588013,
       3174.96751709, 3716.76389038, 2179.59756104, 3103.25613

In [90]:
rf_metrics = {
    'RMSE': np.sqrt(mean_squared_error(y_test, y_pred_rf)),
    'MSE': mean_squared_error(y_test, y_pred_rf),
    'MAE': mean_absolute_error(y_test, y_pred_rf),
    'R2': r2_score(y_test, y_pred_rf)
}
print(rf_metrics)


{'RMSE': 39.03389540254175, 'MSE': 1523.64499029657, 'MAE': 20.617778199195868, 'R2': 0.9988293064174865}


In [91]:
len(df)

2399

In [92]:
data = df [len(df) - 15:]

In [93]:
for i in range(10):
    sma = ta.sma(data['Close'], length=5).iloc[-1]
    ema = ta.ema(data['Close'], length=5).iloc[-1]
    rsi = ta.rsi(data['Close'], length=14).iloc[-1]
    # sma_5, ema_5, rsi_14 = scaler.fit_transform(np.array([sma, ema, rsi]).reshape(-1, 1))
    # new_close = best_rf.predict([[0.163528, 0.0, 0.0, 0.0, 1.0, 0.0, sma_5[0], ema_5[0], rsi_14[0]]])
    new_close = best_rf.predict([[sma, ema, rsi]])
    print(new_close)
    new_row = pd.DataFrame({"Close": new_close, "formatted_time" : "0", "SMA" : sma, "EMA" : ema, "RSI" : rsi})
    data = pd.concat([data, new_row], ignore_index= True)
    

[3795.97700928]
[3748.82550659]
[3753.5623938]
[3748.77512329]
[3733.36637573]
[3624.76443604]
[3546.46490112]
[3503.48641846]
[3419.83947998]
[3373.82316528]


In [94]:
data

Unnamed: 0,Close,formatted_time,SMA,EMA,RSI
0,3663.855469,20-05-2024,3179.579443,3256.483191,70.029124
1,3789.312744,21-05-2024,3348.415771,3434.093042,72.515353
2,3737.217773,22-05-2024,3477.035596,3535.134619,69.921548
3,3776.927246,23-05-2024,3607.83125,3615.732162,70.779529
4,3726.93457,24-05-2024,3738.849561,3652.799631,68.144123
5,3749.236572,25-05-2024,3755.925781,3684.945278,68.703947
6,3825.897461,26-05-2024,3763.242725,3731.929339,70.615536
7,3892.006836,27-05-2024,3794.200537,3785.288505,72.192909
8,3840.256348,28-05-2024,3806.866357,3803.611119,69.067365
9,3763.196533,29-05-2024,3814.11875,3790.13959,64.583517


In [95]:

# Evaluate the model
rf_metrics = {
    'RMSE': np.sqrt(mean_squared_error(y_test, y_pred_rf)),
    'MSE': mean_squared_error(y_test, y_pred_rf),
    'MAE': mean_absolute_error(y_test, y_pred_rf),
    'R2': r2_score(y_test, y_pred_rf)
}
print(rf_metrics)


{'RMSE': 39.03389540254175, 'MSE': 1523.64499029657, 'MAE': 20.617778199195868, 'R2': 0.9988293064174865}


In [96]:
import pickle 
with open('eth.pkl', 'wb') as f:
    pickle.dump(best_rf, f)