In [1]:
import pandas as pd 
import numpy as np
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split

In [2]:
df = pd.read_csv('eth.csv')
df.head(5) 

Unnamed: 0,Date,Open,High,Low,Close,Volume,Dividends,Stock Splits
0,2017-11-09 00:00:00+00:00,308.644989,329.451996,307.056,320.884003,893249984,0.0,0.0
1,2017-11-10 00:00:00+00:00,320.67099,324.717987,294.541992,299.252991,885985984,0.0,0.0
2,2017-11-11 00:00:00+00:00,298.585999,319.453003,298.191986,314.681,842300992,0.0,0.0
3,2017-11-12 00:00:00+00:00,314.690002,319.153015,298.513,307.90799,1613479936,0.0,0.0
4,2017-11-13 00:00:00+00:00,307.024994,328.415009,307.024994,316.716003,1041889984,0.0,0.0


In [3]:
df.columns

Index(['Date', 'Open', 'High', 'Low', 'Close', 'Volume', 'Dividends',
       'Stock Splits'],
      dtype='object')

In [4]:
df.drop([ 'Open', 'High', 'Low',  'Dividends', 'Stock Splits', 'Volume'], inplace = True, axis = 1)

In [5]:
df['time'] = pd.to_datetime(df['Date'])
df['formatted_time'] = df['time'].dt.strftime('%d-%m-%Y')
df['formatted_time']

0       09-11-2017
1       10-11-2017
2       11-11-2017
3       12-11-2017
4       13-11-2017
           ...    
2394    30-05-2024
2395    31-05-2024
2396    01-06-2024
2397    02-06-2024
2398    03-06-2024
Name: formatted_time, Length: 2399, dtype: object

In [6]:
df.drop([ 'Date', 'time'], inplace = True, axis = 1)

In [7]:
df

Unnamed: 0,Close,formatted_time
0,320.884003,09-11-2017
1,299.252991,10-11-2017
2,314.681000,11-11-2017
3,307.907990,12-11-2017
4,316.716003,13-11-2017
...,...,...
2394,3746.849609,30-05-2024
2395,3760.026611,31-05-2024
2396,3813.198975,01-06-2024
2397,3780.895996,02-06-2024


In [8]:
import pandas_ta as ta
 
df['SMA'] = ta.sma(df['Close'], length=5) 
df['EMA'] = ta.ema(df['Close'], length=5) 
df['RSI'] = ta.rsi(df['Close'], length=14) 

In [9]:
df

Unnamed: 0,Close,formatted_time,SMA,EMA,RSI
0,320.884003,09-11-2017,,,
1,299.252991,10-11-2017,,,
2,314.681000,11-11-2017,,,
3,307.907990,12-11-2017,,,
4,316.716003,13-11-2017,311.888397,311.888397,
...,...,...,...,...,...
2394,3746.849609,30-05-2024,3813.641357,3775.709597,63.639676
2395,3760.026611,31-05-2024,3800.467187,3770.481935,64.095183
2396,3813.198975,01-06-2024,3784.705615,3784.720948,65.948936
2397,3780.895996,02-06-2024,3772.833545,3783.445964,63.794067


In [10]:
df.fillna(0, inplace = True)

In [11]:
new_df = df.copy()

In [12]:
X = new_df.drop(['Close', 'formatted_time'], axis = 1).values
Y = new_df['Close'].values

In [13]:
X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=0.2, random_state=42)

In [14]:
from sklearn.model_selection import GridSearchCV
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score 


In [15]:
rf = RandomForestRegressor()
 
param_grid = {
    'n_estimators': [50, 100, 150, 200, 250, 300]
}
grid_search_rf = GridSearchCV(estimator=rf, param_grid=param_grid, cv=3)
grid_search_rf.fit(X_train, y_train)


In [16]:
best_rf = grid_search_rf.best_estimator_
best_rf

In [17]:
y_pred_rf = best_rf.predict(X_test)
y_pred_rf

array([1848.95935059, 1856.93417187,  151.6846402 , 1815.56671582,
       1814.63967627,  522.61327527, 3340.64464551, 1248.64795557,
       2772.12482617,  379.73183142, 1204.17930762,  860.50242822,
       1923.87245654, 3882.10933008, 1645.33104248, 1297.5604126 ,
        758.38700342, 1582.24215967, 3179.7906543 , 2663.69564746,
        437.31808728, 1645.45568066,  410.60821655, 1655.69257568,
       2051.43686621,  174.00035944, 2358.06777344, 1947.92557275,
       2650.64395703, 1982.29393799, 1297.75075781,  586.17220276,
       1886.92792578, 1818.63443164, 1784.99555811, 1622.48233789,
        497.13436243,  121.85354068,  172.4674682 ,  239.33018463,
        112.98613483, 2820.07376758,  145.03871686,  249.72438275,
        991.42150439,  599.86834937, 1809.18273145, 1626.69343115,
       2278.83952344,  485.79714111,  398.29562561, 1777.28229297,
        159.04983685, 1568.97185596, 1584.90258252, 3091.33189453,
       3176.52744531, 3730.84432715, 2176.52702246, 3115.62482

In [18]:
rf_metrics = {
    'RMSE': np.sqrt(mean_squared_error(y_test, y_pred_rf)),
    'MSE': mean_squared_error(y_test, y_pred_rf),
    'MAE': mean_absolute_error(y_test, y_pred_rf),
    'R2': r2_score(y_test, y_pred_rf)
}
print(rf_metrics)


{'RMSE': 39.301994776427776, 'MSE': 1544.646793406356, 'MAE': 20.820493477058417, 'R2': 0.9988131696689142}


In [19]:
len(df)

2399

In [20]:
data = df [len(df) - 15:]

In [21]:
for i in range(10):
    sma = ta.sma(data['Close'], length=5).iloc[-1]
    ema = ta.ema(data['Close'], length=5).iloc[-1]
    rsi = ta.rsi(data['Close'], length=14).iloc[-1]
    # sma_5, ema_5, rsi_14 = scaler.fit_transform(np.array([sma, ema, rsi]).reshape(-1, 1))
    # new_close = best_rf.predict([[0.163528, 0.0, 0.0, 0.0, 1.0, 0.0, sma_5[0], ema_5[0], rsi_14[0]]])
    new_close = best_rf.predict([[sma, ema, rsi]])
    print(new_close)
    new_row = pd.DataFrame({"Close": new_close, "formatted_time" : "0", "SMA" : sma, "EMA" : ema, "RSI" : rsi})
    data = pd.concat([data, new_row], ignore_index= True)
    

[3793.67809766]
[3756.19758496]
[3755.40884766]
[3756.21886035]
[3751.01545801]
[3736.75675]
[3610.11922168]
[3531.47546484]
[3478.52690039]
[3374.94311816]


In [22]:
data

Unnamed: 0,Close,formatted_time,SMA,EMA,RSI
0,3663.855469,20-05-2024,3179.579443,3256.483191,70.029124
1,3789.312744,21-05-2024,3348.415771,3434.093042,72.515353
2,3737.217773,22-05-2024,3477.035596,3535.134619,69.921548
3,3776.927246,23-05-2024,3607.83125,3615.732162,70.779529
4,3726.93457,24-05-2024,3738.849561,3652.799631,68.144123
5,3749.236572,25-05-2024,3755.925781,3684.945278,68.703947
6,3825.897461,26-05-2024,3763.242725,3731.929339,70.615536
7,3892.006836,27-05-2024,3794.200537,3785.288505,72.192909
8,3840.256348,28-05-2024,3806.866357,3803.611119,69.067365
9,3763.196533,29-05-2024,3814.11875,3790.13959,64.583517


In [23]:

# Evaluate the model
rf_metrics = {
    'RMSE': np.sqrt(mean_squared_error(y_test, y_pred_rf)),
    'MSE': mean_squared_error(y_test, y_pred_rf),
    'MAE': mean_absolute_error(y_test, y_pred_rf),
    'R2': r2_score(y_test, y_pred_rf)
}
print(rf_metrics)


{'RMSE': 39.301994776427776, 'MSE': 1544.646793406356, 'MAE': 20.820493477058417, 'R2': 0.9988131696689142}


In [24]:
import pickle 
with open('eth.pkl', 'wb') as f:
    pickle.dump(best_rf, f)