In [130]:
import pandas as pd 
import numpy as np
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split

In [131]:
df = pd.read_csv('sol-usd.csv')
df.head(5) 

Unnamed: 0,Date,Open,High,Low,Close,Volume,Dividends,Stock Splits
0,2020-04-10 00:00:00+00:00,0.832005,1.313487,0.694187,0.951054,87364276,0.0,0.0
1,2020-04-11 00:00:00+00:00,0.951054,1.049073,0.76502,0.776819,43862444,0.0,0.0
2,2020-04-12 00:00:00+00:00,0.785448,0.95667,0.762426,0.882507,38736897,0.0,0.0
3,2020-04-13 00:00:00+00:00,0.89076,0.891603,0.773976,0.777832,18211285,0.0,0.0
4,2020-04-14 00:00:00+00:00,0.777832,0.796472,0.628169,0.661925,16747614,0.0,0.0


In [133]:
df.columns

Index(['Date', 'Open', 'High', 'Low', 'Close', 'Volume', 'Dividends',
       'Stock Splits'],
      dtype='object')

In [134]:
df.drop([ 'Open', 'High', 'Low',  'Dividends', 'Stock Splits', 'Volume'], inplace = True, axis = 1)

In [135]:
df['time'] = pd.to_datetime(df['Date'])
df['formatted_time'] = df['time'].dt.strftime('%d-%m-%Y')
df['formatted_time']

0       10-04-2020
1       11-04-2020
2       12-04-2020
3       13-04-2020
4       14-04-2020
           ...    
1511    30-05-2024
1512    31-05-2024
1513    01-06-2024
1514    02-06-2024
1515    03-06-2024
Name: formatted_time, Length: 1516, dtype: object

In [136]:
df.drop([ 'Date', 'time'], inplace = True, axis = 1)

In [137]:
df

Unnamed: 0,Close,formatted_time
0,0.951054,10-04-2020
1,0.776819,11-04-2020
2,0.882507,12-04-2020
3,0.777832,13-04-2020
4,0.661925,14-04-2020
...,...,...
1511,166.976532,30-05-2024
1512,165.637711,31-05-2024
1513,166.030670,01-06-2024
1514,163.249908,02-06-2024


In [138]:
# df['Symbol'].unique()

In [139]:
# df[df['Symbol'] == 'BTC']['Close'].plot()

In [140]:
df.describe()

Unnamed: 0,Close
count,1516.0
mean,54.675174
std,59.301607
min,0.515273
25%,14.102368
50%,30.028152
75%,91.004284
max,258.934326


In [141]:
# for i in df['Symbol'].unique():
#     print(len(df[df['Symbol'] == i]))

In [142]:
# state = pd.get_dummies(df['Symbol'])
# state

In [143]:
# df = df.drop('Symbol', axis = 1) 
# df = pd.concat([df, state], axis = 1)
# df.head()

In [144]:
# df = df.replace({True : 1, False : 0})

In [145]:
import pandas_ta as ta
 
df['SMA'] = ta.sma(df['Close'], length=5) 
df['EMA'] = ta.ema(df['Close'], length=5) 
df['RSI'] = ta.rsi(df['Close'], length=14) 

In [146]:
df

Unnamed: 0,Close,formatted_time,SMA,EMA,RSI
0,0.951054,10-04-2020,,,
1,0.776819,11-04-2020,,,
2,0.882507,12-04-2020,,,
3,0.777832,13-04-2020,,,
4,0.661925,14-04-2020,0.810027,0.810027,
...,...,...,...,...,...
1511,166.976532,30-05-2024,167.409421,168.110910,53.379252
1512,165.637711,31-05-2024,167.866782,167.286510,52.208216
1513,166.030670,01-06-2024,167.073688,166.867897,52.537340
1514,163.249908,02-06-2024,165.993500,165.661901,49.917586


In [147]:
df.fillna(0, inplace = True)

In [148]:
new_df = df.copy()

In [151]:
X = new_df.drop(['Close', 'formatted_time'], axis = 1).values
Y = new_df['Close'].values

In [154]:
X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=0.2, random_state=42)

In [155]:
from sklearn.model_selection import GridSearchCV
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
from keras.models import Sequential
from keras.layers import LSTM, GRU, Dense
from keras.optimizers import Adam


In [156]:
rf = RandomForestRegressor()
 
param_grid = {
    'n_estimators': [50, 100, 150, 200, 250, 300]
}
grid_search_rf = GridSearchCV(estimator=rf, param_grid=param_grid, cv=3)
grid_search_rf.fit(X_train, y_train)


In [157]:
best_rf = grid_search_rf.best_estimator_
best_rf

In [158]:
y_pred_rf = best_rf.predict(X_test)
y_pred_rf

array([  0.57390012,   3.10914197, 138.85765129,  29.19924201,
        40.54552206,  20.99622374, 219.60334717,   2.22004571,
       137.33739914, 195.00672516,  21.10510779,   0.5809155 ,
       137.98721664,   2.52606811,  22.33598825,  39.7568088 ,
        38.94082041, 202.5685437 , 148.58533028,  33.07185707,
       141.42956902,   1.46167936,  23.38518939,   4.34290123,
        37.47501076,  28.25751584,  31.72033666,   2.08769228,
        14.88080362, 102.14166   ,   3.00857259,  87.6211348 ,
       211.53337387,  28.98130469,  33.52138174,  71.2382074 ,
        32.21959783,   0.72913628,  17.13740417,  20.25027003,
        40.42920876,  29.67391893,  65.06448854,  56.38004311,
       137.14202927,  39.40036524,  45.82538683,  14.20094518,
        23.30419487,  88.82233776,   0.64798233,  23.90531423,
        11.43866443, 160.86871384,  26.35294452, 163.18950996,
        28.56193191,  33.67925066,  31.46534193,  59.45087013,
        13.8728061 ,   3.19902606,  56.1550819 ,  21.77

In [159]:
rf_metrics = {
    'RMSE': np.sqrt(mean_squared_error(y_test, y_pred_rf)),
    'MSE': mean_squared_error(y_test, y_pred_rf),
    'MAE': mean_absolute_error(y_test, y_pred_rf),
    'R2': r2_score(y_test, y_pred_rf)
}
print(rf_metrics)


{'RMSE': 2.710983771455888, 'MSE': 7.349433009097191, 'MAE': 1.3191343742628627, 'R2': 0.9977642176126416}


In [160]:
len(df)

1516

In [161]:
data = df [len(df) - 15:]

In [162]:
for i in range(2):
    sma = ta.sma(data['Close'], length=5).iloc[-1]
    ema = ta.ema(data['Close'], length=5).iloc[-1]
    rsi = ta.rsi(data['Close'], length=14).iloc[-1]
    # sma_5, ema_5, rsi_14 = scaler.transform(np.array([sma, ema, rsi]).reshape(-1, 1))
    # new_close = best_rf.predict([[ sma_5[0], ema_5[0], rsi_14[0]]])
    new_close = best_rf.predict([[sma, ema, rsi]])
    print(new_close)
    new_row = pd.DataFrame({"Close": new_close, "formatted_time" : "0", "SMA" : sma, "EMA" : ema, "RSI" : rsi})
    data = pd.concat([data, new_row], ignore_index= True)
    

[154.29010338]
[151.79022186]
[148.06703491]
[143.0415255]
[139.77252148]
[140.09543472]
[138.72189674]
[136.04753796]
[135.2646035]
[135.0139444]


In [164]:

# Evaluate the model
rf_metrics = {
    'RMSE': np.sqrt(mean_squared_error(y_test, y_pred_rf)),
    'MSE': mean_squared_error(y_test, y_pred_rf),
    'MAE': mean_absolute_error(y_test, y_pred_rf),
    'R2': r2_score(y_test, y_pred_rf)
}
print(rf_metrics)


{'RMSE': 2.710983771455888, 'MSE': 7.349433009097191, 'MAE': 1.3191343742628627, 'R2': 0.9977642176126416}


In [165]:
import pickle
with open('sol.pkl', 'wb') as f:
    pickle.dump(best_rf, f)