In [1]:
import pandas as pd 
import numpy as np
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split

In [2]:
df = pd.read_csv('usdt.csv')
df.head(5) 

Unnamed: 0,Date,Open,High,Low,Close,Volume,Dividends,Stock Splits
0,2017-11-09 00:00:00+00:00,1.01087,1.01327,0.996515,1.00818,358188000,0.0,0.0
1,2017-11-10 00:00:00+00:00,1.0065,1.02423,0.995486,1.00601,756446016,0.0,0.0
2,2017-11-11 00:00:00+00:00,1.00598,1.02621,0.995799,1.00899,746227968,0.0,0.0
3,2017-11-12 00:00:00+00:00,1.00602,1.10591,0.967601,1.01247,1466060032,0.0,0.0
4,2017-11-13 00:00:00+00:00,1.00448,1.02929,0.975103,1.00935,767884032,0.0,0.0


In [3]:
df.columns

Index(['Date', 'Open', 'High', 'Low', 'Close', 'Volume', 'Dividends',
       'Stock Splits'],
      dtype='object')

In [4]:
df.drop([ 'Open', 'High', 'Low',  'Dividends', 'Stock Splits', 'Volume'], inplace = True, axis = 1)

In [5]:
df['time'] = pd.to_datetime(df['Date'])
df['formatted_time'] = df['time'].dt.strftime('%d-%m-%Y')
df['formatted_time']

0       09-11-2017
1       10-11-2017
2       11-11-2017
3       12-11-2017
4       13-11-2017
           ...    
2394    30-05-2024
2395    31-05-2024
2396    01-06-2024
2397    02-06-2024
2398    03-06-2024
Name: formatted_time, Length: 2399, dtype: object

In [6]:
df.drop([ 'Date', 'time'], inplace = True, axis = 1)

In [7]:
import pandas_ta as ta
 
df['SMA'] = ta.sma(df['Close'], length=5) 
df['EMA'] = ta.ema(df['Close'], length=5) 
df['RSI'] = ta.rsi(df['Close'], length=14) 

In [8]:
df

Unnamed: 0,Close,formatted_time,SMA,EMA,RSI
0,1.008180,09-11-2017,,,
1,1.006010,10-11-2017,,,
2,1.008990,11-11-2017,,,
3,1.012470,12-11-2017,,,
4,1.009350,13-11-2017,1.009000,1.009000,
...,...,...,...,...,...
2394,0.999658,30-05-2024,0.999212,0.999314,49.520516
2395,0.998995,31-05-2024,0.999106,0.999208,42.810368
2396,0.999292,01-06-2024,0.999122,0.999236,46.319632
2397,0.999403,02-06-2024,0.999222,0.999291,47.613214


In [9]:
df.fillna(0, inplace = True)

In [10]:
new_df = df.copy()

In [11]:
X = new_df.drop(['Close', 'formatted_time'], axis = 1).values
Y = new_df['Close'].values

In [12]:
X

array([[ 0.        ,  0.        ,  0.        ],
       [ 0.        ,  0.        ,  0.        ],
       [ 0.        ,  0.        ,  0.        ],
       ...,
       [ 0.99912181,  0.99923575, 46.31963211],
       [ 0.999222  ,  0.9992915 , 47.61321412],
       [ 0.9993829 ,  0.99938316, 49.54223662]])

In [13]:
Y

array([1.00818002, 1.00601006, 1.00899005, ..., 0.99929202, 0.999403  ,
       0.9995665 ])

In [14]:
X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=0.2, random_state=42)

In [15]:
from sklearn.model_selection import GridSearchCV
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score 

In [16]:
rf = RandomForestRegressor()
 
param_grid = {
    'n_estimators': [50, 100, 150, 200, 250, 300]
}
grid_search_rf = GridSearchCV(estimator=rf, param_grid=param_grid, cv=3)
grid_search_rf.fit(X_train, y_train)


In [17]:
best_rf = grid_search_rf.best_estimator_
best_rf

In [18]:
y_pred_rf = best_rf.predict(X_test) 

In [19]:
rf_metrics = {
    'RMSE': np.sqrt(mean_squared_error(y_test, y_pred_rf)),
    'MSE': mean_squared_error(y_test, y_pred_rf),
    'MAE': mean_absolute_error(y_test, y_pred_rf),
    'R2': r2_score(y_test, y_pred_rf)
}
print(rf_metrics)


{'RMSE': 0.0018105270953652846, 'MSE': 3.2780083630518547e-06, 'MAE': 0.0007401400460965145, 'R2': 0.8831087046134205}


In [20]:
len(df)

2399

In [21]:
data = df [len(df) - 15:]

In [22]:
for i in range(2):
    sma = ta.sma(data['Close'], length=5).iloc[-1]
    ema = ta.ema(data['Close'], length=5).iloc[-1]
    rsi = ta.rsi(data['Close'], length=14).iloc[-1]
    # sma_5, ema_5, rsi_14 = scaler.fit_transform(np.array([sma, ema, rsi]).reshape(-1, 1))
    # new_close = best_rf.predict([[0.163528, 0.0, 0.0, 0.0, 1.0, 0.0, sma_5[0], ema_5[0], rsi_14[0]]])
    new_close = best_rf.predict([[sma, ema, rsi]])
    print(new_close)
    new_row = pd.DataFrame({"Close": new_close, "formatted_time" : "0", "SMA" : sma, "EMA" : ema, "RSI" : rsi})
    data = pd.concat([data, new_row], ignore_index= True)
    

[0.99883563]
[0.99887218]


In [23]:

# Evaluate the model
rf_metrics = {
    'RMSE': np.sqrt(mean_squared_error(y_test, y_pred_rf)),
    'MSE': mean_squared_error(y_test, y_pred_rf),
    'MAE': mean_absolute_error(y_test, y_pred_rf),
    'R2': r2_score(y_test, y_pred_rf)
}
print(rf_metrics)


{'RMSE': 0.0018105270953652846, 'MSE': 3.2780083630518547e-06, 'MAE': 0.0007401400460965145, 'R2': 0.8831087046134205}


In [24]:
import pickle 
with open('usdt.pkl', 'wb') as f:
    pickle.dump(best_rf, f)