In [1]:
import pandas as pd 
import numpy as np
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split

In [2]:
df = pd.read_csv('usdt-usd.csv')
df.head(5) 

Unnamed: 0,Date,Open,High,Low,Close,Volume,Dividends,Stock Splits
0,2017-11-09 00:00:00+00:00,1.01087,1.01327,0.996515,1.00818,358188000,0.0,0.0
1,2017-11-10 00:00:00+00:00,1.0065,1.02423,0.995486,1.00601,756446016,0.0,0.0
2,2017-11-11 00:00:00+00:00,1.00598,1.02621,0.995799,1.00899,746227968,0.0,0.0
3,2017-11-12 00:00:00+00:00,1.00602,1.10591,0.967601,1.01247,1466060032,0.0,0.0
4,2017-11-13 00:00:00+00:00,1.00448,1.02929,0.975103,1.00935,767884032,0.0,0.0


In [4]:
df.columns

Index(['Date', 'Open', 'High', 'Low', 'Close', 'Volume', 'Dividends',
       'Stock Splits'],
      dtype='object')

In [5]:
df.drop([ 'Open', 'High', 'Low',  'Dividends', 'Stock Splits', 'Volume'], inplace = True, axis = 1)

In [6]:
df['time'] = pd.to_datetime(df['Date'])
df['formatted_time'] = df['time'].dt.strftime('%d-%m-%Y')
df['formatted_time']

0       09-11-2017
1       10-11-2017
2       11-11-2017
3       12-11-2017
4       13-11-2017
           ...    
2394    30-05-2024
2395    31-05-2024
2396    01-06-2024
2397    02-06-2024
2398    03-06-2024
Name: formatted_time, Length: 2399, dtype: object

In [7]:
df.drop([ 'Date', 'time'], inplace = True, axis = 1)

In [16]:
import pandas_ta as ta
 
df['SMA'] = ta.sma(df['Close'], length=5) 
df['EMA'] = ta.ema(df['Close'], length=5) 
df['RSI'] = ta.rsi(df['Close'], length=14) 

In [17]:
df

Unnamed: 0,Close,formatted_time,SMA,EMA,RSI
0,1.008180,09-11-2017,,,
1,1.006010,10-11-2017,,,
2,1.008990,11-11-2017,,,
3,1.012470,12-11-2017,,,
4,1.009350,13-11-2017,1.009000,1.009000,
...,...,...,...,...,...
2394,0.999658,30-05-2024,0.999212,0.999314,49.520516
2395,0.998995,31-05-2024,0.999106,0.999208,42.810368
2396,0.999292,01-06-2024,0.999122,0.999236,46.319632
2397,0.999403,02-06-2024,0.999222,0.999291,47.613214


In [18]:
df.fillna(0, inplace = True)

In [19]:
new_df = df.copy()

In [22]:
X = new_df.drop(['Close', 'formatted_time'], axis = 1).values
Y = new_df['Close'].values

In [23]:
X

array([[ 0.        ,  0.        ,  0.        ],
       [ 0.        ,  0.        ,  0.        ],
       [ 0.        ,  0.        ,  0.        ],
       ...,
       [ 0.99912181,  0.99923575, 46.31963211],
       [ 0.999222  ,  0.9992915 , 47.61321412],
       [ 0.9993829 ,  0.99938316, 49.54223662]])

In [24]:
Y

array([1.00818002, 1.00601006, 1.00899005, ..., 0.99929202, 0.999403  ,
       0.9995665 ])

In [25]:
X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=0.2, random_state=42)

In [26]:
from sklearn.model_selection import GridSearchCV
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score 

In [27]:
rf = RandomForestRegressor()
 
param_grid = {
    'n_estimators': [50, 100, 150, 200, 250, 300]
}
grid_search_rf = GridSearchCV(estimator=rf, param_grid=param_grid, cv=3)
grid_search_rf.fit(X_train, y_train)


In [28]:
best_rf = grid_search_rf.best_estimator_
best_rf

In [29]:
y_pred_rf = best_rf.predict(X_test) 

array([1.00002969, 1.00029692, 1.0087777 , 0.99936268, 1.00021462,
       0.99304206, 1.00075706, 1.00006255, 1.00257181, 1.00089973,
       0.99955984, 0.99994746, 1.00015144, 1.00064947, 0.99969567,
       1.00004595, 0.99774578, 1.0000965 , 1.00007714, 1.00048133,
       1.00318127, 1.00034458, 0.99987211, 1.00046503, 1.00035392,
       1.00410022, 1.00050278, 0.99887017, 1.00043564, 1.00043405,
       1.00007338, 1.00110895, 1.00047521, 1.0004165 , 1.00005393,
       1.00031046, 1.00662822, 1.00102749, 1.00420766, 1.00057992,
       1.00867693, 1.00043382, 1.00334628, 1.00651283, 1.00555926,
       1.00002448, 0.99991211, 0.99964011, 1.00078371, 1.00037329,
       1.00111693, 0.99929687, 1.01884593, 1.00032878, 1.00011474,
       1.00072169, 1.00066651, 0.99992976, 1.00025898, 1.00036898,
       1.00011141, 1.00195609, 1.00003456, 1.00350591, 1.00010136,
       1.00213385, 0.99954163, 1.02461253, 0.99851723, 1.00018466,
       1.00017189, 1.00253362, 1.00019195, 1.0004362 , 1.00388

In [30]:
rf_metrics = {
    'RMSE': np.sqrt(mean_squared_error(y_test, y_pred_rf)),
    'MSE': mean_squared_error(y_test, y_pred_rf),
    'MAE': mean_absolute_error(y_test, y_pred_rf),
    'R2': r2_score(y_test, y_pred_rf)
}
print(rf_metrics)


{'RMSE': 0.001800782635511388, 'MSE': 3.2428181003593407e-06, 'MAE': 0.0007334840500520364, 'R2': 0.8843635627271114}


In [31]:
len(df)

2399

In [32]:
data = df [len(df) - 15:]

In [34]:
for i in range(2):
    sma = ta.sma(data['Close'], length=5).iloc[-1]
    ema = ta.ema(data['Close'], length=5).iloc[-1]
    rsi = ta.rsi(data['Close'], length=14).iloc[-1]
    # sma_5, ema_5, rsi_14 = scaler.fit_transform(np.array([sma, ema, rsi]).reshape(-1, 1))
    # new_close = best_rf.predict([[0.163528, 0.0, 0.0, 0.0, 1.0, 0.0, sma_5[0], ema_5[0], rsi_14[0]]])
    new_close = best_rf.predict([[sma, ema, rsi]])
    print(new_close)
    new_row = pd.DataFrame({"Close": new_close, "formatted_time" : "0", "SMA" : sma, "EMA" : ema, "RSI" : rsi})
    data = pd.concat([data, new_row], ignore_index= True)
    

[0.99882265]
[0.99888768]
[0.9988445]
[0.99870411]
[0.9986585]
[0.99872825]
[0.99868193]
[0.99866508]
[0.99861337]
[0.99858481]


In [None]:

# Evaluate the model
rf_metrics = {
    'RMSE': np.sqrt(mean_squared_error(y_test, y_pred_rf)),
    'MSE': mean_squared_error(y_test, y_pred_rf),
    'MAE': mean_absolute_error(y_test, y_pred_rf),
    'R2': r2_score(y_test, y_pred_rf)
}
print(rf_metrics)


{'RMSE': 39.27151662421437, 'MSE': 1542.2520179659455, 'MAE': 20.68864892191357, 'R2': 0.9988150096961236}


In [38]:
import pickle 
with open('usdt.pkl', 'wb') as f:
    pickle.dump(best_rf, f)