In [None]:
import pandas as pd
from preprocessing import Preprocessing
import numpy as np
from sklearn.metrics import accuracy_score
from sklearn.metrics import confusion_matrix
from sklearn.ensemble import RandomForestClassifier
import pandas_ta as ta

In [2]:
n_rows_to_test = 12
n_periods_to_predict = 12
equal_division = True

In [3]:
crypto_df = pd.read_csv("BTTCUSDT.csv", parse_dates=['Open Time' , 'Close Time'])

In [4]:
mas_values = [10, 20, 30, 50 , 100 , 200]
for ma in mas_values:
    crypto_df["SMA" + str(ma)] = ta.sma(crypto_df["Close"] , ma)
    crypto_df["EMA" + str(ma)] = ta.ema(crypto_df["Close"] , ma)
    crypto_df['WMA'+str(ma)] = ta.wma(crypto_df['Close'] , ma)

In [5]:
crypto_df['Ao'] = ta.ao(high=crypto_df['High'] , low=crypto_df['Low'])
crypto_df['Bop'] = ta.bop(open_=crypto_df['Open'] , high=crypto_df['High'] , low=crypto_df['Low'] , close=crypto_df['Close'])
crypto_df['VWMA'] = ta.vwma(crypto_df['Close'] , volume=crypto_df['Volume'])
crypto_df['ATR'] = ta.atr(high=crypto_df['High'] , low=crypto_df['Low'] , close=crypto_df['Close'])
crypto_df['ATR_4'] = ta.atr(high=crypto_df['High'] , low=crypto_df['Low'] , close=crypto_df['Close'] , length=4)  
crypto_df['Momentum'] = ta.mom(close=crypto_df['Close'], length=14)
crypto_df['CCI'] = ta.cci(high=crypto_df['High'], low=crypto_df['Low'], close=crypto_df['Close'])

In [6]:
crypto_df['24hReturn'] = ((crypto_df['Close'].shift(-n_periods_to_predict) - crypto_df['Close'] ) / crypto_df['Close']) * 100

In [7]:
n_to_shift_Target = 1
crypto_df['24hReturn'] = crypto_df['24hReturn'].round(decimals=2)
crypto_df['p1_Target'] = crypto_df['24hReturn'].shift(n_to_shift_Target)
crypto_df['Target'] = crypto_df['24hReturn']
crypto_df = crypto_df.drop(columns='24hReturn').copy()
crypto_df_testing = crypto_df.iloc[-n_rows_to_test :].copy().reset_index(drop=True)
crypto_df_processed = crypto_df.iloc[ : -n_rows_to_test].copy().reset_index(drop=True)
crypto_df_processed.head(2)

Unnamed: 0,Symbol,Open Time,Close Time,Open,Close,Low,High,Volume,SMA10,EMA10,...,WMA200,Ao,Bop,VWMA,ATR,ATR_4,Momentum,CCI,p1_Target,Target
0,BTTCUSDT,2022-01-25 04:00:00,2022-01-25 07:59:00,2e-06,2e-06,2e-06,3e-06,5202012000000.0,,,...,,,0.009524,,,,,,,1.53
1,BTTCUSDT,2022-01-25 08:00:00,2022-01-25 11:59:00,2e-06,2e-06,2e-06,2e-06,2396618000000.0,,,...,,,0.291667,,,,,,1.53,11.33


In [8]:
pre = Preprocessing()
df_processed  = pre.pre_processing(crypto_df_processed)

In [9]:
df_processed[['Close' , 'Target']].tail(3)

Unnamed: 0,Close,Target
5958,1e-06,-3.51
5959,1e-06,0.89
5960,1e-06,-1.74


In [10]:
def classifiying_Targets(df, equal_divide):
    num_of_classes = 2
    threshhold = 0
    bin_edges = 0
    if equal_divide == True:
       bins = pd.qcut(df['Target'], q=num_of_classes, duplicates='drop') 
       bin_edges = bins.cat.categories 
       bin_edges_list = [interval.left for interval in bin_edges] + [bin_edges.right[-1]]  # Include the rightmost edge
       bin_edges_list = sorted(set(bin_edges_list))
       df['p1_Target'] = pd.qcut(df['p1_Target'] , q=num_of_classes , labels=[0 , 1 ])
       df['Target'] = pd.qcut(df['Target'] , q=num_of_classes , labels=[0 , 1 ])
       threshhold = bin_edges_list[1]
    else :
        df['p1_Target'] = df['p1_Target'].apply(lambda x : 0 if x <= 0 else 1)
        df['Target'] = df['Target'].apply(lambda x : 0 if x <= 0 else 1)
    return df , threshhold , bin_edges

def apply_division(df , mid_value ):
    df['p1_Target'] = df['p1_Target'].apply(lambda x : 0 if pd.notna(x) and x <= mid_value else (1 if pd.notna(x) else x))
    df['Target'] = df['Target'].apply(lambda x : 0 if pd.notna(x) and x <= mid_value else(1 if pd.notna(x) else x))
    return df


In [11]:
df_processed , mid_value , bin_edges = classifiying_Targets(df_processed ,equal_divide=equal_division )

In [12]:
df_processed['Target'].value_counts()

Target
0    3752
1    2209
Name: count, dtype: int64

In [13]:
df_processed[['Target' , 'p1_Target']]

Unnamed: 0,Target,p1_Target
0,1,1
1,1,1
2,1,1
3,1,1
4,1,1
...,...,...
5956,1,0
5957,1,1
5958,0,1
5959,1,0


In [14]:
X_scaled = df_processed.iloc[ : , 3 : -1] 
y = df_processed['Target']

In [15]:
def time_series_train_split(X_scaled , y):
    train_size = int(df_processed.shape[0] * 0.95)
    X_train, X_test = X_scaled[:train_size], X_scaled[train_size:]
    y_train, y_test = y.iloc[:train_size], y.iloc[train_size:]
    return X_train , X_test, y_train , y_test

In [16]:
X_train,X_test,y_train,y_test = time_series_train_split(X_scaled , y )

In [17]:
def models_search(X_train , y_train):
    criterions = ['entropy' , 'gini' ]
    models = []
    for criterion in criterions:
                model = RandomForestClassifier(criterion=criterion , n_estimators=100 , random_state=21 , bootstrap=False , max_features=20 , max_depth=20 , n_jobs=-1)
                model.fit(X_train , y_train)
                models.append(model)
    return models
def best_accuarcy_model(X_train  , X_test , y_train , y_test):
    models = models_search(X_train , y_train)
    best_score = 0
    for model in models :
        y_pred = model.predict(X_test)
        acc = accuracy_score(y_test , y_pred)
        if acc > best_score:
            best_score = acc
            best_model = model
    return best_model 

In [18]:
model = best_accuarcy_model(X_train , X_test , y_train , y_test)
y_pred = model.predict(X_test)
acc = accuracy_score(y_test , y_pred)
cm= np.round(confusion_matrix(y_test , y_pred , labels=[0 , 1 ] , normalize='true') , decimals=2)
print(cm)
print(acc)
print(model)

[[0.87 0.13]
 [0.2  0.8 ]]
0.8394648829431438
RandomForestClassifier(bootstrap=False, criterion='entropy', max_depth=20,
                       max_features=20, n_jobs=-1, random_state=21)


In [19]:
max_depths = [estimator.tree_.max_depth for estimator in model.estimators_]
max_depth_reached = max(max_depths)

print(f"The maximum depth reached by any tree in the Random Forest is: {max_depth_reached}")

The maximum depth reached by any tree in the Random Forest is: 20


In [20]:
min(max_depths)

20

In [21]:
y_test.shape

(299,)

In [22]:
if n_rows_to_test != n_periods_to_predict:
  crypto_df_testing = crypto_df_testing.iloc[ : n_rows_to_test - n_periods_to_predict].copy()

In [23]:
crypto_df_testing = apply_division(crypto_df_testing , mid_value=mid_value )

In [24]:
actual_values = crypto_df_testing.iloc[:, -1].copy() 
X_testing_values = crypto_df_testing.iloc[:, 3:-1].copy()

In [25]:
li = []
probs = []

for i in range(X_testing_values.shape[0]):
    one = model.predict(X_testing_values.iloc[[i], :])
    one = one.round(decimals=2)
    li.append(one[0]) 
    # prob = model.predict_proba(X_testing_values.iloc[[i], :])
    # probs.append(prob[0]) 
    if i + n_to_shift_Target < X_testing_values.shape[0]:
        X_testing_values.iloc[i + n_to_shift_Target, -1] = one[0]

# probs_df = pd.DataFrame(probs, columns=[f"Prob_Class_{i}" for i in range(prob.shape[1])])
pred_df = pd.DataFrame({
    "Actual": actual_values,
    "Pred": li
})
pred_df['Time'] = crypto_df_testing['Open Time'].dt.strftime('%d-%H')

In [26]:
if n_rows_to_test != n_periods_to_predict:
  print(accuracy_score(pred_df['Actual'] , pred_df['Pred']))
print(pred_df[['Time' ,'Actual' , 'Pred']])
print(acc)
print(cm)
print("Tested_values" , len(y_test))
print(model)

     Time  Actual  Pred
0   16-20     NaN     0
1   17-00     NaN     0
2   17-04     NaN     0
3   17-08     NaN     1
4   17-12     NaN     1
5   17-16     NaN     1
6   17-20     NaN     1
7   18-00     NaN     1
8   18-04     NaN     1
9   18-08     NaN     1
10  18-12     NaN     1
11  18-16     NaN     1
0.8394648829431438
[[0.87 0.13]
 [0.2  0.8 ]]
Tested_values 299
RandomForestClassifier(bootstrap=False, criterion='entropy', max_depth=20,
                       max_features=20, n_jobs=-1, random_state=21)


In [27]:
df_processed['Target'].value_counts()

Target
0    3752
1    2209
Name: count, dtype: int64