In [10]:
import numpy as np
import pandas as pd
import pandas_ta as ta
from tqdm import tqdm
tqdm.pandas()
import datetime
import itertools

from sklearn.model_selection import train_test_split
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import RandomizedSearchCV
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import cross_val_score
from sklearn.linear_model import LogisticRegression


In [11]:
def setupDF(file):
  fileName = '../Datasets/'+ file
  df = pd.read_csv(fileName, header=1, parse_dates=True, infer_datetime_format=True, index_col='date')
  df.dropna(inplace=True, how='any')
  df.drop(columns=['unix', 'Volume USDT', 'tradecount','symbol'], inplace=True)
  
  return df

In [12]:
def createY(df):
  df['diff'] = df['close'] - df['open']
  df['y'] = df['diff'].apply(lambda x : 1 if x>0 else 0)
  df = df[::-1]
  df['y'] = df['y'].shift(-1)
  testValue = df.iloc[-50:]
  df = df[:-1]
  return df.drop(columns=['diff'])

In [13]:
def applyStrat(df):
  strat = ta.Strategy(name='Strategie',
                     description='MACD, RSI',
                     ta=[
                      {'kind':'rsi', 'length':7},
                      {'kind':'rsi', 'length':14},
                      {'kind':'sma', 'length':7},
                      {'kind':'sma', 'length':25},
                      {'kind':'macd', 'fast':12 , 'slow':26},
                      {'kind':'obv'},
                     ])
  
  df.ta.strategy(strat)
  df.dropna(inplace=True, how='any')
  return df

In [14]:
def getReadyForTraning(file):
  df = applyStrat(createY(setupDF(file)))
  X = df.drop(columns='y')
  y = df['y']
  return X,y

In [27]:
def modelTrainerTree(X,y):
  X_train, X_test, y_train, y_test = train_test_split(X,y, random_state=42)
  
  rfc = RandomForestClassifier(random_state=42)
  
  param_grid = { 
        'max_depth' : [2,5,8,12],
        'min_samples_leaf' : [1,3,5],
        'n_estimators' : [64, 128, 256, 512]
    } 
  
  CV_rfc = GridSearchCV(rfc, param_grid=param_grid, n_jobs=-1, verbose=1)
  CV_rfc.fit(X_train,y_train)
  
  return CV_rfc.best_params_, CV_rfc.best_estimator_.score(X_test,y_test), CV_rfc.best_estimator_


In [28]:
def getNamesOfDataFile():
  from os import listdir
  return [f for f in listdir('/Users/sam/Worspace/python/Datasets') if f != '.DS_Store']


In [29]:
models = []
for f in tqdm(getNamesOfDataFile()):
  X, y = getReadyForTraning(f)
  bestParams, score, model = modelTrainerTree(X,y)
  models.append({'file':f, 'model':model, 'bestParams':bestParams, 'testScore':score})


  0%|          | 0/6 [00:00<?, ?it/s]

Fitting 5 folds for each of 48 candidates, totalling 240 fits


 17%|█▋        | 1/6 [02:45<13:45, 165.17s/it]

Fitting 5 folds for each of 48 candidates, totalling 240 fits


 33%|███▎      | 2/6 [05:43<11:32, 173.17s/it]

Fitting 5 folds for each of 48 candidates, totalling 240 fits


 50%|█████     | 3/6 [08:47<08:53, 177.88s/it]

Fitting 5 folds for each of 48 candidates, totalling 240 fits


 67%|██████▋   | 4/6 [11:55<06:04, 182.08s/it]

Fitting 5 folds for each of 48 candidates, totalling 240 fits


 83%|████████▎ | 5/6 [14:58<03:02, 182.22s/it]

Fitting 5 folds for each of 48 candidates, totalling 240 fits


100%|██████████| 6/6 [18:05<00:00, 180.92s/it]


In [30]:
pd.DataFrame(models)

Unnamed: 0,file,model,bestParams,testScore
0,Binance_ADAUSDT_1h.csv,"(DecisionTreeClassifier(max_depth=5, max_featu...","{'max_depth': 5, 'min_samples_leaf': 1, 'n_est...",0.5322
1,Binance_ETHUSDT_1h.csv,"(DecisionTreeClassifier(max_depth=8, max_featu...","{'max_depth': 8, 'min_samples_leaf': 3, 'n_est...",0.523583
2,Binance_BNBUSDT_1h.csv,"(DecisionTreeClassifier(max_depth=8, max_featu...","{'max_depth': 8, 'min_samples_leaf': 5, 'n_est...",0.506803
3,Binance_LTCUSDT_1h.csv,"(DecisionTreeClassifier(max_depth=8, max_featu...","{'max_depth': 8, 'min_samples_leaf': 3, 'n_est...",0.540363
4,Binance_XRPUSDT_1h.csv,"(DecisionTreeClassifier(max_depth=2, max_featu...","{'max_depth': 2, 'min_samples_leaf': 1, 'n_est...",0.536054
5,Binance_BTCUSDT_1h.csv,"(DecisionTreeClassifier(max_depth=2, max_featu...","{'max_depth': 2, 'min_samples_leaf': 3, 'n_est...",0.532426
