In [98]:
import numpy as np
import pandas as pd
import pandas_ta as ta
from tqdm import tqdm
tqdm.pandas()
import datetime
import itertools

from sklearn.model_selection import train_test_split
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import RandomizedSearchCV
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import cross_val_score

In [89]:
def setupDF(file):
  fileName = '../Datasets/'+ file
  df = pd.read_csv(fileName, header=1, parse_dates=True, infer_datetime_format=True, index_col='date')
  df.dropna(inplace=True, how='any')
  df.drop(columns=['unix', 'Volume USDT', 'tradecount','symbol'], inplace=True)
  
  return df

In [90]:
def createY(df):
  df['diff'] = df['close'] - df['open']
  df['y'] = df['diff'].apply(lambda x : 1 if x>0 else 0)
  df = df[::-1]
  df['y'] = df['y'].shift(-1)
  testValue = df.iloc[-50:]
  df = df[:-1]
  return df.drop(columns=['diff'])

In [91]:
def applyStrat(df):
  strat = ta.Strategy(name='Strategie',
                     description='MACD, RSI',
                     ta=[
                      {'kind':'rsi', 'length':7},
                      {'kind':'rsi', 'length':14},
                      {'kind':'sma', 'length':7},
                      {'kind':'sma', 'length':25},
                      {'kind':'macd', 'fast':12 , 'slow':26},
                      {'kind':'obv'},
                     ])
  
  df.ta.strategy(strat)
  df.dropna(inplace=True, how='any')
  return df

In [116]:
def getReadyForTraning(file):
  df = applyStrat(createY(setupDF(file)))
  X = df.drop(columns='y')
  y = df['y']
  return X,y

In [155]:
def modelTrainer(X,y):
  X_train, X_test, y_train, y_test = train_test_split(X,y)
  
  rfc = RandomForestClassifier(random_state=42)
  
  param_grid = { 
        'max_depth' : [2,5,8,12],
        'min_samples_leaf' : [1,3,5],
        'n_estimators' : [64, 128, 256, 512]
    } 
  
  CV_rfc = RandomizedSearchCV(rfc, param_distributions=param_grid, n_jobs=-1, n_iter=20)
  CV_rfc.fit(X_train,y_train)
  
  return CV_rfc.best_params_, CV_rfc.best_estimator_.score(X_test,y_test), CV_rfc.best_estimator_


In [156]:
def getNamesOfDataFile():
  from os import listdir
  return [f for f in listdir('/Users/sam/Worspace/python/Datasets') if f != '.DS_Store']


In [157]:
# models = []
for f in getNamesOfDataFile():
  l = []
  print(f)
  for i in range(3):
    X, y = getReadyForTraning(f)
    bestParams, score, model = modelTrainer(X,y)
    l.append(score)
    # models.append({'file':f, 'model':model, 'bestParams':bestParams})
    print(str(score)+ ' - ' + str(bestParams))
  print(str(np.mean(l)))

Binance_ADAUSDT_1h.csv
0.5240362811791384 - {'n_estimators': 64, 'min_samples_leaf': 1, 'max_depth': 2}
0.5301587301587302 - {'n_estimators': 512, 'min_samples_leaf': 1, 'max_depth': 5}
0.5158730158730159 - {'n_estimators': 512, 'min_samples_leaf': 3, 'max_depth': 5}
0.5233560090702948
Binance_ETHUSDT_1h.csv
0.5335600907029479 - {'n_estimators': 64, 'min_samples_leaf': 1, 'max_depth': 2}
0.5294784580498866 - {'n_estimators': 256, 'min_samples_leaf': 1, 'max_depth': 5}
0.5340136054421769 - {'n_estimators': 256, 'min_samples_leaf': 5, 'max_depth': 8}
0.5323507180650038
Binance_BNBUSDT_1h.csv
0.509297052154195 - {'n_estimators': 64, 'min_samples_leaf': 5, 'max_depth': 5}
0.51859410430839 - {'n_estimators': 512, 'min_samples_leaf': 1, 'max_depth': 5}
0.5176870748299319 - {'n_estimators': 128, 'min_samples_leaf': 1, 'max_depth': 2}
0.5151927437641723
Binance_LTCUSDT_1h.csv
0.5435374149659864 - {'n_estimators': 256, 'min_samples_leaf': 5, 'max_depth': 5}
0.5356009070294785 - {'n_estimators':

0.5299319727891156

In [None]:
#{'n_estimators': 128, 'min_samples_leaf': 3, 'max_depth': 2}
#{'n_estimators': 64, 'min_samples_leaf': 3, 'max_depth': 2} 0.5299319727891156
# {'n_estimators': 128, 'min_samples_leaf': 5, 'max_depth': 2} 0.5299319727891156


['Binance_ADAUSDT_1h.csv',
 'Binance_ETHUSDT_1h.csv',
 'Binance_BNBUSDT_1h.csv',
 'Binance_LTCUSDT_1h.csv',
 'Binance_XRPUSDT_1h.csv',
 'Binance_BTCUSDT_1h.csv']