In [29]:
import numpy as np
import pandas as pd
import pandas_ta as ta
from tqdm import tqdm
tqdm.pandas()
import datetime
import itertools
import pickle
from sklearn.model_selection import train_test_split
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import RandomizedSearchCV
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import cross_val_score
from sklearn.linear_model import LogisticRegression


In [2]:
def setupDF(file):
  fileName = 'Datasets/'+ file
  df = pd.read_csv(fileName, header=1, parse_dates=True, infer_datetime_format=True, index_col='date')
  df.dropna(inplace=True, how='any')
  df.drop(columns=['unix', 'Volume USDT', 'tradecount','symbol'], inplace=True)
  
  return df

In [3]:
def createY(df):
  df['diff'] = df['close'] - df['open']
  df['y'] = df['diff'].apply(lambda x : 1 if x>0 else 0)
  df = df[::-1]
  df['y'] = df['y'].shift(-1)
  testValue = df.iloc[-50:]
  df = df[:-1]
  return df.drop(columns=['diff'])

In [4]:
def applyStrat(df):
  strat = ta.Strategy(name='Strategie',
                     description='MACD, RSI',
                     ta=[
                      {'kind':'rsi', 'length':7},
                      {'kind':'rsi', 'length':14},
                      {'kind':'sma', 'length':7},
                      {'kind':'sma', 'length':25},
                      {'kind':'macd', 'fast':12 , 'slow':26},
                      {'kind':'obv'},
                     ])
  
  df.ta.strategy(strat)
  df.dropna(inplace=True, how='any')
  return df

In [5]:
def getReadyForTraning(file):
  df = applyStrat(createY(setupDF(file)))
  
  return df

In [14]:
def modelTrainerLinear(X,y):
  X_train, X_test, y_train, y_test = train_test_split(X,y, random_state=42)
  lr = LogisticRegression(random_state=42, solver='newton-cg', max_iter=10000)
  lr.fit(X_train,y_train)
  
  return lr, lr.score(X_test,y_test)


In [2]:
def getNamesOfDataFile():
  from os import listdir
  return [f for f in listdir('Datasets') if f != '.DS_Store']


In [30]:
def saveModel(name, model):
  filename='Models/'+name+'_model.pkl'
  with open(filename, 'wb') as file:
    pickle.dump(model, file)

In [31]:
models = []

for f in getNamesOfDataFile():
  df = getReadyForTraning(f)
  y = df['y']

  # l=[]
  # a = list(df.columns)
  # b = [x for x in a if 'y' not in x]
  # for i in range(2,4):
  #   l.append(list(itertools.combinations(b,i)))
  # cols = [item for sublist in l for item in sublist]
  # for col in cols: 
  #   X = df[list(col)]
  #  scores = []
  # for i in range(5):
  
  X = df.drop(columns='y')   
    
  model, score = modelTrainerLinear(X,y)
  name = f.split('_')[1]
  saveModel(name, model)




In [26]:
pd.DataFrame(models).sort_values('testScoreMean', ascending=False).drop_duplicates(subset=['file'])

Unnamed: 0,file,model,testScoreMean,cols
1629,Binance_XRPUSDT_1h.csv,"LogisticRegression(max_iter=10000, random_stat...",0.544218,"(high, RSI_7, SMA_25)"
1372,Binance_LTCUSDT_1h.csv,"LogisticRegression(max_iter=10000, random_stat...",0.544218,"(Volume LTC, RSI_7, RSI_14)"
2100,Binance_BTCUSDT_1h.csv,"LogisticRegression(max_iter=10000, random_stat...",0.542857,"(Volume BTC, RSI_7, RSI_14)"
519,Binance_ETHUSDT_1h.csv,"LogisticRegression(max_iter=10000, random_stat...",0.537188,"(high, close, RSI_7)"
284,Binance_ADAUSDT_1h.csv,"LogisticRegression(max_iter=10000, random_stat...",0.532653,"(Volume ADA, RSI_7, MACDh_12_26_9)"
903,Binance_BNBUSDT_1h.csv,"LogisticRegression(max_iter=10000, random_stat...",0.529252,"(high, RSI_7, MACDh_12_26_9)"


In [25]:
np.mean(pd.DataFrame(models).sort_values('testScoreMean', ascending=False).drop_duplicates(subset=['file'])['testScoreMean'])

0.5383975812547241

NameError: name 'model' is not defined

In [14]:
def save34Days():
  for f in getNamesOfDataFile():
    fileName = 'Datasets/'+ f
    df = pd.read_csv(fileName, header=1, parse_dates=True, infer_datetime_format=True,index_col='date')
    df.dropna(inplace=True, how='any')
    df.drop(columns=['unix', 'Volume USDT', 'tradecount','symbol'], inplace=True)
    last48Hours = df[-48:]
  
    last48Hours.to_pickle('OldData/last48'+f.split('_')[1]+'.pkl')

In [15]:
save34Days()