In [1]:
import numpy as np
import pandas as pd
import pandas_ta as ta
import datetime
from tqdm import tqdm
tqdm.pandas()

import csv

In [94]:
df  = pd.read_csv('../Datasets/Binance_BTCUSDT_d.csv', header=1, usecols=['unix','open','high','low','close', 'Volume BTC'], nrows=1680)
df.dropna(inplace=True, how='any')

Transform unix into date readable

In [95]:
df['date']=pd.to_datetime(df['unix'],unit='ms')
df.drop(columns='unix', inplace=True)


In [96]:
df.set_index('date', inplace=True)

Define my first strategy

In [97]:
strat1 = ta.Strategy(name='Strategie 1',
                     description='MACD, RSI, EMA12, EMA26, Stochastic Oscillator',
                     ta=[
                       {'kind':'macd', 'fast':12 , 'slow':26},
                       {'kind':'ema', 'length':12},
                       {'kind':'ema', 'length':26},
                       {'kind':'stoch'},
                       {'kind':'rsi'},
                       
                     ])

Apply the strategy

In [98]:
df.ta.strategy(strat1)


Remove first value because of nan

In [99]:
df = df.iloc[33:]

In [100]:
df['diff'] = df['close'] - df['open']

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['diff'] = df['close'] - df['open']


In [101]:
def simplifier(val):
  if(val > 50):
    return 2
  elif(val<-50):
    return 0
  return 1
df['y'] = df['diff'].progress_apply(simplifier)

100%|██████████| 1647/1647 [00:00<00:00, 588266.94it/s]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['y'] = df['diff'].progress_apply(simplifier)


# Machine Learning

In [102]:
from sklearn.model_selection import train_test_split

In [103]:
df = df[::-1]
df['y']=df['y'].shift(-1)

In [104]:
df = df[:-1]
X = df.drop(columns=['diff','y'])
y = df['y']

In [105]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1)

In [106]:
from sklearn.svm import SVC
svc = SVC()
svc.fit(X_train, y_train)
svc.score(X_test, y_test)

0.5696969696969697

In [107]:
from sklearn.model_selection import GridSearchCV

In [110]:
param_grid = {'C': [1, 10], 'gamma': [0.1,0.01],'kernel': ['rbf', 'poly',]}

In [111]:
#https://scikit-learn.org/stable/modules/generated/sklearn.neural_network.MLPClassifier.html?highlight=mlp#sklearn.neural_network.MLPClassifierß
svc = SVC()
clf = GridSearchCV(svc, param_grid, n_jobs=-1, verbose=100, cv=2)
clf.fit(X_train, y_train)

Fitting 2 folds for each of 8 candidates, totalling 16 fits
[CV 1/2; 1/8] START C=1, gamma=0.1, kernel=rbf..................................
[CV 2/2; 2/8] START C=1, gamma=0.1, kernel=poly.................................
[CV 2/2; 1/8] START C=1, gamma=0.1, kernel=rbf..................................
[CV 1/2; 2/8] START C=1, gamma=0.1, kernel=poly.................................
[CV 1/2; 1/8] END ...C=1, gamma=0.1, kernel=rbf;, score=0.428 total time=   0.1s
[CV 1/2; 5/8] START C=10, gamma=0.1, kernel=rbf.................................
[CV 1/2; 3/8] START C=1, gamma=0.01, kernel=rbf.................................
[CV 2/2; 1/8] END ...C=1, gamma=0.1, kernel=rbf;, score=0.430 total time=   0.1s
[CV 2/2; 5/8] START C=10, gamma=0.1, kernel=rbf.................................
[CV 2/2; 3/8] START C=1, gamma=0.01, kernel=rbf.................................
[CV 1/2; 5/8] END ..C=10, gamma=0.1, kernel=rbf;, score=0.428 total time=   0.1s
[CV 1/2; 6/8] START C=10, gamma=0.1, kernel=poly.

KeyboardInterrupt: 

In [None]:
print(clf.score(X_test, y_test))
print(clf.best_params_)
print(clf.best_estimator_)
print(clf.best_score_)

0.9878640776699029
{'C': 1, 'kernel': 'linear'}
SVC(C=1, kernel='linear')
0.9667996307427631


In [164]:
# {'C': 1, 'kernel': 'linear'}
# SVC(C=1, kernel='linear')

In [169]:
svc = SVC(C=1,kernel='linear',probability=True).fit(X_train,y_train)
svc.score(X_test,y_test)


0.9878640776699029

In [170]:
svc.predict_proba(X_test)

array([[2.44186112e-14, 2.93023329e-14, 1.00000000e+00],
       [9.99999850e-01, 1.00000015e-07, 5.00000075e-08],
       [1.69629951e-03, 5.84068595e-06, 9.98297860e-01],
       ...,
       [3.12653247e-03, 1.96556844e-05, 9.96853812e-01],
       [2.26720378e-09, 2.72048595e-09, 9.99999995e-01],
       [9.99999850e-01, 1.00000015e-07, 5.00000075e-08]])

In [173]:
import pickle

In [175]:
#Save model
filename='./model/svc_fitted.pkl'
with open(filename, 'wb') as file:
  pickle.dump(svc, file)
  

In [176]:
#Load model
with open(filename, 'rb') as file:
    model = pickle.load(file)


In [179]:
del svc
del model
