In [1]:
import pandas as pd
import numpy as np
import sklearn as sk
from sklearn import svm
from sklearn.model_selection import GridSearchCV, train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.neural_network import MLPClassifier
from sklearn.pipeline import Pipeline
from sklearn.svm import LinearSVC
from sklearn.neighbors import KNeighborsClassifier
from sklearn.naive_bayes import BernoulliNB
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.metrics import confusion_matrix, classification_report, accuracy_score
import joblib as jb
from binance.client import Client

In [69]:
#Import and split data 
data = pd.read_csv('1HVHLV.csv', sep = ',')
X = data.drop('Final', axis = 1)
y = data['Final']
X

Unnamed: 0,V1,HLV1,V2,HLV2,V3,HLV3,V4,HLV4,V5,HLV5,...,V8,HLV8,V9,HLV9,V10,HLV10,V11,HLV11,V12,HLV12
0,110.410765,254.107649,12.247645,36.167997,-7.344125,23.995845,-4.982206,12.432432,-0.578822,7.532107,...,1.374443,8.730159,9.747160,23.299126,1.368948,18.132507,2.141680,13.623693,17.903226,27.402597
1,12.247645,36.167997,-7.344125,23.995845,-4.982206,12.432432,-0.578822,7.532107,-17.294521,22.254576,...,9.747160,23.299126,1.368948,18.132507,2.141680,13.623693,17.903226,27.402597,-5.170999,12.860113
2,-7.344125,23.995845,-4.982206,12.432432,-0.578822,7.532107,-17.294521,22.254576,11.516156,18.487744,...,1.368948,18.132507,2.141680,13.623693,17.903226,27.402597,-5.170999,12.860113,-5.968858,14.815990
3,-4.982206,12.432432,-0.578822,7.532107,-17.294521,22.254576,11.516156,18.487744,1.374443,8.730159,...,2.141680,13.623693,17.903226,27.402597,-5.170999,12.860113,-5.968858,14.815990,5.427783,16.016758
4,-0.578822,7.532107,-17.294521,22.254576,11.516156,18.487744,1.374443,8.730159,9.747160,23.299126,...,17.903226,27.402597,-5.170999,12.860113,-5.968858,14.815990,5.427783,16.016758,7.068063,12.031849
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3398,0.000000,0.709220,-0.985915,1.139601,0.426743,0.712251,-0.425532,0.712251,-0.142248,0.427350,...,-0.282486,0.567376,0.424929,1.136364,2.401130,4.943503,1.103448,3.064067,0.136426,1.655172
3399,-0.985915,1.139601,0.426743,0.712251,-0.425532,0.712251,-0.142248,0.427350,0.711238,1.424501,...,0.424929,1.136364,2.401130,4.943503,1.103448,3.064067,0.136426,1.655172,-1.089918,1.241379
3400,0.426743,0.712251,-0.425532,0.712251,-0.142248,0.427350,0.711238,1.424501,0.000000,0.424328,...,2.401130,4.943503,1.103448,3.064067,0.136426,1.655172,-1.089918,1.241379,-0.826446,1.536313
3401,-0.425532,0.712251,-0.142248,0.427350,0.711238,1.424501,0.000000,0.424328,-0.282486,0.567376,...,1.103448,3.064067,0.136426,1.655172,-1.089918,1.241379,-0.826446,1.536313,0.416089,0.835655


In [56]:
#Prepare data for training and testing
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.15)
sc = StandardScaler()
X_train = sc.fit_transform(X_train)
X_test = sc.transform(X_test)
X_train

array([[ 0.21033721, -0.15509184, -0.18234929, ..., -0.25536786,
         0.17870814, -0.41698117],
       [ 0.16478207, -0.37732318, -0.22409254, ..., -0.60821415,
        -0.07606494, -0.51320562],
       [-0.78715643,  0.23625314, -0.24402962, ..., -0.12571266,
        -0.6224693 ,  0.47631914],
       ...,
       [-0.6712532 ,  0.12824024, -0.69143551, ..., -0.4444605 ,
         0.31925609, -0.43657134],
       [ 0.23147279, -0.2536386 , -0.31077069, ..., -0.45396477,
        -0.03046471, -0.4433493 ],
       [-0.03424825, -0.39530237, -0.13768985, ..., -0.51168126,
        -0.18443735, -0.54290858]])

In [13]:
mlpc = MLPClassifier(hidden_layer_sizes = (12,12), max_iter = 2000)
param_grid = { 'activation' : ['identity', 'logistic', 'tanh', 'relu'],
                'solver': ['lbfgs', 'sgd', 'adam'],
                'learning_rate' : ['constant', 'invscaling', 'adaptive']}
    
grid = GridSearchCV(mlpc, param_grid, cv = 10, verbose = 3,n_jobs=-1)
grid.fit(X_train,y_train)
print(grid.best_params_)
grid_predictions = grid.predict(X_test) 
print(classification_report(y_test, grid_predictions, zero_division = 0)) 
print(accuracy_score(y_test, grid_predictions))

              precision    recall  f1-score   support

       False       0.98      0.90      0.94       446
        True       0.90      0.98      0.94       403

    accuracy                           0.94       849
   macro avg       0.94      0.94      0.94       849
weighted avg       0.94      0.94      0.94       849

0.9375736160188457


In [14]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.15)
clf = svm.SVC(C = 1, gamma = 'auto', kernel = 'sigmoid')
# param_grid2 = {'C': [0.1, 1, 10, 100],  
#               'gamma': [1, 0.1, 0.01, 0.001, 0.0001], 
#               'gamma':['scale', 'auto'],
#               'kernel': ['linear','sigmoid','rbf','poly']}     
# grid2 = GridSearchCV(clf, param_grid2, verbose = 3,n_jobs=-1)
# grid2.fit(X_train,y_train)
clf.fit(X_train,y_train)
# print(grid2.best_params_)
# grid_predictions2 = grid2.predict(X_test) 
clf_predictions = clf.predict(X_test)
print(classification_report(y_test, clf_predictions, zero_division = 0)) 
print(accuracy_score(y_test, clf_predictions))
# print(classification_report(y_test, grid_predictions2)) 
# print(accuracy_score(y_test, grid_predictions2))

              precision    recall  f1-score   support

       False       0.55      1.00      0.71       464
        True       0.00      0.00      0.00       385

    accuracy                           0.55       849
   macro avg       0.27      0.50      0.35       849
weighted avg       0.30      0.55      0.39       849

0.5465253239104829


In [22]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.15)
# sc = StandardScaler()
# X_train = sc.fit_transform(X_train)
# X_test = sc.transform(X_test)
ai = BernoulliNB(alpha=1.0, binarize =0.0, class_prior = None, fit_prior = True)
ai.fit(X_train,y_train)
pred_ai = ai.predict(X_test) 
print(classification_report(y_test, pred_ai, zero_division = 0))
print(confusion_matrix(y_test, pred_ai))
score = accuracy_score(y_test, pred_ai) 
score

              precision    recall  f1-score   support

       False       0.56      1.00      0.72       477
        True       0.00      0.00      0.00       372

    accuracy                           0.56       849
   macro avg       0.28      0.50      0.36       849
weighted avg       0.32      0.56      0.40       849

[[477   0]
 [372   0]]


0.5618374558303887

In [64]:
bestscore = 0
score = 0
#for i in range(15):
ai = MLPClassifier(hidden_layer_sizes = (250,250,250,250,250,250), max_iter = 2500, activation = 'tanh', solver = 'adam', learning_rate = 'constant', alpha = 0.0001)
ai.fit(X_train,y_train)
pred_ai = ai.predict(X_test)   
score = accuracy_score(y_test, pred_ai) 
# pred_ai2 = ai.predict(X2_test)
# score2 = accuracy_score(y2_test, pred_ai2)
#     #print(f'Score #{i}: {score}')
#     #if bestscore < score:       
#       #  bestscore = score
#       #  best = ai
#        # print(f'New best score: {bestscore}!')
print(classification_report(y_test, pred_ai, zero_division = 0)) 
print(score)
# print()
# print(classification_report(y2_test, pred_ai2, zero_division = 0))
# score2
      

Fitting 10 folds for each of 36 candidates, totalling 360 fits
{'activation': 'identity', 'learning_rate': 'adaptive', 'solver': 'sgd'}
              precision    recall  f1-score   support

       False       0.57      0.32      0.41       255
        True       0.53      0.76      0.62       256

    accuracy                           0.54       511
   macro avg       0.55      0.54      0.52       511
weighted avg       0.55      0.54      0.52       511

0.5401174168297456


In [25]:
MLPClassifier(hidden_layer_sizes = (200,200,200), max_iter = 2500, activation = 'tanh', solver = 'adam', learning_rate = 'constant', alpha = 0.0001)
svm.SVC(C = 1, gamma = 'auto', kernel = 'rbf')
Pipeline(steps=[('linearsvc',LinearSVC(C=5.0,class_weight = None, dual = False,fit_intercept = True,intercept_scaling = 1, loss ='squared_hinge',
                 max_iter = 1000, multi_class = 'ovr', penalty = 'l2', verbose = 0))])
BernoulliNB(alpha=1.0, binarize =0.0, class_prior = None, fit_prior = True)
KNeighborsClassifier(algorithm = 'auto', leaf_size = 30, metric = 'minkowski', metric_params = None, n_jobs = 1, n_neighbors = 5, p=2, weights = 'uniform')

KNeighborsClassifier(n_jobs=1)

In [67]:
best_ai = grid
pred_bmlpc = best_ai.predict(X_test)
print(classification_report(y_test, pred_bmlpc))
print(confusion_matrix(y_test, pred_bmlpc))
score = accuracy_score(y_test, pred_bmlpc)
score
#jb.dump(best_mlpc,'btc.joblib')

              precision    recall  f1-score   support

       False       0.57      0.32      0.41       255
        True       0.53      0.76      0.62       256

    accuracy                           0.54       511
   macro avg       0.55      0.54      0.52       511
weighted avg       0.55      0.54      0.52       511

[[ 81 174]
 [ 61 195]]


0.5401174168297456

In [70]:
jb.dump(best_ai,'PricePredictorAI.joblib')

['PricePredictorAI.joblib']

In [None]:
jb.dump(bsc,'svcscaler.bin')