In [1]:
import os
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.model_selection import GridSearchCV
from sklearn.preprocessing import PolynomialFeatures
from sklearn.preprocessing import RobustScaler
from sklearn.linear_model import SGDClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.linear_model import LogisticRegression
from sklearn.pipeline import make_pipeline
from sklearn.metrics import roc_curve
from sklearn.metrics import confusion_matrix
from sklearn.metrics import precision_score, recall_score
from sklearn. metrics import classification_report
from sklearn.metrics import accuracy_score
from sklearn.metrics import roc_auc_score
from sklearn.feature_selection import VarianceThreshold
from sklearn.feature_selection import SelectKBest, chi2
from sklearn.feature_selection import SelectFromModel
from sklearn.preprocessing import MinMaxScaler
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.svm import LinearSVC
from sklearn.svm import SVC
from sklearn.svm import NuSVC
import matplotlib
import matplotlib.pyplot as plt
%matplotlib inline


In [2]:
df = pd.read_csv("df_m3_lower.csv", sep=";")

#features = ['total_volume', 'buy_volume', 'sell_volume','buy_ratio','sell_ratio','volume_mean','buy_mean','sell_mean','volatility','rsi2','rsi1','rsi0','std2','std1','std0','len']
features = ['buy_ratio','sell_ratio','volume_mean','buy_mean','sell_mean','volatility','rsi2','rsi1','rsi0','std2','std1','std0','len']
label = 'is_lower'
X, y = df[features].values, df[label].values

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.30, random_state=0)

print('Training cases: %d\nTest cases: %d' % (X_train.shape[0], X_test.shape[0]))

Training cases: 193
Test cases: 84


In [18]:
model = make_pipeline(PolynomialFeatures(),
                     RobustScaler(),
                     SGDClassifier(random_state=0))

params = {
    'polynomialfeatures__degree': [1,2,3,4],
    'sgdclassifier__penalty': ['l1','l2']
}

grid = GridSearchCV(model, param_grid=params, cv=4)

grid.fit(X_train, y_train)
grid.best_params_

{'polynomialfeatures__degree': 4, 'sgdclassifier__penalty': 'l2'}

In [19]:
predictions = grid.predict(X_test)
cm = confusion_matrix(y_test, predictions)

print ('Confusion Matrix:\n',cm, '\n')
print('Accuracy:', accuracy_score(y_test, predictions))
print("Overall Precision:",precision_score(y_test, predictions))
print("Overall Recall:",recall_score(y_test, predictions))
print(classification_report(y_test, predictions))

Confusion Matrix:
 [[36  8]
 [22 18]] 

Accuracy: 0.6428571428571429
Overall Precision: 0.6923076923076923
Overall Recall: 0.45
              precision    recall  f1-score   support

         0.0       0.62      0.82      0.71        44
         1.0       0.69      0.45      0.55        40

    accuracy                           0.64        84
   macro avg       0.66      0.63      0.63        84
weighted avg       0.65      0.64      0.63        84



In [13]:
model = make_pipeline(PolynomialFeatures(),
                     RobustScaler(),
                     SVC(gamma='auto'))

params = {
    'polynomialfeatures__degree': [1,2,3,4],
}

grid = GridSearchCV(model, param_grid=params, cv=4)

grid.fit(X_train, y_train)
print(grid.best_params_)

predictions = grid.predict(X_test)
cm = confusion_matrix(y_test, predictions)

print ('Confusion Matrix:\n',cm, '\n')
print('Accuracy:', accuracy_score(y_test, predictions))
print("Overall Precision:",precision_score(y_test, predictions))
print("Overall Recall:",recall_score(y_test, predictions))
print(classification_report(y_test, predictions))

{'polynomialfeatures__degree': 1}
Confusion Matrix:
 [[30 14]
 [25 15]] 

Accuracy: 0.5357142857142857
Overall Precision: 0.5172413793103449
Overall Recall: 0.375
              precision    recall  f1-score   support

         0.0       0.55      0.68      0.61        44
         1.0       0.52      0.38      0.43        40

    accuracy                           0.54        84
   macro avg       0.53      0.53      0.52        84
weighted avg       0.53      0.54      0.52        84



In [6]:
model = make_pipeline(PolynomialFeatures(),
                     RobustScaler(),
                     NuSVC())

params = {
    'polynomialfeatures__degree': [1,2,3,4],
}

grid = GridSearchCV(model, param_grid=params, cv=4)

grid.fit(X_train, y_train)
print(grid.best_params_)

predictions = grid.predict(X_test)
cm = confusion_matrix(y_test, predictions)

print ('Confusion Matrix:\n',cm, '\n')
print('Accuracy:', accuracy_score(y_test, predictions))
print("Overall Precision:",precision_score(y_test, predictions))
print("Overall Recall:",recall_score(y_test, predictions))
print(classification_report(y_test, predictions))

{'polynomialfeatures__degree': 3}
Confusion Matrix:
 [[25 19]
 [20 20]] 

Accuracy: 0.5357142857142857
Overall Precision: 0.5128205128205128
Overall Recall: 0.5
              precision    recall  f1-score   support

         0.0       0.56      0.57      0.56        44
         1.0       0.51      0.50      0.51        40

    accuracy                           0.54        84
   macro avg       0.53      0.53      0.53        84
weighted avg       0.54      0.54      0.54        84



In [7]:
model = make_pipeline(PolynomialFeatures(),
                     RobustScaler(),
                     KNeighborsClassifier(n_neighbors=10))

params = {
    'polynomialfeatures__degree': [1,2,3,4],
}

grid = GridSearchCV(model, param_grid=params, cv=4)

grid.fit(X_train, y_train)
print(grid.best_params_)

predictions = grid.predict(X_test)
cm = confusion_matrix(y_test, predictions)

print ('Confusion Matrix:\n',cm, '\n')
print('Accuracy:', accuracy_score(y_test, predictions))
print("Overall Precision:",precision_score(y_test, predictions))
print("Overall Recall:",recall_score(y_test, predictions))
print(classification_report(y_test, predictions))

{'polynomialfeatures__degree': 1}
Confusion Matrix:
 [[28 16]
 [28 12]] 

Accuracy: 0.47619047619047616
Overall Precision: 0.42857142857142855
Overall Recall: 0.3
              precision    recall  f1-score   support

         0.0       0.50      0.64      0.56        44
         1.0       0.43      0.30      0.35        40

    accuracy                           0.48        84
   macro avg       0.46      0.47      0.46        84
weighted avg       0.47      0.48      0.46        84



In [8]:
model = make_pipeline(PolynomialFeatures(),
                     RobustScaler(),
                     RandomForestClassifier(n_estimators=50))

params = {
    'polynomialfeatures__degree': [1,2,3,4],
}

grid = GridSearchCV(model, param_grid=params, cv=4)

grid.fit(X_train, y_train)
print(grid.best_params_)

predictions = grid.predict(X_test)
cm = confusion_matrix(y_test, predictions)

print ('Confusion Matrix:\n',cm, '\n')
print('Accuracy:', accuracy_score(y_test, predictions))
print("Overall Precision:",precision_score(y_test, predictions))
print("Overall Recall:",recall_score(y_test, predictions))
print(classification_report(y_test, predictions))

{'polynomialfeatures__degree': 2}
Confusion Matrix:
 [[25 19]
 [18 22]] 

Accuracy: 0.5595238095238095
Overall Precision: 0.5365853658536586
Overall Recall: 0.55
              precision    recall  f1-score   support

         0.0       0.58      0.57      0.57        44
         1.0       0.54      0.55      0.54        40

    accuracy                           0.56        84
   macro avg       0.56      0.56      0.56        84
weighted avg       0.56      0.56      0.56        84



In [9]:
model = make_pipeline(PolynomialFeatures(),
                     RobustScaler(),
                     DecisionTreeClassifier(random_state=0))

params = {
    'polynomialfeatures__degree': [1,2,3,4],
}

grid = GridSearchCV(model, param_grid=params, cv=4)

grid.fit(X_train, y_train)
print(grid.best_params_)

predictions = grid.predict(X_test)
cm = confusion_matrix(y_test, predictions)

print ('Confusion Matrix:\n',cm, '\n')
print('Accuracy:', accuracy_score(y_test, predictions))
print("Overall Precision:",precision_score(y_test, predictions))
print("Overall Recall:",recall_score(y_test, predictions))
print(classification_report(y_test, predictions))

{'polynomialfeatures__degree': 2}
Confusion Matrix:
 [[22 22]
 [16 24]] 

Accuracy: 0.5476190476190477
Overall Precision: 0.5217391304347826
Overall Recall: 0.6
              precision    recall  f1-score   support

         0.0       0.58      0.50      0.54        44
         1.0       0.52      0.60      0.56        40

    accuracy                           0.55        84
   macro avg       0.55      0.55      0.55        84
weighted avg       0.55      0.55      0.55        84



In [10]:
model = make_pipeline(PolynomialFeatures(),
                     RobustScaler(),
                     SGDClassifier(loss="hinge", penalty="l2", max_iter=100))

params = {
    'polynomialfeatures__degree': [1,2,3,4],
}

grid = GridSearchCV(model, param_grid=params, cv=4)

grid.fit(X_train, y_train)
print(grid.best_params_)

predictions = grid.predict(X_test)
cm = confusion_matrix(y_test, predictions)

print ('Confusion Matrix:\n',cm, '\n')
print('Accuracy:', accuracy_score(y_test, predictions))
print("Overall Precision:",precision_score(y_test, predictions))
print("Overall Recall:",recall_score(y_test, predictions))
print(classification_report(y_test, predictions))

{'polynomialfeatures__degree': 1}
Confusion Matrix:
 [[ 7 37]
 [10 30]] 

Accuracy: 0.44047619047619047
Overall Precision: 0.44776119402985076
Overall Recall: 0.75
              precision    recall  f1-score   support

         0.0       0.41      0.16      0.23        44
         1.0       0.45      0.75      0.56        40

    accuracy                           0.44        84
   macro avg       0.43      0.45      0.40        84
weighted avg       0.43      0.44      0.39        84

