In [117]:
import pandas as pd
import numpy as np

from sklearn.svm import SVC
from sklearn.preprocessing import StandardScaler, MinMaxScaler
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score, precision_score

import plotly.express as px

In [118]:
df = pd.read_csv('C:/Users/BTK/Desktop/Eric/Git/backtest_model/database/STRAT_POLENTOVISK_WIN.csv')

In [119]:
df.set_index('date', drop=True, inplace=True)

In [120]:
y = df['Dummy'].shift(1).fillna(0)
X = df.drop(['Dummy'], axis=1)

In [121]:
X['Tipo'] = np.where(X['Tipo'] == 'buy', 1, 0)

In [122]:
# scaler = StandardScaler(with_mean=True, with_std=True)
scaler = MinMaxScaler(feature_range=(0,1))
scaler.fit(X)
X_tr = scaler.transform(X)

In [123]:
X_train, X_test, y_train, y_test = train_test_split(X_tr, y, test_size=1/3, stratify=y, random_state=123)

In [124]:
model = SVC()
model.fit(X_train, y_train)
predict = model.predict(X_test)

print(accuracy_score(predict, y_test))
print(precision_score(predict, y_test))

0.5804195804195804
0.8795180722891566


In [125]:
px.violin(predict - y_test, points='all')

In [126]:
model_tree = DecisionTreeClassifier()
model_tree.fit(X_train, y_train)
predict = model_tree.predict(X_test)

In [127]:
importances = model_tree.feature_importances_
selection = np.where(importances < 0.04)
names = [col for col in X.columns]

features_to_drop = []

for i in selection[0]:
    features_to_drop.append(names[i])

In [128]:
features_to_drop

['Tipo',
 'Volume',
 'Price',
 'SL',
 'TP',
 'Open',
 'High',
 'Low',
 'Close',
 'Tick',
 'Scaled Returns',
 'Proba Returns',
 'Candle Size',
 'MM2']

In [129]:
print(names)
px.bar(importances)

['Tipo', 'Volume', 'Price', 'SL', 'TP', 'Saldo', 'Price Vol', 'Scaled Price Return', 'Retorno', 'Open', 'High', 'Low', 'Close', 'Tick', 'Vol', 'Returns', 'Scaled Returns', 'Proba Returns', 'Candle Size', 'MM20', 'MM10', 'MM5', 'MM2']


In [130]:
X_refat = X.drop(features_to_drop, axis=1)

In [131]:
scaler.fit(X_refat)
X_tr = scaler.transform(X_refat)
X_train, X_test, y_train, y_test = train_test_split(X_tr, y, test_size=1/3, stratify=y, random_state=123)
model.fit(X_train, y_train)
predict = model.predict(X_test)
print(accuracy_score(predict, y_test))
print(precision_score(predict, y_test))

0.5174825174825175
0.7349397590361446


In [132]:
px.violin(predict - y_test, points='all')