# Bibliotecas

In [None]:
from sklearn.metrics import r2_score, mean_squared_error
from obterDados import obterSimbolo
import numpy as np
from tqdm import tqdm

# Obtendo dados

In [None]:
rawDataM5 = obterSimbolo('WDO$')
rawDataM5

In [None]:
rawDataM5 = rawDataM5.drop(columns=['spread'])
rawDataM5['hour'] = rawDataM5.index.hour
rawNP = rawDataM5.to_numpy()
rawNP

# Tratamento de Dados

### Gerando colunas com preços passados

In [None]:
nPassado = 1000
nFuturo = 24
histNp = np.zeros((len(rawNP)-(nPassado+nFuturo), nPassado, 7))
for i in tqdm(range(len(histNp))):
    histNp[i] = rawNP[i:i+nPassado]

### Achar quando comprar, vender ou fazer nada

In [None]:
pontos = 30
y = np.zeros((len(histNp), 3))
y[:, 0] = 1
for i in tqdm(range(len(y))):
    ultimoPreco = histNp[i, -1, 3]
    for j in range(nFuturo):
        # nao fazer nada se for mais de 14hs
        if rawNP[nPassado+i+j, -1] > 14:
            break
        # comprar
        if rawNP[nPassado+i+j, 1] >= ultimoPreco + pontos:
            y[i, 0] = 0
            y[i, 1] = 1
            break
        # vender
        if rawNP[nPassado+i+j, 2] <= ultimoPreco - pontos:
            y[i, 0] = 0
            y[i, 2] = 1
            break

### Normalização

In [None]:
histNpNorm = np.zeros(histNp.shape)
for i in tqdm(range(len(histNpNorm))):
    # preços
    vmax = histNp[i, :, :4].max()
    vmin = histNp[i, :, :4].min()
    histNpNorm[i, :, :4] = (histNp[i, :, :4] - vmin) / (vmax - vmin)
    # tick volume
    vmax = histNp[i, :, 4].max()
    vmin = histNp[i, :, 4].min()
    histNpNorm[i, :, 4] = (histNp[i, :, 4] - vmin) / (vmax - vmin)
    # real volume
    vmax = histNp[i, :, 5].max()
    vmin = histNp[i, :, 5].min()
    histNpNorm[i, :, 5] = (histNp[i, :, 5] - vmin) / (vmax - vmin)
    # hora
    histNpNorm[i, :, 6] = histNp[i, :, 6] / 24


### Separar dados de treino e teste

In [None]:
splitDays = 108 * 200
x = np.reshape(histNpNorm, (histNpNorm.shape[0], histNpNorm.shape[1] * histNpNorm.shape[2]))
x_train = x[:-splitDays]
x_test = x[-splitDays:]
y_train = y[:-splitDays]
y_test = y[-splitDays:]

# Criar Modelo

### Lazy Classifier

In [None]:
# from lazypredict.Supervised import LazyClassifier

# lazyReg = LazyClassifier()
# lazyReg.fit(x_train, x_test, y_train, y_test)

### Gerando pesos das classes

In [None]:
# import numpy as np

# pesos = y_train.copy()
# maximaOcorrencia = -1
# for i in range(3):
#     print(y_train[y_train == i].count())
#     if y_train[y_train == i].count() > maximaOcorrencia:
#         maximaOcorrencia = y_train[y_train == i].count()
# for i in range(3):
#     pesos[pesos == i] = maximaOcorrencia / (len(pesos) * y_train[y_train == i].count())
# pesos

### Criando um dos modelos encontrados

In [11]:
from sklearn.ensemble import AdaBoostClassifier
from sklearn.neural_network import MLPClassifier
from xgboost import XGBClassifier
import pickle

# modelo = MLPClassifier(verbose=True)
# modelo = AdaBoostClassifier()
# modelo = XGBClassifier()
# modelo.fit(x_train_class, y_train_class, sample_weight=pesos)
# modelo.fit(x_train, y_train)
pickle.dump(modelo, open('models/modeloClassificadorMLP.pickle', 'wb'))
# pickle.dump(modelo, open('modeloClassificadorAdaBoost.pickle', 'wb'))

# Pontuação de treino e teste

In [None]:
from sklearn.metrics import classification_report, ConfusionMatrixDisplay
from pickle import load

modelo = load(open('models/modeloClassificadorAdaBoost.pickle', 'rb'))
print('Treino: ', classification_report(y_train, modelo.predict(x_train)))
print('Teste: ', classification_report(y_test, modelo.predict(x_test)))

In [None]:
ConfusionMatrixDisplay.from_estimator(modelo, x_train, y_train)

In [None]:
ConfusionMatrixDisplay.from_estimator(modelo, x_test, y_test)