# Bibliotecas

In [1]:
from sklearn.metrics import r2_score, mean_squared_error
from obterDados import obterSimbolo

# Obtendo dados

In [2]:
rawDataM5 = obterSimbolo('WDO$')
rawDataM5

Unnamed: 0_level_0,open,high,low,close,tick_volume,spread,real_volume
time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2019-11-19 13:15:00,5031.071,5031.667,5028.087,5029.877,2003,1,7607
2019-11-19 13:20:00,5029.877,5031.667,5029.877,5030.474,976,1,3893
2019-11-19 13:25:00,5030.474,5033.458,5029.877,5031.071,2472,1,9451
2019-11-19 13:30:00,5031.071,5031.667,5026.296,5026.893,2866,1,11183
2019-11-19 13:35:00,5027.490,5029.877,5015.554,5020.925,9905,1,45587
...,...,...,...,...,...,...,...
2023-07-21 17:35:00,4785.500,4786.000,4785.000,4785.500,483,0,4170
2023-07-21 17:40:00,4786.000,4786.500,4785.000,4786.000,598,0,5252
2023-07-21 17:45:00,4786.000,4787.500,4784.000,4787.500,913,0,10318
2023-07-21 17:50:00,4787.000,4789.500,4786.500,4788.000,867,0,9567


# Tratamento de Dados

### Gerando colunas com preços passados e futuros

In [None]:
hist = rawDataM5.copy().drop(columns=['spread'])
colunas0 = ['open', 'high', 'low', 'close', 'tick_volume', 'real_volume']
for i in range(1, 1000):
    for coluna in colunas0:
        hist[coluna+str(i)] = hist[coluna].shift(i)
colunasY = []
for i in range(1, 25):
    for coluna in colunas0:
        nomeCol = f'{coluna}-{str(i)}'
        hist[nomeCol] = hist[coluna].shift(-i)
        colunasY.append(nomeCol)

hist = hist.dropna()
hist

# Modelo de Classificação

### Achar quando comprar, vender ou fazer nada

In [None]:
hist['action'] = 0
hist['hora'] = hist.index.hour
ganhoEmPontos = 30
actions = hist['action'].to_numpy()
fechamentos = hist['close'].to_numpy()
horas = hist['hora'].to_numpy()
for i in range(len(fechamentos)):
    if horas[i] > 14:
        continue
    for j in range(1, 25):
        nameCol = f'high-{str(j)}'
        if hist[nameCol].to_numpy()[i] >= fechamentos[i] + ganhoEmPontos:
            actions[i] = 1
            break
        nameCol = f'low-{str(j)}'
        if hist[nameCol].to_numpy()[i] <= fechamentos[i] - ganhoEmPontos:
            actions[i] = 2
            break
hist['action'] = actions.tolist()
colunasY.append('action')
colunasY

### Colunas para normalização

In [None]:
colunasPrecos = colunas0[:4].copy()
colunasTickV = ['tick_volume']
colunasRealV = ['real_volume']
for i in range(1, 1000):
    for coluna in colunas0[:4]:
        colunasPrecos.append(coluna + str(i))
    colunasTickV.append(colunasTickV[0] + str(i))
    colunasRealV.append(colunasRealV[0] + str(i))
colunasPrecos

### Normalização

In [None]:
histNorm = hist.copy()

In [7]:
maxVs = hist[colunasPrecos].max(axis='columns')
minVs = hist[colunasPrecos].min(axis='columns')
histNormPart = hist[colunasPrecos].subtract(minVs, axis=0).divide(maxVs - minVs, axis=0)
for coluna in histNormPart.columns:
    histNorm[coluna] = histNormPart[coluna]

In [None]:
maxVs = hist[colunasTickV].max(axis='columns')
minVs = hist[colunasTickV].min(axis='columns')
histNormPart = hist[colunasTickV].subtract(minVs, axis=0).divide(maxVs - minVs, axis=0)
for coluna in histNormPart.columns:
    histNorm[coluna] = histNormPart[coluna]

In [None]:
maxVs = hist[colunasRealV].max(axis='columns')
minVs = hist[colunasRealV].min(axis='columns')
histNormPart = hist[colunasRealV].subtract(minVs, axis=0).divide(maxVs - minVs, axis=0)
for coluna in histNormPart.columns:
    histNorm[coluna] = histNormPart[coluna]

### Separar dados de treino e teste

In [None]:
xClass = hist.drop(columns=colunasY).values
yClass = hist['action'].values
splitDays = 108 * 200
x_train_class = xClass.iloc[:-splitDays]
x_test_class = xClass.iloc[-splitDays:]
y_train_class = yClass.iloc[:-splitDays]
y_test_class = yClass.iloc[-splitDays:]

### Lazy Classifier

In [None]:
# from lazypredict.Supervised import LazyClassifier

# lazyReg = LazyClassifier()
# lazyReg.fit(x_train_class, x_test_class, y_train_class, y_test_class)

### Gerando pesos das classes

In [None]:
import numpy as np

pesos = y_train_class.copy()
maximaOcorrencia = -1
for i in range(3):
    print(y_train_class[y_train_class == i].count())
    if y_train_class[y_train_class == i].count() > maximaOcorrencia:
        maximaOcorrencia = y_train_class[y_train_class == i].count()
for i in range(3):
    pesos[pesos == i] = maximaOcorrencia / (len(pesos) * y_train_class[y_train_class == i].count())
pesos

### Criando um dos modelos encontrados

In [None]:
from sklearn.ensemble import AdaBoostClassifier
from sklearn.neural_network import MLPClassifier

modelo = MLPClassifier(verbose=True)
# modelo = AdaBoostClassifier()
# modelo.fit(x_train_class, y_train_class, sample_weight=pesos)
modelo.fit(x_train_class, y_train_class)

### Salvar Modelo de Classificação

In [None]:
import pickle

pickle.dump(modelo, open('modeloClassificadorMLP.pickle', 'wb'))
# pickle.dump(modelo, open('modeloClassificadorAdaBoost.pickle', 'wb'))

# Pontuação de treino e teste

In [None]:
from sklearn.metrics import classification_report
from pickle import load

modelo = load(open('modeloClassificadorAdaBoost.pickle', 'rb'))
print('Treino: ', classification_report(y_train_class, modelo.predict(x_train_class)))
print('Teste: ', classification_report(y_test_class, modelo.predict(x_test_class)))