# Bibliotecas

In [None]:
from obterDados import obterSimboloPosicao, obterSimboloData
import numpy as np
from tqdm import tqdm
from matplotlib import pyplot as plt
import tensorflow as tf
from keras.models import Sequential
from keras.layers import Conv2D
from keras.layers import MaxPooling2D
from keras.layers import Flatten
from keras.layers import Dense

# Obtendo dados

In [None]:
rawDataM5 = obterSimboloPosicao('WDO$N')
rawDataM5

In [None]:
rawDataM5[:-252*108]

In [None]:
rawDataM5['minute'] = rawDataM5.index.minute
rawDataM5['hour'] = rawDataM5.index.hour
rawDataM5['day_of_week'] = rawDataM5.index.day_of_week
rawDataM5['day'] = rawDataM5.index.day
rawDataM5NP = rawDataM5.to_numpy()
# rawNP = rawDataM5[:-252*108].to_numpy()
rawDataM5NP

In [None]:
# np.save(open('data/raw/rawDataM5.npy', 'wb'), rawNP)

# Tratamento de Dados

### Gerando colunas com preços passados

In [None]:
nPassado = 300
nFuturo = 24
histM5NP = np.zeros((len(rawDataM5NP)-(nPassado+nFuturo), nPassado+1, 6))
for i in tqdm(range(len(histM5NP))):
    histM5NP[i, 0, -4:] = rawDataM5NP[i:i+nPassado][-1][-4:]
    histM5NP[i, 1:] = rawDataM5NP[i:i+nPassado, :6]

### Achar quando comprar, vender ou fazer nada

In [None]:
pontos = 30
y = np.zeros((len(histM5NP)))
for i in tqdm(range(len(y))):
    ultimoPreco = histM5NP[i, -1, 3]
    for j in range(nFuturo):
        # nao fazer nada se for mais de 14hs
        # if rawNP[nPassado+i+j, -3] > 14:
        #     break
        # comprar
        if rawDataM5NP[nPassado+i+j, 1] >= ultimoPreco + pontos:
            y[i] = 1
            break
        # vender
        if rawDataM5NP[nPassado+i+j, 2] <= ultimoPreco - pontos:
            y[i] = 2
            break

# Obtendo histórico D1

In [None]:
histNP = np.zeros((histM5NP.shape[0], histM5NP.shape[1] + 200, histM5NP.shape[2]))

for i in tqdm(range(len(histNP))):
    rawDataD1 = obterSimboloData(rawDataM5.index[i+nFuturo])
    rawDataD1NP = rawDataD1.to_numpy()
    histNP[i, :histM5NP.shape[1]] = histM5NP[i]
    histNP[i, histM5NP.shape[1]:] = rawDataD1NP
    

### Normalização

In [None]:
histNPNorm = np.zeros(histNP.shape, dtype=np.float64)
for i in tqdm(range(len(histNPNorm))):
    # preços
    vmax = histNP[i, 1:nPassado+1, :4].max()
    vmin = histNP[i, 1:nPassado+1, :4].min()
    histNPNorm[i, 1:nPassado+1, :4] = (histNP[i, 1:nPassado+1, :4] - vmin) / (vmax - vmin)
    # tick volume
    vmax = histNP[i, 1:nPassado+1, 4].max()
    vmin = histNP[i, 1:nPassado+1, 4].min()
    histNPNorm[i, 1:nPassado+1, 4] = (histNP[i, 1:nPassado+1, 4] - vmin) / (vmax - vmin)
    # real volume
    vmax = histNP[i, 1:nPassado+1, 5].max()
    vmin = histNP[i, 1:nPassado+1, 5].min()
    histNPNorm[i, 1:nPassado+1, 5] = (histNP[i, 1:nPassado+1, 5] - vmin) / (vmax - vmin)
    # preços
    vmax = histNP[i, nPassado+1:, :4].max()
    vmin = histNP[i, nPassado+1:, :4].min()
    histNPNorm[i, nPassado+1:, :4] = (histNP[i, nPassado+1:, :4] - vmin) / (vmax - vmin)
    # tick volume
    vmax = histNP[i, nPassado+1:, 4].max()
    vmin = histNP[i, nPassado+1:, 4].min()
    histNPNorm[i, nPassado+1:, 4] = (histNP[i, nPassado+1:, 4] - vmin) / (vmax - vmin)
    # real volume
    vmax = histNP[i, nPassado+1:, 5].max()
    vmin = histNP[i, nPassado+1:, 5].min()
    histNPNorm[i, nPassado+1:, 5] = (histNP[i, nPassado+1:, 5] - vmin) / (vmax - vmin)
# minuto
histNPNorm[:, 0, 2] = histNP[:, 0, 2] / 60
# hora
histNPNorm[:, 0, 3] = histNP[:, 0, 3] / 24
# dia da semana
histNPNorm[:, 0, 4] = histNP[:, 0, 4] / 4
# dia
histNPNorm[:, 0, 5] = histNP[:, 0, 5] / 31


### Sem Reshape para 2d no caso para CNNs

In [None]:
x = histNPNorm

### Balanceamento de Classes

In [None]:
unicos, contagem = np.unique(y, return_counts=True)
print(contagem)
plt.pie(contagem, labels=unicos)

In [None]:
amostra = contagem.min()
Amostragem = np.zeros((amostra * 3, x.shape[1], x.shape[2] + 1), dtype=np.float64)

for i in range(3):
    xClasse_i = x[y == i]
    xAmostra = xClasse_i[np.random.choice(xClasse_i.shape[0], size=amostra, replace=False)]
    Amostragem[i*amostra:(i+1)*amostra, :, :-1] = xAmostra
    Amostragem[i*amostra:(i+1)*amostra, :, -1] = i

Amostragem.shape

In [None]:
np.random.shuffle(Amostragem)
x = Amostragem[:, :, :-1]
y = Amostragem[:, :, -1][:, 0]
y

In [None]:
unicos, contagem = np.unique(y, return_counts=True)
print(contagem)
plt.pie(contagem, labels=unicos)

### Separar dados de treino e teste

In [None]:
np.save(open('data/cnn/x.npy', 'wb'), x)
np.save(open('data/cnn/y.npy', 'wb'), y)

In [None]:
x = np.float64(np.load(open('data/cnn/x.npy', 'rb')))
y = np.int8(np.load(open('data/cnn/y.npy', 'rb')))

In [None]:
from keras.utils import to_categorical

y_one_hot = to_categorical(y, 3)
y_one_hot

In [None]:
# from sklearn.model_selection import train_test_split

# x_train, x_test, y_train, y_test = train_test_split(x, y_one_hot, test_size=0.2)

# Criar Modelo

In [None]:
modelo = Sequential()

modelo.add(Conv2D(256, (25, 5), activation='relu', input_shape=(501, 6, 1)))
modelo.add(MaxPooling2D((2, 2)))

modelo.add(Flatten())
modelo.add(Dense(256, activation='relu'))
modelo.add(Dense(256, activation='relu'))
modelo.add(Dense(3, activation='softmax'))

modelo.compile(optimizer='adam',
              loss='categorical_crossentropy',
              metrics=['accuracy'])

modelo.summary()

# Treinar Modelo

In [None]:
tensorboard_callback = tf.keras.callbacks.TensorBoard(log_dir="./logs")
# checkpoint_filepath = '/tmp/checkpoint'
# model_checkpoint_callback = tf.keras.callbacks.ModelCheckpoint(
#     filepath=checkpoint_filepath,
#     save_weights_only=True,
#     monitor='val_accuracy',
#     mode='max',
#     save_best_only=True
# )
parada_callback = tf.keras.callbacks.EarlyStopping(
    monitor="val_accuracy", patience=15, restore_best_weights=True
)
modelo.fit(
    x,
    y_one_hot,
    shuffle=True, 
    epochs=200,
    batch_size=256,
    validation_split=0.2,
    callbacks=[tensorboard_callback, parada_callback]
)

In [None]:
modelo.save('models/tf-cnn-model')

# Pontuação de treino e teste

In [None]:
from keras.models import load_model

modelo = load_model('models/tf-cnn-model')
modelo.summary()

In [None]:
# modelo.evaluate(x, y)

# Teste Finaceiro

In [None]:
from json import dumps

target = 30
stop = 10
timeLimit = 24
total = 0
long = False
short = False
position = 0
candle = -1
operacoes = {
    'longSuccess' : [0, 0],
    'longFail' : [0, 0],
    'longEndSuccess' : [0, 0],
    'longEndFail' : [0, 0],
    'shortSuccess' : [0, 0],
    'shortFail' : [0, 0],
    'shortEndSuccess' : [0, 0],
    'shortEndFail' : [0, 0],
}
for i in tqdm(range(21*108, -1, -1)):
    histM5 = obterSimboloPosicao('WDO$N', n=300, delayCandles=i)
    histM5['minute'] = histM5.index.minute
    histM5['hour'] = histM5.index.hour
    histM5['day_of_week'] = histM5.index.day_of_week
    histM5['day'] = histM5.index.day
    histM5NP = histM5.to_numpy()
    hist = np.zeros((501, 6))
    hist[0, -4:] = histM5NP[-1][-4:]
    hist[1:301] = histM5NP[:, :6]
    histD1 = obterSimboloData(histM5.index[-1])
    histD1NP = histD1.to_numpy()
    hist[301:] = histD1NP
    vmax = hist[1:301, :4].max()
    vmin = hist[1:301, :4].min()
    hist[1:301, :4] = (hist[1:301, :4] - vmin) / (vmax - vmin)
    vmax = hist[1:301, 4].max()
    vmin = hist[1:301, 4].min()
    hist[1:301, 4] = (hist[1:301, 4] - vmin) / (vmax - vmin)
    vmax = hist[1:301, 5].max()
    vmin = hist[1:301, 5].min()
    hist[1:301, 5] = (hist[1:301, 5] - vmin) / (vmax - vmin)
    vmax = hist[301:, :4].max()
    vmin = hist[301:, :4].min()
    hist[301:, :4] = (hist[301:, :4] - vmin) / (vmax - vmin)
    vmax = hist[301:, 4].max()
    vmin = hist[301:, 4].min()
    hist[301:, 4] = (hist[301:, 4] - vmin) / (vmax - vmin)
    vmax = hist[301:, 5].max()
    vmin = hist[301:, 5].min()
    hist[301:, 5] = (hist[301:, 5] - vmin) / (vmax - vmin)
    hist[0, 2] /= 60
    hist[0, 3] /= 24
    hist[0, 4] /= 4
    hist[0, 5] /= 31
    priceNow = histM5['close'].iloc[-1]
    if long:
        if priceNow >= position + target:
            total += priceNow - position
            long = False
            operacoes['longSuccess'][0] += 1
            operacoes['longSuccess'][1] += priceNow - position
        if priceNow <= position - stop:
            total += priceNow - position
            long = False
            operacoes['longFail'][0] += 1
            operacoes['longFail'][1] += priceNow - position
        if i >= candle + timeLimit:
            total += priceNow - position
            long = False
            if priceNow - position > 0:
                operacoes['longEndSuccess'][0] += 1
                operacoes['longEndSuccess'][1] += priceNow - position
            else:
                operacoes['longEndFail'][0] += 1
                operacoes['longEndFail'][1] += priceNow - position
        continue
    if short:
        if priceNow <= position - target:
            total += position - priceNow
            short = False
            operacoes['shortSuccess'][0] += 1
            operacoes['shortSuccess'][1] += position - priceNow
        if priceNow >= position + stop:
            total += position - priceNow
            short = False
            operacoes['shortFail'][0] += 1
            operacoes['shortFail'][1] += position - priceNow
        if i >= candle + timeLimit:
            total += position - priceNow
            short = False
            if position - priceNow > 0:
                operacoes['shortEndSuccess'][0] += 1
                operacoes['shortEndSuccess'][1] += position - priceNow
            else:
                operacoes['shortEndFail'][0] += 1
                operacoes['shortEndFail'][1] += position - priceNow
        continue

    previsao = np.argmax(modelo.predict(np.array([hist]), verbose=0)[0])
    if previsao == 1:
        long = True
        position = priceNow
        candle = i
    if previsao == 2:
        short = True
        position = priceNow
        candle = i

print('Total:', total)
print('Operações: ', dumps(operacoes, indent=4))