# Bibliotecas

In [1]:
from obterDados import obterSimbolo
import numpy as np
from tqdm import tqdm
from matplotlib import pyplot as plt
import tensorflow as tf
from keras.models import Sequential
from keras.layers import Conv2D
from keras.layers import MaxPooling2D
from keras.layers import Flatten
from keras.layers import Dense

# Obtendo dados

In [None]:
rawDataM5 = obterSimbolo('WDO$N')
rawDataM5

In [None]:
rawDataM5[:-252*108]

In [None]:
rawDataM5 = rawDataM5.drop(columns=['spread'])
rawDataM5['minute'] = rawDataM5.index.minute
rawDataM5['hour'] = rawDataM5.index.hour
rawDataM5['day_of_week'] = rawDataM5.index.day_of_week
rawDataM5['day'] = rawDataM5.index.day
rawNP = rawDataM5.to_numpy()
# rawNP = rawDataM5[:-252*108].to_numpy()
rawNP

In [None]:
# np.save(open('data/raw/rawDataM5.npy', 'wb'), rawNP)

# Tratamento de Dados

### Gerando colunas com preços passados

In [None]:
nPassado = 1000
nFuturo = 24
histNp = np.zeros((len(rawNP)-(nPassado+nFuturo), nPassado, rawDataM5.shape[1]))
for i in tqdm(range(len(histNp))):
    histNp[i] = rawNP[i:i+nPassado]

### Achar quando comprar, vender ou fazer nada

In [None]:
pontos = 30
y = np.zeros((len(histNp)))
# y[:, 0] = 1
for i in tqdm(range(len(y))):
    ultimoPreco = histNp[i, -1, 3]
    for j in range(nFuturo):
        # nao fazer nada se for mais de 14hs
        # if rawNP[nPassado+i+j, -3] > 14:
        #     break
        # comprar
        if rawNP[nPassado+i+j, 1] >= ultimoPreco + pontos:
            y[i] = 1
            break
        # vender
        if rawNP[nPassado+i+j, 2] <= ultimoPreco - pontos:
            y[i] = 2
            break


### Normalização

In [None]:
histNpNorm = np.zeros(histNp.shape, dtype=np.float16)
for i in tqdm(range(len(histNpNorm))):
    # preços
    vmax = histNp[i, :, :4].max()
    vmin = histNp[i, :, :4].min()
    histNpNorm[i, :, :4] = (histNp[i, :, :4] - vmin) / (vmax - vmin)
    # tick volume
    vmax = histNp[i, :, 4].max()
    vmin = histNp[i, :, 4].min()
    histNpNorm[i, :, 4] = (histNp[i, :, 4] - vmin) / (vmax - vmin)
    # real volume
    vmax = histNp[i, :, 5].max()
    vmin = histNp[i, :, 5].min()
    histNpNorm[i, :, 5] = (histNp[i, :, 5] - vmin) / (vmax - vmin)
    # minuto
    histNpNorm[i, :, 6] = histNp[i, :, 6] / 60
    # hora
    histNpNorm[i, :, 7] = histNp[i, :, 7] / 24
    # dia da semana
    histNpNorm[i, :, 8] = histNp[i, :, 8] / 4
    # dia
    histNpNorm[i, :, 9] = histNp[i, :, 9] / 31


### Sem Reshape para 2d no caso para CNNs

In [None]:
x = histNpNorm

### Balanceamento de Classes

In [None]:
unicos, contagem = np.unique(y, return_counts=True)
print(contagem)
plt.pie(contagem, labels=unicos)

In [None]:
amostra = contagem.min()
Amostragem = np.zeros((amostra * 3, x.shape[1], x.shape[2] + 1), dtype=np.float16)

for i in range(3):
    xClasse_i = x[y == i]
    xAmostra = xClasse_i[np.random.choice(xClasse_i.shape[0], size=amostra, replace=False)]
    Amostragem[i*amostra:(i+1)*amostra, :, :-1] = xAmostra
    Amostragem[i*amostra:(i+1)*amostra, :, -1] = i

Amostragem.shape

In [None]:
np.random.shuffle(Amostragem)
x = Amostragem[:, :, :-1]
y = Amostragem[:, :, -1][:, 0]
y

In [None]:
unicos, contagem = np.unique(y, return_counts=True)
print(contagem)
plt.pie(contagem, labels=unicos)

### Separar dados de treino e teste

In [None]:
np.save(open('data/cnn/x.npy', 'wb'), x)
np.save(open('data/cnn/y.npy', 'wb'), y)

In [2]:
x = np.float16(np.load(open('data/cnn/x.npy', 'rb')))
y = np.int8(np.load(open('data/cnn/y.npy', 'rb')))

In [3]:
from keras.utils import to_categorical

y_one_hot = to_categorical(y, 3)
y_one_hot

array([[1., 0., 0.],
       [1., 0., 0.],
       [1., 0., 0.],
       ...,
       [0., 0., 1.],
       [0., 0., 1.],
       [0., 1., 0.]], dtype=float32)

In [None]:
# from sklearn.model_selection import train_test_split

# x_train, x_test, y_train, y_test = train_test_split(x, y_one_hot, test_size=0.2)

# Criar Modelo

In [4]:
modelo = Sequential()

modelo.add(Conv2D(64, (5, 3), activation='relu', input_shape=(1000, 10, 1)))
modelo.add(MaxPooling2D((2, 2)))
modelo.add(Conv2D(128, (5, 3), activation='relu'))
modelo.add(MaxPooling2D((2, 2)))

modelo.add(Flatten())
modelo.add(Dense(256, activation='relu'))
modelo.add(Dense(256, activation='relu'))
modelo.add(Dense(3, activation='softmax'))

modelo.compile(optimizer='adam',
              loss='categorical_crossentropy',
              metrics=['accuracy'])

modelo.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d (Conv2D)             (None, 996, 8, 64)        1024      
                                                                 
 max_pooling2d (MaxPooling2  (None, 498, 4, 64)        0         
 D)                                                              
                                                                 
 conv2d_1 (Conv2D)           (None, 494, 2, 128)       123008    
                                                                 
 max_pooling2d_1 (MaxPoolin  (None, 247, 1, 128)       0         
 g2D)                                                            
                                                                 
 flatten (Flatten)           (None, 31616)             0         
                                                                 
 dense (Dense)               (None, 256)               8

# Treinar Modelo

In [5]:
tensorboard_callback = tf.keras.callbacks.TensorBoard(log_dir="./logs")
# checkpoint_filepath = '/tmp/checkpoint'
# model_checkpoint_callback = tf.keras.callbacks.ModelCheckpoint(
#     filepath=checkpoint_filepath,
#     save_weights_only=True,
#     monitor='val_accuracy',
#     mode='max',
#     save_best_only=True
# )
parada_callback = tf.keras.callbacks.EarlyStopping(
    monitor="val_accuracy", patience=15, restore_best_weights=True
)
modelo.fit(
    x,
    y_one_hot,
    shuffle=True, 
    epochs=100,
    batch_size=64,
    validation_split=0.2,
    callbacks=[tensorboard_callback, parada_callback]
)

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100


<keras.src.callbacks.History at 0x1c79ed26740>

In [10]:
modelo.save('models/tf-cnn-model')

INFO:tensorflow:Assets written to: models/tf-cnn-model\assets


INFO:tensorflow:Assets written to: models/tf-cnn-model\assets


# Pontuação de treino e teste

In [7]:
from keras.models import load_model

modelo = load_model('models/tf-cnn-model')
modelo.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d (Conv2D)             (None, 996, 8, 64)        1024      
                                                                 
 max_pooling2d (MaxPooling2  (None, 498, 4, 64)        0         
 D)                                                              
                                                                 
 conv2d_1 (Conv2D)           (None, 494, 2, 128)       123008    
                                                                 
 max_pooling2d_1 (MaxPoolin  (None, 247, 1, 128)       0         
 g2D)                                                            
                                                                 
 flatten (Flatten)           (None, 31616)             0         
                                                                 
 dense (Dense)               (None, 256)               8

In [8]:
# modelo.evaluate(x_test, y_test)

# Teste Finaceiro

In [11]:
from json import dumps

target = 30
stop = 10
timeLimit = 24
total = 0
long = False
short = False
position = 0
candle = -1
operacoes = {
    'longSuccess' : [0, 0],
    'longFail' : [0, 0],
    'longEndSuccess' : [0, 0],
    'longEndFail' : [0, 0],
    'shortSuccess' : [0, 0],
    'shortFail' : [0, 0],
    'shortEndSuccess' : [0, 0],
    'shortEndFail' : [0, 0],
}
for i in tqdm(range(252*108, -1, -1)):
    dados = obterSimbolo('WDO$N', n=1000, delayCandles=i)
    hist = dados.copy().drop(columns=['spread'])
    hist['minute'] = hist.index.minute
    hist['hour'] = hist.index.hour
    hist['day_of_week'] = hist.index.day_of_week
    hist['day'] = hist.index.day
    histNP = hist.to_numpy()
    vmax = histNP[:, :4].max()
    vmin = histNP[:, :4].min()
    histNP[:, :4] = (histNP[:, :4] - vmin) / (vmax - vmin)
    vmax = histNP[:, 4].max()
    vmin = histNP[:, 4].min()
    histNP[:, 4] = (histNP[:, 4] - vmin) / (vmax - vmin)
    vmax = histNP[:, 5].max()
    vmin = histNP[:, 5].min()
    histNP[:, 5] = (histNP[:, 5] - vmin) / (vmax - vmin)
    histNP[:, 6] /= 60
    histNP[:, 7] /= 24
    histNP[:, 8] /= 4
    histNP[:, 9] /= 31
    priceNow = hist['close'].iloc[-1]
    if long:
        if priceNow >= position + target:
            total += priceNow - position
            long = False
            operacoes['longSuccess'][0] += 1
            operacoes['longSuccess'][1] += priceNow - position
        if priceNow <= position - stop:
            total += priceNow - position
            long = False
            operacoes['longFail'][0] += 1
            operacoes['longFail'][1] += priceNow - position
        if i >= candle + timeLimit:
            total += priceNow - position
            long = False
            if priceNow - position > 0:
                operacoes['longEndSuccess'][0] += 1
                operacoes['longEndSuccess'][1] += priceNow - position
            else:
                operacoes['longEndFail'][0] += 1
                operacoes['longEndFail'][1] += priceNow - position
        continue
    if short:
        if priceNow <= position - target:
            total += position - priceNow
            short = False
            operacoes['shortSuccess'][0] += 1
            operacoes['shortSuccess'][1] += position - priceNow
        if priceNow >= position + stop:
            total += position - priceNow
            short = False
            operacoes['shortFail'][0] += 1
            operacoes['shortFail'][1] += position - priceNow
        if i >= candle + timeLimit:
            total += position - priceNow
            short = False
            if position - priceNow > 0:
                operacoes['shortEndSuccess'][0] += 1
                operacoes['shortEndSuccess'][1] += position - priceNow
            else:
                operacoes['shortEndFail'][0] += 1
                operacoes['shortEndFail'][1] += position - priceNow
        continue

    previsao = np.argmax(modelo.predict(np.array([histNP]), verbose=0)[0])
    if previsao == 1:
        long = True
        position = priceNow
        candle = i
    if previsao == 2:
        short = True
        position = priceNow
        candle = i

print('Total:', total)
print('Operações: ', dumps(operacoes, indent=4))

  1%|▏         | 385/27217 [00:20<24:12, 18.47it/s]


KeyboardInterrupt: 