# Bibliotecas

In [None]:
from obterDados import obterSimboloPosicao, obterSimboloData
import numpy as np
import tensorflow as tf
import gymnasium as gym
import gym_anytrading
from stable_baselines3 import DQN
from json import dumps

# Obtendo dados de M5 e D1

In [None]:
rawDataM5 = obterSimboloPosicao()[:-100*108]
rawDataM5

In [None]:
rawDataD1 = obterSimboloData(rawDataM5.index[-1], n=int(len(rawDataM5)/108)+200)
rawDataD1

# Criando ambiente e testando aleatoriamente

In [None]:
from customTradingEnv2 import CustomTradingEnv

env = CustomTradingEnv(df_5min=rawDataM5, df_daily=rawDataD1)

In [None]:
# observation = env.reset()
# done = False
# cont = 0
# while not done:
#     action = env.action_space.sample()
#     observation, reward, done, info = env.step(action)

# info

# Treinando modelo com aprendizado por reforço

In [None]:
from stable_baselines3.common.callbacks import CheckpointCallback

modelo = DQN('MultiInputPolicy', env, verbose=1, tensorboard_log='logs/dqn/', exploration_final_eps=0.2)

models_dir = 'models/DQN'
checkpointCallback = CheckpointCallback(save_freq=10000, save_path=models_dir)
modelo.learn(total_timesteps=len(rawDataM5)*200, callback=checkpointCallback)

### Continuar treinamento a partir de modelo salvo

In [None]:
from stable_baselines3.common.callbacks import CheckpointCallback

models_dir = 'models/DQN'
model_path = f"{models_dir}/rl_model_3810000_steps.zip"
modelo = DQN.load(model_path, env=env)
checkpointCallback = CheckpointCallback(save_freq=10000, save_path=models_dir)
modelo.learn(total_timesteps=len(rawDataM5)*300, callback=checkpointCallback, reset_num_timesteps=False)

# Testando modelo treinado

In [None]:
testDataM5 = obterSimboloPosicao()[-(20*108+300):]
testDataD1 = obterSimboloData(testDataM5.index[-1], n=int(len(testDataM5)/108)+200)

In [None]:
from customTradingEnv2 import CustomTradingEnv

env = CustomTradingEnv(df_5min=testDataM5, df_daily=testDataD1)

In [None]:
models_dir = 'models/DQN'
model_path = f"{models_dir}/rl_model_6020000_steps.zip"
modelo = DQN.load(model_path, env=env)

In [None]:
observation = env.reset()
done = False
while not done:
    action = modelo.predict(observation)
    observation, reward, done, info = env.step(int(action[0]))
    # env.render()

info

In [None]:
from matplotlib import pyplot as plt

plt.figure(figsize=(200, 50))
plt.plot(testDataM5['Close'].to_numpy())
for (idx, action, price) in info['trades']:
    if action == 'Buy':
        plt.plot(idx, price, 'o', color='g')
    if action == 'Sell':
        plt.plot(idx, price, 'o', color='r')
plt.show()