In [1]:
# Bloco 1: Preparar os Dados

import pandas as pd
import numpy as np
from sklearn.preprocessing import MinMaxScaler
import time

# Carregar o dataset
data = pd.read_csv('D:\\dados\\bar_M1_data_07-08-2024.csv')
data['DateTime'] = pd.to_datetime(data['DateTime'])

# Criar a coluna "Valor", que é uma cópia de "Close" e não será normalizada
data['Valor'] = data['Close']

# Normalizar as colunas necessárias (exceto "Valor" e "Gatilho")
scaler = MinMaxScaler()
cols_to_normalize = ['Open', 'High', 'Low', 'Close', 'Volume', 'PavioSuperior', 'PavioInferior', 'Corpo', 'Range', 'SMA50', 'SMA100', 'SMA200', 'StochasticoK', 'StochasticoD', 'RSI', 'MACD', 'MACDSignal', 'MACDHistogram']
data[cols_to_normalize] = scaler.fit_transform(data[cols_to_normalize])

# Converter todos os valores para tipo float32 para evitar problemas de tipo
data = data.astype({col: 'float32' for col in cols_to_normalize + ['Valor']})

# Bloco 2: Criar o Ambiente com TF-Agents

import tensorflow as tf
from tf_agents.environments import py_environment
from tf_agents.environments import tf_py_environment
from tf_agents.specs import array_spec
from tf_agents.trajectories import time_step as ts

class TradingEnvTF(py_environment.PyEnvironment):
    def __init__(self, data):
        super(TradingEnvTF, self).__init__()
        self.data = data
        self.current_step = 0
        self._action_spec = array_spec.BoundedArraySpec(shape=(), dtype=np.int32, minimum=0, maximum=2, name='action')
        self._observation_spec = array_spec.BoundedArraySpec(shape=(len(data.columns) - 3,), dtype=np.float32, minimum=0, maximum=1, name='observation')
        self._state = self._next_observation()
        self._episode_ended = False

    def action_spec(self):
        return self._action_spec

    def observation_spec(self):
        return self._observation_spec

    def _reset(self):
        self.current_step = 0
        self._state = self._next_observation()
        self._episode_ended = False
        return ts.restart(self._state)

    def _next_observation(self):
        obs = self.data.iloc[self.current_step].drop(['Valor', 'DateTime', 'Gatilho']).values
        return obs.astype(np.float32)

    def _step(self, action):
        if self._episode_ended:
            return self.reset()

        self.current_step += 1
        if self.current_step >= len(self.data) - 1:
            self._episode_ended = True

        reward = 0
        if action == 1:  # Comprar
            reward = self.data['Valor'].iloc[self.current_step] - self.data['Valor'].iloc[self.current_step - 1] - 0.25  # Subtrair custo de operação
        elif action == 2:  # Vender
            reward = self.data['Valor'].iloc[self.current_step - 1] - self.data['Valor'].iloc[self.current_step] - 0.25  # Subtrair custo de operação

        if self._episode_ended:
            return ts.termination(self._state, reward)
        else:
            self._state = self._next_observation()
            return ts.transition(self._state, reward, discount=1.0)

# Criar o ambiente TF-Agents
env_py = TradingEnvTF(data)
tf_env = tf_py_environment.TFPyEnvironment(env_py)

# Bloco 3: Criar o Agente com TF-Agents

from tf_agents.agents.ppo import ppo_agent
from tf_agents.networks import actor_distribution_network
from tf_agents.networks import value_network
from tf_agents.utils import common

# Criar redes de política e valor
actor_net = actor_distribution_network.ActorDistributionNetwork(
    tf_env.observation_spec(),
    tf_env.action_spec(),
    fc_layer_params=(64, 64)
)

value_net = value_network.ValueNetwork(
    tf_env.observation_spec(),
    fc_layer_params=(64, 64)
)

# Otimizador
optimizer = tf.keras.optimizers.Adam(learning_rate=1e-3)

# Definir o agente PPO
train_step_counter = tf.Variable(0)
agente = ppo_agent.PPOAgent(
    tf_env.time_step_spec(),
    tf_env.action_spec(),
    actor_net=actor_net,
    value_net=value_net,
    optimizer=tf.keras.optimizers.Adam(learning_rate=1e-3),
    train_step_counter=train_step_counter
)
agente.initialize()

# Bloco 4: Treinamento com Logs

from tf_agents.drivers import dynamic_step_driver
from tf_agents.replay_buffers import tf_uniform_replay_buffer
from tf_agents.trajectories import trajectory
from tf_agents.metrics import tf_metrics

# Buffer de Replay
replay_buffer = tf_uniform_replay_buffer.TFUniformReplayBuffer(
    data_spec=agente.collect_data_spec,
    batch_size=tf_env.batch_size,
    max_length=10000
)

# Política para coleta de dados
collect_policy = agente.collect_policy

# Métricas de avaliação
metrics = [
    tf_metrics.AverageReturnMetric(),
    tf_metrics.AverageEpisodeLengthMetric()
]

# Coletar dados e treinar o agente
num_iterations = 10
for episode in range(num_iterations):
    time_step = tf_env.reset()
    episode_reward = 0
    episode_ended = False

    while not episode_ended:
        action_step = agente.collect_policy.action(time_step)
        next_time_step = tf_env.step(action_step.action)
        traj = trajectory.from_transition(time_step, action_step, next_time_step)
        replay_buffer.add_batch(traj)
        episode_reward += next_time_step.reward.numpy()
        time_step = next_time_step
        episode_ended = time_step.is_last()

    print(f"Episode {episode + 1}: Reward: {episode_reward}")

print("Treinamento finalizado.")



A module that was compiled using NumPy 1.x cannot be run in
NumPy 2.0.2 as it may crash. To support both 1.x and 2.x
versions of NumPy, modules must be compiled with NumPy 2.0.
Some module may need to rebuild instead e.g. with 'pybind11>=2.12'.

If you are a user of the module, the easiest solution will be to
downgrade to 'numpy<2' or try to upgrade the affected module.
We expect that some modules will need time to support NumPy 2.

Traceback (most recent call last):  File "c:\Python39\lib\runpy.py", line 197, in _run_module_as_main
    return _run_code(code, main_globals, None,
  File "c:\Python39\lib\runpy.py", line 87, in _run_code
    exec(code, run_globals)
  File "C:\Users\fabri\AppData\Roaming\Python\Python39\site-packages\ipykernel_launcher.py", line 18, in <module>
    app.launch_new_instance()
  File "C:\Users\fabri\AppData\Roaming\Python\Python39\site-packages\traitlets\config\application.py", line 1075, in launch_instance
    app.start()
  File "C:\Users\fabri\AppData\Roam

AttributeError: _ARRAY_API not found


A module that was compiled using NumPy 1.x cannot be run in
NumPy 2.0.2 as it may crash. To support both 1.x and 2.x
versions of NumPy, modules must be compiled with NumPy 2.0.
Some module may need to rebuild instead e.g. with 'pybind11>=2.12'.

If you are a user of the module, the easiest solution will be to
downgrade to 'numpy<2' or try to upgrade the affected module.
We expect that some modules will need time to support NumPy 2.

Traceback (most recent call last):  File "c:\Python39\lib\runpy.py", line 197, in _run_module_as_main
    return _run_code(code, main_globals, None,
  File "c:\Python39\lib\runpy.py", line 87, in _run_code
    exec(code, run_globals)
  File "C:\Users\fabri\AppData\Roaming\Python\Python39\site-packages\ipykernel_launcher.py", line 18, in <module>
    app.launch_new_instance()
  File "C:\Users\fabri\AppData\Roaming\Python\Python39\site-packages\traitlets\config\application.py", line 1075, in launch_instance
    app.start()
  File "C:\Users\fabri\AppData\Roam

AttributeError: _ARRAY_API not found

ImportError: numpy.core._multiarray_umath failed to import

ImportError: numpy.core.umath failed to import

TypeError: Unable to convert function return value to a Python type! The signature was
	() -> handle