In [6]:
# Bloco 1: Preparar os Dados

import pandas as pd
import numpy as np
from sklearn.preprocessing import MinMaxScaler
import time

# Carregar o dataset
data = pd.read_csv('D:\\dados\\bar_M1_data_07-08-2024.csv')
data['DateTime'] = pd.to_datetime(data['DateTime'])

# Criar a coluna "Valor", que é uma cópia de "Close" e não será normalizada
data['Valor'] = data['Close']

# Normalizar as colunas necessárias (exceto "Valor" e "Gatilho")
scaler = MinMaxScaler()
cols_to_normalize = ['Open', 'High', 'Low', 'Close', 'Volume', 'PavioSuperior', 'PavioInferior', 'Corpo', 'Range', 'SMA50', 'SMA100', 'SMA200', 'StochasticoK', 'StochasticoD', 'RSI', 'MACD', 'MACDSignal', 'MACDHistogram']
data[cols_to_normalize] = scaler.fit_transform(data[cols_to_normalize])

# Converter todos os valores para tipo float32 para evitar problemas de tipo
data = data.astype({col: 'float32' for col in cols_to_normalize + ['Valor']})

# Exibir as primeiras linhas do dataframe para verificação
#print(data.head())

# Bloco 2: Criar o Ambiente

import gym
from gym import spaces

class TradingEnv(gym.Env):
    def __init__(self, data):
        super(TradingEnv, self).__init__()
        self.data = data
        self.current_step = 0 #Inicializa o passo atual do ambiente em zero, indicando o início do período de simulação.
        self.action_space = spaces.Discrete(3)  # 0 = Manter, 1 = Comprar, 2 = Vender
        self.observation_space = spaces.Box(low=0, high=1, shape=(len(data.columns) - 3,), dtype=np.float32)

    def reset(self):
        self.current_step = 0
        return self._next_observation()

    def _next_observation(self):
        obs = self.data.iloc[self.current_step].drop(['Valor', 'DateTime', 'Gatilho']).values
        return obs.astype(np.float32)

    def step(self, action):
        self.current_step += 1

        done = self.current_step >= len(self.data) - 1
        reward = 0

        if action == 1:  # Comprar
            reward = self.data['Valor'].iloc[self.current_step] - self.data['Valor'].iloc[self.current_step - 1] - 0.25  # Subtrair custo de operação
        elif action == 2:  # Vender
            reward = self.data['Valor'].iloc[self.current_step - 1] - self.data['Valor'].iloc[self.current_step] - 0.25  # Subtrair custo de operação

        obs = self._next_observation()
        return obs, reward, done, {}

# Bloco 3: Criar o Agente

import torch
import torch.nn as nn
import torch.optim as optim
import pytorch_lightning as pl
from torch.utils.data import DataLoader, Dataset

class TradingDataset(Dataset):
    def __init__(self, data):
        self.data = data

    def __len__(self):
        return len(self.data) - 1

    def __getitem__(self, idx):
        obs = self.data.iloc[idx].drop(['Valor', 'DateTime', 'Gatilho']).values.astype(np.float32)
        action = int(self.data['Gatilho'].iloc[idx])
        return obs, action

class TradingAgent(pl.LightningModule):
    def __init__(self, input_dim, lr=1e-3):
        super(TradingAgent, self).__init__()
        self.model = nn.Sequential(
            nn.Linear(input_dim, 128),
            nn.ReLU(),
            nn.Linear(128, 3)
        )
        self.lr = lr
        self.criterion = nn.CrossEntropyLoss()

    def forward(self, x):
        return self.model(x)

    def training_step(self, batch, batch_idx):
        obs, action = batch
        actions = self(obs)
        loss = self.criterion(actions, action)
        self.log('train_loss', loss)
        return loss

    def configure_optimizers(self):
        return optim.Adam(self.parameters(), lr=self.lr)

# Criar o dataset e o dataloader
dataset = TradingDataset(data)
dataloader = DataLoader(dataset, batch_size=64, shuffle=True)

# Criar o agente
tagente = TradingAgent(input_dim=len(data.columns) - 3)

# Treinar o modelo
trainer = pl.Trainer(max_epochs=10, accelerator='gpu', devices=1)
trainer.fit(tagente, dataloader)

# Bloco 4: Treinamento com Logs

class LightningTradingEnv(pl.LightningModule):
    def __init__(self, env, agent):
        super(LightningTradingEnv, self).__init__()
        self.env = env
        self.agent = agent

    def training_step(self, batch, batch_idx):
        obs = self.env.reset()
        done = False
        total_reward = 0
        actions_count = {0: 0, 1: 0, 2: 0}
        wins = 0
        losses = 0
        win_total = 0
        lose_total = 0
        start_time = time.time()

        while not done:
            action = torch.argmax(self.agent(torch.tensor(obs, dtype=torch.float32)))
            obs, reward, done, _ = self.env.step(action.item())

            actions_count[action.item()] += 1
            total_reward += reward
            if reward > 0:
                wins += 1
                win_total += reward
            elif reward < 0:
                losses += 1
                lose_total += reward

            gatilho = int(self.env.data['Gatilho'].iloc[self.env.current_step])
            if gatilho == 1:
                print(f"Episode: {batch_idx + 1}, Step: {self.env.current_step}, Action: {action.item()}, Reward: {reward}")

        win_rate = wins / (wins + losses) if (wins + losses) > 0 else 0
        training_time = time.time() - start_time
        print(f"Episode: {batch_idx + 1}, Total Reward: {total_reward}, Win Rate: {win_rate:.2f}, Win Total: {win_total}, Lose Total: {lose_total}, Ação 0: {actions_count[0]}, Ação 1: {actions_count[1]}, Ação 2: {actions_count[2]}, Wins: {wins}, Losses: {losses}")
        print(f"Tempo de Treinamento: {training_time:.2f} segundos")
        return total_reward

    def configure_optimizers(self):
        return optim.Adam(self.agent.parameters(), lr=1e-3)

# Criar o ambiente
env = TradingEnv(data)

# Treinar o ambiente
trainer = pl.Trainer(max_epochs=10, accelerator='gpu', devices=1)
trainer.fit(LightningTradingEnv(env, tagente))

print("Treinamento finalizado.")


GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name      | Type             | Params | Mode 
-------------------------------------------------------
0 | model     | Sequential       | 4.2 K  | train
1 | criterion | CrossEntropyLoss | 0      | train
-------------------------------------------------------
4.2 K     Trainable params
0         Non-trainable params
4.2 K     Total params
0.017     Total estimated model params size (MB)
5         Modules in train mode
0         Modules in eval mode
c:\Python39\lib\site-packages\pytorch_lightning\trainer\connectors\data_connector.py:424: The 'train_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=11` in the `DataLoader` to improve performance.


Epoch 9: 100%|██████████| 919/919 [00:18<00:00, 48.51it/s, v_num=4]

`Trainer.fit` stopped: `max_epochs=10` reached.


Epoch 9: 100%|██████████| 919/919 [00:18<00:00, 48.47it/s, v_num=4]

GPU available: True (cuda), used: True





TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name  | Type         | Params | Mode 
-----------------------------------------------
0 | agent | TradingAgent | 4.2 K  | train
-----------------------------------------------
4.2 K     Trainable params
0         Non-trainable params
4.2 K     Total params
0.017     Total estimated model params size (MB)
6         Modules in train mode
0         Modules in eval mode


MisconfigurationException: `train_dataloader` must be implemented to be used with the Lightning Trainer