In [1]:
import pandas as pd
import numpy as np
import time

def calculate_rsi(series, period=14):
    """Calcula el Índice de Fuerza Relativa (RSI)."""
    delta = series.diff()
    gain = delta.where(delta > 0, 0)
    loss = -delta.where(delta < 0, 0)
    avg_gain = gain.ewm(com=period - 1, min_periods=period).mean()
    avg_loss = loss.ewm(com=period - 1, min_periods=period).mean()
    rs = avg_gain / avg_loss
    return 100 - (100 / (1 + rs))

def calculate_macd_hist(series, fast=12, slow=26, signal=9):
    """Calcula el Histograma MACD."""
    ema_fast = series.ewm(span=fast, adjust=False).mean()
    ema_slow = series.ewm(span=slow, adjust=False).mean()
    macd = ema_fast - ema_slow
    signal_line = macd.ewm(span=signal, adjust=False).mean()
    return macd - signal_line

def create_swing_trading_data() -> pd.DataFrame:
    """
    Genera datos horarios simulados para Swing Trading de Ethereum, 
    incluyendo el precio y los indicadores técnicos.
    """
    # 365 días * 24 horas = 8760 horas
    num_hours = 8760 
    dates = pd.date_range(end=pd.Timestamp.now(), periods=num_hours, freq='H')
    
    np.random.seed(int(time.time())) 

    # Simular un movimiento de precios con ruido (Walk aleatorio)
    initial_price = 3000  # Precio inicial de ETH simulado
    price_changes = np.random.normal(0, 5, num_hours).cumsum() # Simular tendencia
    noise = np.random.normal(0, 50, num_hours) # Simular volatilidad
    
    # Precio base + Tendencia + Ruido
    eth_price = initial_price + price_changes + noise
    
    # Crear el DataFrame base
    df = pd.DataFrame({'ETH_Price': eth_price}, index=dates)
    
    # ----------------------------------------------------
    # CALCULAR INDICADORES TÉCNICOS (ITs)
    # ----------------------------------------------------
    df['RSI'] = calculate_rsi(df['ETH_Price']).fillna(50)
    df['MACD_Hist'] = calculate_macd_hist(df['ETH_Price']).fillna(0)

    # ----------------------------------------------------
    # VARIABLES DE ENTORNO
    # ----------------------------------------------------
    # Costo de transacción más alto para una red más congestionada/compleja
    df['Transaction_Cost_USD'] = np.random.uniform(low=0.10, high=0.50, size=num_hours)
    
    # Normalización: Necesitamos precios normalizados para el Agente RL
    # Usaremos el precio como la diferencia porcentual con respecto al día anterior (rendimiento)
    df['Price_Change_Norm'] = df['ETH_Price'].pct_change().fillna(0)
    
    # Limpieza final
    df = df.dropna().reset_index(names=['timestamp'])
    
    # Guardar el archivo para su uso en la clase SwingTradingEnv
    df.to_csv('swing_trading_data.csv', index=False)
    
    print("--- Datos de Swing Trading Horarios Generados y Guardados en 'swing_trading_data.csv' ---")
    print(f"Total de pasos horarios (timesteps): {len(df)}")
    print(df.head())
    
    return df

# Ejecutar la generación de datos
historical_swing_df = create_swing_trading_data()

--- Datos de Swing Trading Horarios Generados y Guardados en 'swing_trading_data.csv' ---
Total de pasos horarios (timesteps): 8760
                   timestamp    ETH_Price   RSI  MACD_Hist  \
0 2024-10-13 18:52:57.285421  3079.440968  50.0   0.000000   
1 2024-10-13 19:52:57.285421  2956.210541  50.0  -7.864278   
2 2024-10-13 20:52:57.285421  3035.836389  50.0  -7.281737   
3 2024-10-13 21:52:57.285421  3012.107206  50.0  -8.014711   
4 2024-10-13 22:52:57.285421  2944.068089  50.0 -12.343290   

   Transaction_Cost_USD  Price_Change_Norm  
0              0.395208           0.000000  
1              0.341172          -0.040017  
2              0.400810           0.026935  
3              0.161008          -0.007816  
4              0.286840          -0.022589  


  dates = pd.date_range(end=pd.Timestamp.now(), periods=num_hours, freq='H')


In [4]:
import gymnasium as gym
from gymnasium import spaces
import numpy as np
import pandas as pd
from stable_baselines3 import DQN
from stable_baselines3.common.env_util import make_vec_env
import time
import os
from typing import Dict, Any

# =======================================================
# 1. FUNCIÓN DE GENERACIÓN DE DATOS Y CÁLCULO DE ITs
# =======================================================

def calculate_rsi(series, period=14):
    """Calcula el Índice de Fuerza Relativa (RSI)."""
    delta = series.diff()
    gain = delta.where(delta > 0, 0)
    loss = -delta.where(delta < 0, 0)
    avg_gain = gain.ewm(com=period - 1, min_periods=period).mean()
    avg_loss = loss.ewm(com=period - 1, min_periods=period).mean()
    rs = avg_gain / avg_loss
    return 100 - (100 / (1 + rs))

def calculate_macd_hist(series, fast=12, slow=26, signal=9):
    """Calcula el Histograma MACD."""
    ema_fast = series.ewm(span=fast, adjust=False).mean()
    ema_slow = series.ewm(span=slow, adjust=False).mean()
    macd = ema_fast - ema_slow
    signal_line = macd.ewm(span=signal, adjust=False).mean()
    return macd - signal_line

def create_swing_trading_data() -> pd.DataFrame:
    """
    Genera datos horarios simulados para Swing Trading de Altcoins, 
    incluyendo el precio y los indicadores técnicos.
    """
    num_hours = 8760 
    dates = pd.date_range(end=pd.Timestamp.now(), periods=num_hours, freq='H')
    
    np.random.seed(int(time.time())) 

    # Simular un movimiento de precios bajista (similar al último resultado)
    initial_price = 3000 
    # Aseguramos una volatilidad significativa
    price_changes = np.random.normal(0, 5, num_hours).cumsum() - np.linspace(0, 100, num_hours) 
    noise = np.random.normal(0, 50, num_hours) 
    
    eth_price = initial_price + price_changes + noise
    
    df = pd.DataFrame({'ETH_Price': eth_price}, index=dates)
    
    df['RSI'] = calculate_rsi(df['ETH_Price']).fillna(50)
    df['MACD_Hist'] = calculate_macd_hist(df['ETH_Price']).fillna(0)
    df['Transaction_Cost_USD'] = np.random.uniform(low=0.10, high=0.50, size=num_hours)
    df['Price_Change_Norm'] = df['ETH_Price'].pct_change().fillna(0)
    
    df = df.dropna().reset_index(names=['timestamp'])
    
    df.to_csv('swing_trading_data.csv', index=False)
    
    print("--- Datos de Swing Trading Horarios Generados y Guardados en 'swing_trading_data.csv' ---")
    print(f"Total de pasos horarios (timesteps): {len(df)}")
    
    return df

# Ejecutar la generación de datos
historical_swing_df = create_swing_trading_data()


# =======================================================
# 2. CLASE DEL ENTORNO DE SWING TRADING (CORREGIDA)
# =======================================================

try:
    swing_df = pd.read_csv('swing_trading_data.csv')
    swing_df['timestamp'] = pd.to_datetime(swing_df['timestamp'])
except FileNotFoundError:
    print("ERROR FATAL: Ejecuta el Paso 1 (create_swing_trading_data) primero.")
    swing_df = pd.DataFrame() 


class SwingTradingEnv(gym.Env):
    """Entorno de Reinforcement Learning para Swing Trading agresivo de Altcoins."""

    def __init__(self, df=swing_df, initial_capital=400):
        super(SwingTradingEnv, self).__init__()
        self.df = df
        self.max_timesteps = len(df) - 1
        self.initial_capital = initial_capital
        
        # Parámetros de Riesgo y Trade
        self.TRANSACTION_FEE = 0.0005  # 0.05% de fee por trade
        
        # DEFINICIÓN DEL ESPACIO DE ACCIONES: 0=Hold, 1=Buy/Long, 2=Sell/Close
        self.action_space = spaces.Discrete(3) 
        
        # DEFINICIÓN DEL ESPACIO DE ESTADOS
        self.observation_space = spaces.Box(
            low=-np.inf, high=np.inf, shape=(4,), dtype=np.float32 
        )
        self.reset()
        
    def reset(self, seed=None, options=None):
        super().reset(seed=seed)
        self.current_step = 1 
        self.capital_usd = self.initial_capital
        self.asset_quantity = 0  
        self.current_position = 0 # 0=Liquido (USDC), 1=Comprado (ETH)
        self.entry_value = self.initial_capital # Nuevo: Valor inicial de la posición abierta
        
        current_data = self.df.iloc[self.current_step]
        observation = self._get_observation(current_data)
        
        info = {'capital_usd': self.capital_usd, 'position': self.current_position}
        return observation, info

    def _get_observation(self, data_row):
        """Prepara el vector de estado (observación) para el agente."""
        return np.array([
            data_row['RSI'],
            data_row['MACD_Hist'],
            data_row['Price_Change_Norm'],
            self.current_position 
        ], dtype=np.float32)

    def _calculate_portfolio_value(self, price):
        """Calcula el valor total del portfolio en USD."""
        return self.capital_usd + (self.asset_quantity * price)

    def step(self, action):
        """Aplica la acción y calcula el nuevo estado y la recompensa."""
        
        prev_price = self.df.iloc[self.current_step - 1]['ETH_Price']
        current_price = self.df.iloc[self.current_step]['ETH_Price']
        
        portfolio_value_before_trade = self._calculate_portfolio_value(prev_price)
        reward = 0 
        
        # ⚠️ AJUSTE CRÍTICO 1: Recompensa flotante (pérdida/ganancia real, factor 1.0)
        if self.current_position == 1:
            # Recompensa es la ganancia porcentual *real* por hora * capital
            price_diff = current_price - prev_price
            reward = price_diff * self.asset_quantity # Ganancia/Pérdida en USD de esta hora
        
        # 2. Aplicar la ACCIÓN
        
        if action == 1: # BUY
            if self.current_position == 0:
                # Comprar con todo el capital USD
                self.entry_value = self.capital_usd # Guardamos el valor de entrada
                self.asset_quantity = self.capital_usd / current_price
                self.asset_quantity *= (1 - self.TRANSACTION_FEE) # Aplicar Fee
                self.capital_usd = 0
                self.current_position = 1
                reward -= self.TRANSACTION_FEE * self.entry_value # Penalizar el costo del trade

        elif action == 2: # SELL
            if self.current_position == 1:
                # 💥 AJUSTE CRÍTICO 2: La gran recompensa viene SOLO al vender
                
                # Valor del portfolio ANTES de la venta
                value_before_sell = self.asset_quantity * current_price
                
                # Ejecutar la venta y el fee
                self.capital_usd = value_before_sell * (1 - self.TRANSACTION_FEE)
                self.asset_quantity = 0
                self.current_position = 0
                
                # Ganancia/Pérdida total del Trade = Capital Venta - Capital Entrada
                net_profit_on_trade = self.capital_usd - self.entry_value
                
                # Recompensa Finalizada: Usamos un multiplicador alto (10) para priorizar el trade exitoso
                # El agente valora 1 USD de ganancia en 10 puntos de recompensa
                reward += net_profit_on_trade * 10
                
        # 3. Penalización por costo de gas
        gas_cost = self.df.iloc[self.current_step]['Transaction_Cost_USD']
        if action != 0: 
            reward -= gas_cost * 0.1 
        
        # 4. Actualización y Siguiente Paso
        self.current_step += 1
        terminated = self.current_step >= self.max_timesteps
        truncated = False
        
        if not terminated:
            next_data = self.df.iloc[self.current_step]
            observation = self._get_observation(next_data)
        else:
            observation = np.zeros(self.observation_space.shape, dtype=np.float32)
            
        final_price = self.df.iloc[self.current_step - 1]['ETH_Price']
        current_value = self._calculate_portfolio_value(final_price)
        
        info = {'current_value': current_value, 'net_profit_usd': current_value - self.initial_capital}
        
        return observation, reward, terminated, truncated, info


# =======================================================
# 3. ENTRENAMIENTO Y EVALUACIÓN DQN (FINAL)
# =======================================================

if not swing_df.empty:
    env_swing = SwingTradingEnv(df=swing_df, initial_capital=400)
    vec_env_swing = make_vec_env(lambda: env_swing, n_envs=1)

    # Configuración DQN: Optimizada para Altcoin Trading
    model_swing = DQN(
        "MlpPolicy", 
        vec_env_swing, 
        learning_rate=1e-3, 
        buffer_size=200000,  # Gran memoria para volatilidad
        gamma=0.9,           # Prioriza ganancias rápidas
        exploration_final_eps=0.05, 
        verbose=0,
        device="auto" 
    )

    print("\n\n--- INICIANDO ENTRENAMIENTO DQN (MODELO FINAL DE ALTA GANANCIA) ---")

    # Entrenar el modelo
    TIMESTEPS_SWING = 200000 
    model_swing.learn(total_timesteps=TIMESTEPS_SWING)
    
    # --- PRUEBA DE RENDIMIENTO ---
    print("\n--- INICIANDO PRUEBA DE RENDIMIENTO SWING TRADING ---")
    
    # La prueba siempre se realiza sobre el dataset completo
    obs, info = env_swing.reset()
    done = False
    initial_capital = env_swing.initial_capital

    while not done:
        action_array, _states = model_swing.predict(obs, deterministic=True) 
        try:
            action = int(action_array[0])
        except IndexError:
            action = int(action_array)
        
        obs, reward, terminated, truncated, info = env_swing.step(action)
        done = terminated or truncated

    final_capital = info['current_value']
    net_profit_usd = final_capital - initial_capital
    net_profit_perc = (net_profit_usd / initial_capital) * 100

    # Estrategia de comparación (Benchmark: Buy and Hold del activo)
    start_price = swing_df.iloc[1]['ETH_Price']
    end_price = swing_df.iloc[-1]['ETH_Price']
    price_change = (end_price - start_price) / start_price
    
    buy_hold_profit = initial_capital * price_change
    buy_hold_final = initial_capital + buy_hold_profit

    print("\n--- RESULTADOS SWING TRADING DE ETH ---")
    print(f"Capital Inicial: ${initial_capital:,.2f} USD")
    print(f"Capital Final (Agente RL): ${final_capital:,.2f} USD")
    print(f"Ganancia Neta (Agente RL): +{net_profit_usd:,.2f} USD ({net_profit_perc:.2f} %)")
    print("-" * 30)
    print(f"Benchmark (Buy & Hold ETH): +{buy_hold_profit:,.2f} USD ({price_change * 100:.2f} %)")

    if net_profit_usd > buy_hold_profit:
        print("✅ ¡El agente RL superó al Buy & Hold!")
    else:
        print("❌ El agente RL no superó al Buy & Hold.")

  dates = pd.date_range(end=pd.Timestamp.now(), periods=num_hours, freq='H')


--- Datos de Swing Trading Horarios Generados y Guardados en 'swing_trading_data.csv' ---
Total de pasos horarios (timesteps): 8760


--- INICIANDO ENTRENAMIENTO DQN (MODELO FINAL DE ALTA GANANCIA) ---

--- INICIANDO PRUEBA DE RENDIMIENTO SWING TRADING ---

--- RESULTADOS SWING TRADING DE ETH ---
Capital Inicial: $400.00 USD
Capital Final (Agente RL): $400.00 USD
Ganancia Neta (Agente RL): +0.00 USD (0.00 %)
------------------------------
Benchmark (Buy & Hold ETH): +-19.36 USD (-4.84 %)
✅ ¡El agente RL superó al Buy & Hold!


In [7]:
import gymnasium as gym
from gymnasium import spaces
import numpy as np
import pandas as pd
from stable_baselines3 import DQN
from stable_baselines3.common.env_util import make_vec_env
import time
import os
from typing import Dict, Any

# =======================================================
# 1. FUNCIÓN DE GENERACIÓN DE DATOS Y CÁLCULO DE ITs
# =======================================================

def calculate_rsi(series, period=14):
    """Calcula el Índice de Fuerza Relativa (RSI)."""
    delta = series.diff()
    gain = delta.where(delta > 0, 0)
    loss = -delta.where(delta < 0, 0)
    avg_gain = gain.ewm(com=period - 1, min_periods=period).mean()
    avg_loss = loss.ewm(com=period - 1, min_periods=period).mean()
    rs = avg_gain / avg_loss
    return 100 - (100 / (1 + rs))

def calculate_macd_hist(series, fast=12, slow=26, signal=9):
    """Calcula el Histograma MACD."""
    ema_fast = series.ewm(span=fast, adjust=False).mean()
    ema_slow = series.ewm(span=slow, adjust=False).mean()
    macd = ema_fast - ema_slow
    signal_line = macd.ewm(span=signal, adjust=False).mean()
    return macd - signal_line

def create_swing_trading_data() -> pd.DataFrame:
    """
    Genera datos horarios simulados para Swing Trading agresivo de Altcoins, 
    incluyendo el precio y los indicadores técnicos.
    """
    # Usar 'h' en lugar de 'H' para evitar la advertencia de 'FutureWarning'
    num_hours = 8760 
    dates = pd.date_range(end=pd.Timestamp.now(), periods=num_hours, freq='h')
    
    np.random.seed(int(time.time())) 

    # Simular un movimiento de precios bajista
    initial_price = 3000 
    price_changes = np.random.normal(0, 5, num_hours).cumsum() - np.linspace(0, 100, num_hours) 
    noise = np.random.normal(0, 50, num_hours) 
    
    eth_price = initial_price + price_changes + noise
    
    df = pd.DataFrame({'ETH_Price': eth_price}, index=dates)
    
    df['RSI'] = calculate_rsi(df['ETH_Price']).fillna(50)
    df['MACD_Hist'] = calculate_macd_hist(df['ETH_Price']).fillna(0)
    df['Transaction_Cost_USD'] = np.random.uniform(low=0.10, high=0.50, size=num_hours)
    df['Price_Change_Norm'] = df['ETH_Price'].pct_change().fillna(0)
    
    df = df.dropna().reset_index(names=['timestamp'])
    
    df.to_csv('swing_trading_data.csv', index=False)
    
    print("--- Datos de Swing Trading Horarios Generados y Guardados en 'swing_trading_data.csv' ---")
    print(f"Total de pasos horarios (timesteps): {len(df)}")
    
    return df

# Ejecutar la generación de datos
historical_swing_df = create_swing_trading_data()


# =======================================================
# 2. CLASE DEL ENTORNO DE SWING TRADING (CORREGIDA FINAL)
# =======================================================

try:
    swing_df = pd.read_csv('swing_trading_data.csv')
    swing_df['timestamp'] = pd.to_datetime(swing_df['timestamp'])
except FileNotFoundError:
    print("ERROR FATAL: Ejecuta el Paso 1 (create_swing_trading_data) primero.")
    swing_df = pd.DataFrame() 


class SwingTradingEnv(gym.Env):
    """Entorno de Reinforcement Learning para Swing Trading agresivo de Altcoins."""

    def __init__(self, df=swing_df, initial_capital=400):
        super(SwingTradingEnv, self).__init__()
        self.df = df
        self.max_timesteps = len(df) - 1
        self.initial_capital = initial_capital
        
        self.TRANSACTION_FEE = 0.0005 
        
        self.action_space = spaces.Discrete(3) 
        self.observation_space = spaces.Box(
            low=-np.inf, high=np.inf, shape=(4,), dtype=np.float32 
        )
        self.reset()
        
    def reset(self, seed=None, options=None):
        super().reset(seed=seed)
        self.current_step = 1 
        self.capital_usd = self.initial_capital
        self.asset_quantity = 0  
        self.current_position = 0 
        self.entry_value = self.initial_capital 
        
        current_data = self.df.iloc[self.current_step]
        observation = self._get_observation(current_data)
        
        info = {'capital_usd': self.capital_usd, 'position': self.current_position}
        return observation, info

    def _get_observation(self, data_row):
        return np.array([
            data_row['RSI'],
            data_row['MACD_Hist'],
            data_row['Price_Change_Norm'],
            self.current_position 
        ], dtype=np.float32)

    def _calculate_portfolio_value(self, price):
        return self.capital_usd + (self.asset_quantity * price)

    def step(self, action):
        """Aplica la acción y calcula el nuevo estado y la recompensa."""
        
        prev_price = self.df.iloc[self.current_step - 1]['ETH_Price']
        current_price = self.df.iloc[self.current_step]['ETH_Price']
        
        portfolio_value_before_trade = self._calculate_portfolio_value(prev_price)
        reward = 0 
        
        # 💥 CORRECCIÓN FINAL: La recompensa por mantener la posición es CERO.
        # Esto elimina la fuga de recompensa acumulativa.
        if self.current_position == 1:
            reward = 0.0 # <-- CORRECCIÓN APLICADA AQUÍ
        
        # 2. Aplicar la ACCIÓN
        
        if action == 1: # BUY
            if self.current_position == 0:
                self.entry_value = self.capital_usd 
                self.asset_quantity = self.capital_usd / current_price
                self.asset_quantity *= (1 - self.TRANSACTION_FEE) 
                self.capital_usd = 0
                self.current_position = 1
                reward -= self.TRANSACTION_FEE * self.entry_value 

        elif action == 2: # SELL
            if self.current_position == 1:
                
                value_before_sell = self.asset_quantity * current_price
                
                self.capital_usd = value_before_sell * (1 - self.TRANSACTION_FEE)
                self.asset_quantity = 0
                self.current_position = 0
                
                net_profit_on_trade = self.capital_usd - self.entry_value
                
                # Recompensa fuerte por el trade completado (sin multiplicador exagerado)
                reward += net_profit_on_trade * 10.0 
                
        # 3. Penalización por costo de gas
        gas_cost = self.df.iloc[self.current_step]['Transaction_Cost_USD']
        if action != 0: 
            reward -= gas_cost * 0.1 
        
        # 4. Actualización y Siguiente Paso
        self.current_step += 1
        terminated = self.current_step >= self.max_timesteps
        truncated = False
        
        if not terminated:
            next_data = self.df.iloc[self.current_step]
            observation = self._get_observation(next_data)
        else:
            observation = np.zeros(self.observation_space.shape, dtype=np.float32)
            
        final_price = self.df.iloc[self.current_step - 1]['ETH_Price']
        current_value = self._calculate_portfolio_value(final_price)
        
        info = {'current_value': current_value, 'net_profit_usd': current_value - self.initial_capital}
        
        return observation, reward, terminated, truncated, info


# =======================================================
# 3. BACKTESTING Y ENTRENAMIENTO (FASE QA FINAL)
# =======================================================

if not swing_df.empty:
    # --- DIVISIÓN DE DATOS (TRAIN / TEST) ---
    split_point = int(len(swing_df) * 0.70)
    train_df = swing_df.iloc[1:split_point] 
    test_df = swing_df.iloc[split_point:]

    print(f"\n--- Preparación para Backtesting ---")
    print(f"Total de pasos (Horas): {len(swing_df)}")
    print(f"Pasos para Entrenamiento (Train): {len(train_df)}")
    print(f"Pasos para Prueba (Test): {len(test_df)}")

    # 1. ENTRENAMIENTO (SOLO con Train Set)
    env_train = SwingTradingEnv(df=train_df, initial_capital=400)
    vec_env_train = make_vec_env(lambda: env_train, n_envs=1)

    model_swing = DQN(
        "MlpPolicy", vec_env_train, learning_rate=1e-3, 
        buffer_size=200000, gamma=0.9, exploration_final_eps=0.05, 
        verbose=0, device="auto" 
    )

    print("\n--- INICIANDO ENTRENAMIENTO EN TRAIN SET ---")
    TIMESTEPS_SWING = 200000 
    model_swing.learn(total_timesteps=TIMESTEPS_SWING)
    
    # 2. EVALUACIÓN (BACKTESTING en Test Set)
    env_test = SwingTradingEnv(df=test_df, initial_capital=400)

    print("\n--- INICIANDO PRUEBA DE RENDIMIENTO (DATOS NO VISTOS) ---")
    obs, info = env_test.reset()
    done = False
    initial_capital = env_test.initial_capital

    while not done:
        action_array, _states = model_swing.predict(obs, deterministic=True) 
        try:
            action = int(action_array[0])
        except IndexError:
            action = int(action_array)
        
        obs, reward, terminated, truncated, info = env_test.step(action)
        done = terminated or truncated

    final_capital = info['current_value']
    net_profit_usd = final_capital - initial_capital
    net_profit_perc = (net_profit_usd / initial_capital) * 100

    # 3. BENCHMARK (Buy and Hold SOLO del Test Set)
    start_price_test = test_df.iloc[0]['ETH_Price']
    end_price_test = test_df.iloc[-1]['ETH_Price']
    price_change_test = (end_price_test - start_price_test) / start_price_test
    
    buy_hold_profit_test = initial_capital * price_change_test

    print("\n--- RESULTADOS DEL BACKTESTING (VALIDACIÓN FINAL) ---")
    print(f"Capital Inicial: ${initial_capital:,.2f} USD")
    print(f"Capital Final (Agente RL): ${final_capital:,.2f} USD")
    print(f"Ganancia Neta (Agente RL): +{net_profit_usd:,.2f} USD ({net_profit_perc:.2f} %)")
    print("-" * 30)
    print(f"Benchmark (Buy & Hold ETH): +{buy_hold_profit_test:,.2f} USD ({price_change_test * 100:.2f} %)")

    if net_profit_usd > buy_hold_profit_test:
        print("✅ ¡El agente RL superó al Buy & Hold en datos no vistos!")
    else:
        print("❌ El agente RL no superó al Buy & Hold en datos no vistos. Se necesita más ajuste.")

--- Datos de Swing Trading Horarios Generados y Guardados en 'swing_trading_data.csv' ---
Total de pasos horarios (timesteps): 8760

--- Preparación para Backtesting ---
Total de pasos (Horas): 8760
Pasos para Entrenamiento (Train): 6131
Pasos para Prueba (Test): 2628

--- INICIANDO ENTRENAMIENTO EN TRAIN SET ---

--- INICIANDO PRUEBA DE RENDIMIENTO (DATOS NO VISTOS) ---

--- RESULTADOS DEL BACKTESTING (VALIDACIÓN FINAL) ---
Capital Inicial: $400.00 USD
Capital Final (Agente RL): $44,656,124.27 USD
Ganancia Neta (Agente RL): +44,655,724.27 USD (11163931.07 %)
------------------------------
Benchmark (Buy & Hold ETH): +-99.17 USD (-24.79 %)
✅ ¡El agente RL superó al Buy & Hold en datos no vistos!


In [8]:
# El modelo ya está guardado en tu código:
model_path = f"dqn_yield_farming_model_{int(time.time())}"
model_swing.save(model_path)