In [None]:
# Custom data class for Backtrader
class AUDUSDData(bt.feeds.PandasData):
    """Custom data feed that includes SuperTrend and RSI indicators"""
    # Add lines
    lines = ('supertrend_direction', 'rsi',)
    
    # Define parameters
    params = (
        ('datetime', 'Date'),
        ('open', 'Open'),
        ('high', 'High'),
        ('low', 'Low'),
        ('close', 'Close'),
        ('volume', None),
        ('openinterest', None),
        ('supertrend_direction', 'supertrend_direction'),
        ('rsi', 'RSI'),
    )
    
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import backtrader as bt
import gymnasium as gym
from gymnasium import spaces
from stable_baselines3 import PPO
from stable_baselines3.common.vec_env import DummyVecEnv
from stable_baselines3.common.callbacks import BaseCallback
from datetime import datetime, timedelta
import os
import warnings
import torch
warnings.filterwarnings('ignore')

# Global configs
IGNORE_VOLUME = True  # Set to True to ignore volume data in csv

# Check for MPS (Apple Silicon) support
USE_MPS = False
if torch.backends.mps.is_available():
    USE_MPS = False  # Set to False since CPU appears to be faster
    print("MPS (Apple Silicon acceleration) is available but disabled (using CPU for better performance)")
else:
    print("MPS not available, using CPU")

# SuperTrend Indicator Implementation
def calculate_supertrend(df, period=10, multiplier=3.0):
    """
    Calculate SuperTrend indicator using manual ATR calculation
    
    Parameters:
    -----------
    df : pd.DataFrame
        DataFrame with OHLC data
    period : int
        Period for ATR calculation
    multiplier : float
        Multiplier for ATR
    
    Returns:
    --------
    pd.DataFrame
        DataFrame with SuperTrend indicator
    """
    df = df.copy()
    
    # Calculate True Range
    df['tr0'] = abs(df["High"] - df["Low"])
    df['tr1'] = abs(df["High"] - df["Close"].shift())
    df['tr2'] = abs(df["Low"] - df["Close"].shift())
    df['tr'] = df[['tr0', 'tr1', 'tr2']].max(axis=1)
    
    # Calculate ATR
    df['atr'] = df['tr'].rolling(period).mean()
    
    # Calculate SuperTrend
    df['upperband'] = ((df['High'] + df['Low']) / 2) + (multiplier * df['atr'])
    df['lowerband'] = ((df['High'] + df['Low']) / 2) - (multiplier * df['atr'])
    
    # Initialize SuperTrend direction
    df['supertrend_direction'] = False
    df['supertrend'] = 0.0
    
    # Calculate SuperTrend values
    for i in range(1, len(df)):
        if df['Close'].iloc[i] > df['upperband'].iloc[i-1]:
            df.loc[df.index[i], 'supertrend_direction'] = True
        elif df['Close'].iloc[i] < df['lowerband'].iloc[i-1]:
            df.loc[df.index[i], 'supertrend_direction'] = False
        else:
            df.loc[df.index[i], 'supertrend_direction'] = df['supertrend_direction'].iloc[i-1]
            
            if df['supertrend_direction'].iloc[i] and df['lowerband'].iloc[i] < df['lowerband'].iloc[i-1]:
                df.loc[df.index[i], 'lowerband'] = df['lowerband'].iloc[i-1]
            if not df['supertrend_direction'].iloc[i] and df['upperband'].iloc[i] > df['upperband'].iloc[i-1]:
                df.loc[df.index[i], 'upperband'] = df['upperband'].iloc[i-1]
                
        if df['supertrend_direction'].iloc[i]:
            df.loc[df.index[i], 'supertrend'] = df['lowerband'].iloc[i]
        else:
            df.loc[df.index[i], 'supertrend'] = df['upperband'].iloc[i]
    
    # Clean up temporary columns
    df.drop(['tr0', 'tr1', 'tr2', 'tr', 'atr'], axis=1, inplace=True)
    
    return df

# Custom Trading Environment for RL
class ForexTradingEnv(gym.Env):
    """
    Custom Environment for FOREX trading with Reinforcement Learning
    """
    
    def __init__(self, df, window_size=14, initial_balance=1_000_000, 
                 transaction_cost_pct=0.0001, position_size=1_000_000):
        super(ForexTradingEnv, self).__init__()
        
        self.df = df.copy().reset_index(drop=True)
        self.window_size = window_size
        self.initial_balance = initial_balance
        self.balance = initial_balance
        self.transaction_cost_pct = transaction_cost_pct
        self.position_size = position_size  # 1M AUD
        
        # Action and observation spaces
        # Actions: 0 = Hold, 1 = Buy, 2 = Sell
        self.action_space = spaces.Discrete(3)
        
        # Observation space: normalized OHLC, SuperTrend, RSI, current position
        self.observation_space = spaces.Box(
            low=-np.inf, high=np.inf, shape=(window_size, 7), dtype=np.float32
        )
        
    def reset(self, seed=None, options=None):
        super().reset(seed=seed)
        
        # Reset state
        self.current_step = self.window_size
        self.balance = self.initial_balance
        self.portfolio_value = self.initial_balance
        self.position = 0  # 0: no position, 1: long, -1: short
        self.entry_price = 0
        self.trades = []
        self.max_portfolio_value = self.initial_balance  # For drawdown calculation
        
        # Get initial observation
        observation = self._get_observation()
        
        return observation, {}
    
    def _get_observation(self):
        """Return the current state observation"""
        start = max(0, self.current_step - self.window_size)
        end = self.current_step
        
        # Extract window of data
        window = self.df.iloc[start:end].copy()
        
        # If window is smaller than window_size, pad with the first row
        if len(window) < self.window_size:
            padding = self.window_size - len(window)
            padding_df = pd.DataFrame([window.iloc[0]] * padding)
            window = pd.concat([padding_df, window]).reset_index(drop=True)
        
        # Normalize price data
        base_price = window['Close'].iloc[0]
        if base_price == 0:  # Avoid division by zero
            base_price = 1.0
        
        # Create observation array with error handling
        try:
            observation = np.column_stack((
                window['Open'].values / base_price - 1,  # Normalized open
                window['High'].values / base_price - 1,  # Normalized high
                window['Low'].values / base_price - 1,   # Normalized low
                window['Close'].values / base_price - 1, # Normalized close
                window['supertrend_direction'].values.astype(float),  # SuperTrend direction
                window['RSI'].values / 100.0,  # Normalized RSI
                np.full(self.window_size, self.position)  # Current position
            ))
            
            # Ensure observation has correct shape
            if observation.shape != (self.window_size, 7):
                print(f"Warning: Unexpected observation shape {observation.shape}, reshaping...")
                observation = np.zeros((self.window_size, 7), dtype=np.float32)
                
            # Replace NaN values with zeros
            observation = np.nan_to_num(observation, nan=0.0)
            
        except Exception as e:
            print(f"Error creating observation: {e}")
            observation = np.zeros((self.window_size, 7), dtype=np.float32)
        
        return observation.astype(np.float32)
    
    def step(self, action):
        """Take a trading action in the environment"""
        # Get current price data
        current_price = self.df.iloc[self.current_step]['Close']
        
        # Initialize reward, done, and info
        reward = 0
        done = False
        info = {}
        
        # Process the action
        # If we have a position and want to close/change it
        if self.position != 0:
            if (self.position == 1 and (action == 2 or action == 0)) or \
               (self.position == -1 and (action == 1 or action == 0)):
                # Calculate PnL
                price_diff = current_price - self.entry_price
                if self.position == -1:
                    price_diff = -price_diff
                
                # Calculate PnL in dollars
                pnl = price_diff * self.position_size
                
                # Subtract transaction cost
                transaction_cost = current_price * self.position_size * self.transaction_cost_pct
                pnl -= transaction_cost
                
                # Update balance
                self.balance += pnl
                
                # Record trade
                self.trades.append({
                    'entry_time': self.df.iloc[self.entry_step]['Date'],
                    'exit_time': self.df.iloc[self.current_step]['Date'],
                    'entry_price': self.entry_price,
                    'exit_price': current_price,
                    'position': 'long' if self.position == 1 else 'short',
                    'pnl': pnl,
                    'return': pnl / self.position_size
                })
                
                # Set reward based on PnL (normalized)
                reward = pnl / self.initial_balance
                
                # Reset position
                self.position = 0
        
        # If we want to open a new position (or change from current position)
        if (self.position == 0 and action != 0) or \
           (self.position == 1 and action == 2) or \
           (self.position == -1 and action == 1):
            # Set new position
            self.position = 1 if action == 1 else -1
            self.entry_price = current_price
            self.entry_step = self.current_step
            
            # Subtract transaction cost from balance
            transaction_cost = current_price * self.position_size * self.transaction_cost_pct
            self.balance -= transaction_cost
        
        # Update portfolio value
        self.portfolio_value = self.balance
        if self.position != 0:
            # Add unrealized PnL
            price_diff = current_price - self.entry_price
            if self.position == -1:
                price_diff = -price_diff
            unrealized_pnl = price_diff * self.position_size
            self.portfolio_value += unrealized_pnl
        
        # Update max portfolio value for drawdown calculation
        self.max_portfolio_value = max(self.max_portfolio_value, self.portfolio_value)
        
        # Move to next step
        self.current_step += 1
        
        # Check if done
        if self.current_step >= len(self.df) - 1:
            done = True
            # Close any open positions at the end
            if self.position != 0:
                # Get final price
                final_price = self.df.iloc[-1]['Close']
                
                # Calculate PnL
                price_diff = final_price - self.entry_price
                if self.position == -1:
                    price_diff = -price_diff
                
                pnl = price_diff * self.position_size
                transaction_cost = final_price * self.position_size * self.transaction_cost_pct
                pnl -= transaction_cost
                
                # Update balance
                self.balance += pnl
                self.portfolio_value = self.balance
                
                # Record trade
                self.trades.append({
                    'entry_time': self.df.iloc[self.entry_step]['Date'],
                    'exit_time': self.df.iloc[-1]['Date'],
                    'entry_price': self.entry_price,
                    'exit_price': final_price,
                    'position': 'long' if self.position == 1 else 'short',
                    'pnl': pnl,
                    'return': pnl / self.position_size
                })
            
            # Calculate final metrics
            info = self._calculate_metrics()
        
        # Get new observation
        observation = self._get_observation()
        
        return observation, reward, done, False, info
    
    def _calculate_metrics(self):
        """Calculate trading performance metrics"""
        metrics = {}
        
        # Calculate win rate
        if len(self.trades) > 0:
            win_trades = sum(1 for trade in self.trades if trade['pnl'] > 0)
            metrics['win_rate'] = win_trades / len(self.trades)
        else:
            metrics['win_rate'] = 0
        
        # Calculate PnL statistics
        pnls = [trade['pnl'] for trade in self.trades]
        metrics['total_pnl'] = sum(pnls)
        metrics['avg_pnl'] = np.mean(pnls) if pnls else 0
        
        # Calculate Sharpe ratio (annualized, assuming 252 trading days)
        returns = [trade['return'] for trade in self.trades]
        if len(returns) > 1:
            metrics['sharpe_ratio'] = np.mean(returns) / np.std(returns) * np.sqrt(252)
        else:
            metrics['sharpe_ratio'] = 0
        
        # Calculate maximum drawdown
        metrics['max_drawdown'] = (self.max_portfolio_value - self.portfolio_value) / self.max_portfolio_value
        
        # Additional metrics
        metrics['final_balance'] = self.balance
        metrics['num_trades'] = len(self.trades)
        
        return metrics

# Callback for saving best model during training and showing progress metrics
class SaveBestModelCallback(BaseCallback):
    def __init__(self, check_freq=1000, save_path="./models", verbose=1, env=None, total_timesteps=100000):
        super(SaveBestModelCallback, self).__init__(verbose)
        self.check_freq = check_freq
        self.save_path = save_path
        self.best_mean_reward = -np.inf
        self.env = env
        self.total_timesteps = total_timesteps
        self.start_time = None
        self.last_print_time = None
        self.print_freq = 10  # Print every 10 iterations
    
    def _init_callback(self):
        if self.save_path is not None:
            os.makedirs(self.save_path, exist_ok=True)
        self.start_time = datetime.now()
        self.last_print_time = self.start_time
    
    def _on_step(self):
        # Print time estimation more frequently
        current_time = datetime.now()
        time_since_last_print = (current_time - self.last_print_time).total_seconds()
        
        # Print status update every N iterations or if it's been more than 60 seconds
        if self.n_calls % (self.model.n_steps * self.print_freq) == 0 or time_since_last_print > 60:
            # Calculate estimated time remaining
            elapsed_time = (current_time - self.start_time).total_seconds()
            progress = self.num_timesteps / self.total_timesteps
            if progress > 0:
                est_total_time = elapsed_time / progress
                est_remaining = est_total_time - elapsed_time
                time_remaining_str = str(timedelta(seconds=int(est_remaining)))
                
                # Calculate current FPS
                if elapsed_time > 0:
                    fps = self.num_timesteps / elapsed_time
                else:
                    fps = 0
                
                print(f"\nProgress: {self.num_timesteps}/{self.total_timesteps} ({progress:.1%} complete)")
                print(f"Current FPS: {fps:.1f}, Elapsed: {timedelta(seconds=int(elapsed_time))}")
                print(f"Estimated time remaining: {time_remaining_str}")
                
                # Try to get additional metrics if env is provided
                if self.env is not None and hasattr(self.env, 'get_attr'):
                    try:
                        info = self.env.get_attr('recent_info')[0]
                        if info and 'win_rate' in info:
                            print(f"Win rate: {info['win_rate']:.2%}")
                        if info and 'total_pnl' in info:
                            print(f"Total PnL: ${info['total_pnl']:.2f}")
                        if info and 'sharpe_ratio' in info:
                            print(f"Sharpe ratio: {info['sharpe_ratio']:.2f}")
                    except (AttributeError, IndexError) as e:
                        pass
                
                print("-" * 50)
                # Update last print time
                self.last_print_time = current_time
        
        # Check for model saving at regular intervals
        if self.n_calls % self.check_freq == 0:
            # Get average episode reward from last few episodes
            try:
                ep_rewards = [ep_info["r"] for ep_info in self.model.ep_info_buffer]
                if len(ep_rewards) > 0:
                    mean_reward = np.mean(ep_rewards)
                    
                    # Save best model
                    if mean_reward > self.best_mean_reward:
                        self.best_mean_reward = mean_reward
                        if self.verbose > 0:
                            print(f"New best mean reward: {mean_reward:.2f}")
                            print(f"Saving new best model to {self.save_path}/best_model.zip")
                        
                        try:
                            self.model.save(os.path.join(self.save_path, "best_model"))
                            # Also save as a backup in case the file is accessed before saving completes
                            self.model.save(os.path.join(self.save_path, "latest_best_model"))
                        except Exception as e:
                            print(f"Warning: Error saving model: {e}")
            except Exception as e:
                print(f"Warning: Error checking rewards: {e}")
        
        return True

# BackTrader implementation for strategy evaluation
class RLStrategy(bt.Strategy):
    """Backtrader strategy that uses the trained RL model for trading decisions"""
    
    params = (
        ('model_path', 'models/best_model'),  # Path to saved model
        ('window_size', 14),                  # Observation window size
    )
    
    def __init__(self):
        # Load the trained model
        try:
            self.model = PPO.load(self.params.model_path)
            print(f"Successfully loaded model from {self.params.model_path}")
        except Exception as e:
            print(f"Error loading model: {e}")
            print("Trying alternative model path...")
            try:
                # Try without extension
                self.model = PPO.load(self.params.model_path.replace(".zip", ""))
                print(f"Successfully loaded model without .zip extension")
            except Exception as e2:
                print(f"Failed to load model without extension: {e2}")
                raise
        
        # Store indicators
        self.supertrend_direction = self.datas[0].supertrend_direction
        self.rsi = self.datas[0].rsi
        
        # Initialize variables
        self.order = None
        self.position_size = 1_000_000  # 1M AUD
        self.price_history = []
        self.supertrend_history = []
        self.rsi_history = []
        
        # Initialize trade tracking
        self.trades = []
        self.entry_price = 0
        self.entry_time = None
        self.current_position = 0  # 0: no position, 1: long, -1: short
    
    def next(self):
        # Skip the first window_size bars to have enough history
        if len(self.data) <= self.params.window_size:
            return
        
        # Update price history
        self.price_history.append({
            'Open': self.data.open[0],
            'High': self.data.high[0],
            'Low': self.data.low[0],
            'Close': self.data.close[0]
        })
        
        # Keep only the last window_size bars
        if len(self.price_history) > self.params.window_size:
            self.price_history.pop(0)
        
        # Update indicator history
        self.supertrend_history.append(self.supertrend_direction[0])
        self.rsi_history.append(self.rsi[0])
        
        if len(self.supertrend_history) > self.params.window_size:
            self.supertrend_history.pop(0)
        if len(self.rsi_history) > self.params.window_size:
            self.rsi_history.pop(0)
        
        # Prepare observation for the model
        observation = self._prepare_observation()
        
        # Skip if observation is None or there's a pending order
        if self.order:
            return
            
        try:
            # Get action from RL model
            action, _ = self.model.predict(observation, deterministic=True)
            
            # Execute action
            if action == 1:  # Buy
                if self.current_position <= 0:  # No position or short
                    if self.current_position < 0:  # Close short first
                        self.close()
                    # Enter long position
                    self.order = self.buy(size=self.position_size)
                    self.entry_price = self.data.close[0]
                    self.entry_time = self.data.datetime.datetime()
                    self.current_position = 1
            
            elif action == 2:  # Sell
                if self.current_position >= 0:  # No position or long
                    if self.current_position > 0:  # Close long first
                        self.close()
                    # Enter short position
                    self.order = self.sell(size=self.position_size)
                    self.entry_price = self.data.close[0]
                    self.entry_time = self.data.datetime.datetime()
                    self.current_position = -1
            
            elif action == 0 and self.current_position != 0:  # Hold but close position if we have one
                self.close()
                self.current_position = 0
                
        except Exception as e:
            self.log(f"Error predicting action: {e}")
            # If error occurs, default to closing position (safer approach)
            if self.current_position != 0:
                self.close()
                self.current_position = 0
    
    def _prepare_observation(self):
        """Prepare observation for the RL model"""
        if len(self.price_history) < self.params.window_size:
            # Not enough data yet, return a zeros array with correct shape
            return np.zeros((self.params.window_size, 7), dtype=np.float32)
        
        try:
            # Create price arrays
            opens = np.array([bar['Open'] for bar in self.price_history])
            highs = np.array([bar['High'] for bar in self.price_history])
            lows = np.array([bar['Low'] for bar in self.price_history])
            closes = np.array([bar['Close'] for bar in self.price_history])
            
            # Normalize prices relative to the first close price
            base_price = closes[0]
            if base_price == 0:  # Avoid division by zero
                base_price = 1.0
                
            opens = opens / base_price - 1
            highs = highs / base_price - 1
            lows = lows / base_price - 1
            closes = closes / base_price - 1
            
            # Ensure supertrend_history and rsi_history are the right length
            st_history = np.array(self.supertrend_history).astype(float)
            if len(st_history) < self.params.window_size:
                # Pad with zeros if needed
                st_history = np.pad(st_history, (self.params.window_size - len(st_history), 0))
                
            # Normalize RSI (handle possible NaN values)
            rsi_history = np.array(self.rsi_history)
            if len(rsi_history) < self.params.window_size:
                # Pad with 50 (neutral RSI) if needed
                rsi_history = np.pad(rsi_history, (self.params.window_size - len(rsi_history), 0), 
                                     constant_values=50)
            
            # Replace possible NaN values with neutral value (50)
            rsi_history = np.nan_to_num(rsi_history, nan=50.0)
            rsi_norm = rsi_history / 100.0
            
            # Create observation array
            observation = np.column_stack((
                opens,
                highs,
                lows,
                closes,
                st_history,
                rsi_norm,
                np.full(self.params.window_size, self.current_position)
            ))
            
            # Final check to ensure correct shape
            if observation.shape != (self.params.window_size, 7):
                print(f"Warning: Unexpected observation shape {observation.shape}, reshaping...")
                observation = np.zeros((self.params.window_size, 7), dtype=np.float32)
                
            return observation.astype(np.float32)
            
        except Exception as e:
            print(f"Error creating observation: {e}")
            # Return zeros array with correct shape as fallback
            return np.zeros((self.params.window_size, 7), dtype=np.float32)
    
    def notify_order(self, order):
        if order.status in [order.Completed]:
            if order.isbuy():
                self.log(f'BUY EXECUTED, Price: {order.executed.price:.5f}')
            elif order.issell():
                self.log(f'SELL EXECUTED, Price: {order.executed.price:.5f}')
            
            self.order = None
    
    def notify_trade(self, trade):
        if trade.isclosed:
            self.trades.append({
                'entry_time': self.entry_time,
                'exit_time': self.data.datetime.datetime(),
                'entry_price': self.entry_price,
                'exit_price': trade.price,
                'pnl': trade.pnl,
                'pnlcomm': trade.pnlcomm,
                'commission': trade.commission
            })
            
            self.log(f'TRADE COMPLETED, PnL: {trade.pnlcomm:.2f}')
    
    def log(self, txt, dt=None):
        dt = dt or self.data.datetime.datetime()
        print(f'{dt.isoformat()}, {txt}')
    
    def stop(self):
        # Calculate and print metrics
        if self.trades:
            # Win rate
            win_trades = sum(1 for t in self.trades if t['pnlcomm'] > 0)
            win_rate = win_trades / len(self.trades)
            
            # PnL metrics
            total_pnl = sum(t['pnlcomm'] for t in self.trades)
            avg_pnl = total_pnl / len(self.trades)
            
            # Sharpe ratio (simplified, using trade returns)
            returns = [t['pnlcomm'] / self.position_size for t in self.trades]
            sharpe = np.mean(returns) / np.std(returns) * np.sqrt(252) if len(returns) > 1 else 0
            
            print('\n====== Strategy Metrics ======')
            print(f'Total Trades: {len(self.trades)}')
            print(f'Win Rate: {win_rate:.2%}')
            print(f'Total PnL: ${total_pnl:.2f}')
            print(f'Average PnL per Trade: ${avg_pnl:.2f}')
            print(f'Sharpe Ratio: {sharpe:.2f}')

# Function to prepare data for RL training and backtesting
def load_data(csv_path: str) -> pd.DataFrame:
    """
    Loads OHLC data from a CSV file.
    Handles the specific format of AUDUSD_M5.csv with 'Local time' column.
    """
    data = pd.read_csv(csv_path)
    
    # Handle the datetime column
    if 'Local time' in data.columns:
        # Strip the timezone info to avoid parsing issues
        data['Date'] = data['Local time'].str.split(' GMT').str[0]
        data['Date'] = pd.to_datetime(data['Date'], format='%d.%m.%Y %H:%M:%S.%f')
        # Keep the original column for reference but we'll use Date for processing
        data['Local time'] = data['Date']
    elif 'Date' in data.columns:
        data['Date'] = pd.to_datetime(data['Date'], infer_datetime_format=True)
    
    # Drop Volume column if requested
    if 'Volume' in data.columns and IGNORE_VOLUME:
        data.drop('Volume', axis=1, inplace=True)
    
    return data

def calculate_rsi(close_prices, window=14):
    """
    Calculate RSI (Relative Strength Index) manually
    
    Parameters:
    -----------
    close_prices : pd.Series
        Series of closing prices
    window : int
        RSI calculation window
    
    Returns:
    --------
    pd.Series
        RSI values
    """
    try:
        # Calculate price differences
        delta = close_prices.diff()
        
        # Separate gains and losses
        gains = delta.where(delta > 0, 0)
        losses = -delta.where(delta < 0, 0)
        
        # Calculate average gains and losses
        avg_gains = gains.rolling(window=window).mean()
        avg_losses = losses.rolling(window=window).mean()
        
        # Handle division by zero
        avg_losses = avg_losses.replace(0, np.finfo(float).eps)
        
        # Calculate RS (Relative Strength)
        rs = avg_gains / avg_losses
        
        # Calculate RSI
        rsi = 100 - (100 / (1 + rs))
        
        # Handle NaN values
        rsi = rsi.fillna(50)  # Fill NaN with neutral RSI value
        
        return rsi
        
    except Exception as e:
        print(f"Error calculating RSI: {e}")
        # Return a Series of default RSI values (50 = neutral)
        return pd.Series(50, index=close_prices.index)

def prepare_data(csv_path):
    """Load and prepare AUDUSD data for training and backtesting"""
    # Load data using the new function
    df = load_data(csv_path)
    
    # Calculate SuperTrend
    df = calculate_supertrend(df, period=10, multiplier=3)
    
    # Calculate RSI
    df['RSI'] = calculate_rsi(df['Close'], window=14)
    
    # Drop NaN values
    df = df.dropna().reset_index(drop=True)
    
    return df

# Function to train the RL model
def train_rl_model(train_df, model_save_path='models', total_timesteps=100000):
    """Train the RL model on historical data"""
    try:
        # Create the environment
        env = ForexTradingEnv(
            df=train_df, 
            window_size=14, 
            initial_balance=1_000_000, 
            transaction_cost_pct=0.0001, 
            position_size=1_000_000
        )
        
        # Add recent_info attribute to track metrics
        env.recent_info = None
        
        # Override step method to capture metrics
        original_step = env.step
        def step_with_metrics(action):
            try:
                observation, reward, done, truncated, info = original_step(action)
                
                if done:
                    env.recent_info = info
                    
                return observation, reward, done, truncated, info
            except Exception as e:
                print(f"Error in step_with_metrics: {e}")
                # Return safe defaults
                return np.zeros((env.window_size, 7), dtype=np.float32), 0, True, False, {}
        
        env.step = step_with_metrics
        
        # Wrap in DummyVecEnv for Stable-Baselines
        vec_env = DummyVecEnv([lambda: env])
        
        # Set device
        device = 'cpu'
        if USE_MPS:
            device = 'mps'  # Use MPS if available (Apple Silicon)
        
        # Create PPO model
        model = PPO(
            "MlpPolicy",
            vec_env,
            learning_rate=3e-4,
            n_steps=2048,
            batch_size=64,
            n_epochs=10,
            gamma=0.99,
            gae_lambda=0.95,
            clip_range=0.2,
            ent_coef=0.01,
            vf_coef=0.5,
            max_grad_norm=0.5,
            verbose=0,  # Reduce verbosity to show custom progress instead
            device=device
        )
        
        # Create callback for saving best model
        os.makedirs(model_save_path, exist_ok=True)
        callback = SaveBestModelCallback(
            check_freq=5000,  # Check model saving less frequently
            save_path=model_save_path,
            env=vec_env,
            total_timesteps=total_timesteps
        )
        
        # Print time estimate before training
        print(f"Starting training for {total_timesteps} timesteps...")
        if USE_MPS:
            print(f"Using Apple M chip acceleration (MPS)")
            print(f"Estimated time: ~{total_timesteps/250:.1f} minutes (based on ~250 FPS)")
        else:
            print(f"Using CPU")
            print(f"Estimated time: ~{total_timesteps/300:.1f} minutes (based on ~300 FPS)")
        print("Progress updates will be shown every 10 iterations or 60 seconds")
        print("-" * 50)
        
        # Train the model with error handling
        try:
            model.learn(total_timesteps=total_timesteps, callback=callback)
        except Exception as e:
            print(f"Error during training: {e}")
            print("Attempting to save current model state...")
        
        # Save final model
        try:
            final_model_path = os.path.join(model_save_path, "final_model")
            model.save(final_model_path)
            print(f"Model training complete, saved to {final_model_path}")
            
            # Also save as best model if no best model was saved during training
            best_model_path = os.path.join(model_save_path, "best_model")
            if not os.path.exists(best_model_path + ".zip"):
                print("No best model was saved during training. Using final model as best model.")
                model.save(best_model_path)
        except Exception as e:
            print(f"Error saving model: {e}")
        
        return model
        
    except Exception as e:
        print(f"Error in train_rl_model: {e}")
        import traceback
        traceback.print_exc()
        raise

# Function to backtest the trained model with backtrader
def backtest_with_backtrader(test_df, model_path='models/best_model'):
    """Backtest the trained model using backtrader"""
    try:
        print(f"Attempting to load model from {model_path}")
        
        # Check if model exists, try alternatives if not found
        if not os.path.exists(f"{model_path}.zip"):
            print(f"Model file {model_path}.zip not found. Checking alternatives...")
            
            # Try various alternative paths
            alternatives = [
                "models/latest_best_model",
                "models/final_model",
                "models/best_model",
            ]
            
            for alt_path in alternatives:
                if os.path.exists(f"{alt_path}.zip"):
                    print(f"Found alternative model at {alt_path}.zip")
                    model_path = alt_path
                    break
            else:
                # If we get here, no alternatives were found
                raise FileNotFoundError(f"No model files found. Please train the model first.")
        
        # Create a cerebro instance
        cerebro = bt.Cerebro()
        
        # Add data feed using custom data class
        data = AUDUSDData(
            dataname=test_df
        )
        
        cerebro.adddata(data)
        
        # Add strategy
        cerebro.addstrategy(RLStrategy, model_path=model_path, window_size=14)
        
        # Set initial cash
        cerebro.broker.setcash(1_000_000.0)
        
        # Set commission
        cerebro.broker.setcommission(commission=0.0001)  # 0.01%
        
        # Add analyzers
        cerebro.addanalyzer(bt.analyzers.SharpeRatio, _name='sharpe')
        cerebro.addanalyzer(bt.analyzers.DrawDown, _name='drawdown')
        cerebro.addanalyzer(bt.analyzers.TradeAnalyzer, _name='trades')
        cerebro.addanalyzer(bt.analyzers.Returns, _name='returns')
        
        # Run backtest
        print("Running backtest with Backtrader...")
        results = cerebro.run()
        strat = results[0]
        
        # Get analyzer results
        sharpe = strat.analyzers.sharpe.get_analysis()
        drawdown = strat.analyzers.drawdown.get_analysis()
        trades = strat.analyzers.trades.get_analysis()
        returns = strat.analyzers.returns.get_analysis()
        
        # Print metrics
        print("\n======= Backtrader Results =======")
        print(f"Final Portfolio Value: ${cerebro.broker.getvalue():.2f}")
        
        # Fix for sharpe ratio TypeError
        if sharpe and 'sharperatio' in sharpe and sharpe['sharperatio'] is not None:
            print(f"Sharpe Ratio: {sharpe['sharperatio']:.2f}")
        else:
            print("Sharpe Ratio: N/A")
            
        # Check if drawdown exists and has max
        if drawdown and 'max' in drawdown and drawdown['max'] and 'drawdown' in drawdown['max']:
            print(f"Max Drawdown: {drawdown['max']['drawdown']:.2f}%")
        else:
            print("Max Drawdown: N/A")
        
        if trades and trades.get('total'):
            total_trades = trades['total']['closed']
            winning_trades = trades.get('won', {}).get('total', 0)
            win_rate = (winning_trades / total_trades) if total_trades > 0 else 0
            
            print(f"Total Trades: {total_trades}")
            print(f"Winning Trades: {winning_trades}")
            print(f"Win Rate: {win_rate:.2%}")
            
            if 'pnl' in trades:
                gross_profit = trades['pnl']['gross']['total']
                gross_loss = trades['pnl']['gross']['total'] - trades['pnl']['net']['total']
                net_profit = trades['pnl']['net']['total']
                
                print(f"Gross Profit: ${gross_profit:.2f}")
                print(f"Gross Loss: ${gross_loss:.2f}")
                print(f"Net Profit/Loss: ${net_profit:.2f}")
                
                if total_trades > 0:
                    print(f"Average Trade PnL: ${net_profit / total_trades:.2f}")
        else:
            print("No trades were executed during backtesting")
        
        # Plot results
        try:
            cerebro.plot(style='candlestick', barup='green', bardown='red')
        except Exception as e:
            print(f"Error plotting results: {e}")
        
        return results
        
    except Exception as e:
        print(f"Error during backtesting: {e}")
        import traceback
        traceback.print_exc()
        return None

def main():
    """Main function to run the entire pipeline"""
    # Define file path
    csv_path = 'data/AUDUSD_M5.csv'
    
    # Prepare data
    print("Preparing data...")
    df = prepare_data(csv_path)
    print(f"Loaded {len(df)} bars of AUDUSD M5 data")
    
    # Split data into training and testing sets (80% train, 20% test)
    train_size = int(len(df) * 0.8)
    train_df = df[:train_size].copy()
    test_df = df[train_size:].copy()
    
    print(f"Training data: {len(train_df)} bars")
    print(f"Testing data: {len(test_df)} bars")
    
    # Check if models directory exists and create if not
    if not os.path.exists('models'):
        os.makedirs('models')
    

    check_model_mode = False

    if check_model_mode:
        # Check if model already exists
        model_exists = os.path.exists('models/best_model.zip') or os.path.exists('models/final_model.zip')
        
        if not model_exists or input("Model already exists. Train again? (y/n): ").lower() == 'y':
            # Train RL model
            model = train_rl_model(train_df, model_save_path='models', total_timesteps=100000)
        else:
            print("Skipping training, using existing model")
    else:
        # Train RL model
        model = train_rl_model(train_df, model_save_path='models', total_timesteps=100000)

    
    # Backtest with backtrader
    backtest_with_backtrader(test_df, model_path='models/best_model')

if __name__ == "__main__":
    main()

MPS (Apple Silicon acceleration) is available but disabled (using CPU for better performance)
Preparing data...


### O1 Pro