# Library Setup

In [1]:
%pip install -r requirements.txt

Note: you may need to restart the kernel to use updated packages.


ERROR: Could not open requirements file: [Errno 2] No such file or directory: 'requirements.txt'


In [None]:
import os
import yfinance as yf
from vnstock import Vnstock
from typing import Tuple, List, Dict

from gymnasium import spaces
import gymnasium as gym

import torch
from torch.nn import functional as F
from torch import nn, optim
from torch.distributions import Normal

import talib
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from matplotlib.dates import DateFormatter
from datetime import datetime

from stable_baselines3 import SAC, PPO, A2C
from sb3_contrib import RecurrentPPO
from stable_baselines3.common.env_checker import check_env
from stable_baselines3.common.vec_env import DummyVecEnv
from stable_baselines3.common.monitor import Monitor
from stable_baselines3.common.evaluation import evaluate_policy
from stable_baselines3.common.callbacks import BaseCallback
from stable_baselines3.common.policies import ActorCriticPolicy

from joblib import Parallel, delayed
import multiprocessing

import warnings
warnings.filterwarnings('ignore')

device = 'cuda' if torch.cuda.is_available() else 'cpu'

# Data Crawling

In [10]:
def fetch_stock_data(vn_stock_list, global_stock_list, starting, ending) -> pd.DataFrame:
    date_range = pd.date_range(start=starting, end=ending, freq='D')
    stock_data_dict = {}
    
    print(f"Fetching {len(vn_stock_list)} Vietnamese stock data...")
    for stock in vn_stock_list:
        try:
            data = Vnstock().stock(symbol=stock, source='VCI').quote.history(
                start=starting, end=ending
            )
            # Reset index and rename 'time' to 'date'
            data.reset_index(inplace=True)
            data.rename(columns={'time': 'date'}, inplace=True)
            # Convert 'date' to datetime and set as index
            data['date'] = pd.to_datetime(data['date'])
            data.set_index('date', inplace=True)
            # Select relevant columns
            data = data[['open', 'high', 'low', 'close']]
            # Reindex to full date range
            data = data.reindex(date_range, fill_value=pd.NA)
            # Create multi-index with stock symbol and metrics
            data.columns = pd.MultiIndex.from_product([[stock], data.columns], names=['Stock', 'Metric'])
            stock_data_dict[stock] = data
        except Exception as e:
            print(f"Error fetching data for {stock}: {e}")
            continue
    
    print(f"Fetching {len(global_stock_list)} Global stock data...")
    for stock in global_stock_list:
        try:
            data = yf.download(stock, start=starting, end=ending)
            # Ensure index is datetime
            data.index = pd.to_datetime(data.index)
            # Reindex to full date range
            data = data.reindex(date_range, fill_value=pd.NA)
            # Select and rename columns to match Vietnamese stock format
            data = data[['Open', 'High', 'Low', 'Close']]
            data.columns = ['open', 'high', 'low', 'close']  # Lowercase for consistency
            
            price_columns = ['open', 'high', 'low', 'close']
            data[price_columns] = data[price_columns] * 25.5  # Multiply by 25.5 for USD to VND conversion
            
            # Limit to 2 decimal places
            data[price_columns] = data[price_columns].round(2)
            
            # Create multi-index with stock symbol and metrics
            data.columns = pd.MultiIndex.from_product([[stock], data.columns], names=['Stock', 'Metric'])
            stock_data_dict[stock] = data
        except Exception as e:
            print(f"Error fetching data for {stock}: {e}")
            continue
    
    # Concatenate all stock data horizontally
    if stock_data_dict:
        combined_data = pd.concat(stock_data_dict.values(), axis=1)
        
        # Fill with both forward and backward
        combined_data.ffill(inplace=True)
        combined_data.bfill(inplace=True)
        
        # Set title for all columns
        combined_data.columns = pd.MultiIndex.from_tuples(combined_data.columns, names=['Stock', 'Metric'])
        
        # Sort columns by stock name
        combined_data = combined_data.sort_index(axis=1, level='Stock')
        
        return combined_data
    else:
        raise ValueError("No stock data was successfully fetched.")

# Environment

In [4]:
class CustomLoggingCallback(BaseCallback):
    def __init__(self, verbose=0, log_interval=500, save_path="plots", filename="training_rewards.pdf"):
        super(CustomLoggingCallback, self).__init__(verbose)
        self.rewards = []
        self.steps = []
        self.log_interval = log_interval
        self.save_path = save_path
        self.filename = filename

        # Ensure the save directory exists
        os.makedirs(self.save_path, exist_ok=True)

    def _on_step(self) -> bool:
        reward = self.locals['rewards'][0]
        step = self.num_timesteps

        self.rewards.append(reward)
        self.steps.append(step)

        if self.verbose > 0 and step % self.log_interval == 0:
            avg_reward = np.mean(self.rewards[-self.log_interval:])
            max_reward = np.max(self.rewards[-self.log_interval:])
            min_reward = np.min(self.rewards[-self.log_interval:])
            print(f"Step: {step}, Avg Reward: {avg_reward}, Max Reward: {max_reward}, Min Reward: {min_reward}")

        return True

    def _on_training_end(self) -> None:
        if self.verbose > 0:
            total_rewards = sum(self.rewards)
            print(f"Training finished. Total rewards: {total_rewards}")
            print(f"Total steps: {self.steps[-1]}")
            self.plot_training(self.rewards)

    def plot_training(self, rewards):
        sma = np.convolve(rewards, np.ones(50) / 50, mode='valid')

        plt.figure()
        plt.title("Training Rewards", fontsize=14, fontweight='bold')
        plt.plot(rewards, label='Raw Reward', color='#F6CE3B', alpha=1)
        plt.plot(sma, label='SMA 50', color='#385DAA')
        plt.xlabel("Step", fontsize=12, fontweight='bold')
        plt.ylabel("Rewards", fontsize=12, fontweight='bold')
        plt.legend()
        plt.tight_layout()
        plt.grid(True)

        # Save the plot as PDF
        filepath = os.path.join(self.save_path, self.filename)
        plt.savefig(filepath, format='pdf')

        plt.show()
        plt.clf()
        plt.close()

In [5]:
class TradingEnv(gym.Env):
    def __init__(
        self, 
        df: pd.DataFrame,
        window_size: int, 
        frame_bound: tuple,
        initial_balance: int, 
        max_shares: int,      
        trade_max: int = 100,   
        trade_fee_percent: float = 0.001,
        risk_free_rate: float = 0.01 / 252  # Annual risk-free rate converted to daily
    ):
        self.window_size = window_size
        self.frame_bound = frame_bound
        self.initial_balance = initial_balance
        self.max_shares = max_shares
        self.trade_max = trade_max
        self.trade_fee_percent = trade_fee_percent
        self.risk_free_rate = risk_free_rate
        
        self.df = self.add_technical_indicators(df)
        
        self.prices, self.signal_features = self._process_data()
        self.shape = (self.window_size, self.signal_features.shape[1])
        super().__init__()

        # Define action space - one action per stock
        self.action_space = spaces.Box(low=-1, high=1, shape=(len(df.columns.levels[0]),), dtype=np.float64)
        
        # Define observation space
        self.observation_space = spaces.Box(
            low=-np.inf,
            high=np.inf,
            shape=self.shape,
            dtype=np.float64
        )
        
        # Episode state variables
        self._start_tick = self.window_size
        self._end_tick = len(self.prices) - 1
        self._truncated = None
        self._current_tick = None
        self._last_trade_tick = None
        self._total_reward = None
        self._total_profit = None
        self._trade_count = None
        self.history = None
        
        # Trading statistics variables
        self._balance = None
        self._shares_held = None
        self._portfolio_value = None
        self._prev_portfolio_value = None
        self._last_action = None
        self._peak_portfolio_value = None
        self._transaction_cost = 0
        self._step_transaction_cost = 0 

        # Reward tracking variables
        self.min_reward = None
        self.max_reward = None
        self.daily_returns = []
        
        self.reset()
        
    def add_technical_indicators(self, df: pd.DataFrame):
        """
        Add technical indicators to the dataframe
        """
        for stock in df.columns.levels[0]:
            # Bollinger Bands
            df[(stock, 'BBUpper')], df[(stock, 'BBMiddle')], df[(stock, 'BBLower')] = talib.BBANDS(
                df[(stock, 'close')], timeperiod=20, nbdevup=2, nbdevdn=2
            )
            # MACD
            df[(stock, 'MACD')], df[(stock, 'MACDSignal')], df[(stock, 'MACDHist')] = talib.MACD(
                df[(stock, 'close')], fastperiod=12, slowperiod=26, signalperiod=9
            )
            # RSI - Relative Strength Index
            df[(stock, 'RSI')] = talib.RSI(df[(stock, 'close')], timeperiod=14)
            # CCI - Commodity Channel Index
            df[(stock, 'CCI')] = talib.CCI(df[(stock, 'high')], df[(stock, 'low')], df[(stock, 'close')], timeperiod=14)
            # ADX - Average Directional Index
            df[(stock, 'ADX')] = talib.ADX(df[(stock, 'high')], df[(stock, 'low')], df[(stock, 'close')], timeperiod=14)
            # ATR - Average True Range
            df[(stock, 'ATR')] = talib.ATR(df[(stock, 'high')], df[(stock, 'low')], df[(stock, 'close')], timeperiod=14)
            
            # Handle NaN values
            df.fillna(0, inplace=True)
        return df

    def reset(self, seed=None, options=None):
        super().reset(seed=seed, options=options)
        if seed is not None:
            self.action_space.seed(int(self.np_random.uniform(0, seed)))

        # Reload processed data
        self.prices, self.signal_features = self._process_data()
        
        # Reset episode state variables
        self._start_tick = self.window_size
        self._end_tick = len(self.prices) - 1
        self._truncated = False
        self._current_tick = self._start_tick
        self._last_trade_tick = self._current_tick - 1
        self._total_reward = 0.0
        self._total_profit = 0.0
        self._trade_count = 0
        self.history = {}
        
        # Reset trading statistics
        self.daily_returns = []
        self.min_reward = float('inf')
        self.max_reward = float('-inf')
        self._balance = self.initial_balance
        self._shares_held = {stock: 0 for stock in self.df.columns.levels[0]}
        self._last_action = np.zeros(len(self.df.columns.levels[0]))
        self._transaction_cost = 0
        self._step_transaction_cost = 0
        
        # Initialize portfolio value
        self._portfolio_value = self._balance
        self._prev_portfolio_value = self._portfolio_value
        self._peak_portfolio_value = self._portfolio_value

        return self._get_observation(), self._get_info()
    
    def step(self, action):
        self._truncated = False
        self._current_tick += 1
        self._last_action = action
    
        if self._current_tick == self._end_tick:
            self._truncated = True

        # Update portfolio based on action
        self._update_profit(action)
        
        # Calculate reward
        step_reward = self._calculate_reward()
        self._total_reward += step_reward
    
        # Update trade tracking
        self._last_trade_tick = self._current_tick
        
        # Get observation and info
        observation = self._get_observation()
        info = self._get_info()
        
        # Update history
        self._update_history(info)
    
        return observation, step_reward, False, self._truncated, info
    
    def _get_info(self):
        """
        Return detailed information about current state
        """
        detailed_info = {
            'balance': self._balance,
            'shares_held': self._shares_held,
            'last_action': self._last_action,
            'portfolio_value': self._portfolio_value,
            'total_reward': self._total_reward,
            'total_profit': self._total_profit,
            'overall_transaction_cost': self._transaction_cost,
            'average_daily_return': np.mean(self.daily_returns) if self.daily_returns else 0,
        }
        return detailed_info
    
    def _get_observation(self):
        """
        Return observation window of features
        """
        return self.signal_features[(self._current_tick - self.window_size+1):self._current_tick + 1]
    
    def _update_history(self, info):
        """
        Update history dictionary with info from current step
        """
        if not self.history:
            self.history = {key: [] for key in info.keys()}
        for key, value in info.items():
            self.history[key].append(value)

    def _process_data(self):
        """
        Process raw data into normalized features
        """
        raise NotImplementedError

    def _calculate_reward(self):
        """
        Calculate reward for current step
        """
        raise NotImplementedError

    def _update_profit(self, action):
        """
        Update portfolio based on action
        """
        raise NotImplementedError

In [6]:
class VnStockEnv(TradingEnv):
    def _process_data(self):
        """
        Process multiindex DataFrame into prices and features
        """
        feature_columns = [
            'close', 'RSI', 'MACD', 'MACDSignal', 'MACDHist',
            'BBUpper', 'BBLower', 'BBMiddle', 'CCI', 'ADX', 'ATR'
        ]
        prices = {}
        signal_features = pd.DataFrame()
        
        for stock in self.df.columns.levels[0]:
            # Get close prices for this stock
            prices_close = self.df[(stock, 'close')].to_numpy()
            # Extract price range based on frame_bound
            prices_range = prices_close[self.frame_bound[0] - self.window_size : self.frame_bound[1]]
            prices[stock] = prices_range
            
            # Process each feature
            for feature in feature_columns:
                if feature in self.df.columns.levels[1]:
                    # Extract feature data
                    data = self.df[(stock, feature)].iloc[self.frame_bound[0] - self.window_size:self.frame_bound[1]].values
                    
                    # Handle NaN values
                    data = np.nan_to_num(data)
                    
                    # Normalize the feature
                    if np.max(data) - np.min(data) != 0:
                        normalized = (data - np.min(data)) / (np.max(data) - np.min(data))
                    else:
                        normalized = data
                    
                    signal_features[(stock, feature)] = normalized
        
        return prices, signal_features.values

    def _calculate_reward(self) -> float:
        """
        Enhanced reward calculation that balances profit, risk-adjusted returns, 
        drawdown protection, and efficient trading
        """
        portfolio_value = self._portfolio_value 

        # Basic reward: portfolio value change
        base_reward = portfolio_value - self._prev_portfolio_value
        
        # Calculate daily return for metrics
        daily_return = (portfolio_value - self._prev_portfolio_value) / self._prev_portfolio_value if self._prev_portfolio_value > 0 else 0
        self.daily_returns.append(daily_return)
        
        # Calculate Sharpe ratio component (if we have enough data points)
        sharpe_component = 0
        if len(self.daily_returns) > 5:
            returns_std = np.std(self.daily_returns[-20:]) if len(self.daily_returns) >= 20 else np.std(self.daily_returns)
            avg_return = np.mean(self.daily_returns[-20:]) if len(self.daily_returns) >= 20 else np.mean(self.daily_returns)
            
            # Prevent division by zero
            if returns_std > 0:
                sharpe_ratio = (avg_return - self.risk_free_rate) / returns_std
                sharpe_component = sharpe_ratio * 0.5  # Weight for Sharpe ratio
        
        # Advanced drawdown penalty with progressive scaling
        drawdown_penalty = 0
        if portfolio_value < self._peak_portfolio_value:
            drawdown_percentage = (self._peak_portfolio_value - portfolio_value) / self._peak_portfolio_value
            # Progressive penalty that grows exponentially with larger drawdowns
            drawdown_penalty = 0.1 * (drawdown_percentage ** 2) * self._peak_portfolio_value
        
        # Transaction efficiency incentive
        trade_efficiency = 0
        if self._step_transaction_cost > 0:
            profit_ratio = max(0, base_reward) / (self._step_transaction_cost + 1e-9)  # Prevent division by zero
            if profit_ratio > 2.0:  # If profit is more than twice the transaction cost
                trade_efficiency = 0.2 * profit_ratio  # Reward efficient trades
            else:
                trade_efficiency = -0.1 * self._step_transaction_cost  # Small penalty for inefficient trades
        
        # Holding time incentive to discourage excessive trading
        holding_time_factor = 0
        if self._current_tick - self._last_trade_tick > 3:  # If held for more than 3 time steps
            holding_time_factor = 0.05 * base_reward if base_reward > 0 else 0
        
        # Portfolio diversification incentive
        diversification_score = 0
        non_zero_positions = sum(1 for shares in self._shares_held.values() if shares > 0)
        if non_zero_positions > 0:
            total_shares = sum(self._shares_held.values())
            if total_shares > 0:
                # Calculate Herfindahl index (measure of concentration)
                herfindahl = sum((shares/total_shares)**2 for shares in self._shares_held.values() if shares > 0)
                # Convert to diversification score (1 - herfindahl)
                diversification_score = (1 - herfindahl) * 0.2 * base_reward if base_reward > 0 else 0
        
        # Momentum following bonus
        momentum_bonus = 0
        if len(self.daily_returns) >= 3:
            recent_trend = np.mean(self.daily_returns[-3:])
            if (recent_trend > 0 and base_reward > 0) or (recent_trend < 0 and base_reward < 0):
                momentum_bonus = 0.15 * abs(base_reward)  # Reward for following the trend
        
        # Combine all components
        reward = base_reward - self._step_transaction_cost + sharpe_component - drawdown_penalty + trade_efficiency + holding_time_factor + diversification_score + momentum_bonus
        
        # Update tracking variables
        self._prev_portfolio_value = portfolio_value
        self._peak_portfolio_value = max(self._peak_portfolio_value, portfolio_value)
        
        # Track min and max rewards for normalization
        self.min_reward = min(self.min_reward, reward)
        self.max_reward = max(self.max_reward, reward)
        
        # Normalize reward while preventing division by zero
        if self.max_reward > self.min_reward:
            normalized_reward = (reward - self.min_reward) / (self.max_reward - self.min_reward)
        else:
            normalized_reward = 0
        
        return normalized_reward
    
    def _update_profit(self, action: float):
        """
        Update portfolio based on action
        """
        self._step_transaction_cost = 0
        
        for i, stock in enumerate(self.df.columns.levels[0]):
            current_price = self.prices[stock][self._current_tick]
            shares_to_trade = int(action[i] * self.trade_max)  # Scale action to trade_max
            
            # Only trade if price is valid
            if self.df[(stock, 'close')].iloc[self._current_tick] != 0:
                # Handle buying shares
                if shares_to_trade > 0:
                    # Calculate maximum affordable shares considering balance, max_shares limit, and shares already held
                    max_affordable = min(
                        shares_to_trade,
                        int(self._balance / current_price),
                        self.max_shares - self._shares_held[stock]
                    )
        
                    if max_affordable > 0:
                        cost = max_affordable * current_price
                        transaction_fee = cost * self.trade_fee_percent
                        self._balance -= (cost + transaction_fee)
                        self._shares_held[stock] += max_affordable
                        self._step_transaction_cost += transaction_fee
                        self._transaction_cost += transaction_fee
                        self._trade_count += 1

                # Handle selling shares
                elif shares_to_trade < 0:
                    shares_to_sell = min(abs(shares_to_trade), self._shares_held[stock])
                    if shares_to_sell > 0:
                        proceeds = shares_to_sell * current_price
                        transaction_fee = proceeds * self.trade_fee_percent
                        self._balance += (proceeds - transaction_fee)
                        self._shares_held[stock] -= shares_to_sell
                        self._step_transaction_cost += transaction_fee
                        self._transaction_cost += transaction_fee
                        self._trade_count += 1
        
        # Update portfolio value for all stocks
        self._portfolio_value = self._balance + sum(
            self._shares_held[stock] * self.prices[stock][self._current_tick]
            for stock in self.df.columns.levels[0]
        )
        # Update peak portfolio value
        self._peak_portfolio_value = max(self._peak_portfolio_value, self._portfolio_value)
        
        # Calculate profit/loss
        self._total_profit = self._portfolio_value - self.initial_balance

# Training and Testing

In [None]:
# Hyperparameter for multiple algorithms MUST ADJUST
WINDOW_SIZE = [5, 10, 25]

HYPERPARAMS_PPO = [
    {
        "name": "PPO_default",
        "policy": "MlpPolicy",
        "learning_rate": 3e-4,
        "n_steps": 2048,
        "batch_size": 64,
        "n_epochs": 10,
        "gamma": 0.99,
        "clip_range": 0.2,
        "ent_coef": 0.0,
        "vf_coef": 0.5,
        "max_grad_norm": 0.5,
        "window_size": 25,
    }
]

HYPERPARAMS_A2C = [
    {
        "name": "A2C_default",
        "policy": "MlpPolicy",
        "learning_rate": 3e-4,
        "n_steps": 5,
        "gamma": 0.99,
        "ent_coef": 0.0,
        "vf_coef": 0.5,
        "max_grad_norm": 0.5,
    }
]

HYPERPARAMS_SAC = [
    {
        "name": "SAC_default",
        "policy": "MlpPolicy",
        "learning_rate": 3e-4,
        "buffer_size": 1000000,
        "batch_size": 256,
        "gamma": 0.99,
        "tau": 0.005,
        "train_freq": 1,
        "gradient_steps": 1,
        "ent_coef": "auto",
    }
]

HYPERPARAMS_RPPO = [
    {
        "name": "RPPO_default",
        "policy": "MlpLstmPolicy",
        "learning_rate": 3e-4,
        "n_steps": 2048,
        "batch_size": 64,
        "n_epochs": 10,
        "gamma": 0.99,
        "clip_range": 0.2,
        "ent_coef": 0.0,
        "vf_coef": 0.5,
        "max_grad_norm": 0.5,
    }
]

In [12]:
def create_environment(data, window_size, initial_balance, max_shares, trade_max):
    """Create and configure trading environment"""
    # Set up environment parameters
    frame_bound = (window_size, len(data))

    # Create environment
    env = VnStockEnv(
        df=data,
        window_size=window_size,
        frame_bound=frame_bound,
        initial_balance=initial_balance,
        max_shares=max_shares,
        trade_max=trade_max
    )
    
    action = env.action_space.sample()
    observation, reward, done, truncated, info = env.step(action)
    print(f"Observation shape: {observation.shape}")
        
    return env

def train_model(model_class, params, data, initial_balance, max_shares, trade_max, timesteps=500000):
    """Train a model with specific parameters"""
    # Create environment
    window_size = params.pop("window_size")
    env = create_environment(data, window_size, initial_balance, max_shares, trade_max)
    
    # Setup monitoring
    model_name = params.pop("name")
    os.makedirs(f"models/{model_name}", exist_ok=True)
    os.makedirs(f"logs/{model_name}", exist_ok=True)
    
    env = Monitor(env, f"logs/{model_name}")
    env = DummyVecEnv([lambda: env])
    
    # Create callback
    callback = CustomLoggingCallback(
        verbose = 1, 
        log_interval = 20000, 
        save_path=f"models/{model_name}", 
        filename=f"{model_name}_training.pdf"
    )
    
    # Create and train model
    model = model_class(**params, env=env, verbose=0, device=device)
    model.learn(total_timesteps=timesteps, callback=callback)
    
    # Save model
    model_path = f"models/{model_name}/{model_name}"
    model.save(model_path)
    
    # Reset params for next use
    params["window_size"] = window_size
    params["name"] = model_name
    
    return model_path, window_size

def evaluate_model(model_path, model_class, data, initial_balance, max_shares, trade_max, window_size):
    """Evaluate a trained model on test data"""
    # Create test environment
    env = create_environment(data, window_size, initial_balance, max_shares, trade_max)
    env = Monitor(env, None)
    env = DummyVecEnv([lambda: env])
    
    # Load model
    model = model_class.load(model_path)
    
    # Run evaluation episodes
    obs = env.reset()
    done = False
    truncated = False
    
    # Track performance metrics
    portfolio_values = []
    returns = []
    actions = []
    dates = data.index[window_size:]
    
    while not (done or truncated):
        action, _ = model.predict(obs)
        obs, reward, done, info = env.step(action)
        
        portfolio_values.append(info[0]['portfolio_value'])
        
        # Calculate daily return
        if len(portfolio_values) > 1:
            daily_return = (portfolio_values[-1] - portfolio_values[-2]) / portfolio_values[-2]
            returns.append(daily_return)
        
        actions.append(action)
    
    return portfolio_values, returns, actions, dates

# Evaluate / Visualize

In [None]:
def main():
    STOCK_LIST_VN = ['FPT', 'VCB', 'VIC', 'GAS', 'VHM', 'BID', 'MBB', 'HVN', 'HPG', 'GVR', 'VNM', 'SSI', 'GEX', 'ACB']
    STOCK_LIST_GLOBAL = ['AXP', 'AAPL', 'BA', 'CAT', 'CSCO', 'CVX', 'GS', 'HON', 'IBM', 'INTC', 'JPM', 'MSFT', 'NKE', 'GOOGL', 'AMZN', 'WMT', 'NVDA']
    print(f"Number of stocks using: {len(STOCK_LIST_VN) + len(STOCK_LIST_GLOBAL)}")
    
    print("Loading TRAIN data...")
    START_DATE = '2018-01-01'
    END_DATE = '2024-01-01'
    train_data = fetch_stock_data(STOCK_LIST_VN, STOCK_LIST_GLOBAL, START_DATE, END_DATE)
    
    train_data.head()
    
    print("Loading TEST data...")
    START_DATE = '2024-01-01'
    END_DATE = '2025-04-13'
    test_data = fetch_stock_data(STOCK_LIST_VN, STOCK_LIST_GLOBAL, START_DATE, END_DATE)
    
    test_data.head()
    print("Data loaded successfully.")
    
    # Create directories
    os.makedirs("models", exist_ok=True)
    os.makedirs("logs", exist_ok=True)
    os.makedirs("plots", exist_ok=True)
    
    # Dictionary to store results for all models
    all_results = {}
    
    # Define model classes and their hyperparameter lists
    models_to_train = [
        (PPO, HYPERPARAMS_PPO),
        # (A2C, HYPERPARAMS_A2C),
        # (SAC, HYPERPARAMS_SAC),
        # (RecurrentPPO, HYPERPARAMS_RPPO)
    ]
    
    # Train all models with different hyperparameters
    for model_class, hyperparams_list in models_to_train:
        model_type = model_class.__name__
        print(f"\nTraining {model_type} models...")
        
        for params in hyperparams_list:
            model_name = params["name"]
            print(f"  Training {model_name}...")
            
            # Create a copy of params to avoid modifying the original
            params_copy = params.copy()
            
            # Train model
            model_path, window_size = train_model(
                model_class=model_class,
                params=params_copy,
                data=train_data,
                initial_balance=100000,
                max_shares=1000,
                trade_max=100,
                timesteps=500000
            )
            
            # Evaluate model
            portfolio_values, returns, actions, dates = evaluate_model(
                model_path=model_path,
                model_class=model_class,
                data=test_data,
                initial_balance=100000,
                max_shares=1000,
                trade_max=100,
                window_size=window_size
            )
            
            # Visualize results
            plt.figure(figsize=(12, 6))
            plt.plot(dates, portfolio_values, label=f"{model_name} Portfolio Value")
            plt.title(f"{model_name} Portfolio Value Over Time")
            plt.xlabel("Date")
            plt.ylabel("Portfolio Value")
            plt.legend()
            plt.grid()
            plt.tight_layout()
            
if __name__ == "__main__":
    main()

Number of stocks using: 31
Loading TRAIN data...
Fetching 14 Vietnamese stock data...


[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed

Fetching 17 Global stock data...



[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed


Loading TEST data...
Fetching 14 Vietnamese stock data...


[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed

Fetching 17 Global stock data...



[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed


Data loaded successfully.

Training PPO models...
  Training PPO_default...
Observation shape: (25, 341)
Step: 20000, Avg Reward: 0.5068773627281189, Max Reward: 1.0, Min Reward: 0.0
Step: 40000, Avg Reward: 0.6651334762573242, Max Reward: 1.0, Min Reward: 0.0
Step: 60000, Avg Reward: 0.7068259119987488, Max Reward: 1.0, Min Reward: 0.0
