# Library Setup

In [None]:
import os
import yfinance as yf
from vnstock import Vnstock
from typing import Tuple, List, Dict

from gymnasium import spaces
import gymnasium as gym

import torch
from torch.nn import functional as F
from torch import nn, optim
from torch.distributions import Normal

import talib
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from matplotlib.dates import DateFormatter
from datetime import datetime

from stable_baselines3 import SAC, PPO, A2C, TD3
from sb3_contrib import RecurrentPPO, TRPO
from stable_baselines3.common.env_checker import check_env
from stable_baselines3.common.vec_env import DummyVecEnv
from stable_baselines3.common.monitor import Monitor
from stable_baselines3.common.evaluation import evaluate_policy
from stable_baselines3.common.callbacks import BaseCallback
from stable_baselines3.common.policies import ActorCriticPolicy

from joblib import Parallel, delayed
import multiprocessing

import warnings
warnings.filterwarnings('ignore')

device = 'cuda' if torch.cuda.is_available() else 'cpu'

# Data Crawling

In [None]:
def fetch_stock_data(vn_stock_list, global_stock_list, starting, ending) -> pd.DataFrame:
    # Create a full date range including weekends
    date_range = pd.date_range(start=starting, end=ending, freq='D')
    stock_data_dict = {}
    
    print(f"Fetching {len(vn_stock_list)} Vietnamese stock data...")
    for stock in vn_stock_list:
        try:
            data = Vnstock().stock(symbol=stock, source='VCI').quote.history(
                start=starting, end=ending
            )
            # Reset index and rename 'time' to 'date'
            data.reset_index(inplace=True)
            data.rename(columns={'time': 'date'}, inplace=True)
            # Convert 'date' to datetime and set as index
            data['date'] = pd.to_datetime(data['date'])
            data.set_index('date', inplace=True)
            # Select relevant columns
            data = data[['open', 'high', 'low', 'close']]
            
            # Create a new DataFrame with all dates, but keep NaN values for now
            full_data = pd.DataFrame(index=date_range, columns=data.columns)
            full_data.loc[data.index] = data  # Add existing data
            
            # Fill gap days with evenly distributed price changes
            full_data = fill_price_gaps_evenly(full_data)
            
            # Create multi-index with stock symbol and metrics
            full_data.columns = pd.MultiIndex.from_product([[stock], full_data.columns], names=['Stock', 'Metric'])
            stock_data_dict[stock] = full_data
        except Exception as e:
            print(f"Error fetching data for {stock}: {e}")
            continue
    
    print(f"Fetching {len(global_stock_list)} Global stock data...")
    for stock in global_stock_list:
        try:
            data = yf.download(stock, start=starting, end=ending)
            # Ensure index is datetime
            data.index = pd.to_datetime(data.index)
            
            # Select and rename columns to match Vietnamese stock format
            data = data[['Open', 'High', 'Low', 'Close']]
            data.columns = ['open', 'high', 'low', 'close']
            
            price_columns = ['open', 'high', 'low', 'close']
            data[price_columns] = data[price_columns] * 25.5  # Multiply by 25.5 for USD to VND conversion
            
            # Limit to 2 decimal places
            data[price_columns] = data[price_columns].round(2)
            
            # Create a new DataFrame with all dates, but keep NaN values for now
            full_data = pd.DataFrame(index=date_range, columns=data.columns)
            full_data.loc[data.index] = data  # Add existing data
            
            # Fill gap days with evenly distributed price changes
            full_data = fill_price_gaps_evenly(full_data)
            
            # Create multi-index with stock symbol and metrics
            full_data.columns = pd.MultiIndex.from_product([[stock], full_data.columns], names=['Stock', 'Metric'])
            stock_data_dict[stock] = full_data
        except Exception as e:
            print(f"Error fetching data for {stock}: {e}")
            continue
    
    # Concatenate all stock data horizontally
    if stock_data_dict:
        combined_data = pd.concat(stock_data_dict.values(), axis=1)
        
        # Set title for all columns
        combined_data.columns = pd.MultiIndex.from_tuples(combined_data.columns, names=['Stock', 'Metric'])
        
        # Sort columns by stock name
        combined_data = combined_data.sort_index(axis=1, level='Stock')
        
        # Check for any remaining NaN values
        null_counts = combined_data.isnull().sum()
        if null_counts.sum() > 0:
            print("Warning: There are still NaN values in the data:")
            print(null_counts[null_counts > 0])
        
        return combined_data
    else:
        raise ValueError("No stock data was successfully fetched.")


def fill_price_gaps_evenly(df):
    """
    Fill gaps in price data by evenly distributing the price change across missing days.
    
    For each gap:
    1. Find the last value before the gap and the first value after the gap
    2. Calculate the total change for each metric (open, high, low, close)
    3. Distribute this change evenly across the missing days
    """
    result = df.copy()
    
    # Loop through each column (open, high, low, close)
    for col_name in result.columns:
        # Find sequences of NaN values
        is_nan = result[col_name].isna()
        
        if not is_nan.any():
            continue  # Skip if no NaN values in this column
        
        # Find the start and end indices of each gap
        gap_starts = []
        gap_ends = []
        
        in_gap = False
        for i in range(len(is_nan)):
            if is_nan.iloc[i] and not in_gap:
                gap_starts.append(i)
                in_gap = True
            elif not is_nan.iloc[i] and in_gap:
                gap_ends.append(i)
                in_gap = False
        
        # Handle the case where the gap extends to the end of the dataframe
        if len(gap_starts) > len(gap_ends):
            gap_ends.append(len(is_nan))
        
        # Process each gap
        for start_idx, end_idx in zip(gap_starts, gap_ends):
            # Skip gaps at the beginning or end where we can't calculate a gradient
            if start_idx == 0 or end_idx == len(result):
                continue
            
            # Get values before and after the gap
            start_date = result.index[start_idx - 1]
            end_date = result.index[end_idx]
            
            start_value = result.loc[start_date, col_name]
            end_value = result.loc[end_date, col_name]
            
            # Calculate the total price change and the change per day
            total_change = end_value - start_value
            gap_length = end_idx - start_idx
            change_per_day = total_change / (gap_length + 1)  # Include the end point in calculation
            
            # Fill gap days with evenly distributed price changes
            for i in range(gap_length):
                gap_date = result.index[start_idx + i]
                gap_value = start_value + change_per_day * (i + 1)
                result.loc[gap_date, col_name] = round(gap_value, 2)
    
    # Handle any remaining NaN values at the beginning or end
    result = result.ffill().bfill()
    
    return result

# Environment

In [3]:
class CustomLoggingCallback(BaseCallback):
    def __init__(self, verbose=0, log_interval=500, save_path="plots", filename="training_rewards.pdf"):
        super(CustomLoggingCallback, self).__init__(verbose)
        self.rewards = []
        self.steps = []
        self.log_interval = log_interval
        self.save_path = save_path
        self.filename = filename

        # Ensure the save directory exists
        os.makedirs(self.save_path, exist_ok=True)

    def _on_step(self) -> bool:
        reward = self.locals['rewards'][0]
        step = self.num_timesteps

        self.rewards.append(reward)
        self.steps.append(step)

        if self.verbose > 0 and step % self.log_interval == 0:
            avg_reward = np.mean(self.rewards[-self.log_interval:])
            max_reward = np.max(self.rewards[-self.log_interval:])
            min_reward = np.min(self.rewards[-self.log_interval:])
            print(f"Step: {step}, Avg Reward: {avg_reward}, Max Reward: {max_reward}, Min Reward: {min_reward}")

        return True

    def _on_training_end(self) -> None:
        if self.verbose > 0:
            total_rewards = sum(self.rewards)
            print(f"Training finished. Total rewards: {total_rewards}")
            print(f"Total steps: {self.steps[-1]}")
            self.plot_training(self.rewards)

    def plot_training(self, rewards):
        sma = np.convolve(rewards, np.ones(50) / 50, mode='valid')

        plt.figure()
        plt.title("Training Rewards", fontsize=14, fontweight='bold')
        plt.plot(rewards, label='Raw Reward', color='#F6CE3B', alpha=1)
        plt.plot(sma, label='SMA 50', color='#385DAA')
        plt.xlabel("Step", fontsize=12, fontweight='bold')
        plt.ylabel("Rewards", fontsize=12, fontweight='bold')
        plt.legend()
        plt.tight_layout()
        plt.grid(True)

        # Save the plot as PDF
        filepath = os.path.join(self.save_path, self.filename)
        plt.savefig(filepath, format='pdf')

        plt.show()
        plt.clf()
        plt.close()

In [4]:
class TradingEnv(gym.Env):
    def __init__(
        self, 
        df: pd.DataFrame,
        window_size: int, 
        frame_bound: tuple,
        initial_balance: int, 
        max_shares: int,      
        trade_max: int = 100,   
        trade_fee_percent: float = 0.001,
        risk_free_rate: float = 0.01 / 252
    ):
        self.window_size = window_size
        self.frame_bound = frame_bound
        self.initial_balance = initial_balance
        self.max_shares = max_shares
        self.trade_max = trade_max
        self.trade_fee_percent = trade_fee_percent
        self.risk_free_rate = risk_free_rate
        
        self.df = self.add_technical_indicators(df)
        
        self.prices, self.signal_features = self._process_data()
        self.shape = (self.window_size, self.signal_features.shape[1])
        super().__init__()

        # Define action space - one action per stock
        self.action_space = spaces.Box(low=-1, high=1, shape=(len(df.columns.levels[0]),), dtype=np.float64)
        
        # Define observation space
        self.observation_space = spaces.Box(
            low=-np.inf,
            high=np.inf,
            shape=self.shape,
            dtype=np.float64
        )
        
        # Episode state variables
        self._start_tick = self.window_size
        self._end_tick = len(self.prices) - 1
        self._truncated = None
        self._current_tick = None
        self._last_trade_tick = None
        self._total_reward = None
        self._total_profit = None
        self._trade_count = None
        self.history = None
        
        # Trading statistics variables
        self._balance = None
        self._shares_held = None
        self._portfolio_value = None
        self._prev_portfolio_value = None
        self._last_action = None
        self._peak_portfolio_value = None
        self._transaction_cost = 0
        self._step_transaction_cost = 0 

        # Reward tracking variables
        self.min_reward = None
        self.max_reward = None
        self.daily_returns = []
        
        self.reset()
        
    def add_technical_indicators(self, df: pd.DataFrame):
        """
        Add technical indicators to the dataframe
        """
        for stock in df.columns.levels[0]:
            # Bollinger Bands
            df[(stock, 'BBUpper')], df[(stock, 'BBMiddle')], df[(stock, 'BBLower')] = talib.BBANDS(
                df[(stock, 'close')], timeperiod=20, nbdevup=2, nbdevdn=2
            )
            # MACD
            df[(stock, 'MACD')], df[(stock, 'MACDSignal')], df[(stock, 'MACDHist')] = talib.MACD(
                df[(stock, 'close')], fastperiod=12, slowperiod=26, signalperiod=9
            )
            # RSI - Relative Strength Index
            df[(stock, 'RSI')] = talib.RSI(df[(stock, 'close')], timeperiod=14)
            # CCI - Commodity Channel Index
            df[(stock, 'CCI')] = talib.CCI(df[(stock, 'high')], df[(stock, 'low')], df[(stock, 'close')], timeperiod=14)
            # ADX - Average Directional Index
            df[(stock, 'ADX')] = talib.ADX(df[(stock, 'high')], df[(stock, 'low')], df[(stock, 'close')], timeperiod=14)
            # ATR - Average True Range
            df[(stock, 'ATR')] = talib.ATR(df[(stock, 'high')], df[(stock, 'low')], df[(stock, 'close')], timeperiod=14)
            
            # Handle NaN values
            df.fillna(0, inplace=True)
        return df

    def reset(self, seed=None, options=None):
        super().reset(seed=seed, options=options)
        if seed is not None:
            self.action_space.seed(int(self.np_random.uniform(0, seed)))

        # Reload processed data
        self.prices, self.signal_features = self._process_data()
        
        # Reset episode state variables
        self._start_tick = self.window_size
        self._end_tick = len(self.prices) - 1
        self._truncated = False
        self._current_tick = self._start_tick
        self._last_trade_tick = self._current_tick - 1
        self._total_reward = 0.0
        self._total_profit = 0.0
        self._trade_count = 0
        self.history = {}
        
        # Reset trading statistics
        self.daily_returns = []
        self.min_reward = float('inf')
        self.max_reward = float('-inf')
        self._balance = self.initial_balance
        self._shares_held = {stock: 0 for stock in self.df.columns.levels[0]}
        self._last_action = np.zeros(len(self.df.columns.levels[0]))
        self._transaction_cost = 0
        self._step_transaction_cost = 0
        
        # Initialize portfolio value
        self._portfolio_value = self._balance
        self._prev_portfolio_value = self._portfolio_value
        self._peak_portfolio_value = self._portfolio_value

        return self._get_observation(), self._get_info()
    
    def step(self, action):
        self._truncated = False
        self._current_tick += 1
        self._last_action = action
    
        if self._current_tick == self._end_tick:
            self._truncated = True

        # Update portfolio based on action
        self._update_profit(action)
        
        # Calculate reward
        step_reward = self._calculate_reward()
        self._total_reward += step_reward
    
        # Update trade tracking
        self._last_trade_tick = self._current_tick
        
        # Get observation and info
        observation = self._get_observation()
        info = self._get_info()
        
        # Update history
        self._update_history(info)
    
        return observation, step_reward, False, self._truncated, info
    
    def _get_info(self):
        """
        Return detailed information about current state
        """
        detailed_info = {
            'balance': self._balance,
            'shares_held': self._shares_held,
            'last_action': self._last_action,
            'portfolio_value': self._portfolio_value,
            'total_reward': self._total_reward,
            'total_profit': self._total_profit,
            'overall_transaction_cost': self._transaction_cost,
            'average_daily_return': np.mean(self.daily_returns) if self.daily_returns else 0,
        }
        return detailed_info
    
    def _get_observation(self):
        """
        Return observation window of features
        """
        return self.signal_features[max(0, self._current_tick - self.window_size + 1):self._current_tick + 1]
    
    def _update_history(self, info):
        """
        Update history dictionary with info from current step
        """
        if not self.history:
            self.history = {key: [] for key in info.keys()}
        for key, value in info.items():
            self.history[key].append(value)

    def _process_data(self):
        """
        Process raw data into normalized features
        """
        raise NotImplementedError

    def _calculate_reward(self):
        """
        Calculate reward for current step
        """
        raise NotImplementedError

    def _update_profit(self, action):
        """
        Update portfolio based on action
        """
        raise NotImplementedError

In [None]:
class VnStockEnv(TradingEnv):
    def _process_data(self):
        """
        Enhanced feature processing with advanced financial indicators
        """
        feature_columns = [
            'close', 'RSI', 'MACD', 'MACDSignal', 'MACDHist',
            'BBUpper', 'BBLower', 'BBMiddle', 'CCI', 'ADX', 'ATR'
        ]
        prices = {}
        signal_features = pd.DataFrame()

        for stock in self.df.columns.levels[0]:
            # Get close prices for this stock
            prices_close = self.df[(stock, 'close')].to_numpy()
            # Extract price range based on frame_bound
            prices_range = prices_close[self.frame_bound[0] - self.window_size : self.frame_bound[1]]
            prices[stock] = prices_range

            # Process existing features
            for feature in feature_columns:
                if feature in self.df.columns.levels[1]:
                    # Extract feature data
                    data = self.df[(stock, feature)].iloc[self.frame_bound[0] - self.window_size:self.frame_bound[1]].values

                    # Double check values
                    data = np.nan_to_num(data)

                    signal_features[(stock, feature)] = data
        
        return prices, signal_features.values

    def _calculate_reward(self) -> float:
        """
        Calculate reward using a combination of portfolio return and penalties
        """
        # Base reward is the change in portfolio value minus transaction costs
        if self._prev_portfolio_value == 0:
            return 0

        # Calculate portfolio return
        portfolio_return = (self._portfolio_value - self._prev_portfolio_value) / self._prev_portfolio_value

        # Calculate drawdown penalty
        drawdown = (self._peak_portfolio_value - self._portfolio_value) / self._peak_portfolio_value if self._peak_portfolio_value > 0 else 0
        drawdown_penalty = 0.01 * drawdown 

        # Calculate reward as return minus penalties and transaction costs
        reward = portfolio_return - drawdown_penalty - (self._step_transaction_cost / self._prev_portfolio_value) - self.risk_free_rate

        # Update min and max rewards for potential normalization
        self.min_reward = min(self.min_reward, reward)
        self.max_reward = max(self.max_reward, reward)
        
        normalized_reward = (reward - self.min_reward) / (self.max_reward - self.min_reward) if self.max_reward != self.min_reward else 0

        return normalized_reward

    def _update_profit(self, action):
        """
        Update portfolio based on action
        """
        # Store previous portfolio value for return calculation
        self._prev_portfolio_value = self._portfolio_value
        self._step_transaction_cost = 0

        # Process each stock based on its corresponding action value
        for i, stock in enumerate(self.df.columns.levels[0]):
            current_price = self.prices[stock][self._current_tick]

            if current_price <= 0:
                continue  # Skip invalid prices

            # Convert action from range [-1, 1] to actual shares to trade
            # Use action to determine direction and strength
            action_value = action[i]

            # Calculate target position based on action
            max_shares_to_trade = min(self.max_shares, self.trade_max)
            target_shares = int(action_value * max_shares_to_trade)
            shares_to_trade = target_shares - self._shares_held[stock]

            # Buy shares
            if shares_to_trade > 0:
                shares_to_buy = min(shares_to_trade, int(self._balance // current_price))
                cost = shares_to_buy * current_price
                transaction_fee = cost * self.trade_fee_percent
                total_cost = cost + transaction_fee

                if self._balance >= total_cost and shares_to_buy > 0:
                    self._balance -= total_cost
                    self._shares_held[stock] += shares_to_buy
                    self._step_transaction_cost += transaction_fee
                    self._transaction_cost += transaction_fee
                    self._trade_count += 1

            # Sell shares
            elif shares_to_trade < 0:
                shares_to_sell = min(abs(shares_to_trade), self._shares_held[stock])
                revenue = shares_to_sell * current_price
                transaction_fee = revenue * self.trade_fee_percent

                if shares_to_sell > 0:
                    self._balance += revenue - transaction_fee
                    self._shares_held[stock] -= shares_to_sell
                    self._step_transaction_cost += transaction_fee
                    self._transaction_cost += transaction_fee
                    self._trade_count += 1

        # Update portfolio value
        self._portfolio_value = self._balance + sum(
            self._shares_held[stock] * self.prices[stock][self._current_tick]
            for stock in self.df.columns.levels[0]
        )

        # Update peak portfolio value for drawdown calculation
        self._peak_portfolio_value = max(self._peak_portfolio_value, self._portfolio_value)

        # Calculate daily return
        if self._prev_portfolio_value > 0:
            daily_return = (self._portfolio_value - self._prev_portfolio_value) / self._prev_portfolio_value
            self.daily_returns.append(daily_return)

        # Calculate profit
        self._total_profit = self._portfolio_value - self.initial_balance

# Training and Testing

In [None]:
# Hyperparameter for multiple algorithms
WINDOW_SIZE = [5, 10, 15, 25]

HYPERPARAMS_PPO = [
    {
        "name": "PPO",
        "policy": "MlpPolicy",
        "learning_rate": 4e-4,
        "n_steps": 2048,
        "batch_size": 128,
        "n_epochs": 20,
        "gamma": 0.995,
        "clip_range": 0.2,
    },
    {
        "name": "PPO_CNN",
        "policy": "CnnPolicy",
        "learning_rate": 4e-4,
        "n_steps": 2048,
        "batch_size": 128,
        "n_epochs": 20,
        "gamma": 0.995,
        "clip_range": 0.2,
    },
]

HYPERPARAMS_A2C = [
    {
        "name": "A2C",
        "policy": "MlpPolicy",
        "learning_rate": 4e-4,
        "n_steps": 5,
        "gamma": 0.99,
        "max_grad_norm": 0.5,
    },
    {
        "name": "A2C_CNN",
        "policy": "CnnPolicy",
        "learning_rate": 4e-4,
        "n_steps": 5,
        "gamma": 0.99,
        "max_grad_norm": 0.5,
    },
]

HYPERPARAMS_SAC = [
    {
        "name": "SAC",
        "policy": "MlpPolicy",
        "learning_rate": 4e-4,
        "buffer_size": 1000000,
        "batch_size": 256,
        "gamma": 0.99,
        "tau": 0.005,
    },
    {
        "name": "SAC_CNN",
        "policy": "CnnPolicy",
        "learning_rate": 4e-4,
        "buffer_size": 1000000,
        "batch_size": 256,
        "gamma": 0.99,
        "tau": 0.005,
    }
]

HYPERPARAMS_RPPO = [
    {
        "name": "RPPO",
        "policy": "MlpLstmPolicy",
        "learning_rate": 4e-4,
        "n_steps": 2048,
        "batch_size": 64,
        "n_epochs": 10,
        "gamma": 0.99,
        "clip_range": 0.2,
        "max_grad_norm": 0.5,
    }
]

HYPERPARAMS_TRPO = [
    {
        "name": "TRPO",
        "policy": "MlpPolicy",
        "learning_rate": 4e-4,
        "n_steps": 2048,
        "batch_size": 64,
        "n_epochs": 10,
        "gamma": 0.99,
        "max_grad_norm": 0.5,
    }
]

In [None]:
def create_environment(data, window_size, initial_balance, max_shares, trade_max):
    """Create and configure trading environment"""
    # Set up environment parameters
    frame_bound = (window_size, len(data))

    # Create environment
    env = VnStockEnv(
        df=data,
        window_size=window_size,
        frame_bound=frame_bound,
        initial_balance=initial_balance,
        max_shares=max_shares,
        trade_max=trade_max
    )
    
    print("Testing environment...")
    action = env.action_space.sample()
    observation, reward, done, truncated, info = env.step(action)
    print(f"Observation shape: {observation.shape}")
        
    return env

def train_model(model_class, params, data, initial_balance, max_shares, trade_max, window_size, timesteps=500000):
    """Train a model with specific parameters"""
    # Create environment
    env = create_environment(data, window_size, initial_balance, max_shares, trade_max)
    
    # Setup monitoring
    model_name = params.pop("name")
    os.makedirs(f"models/{model_name}/{window_size}", exist_ok=True)
    os.makedirs(f"logs/{model_name}/{window_size}", exist_ok=True)
    
    env = Monitor(env, f"logs/{model_name}/{window_size}")
    env = DummyVecEnv([lambda: env])
    
    # Create callback
    callback = CustomLoggingCallback(
        verbose = 1, 
        log_interval = 20000, 
        save_path=f"models/{model_name}/{window_size}", 
        filename=f"{model_name}_{window_size}_training.pdf"
    )
    
    # Create and train model
    model = model_class(**params, env=env, verbose=0, device=device)
    model.learn(total_timesteps=timesteps, callback=callback)
    
    # Save model
    model_path = f"models/{model_name}/{model_name}_{window_size}.zip"
    model.save(model_path)
    
    params["name"] = model_name
    
    return model_path

def evaluate_model(model_path, model_class, data, initial_balance, max_shares, trade_max, window_size):
    """Evaluate a trained model on test data"""
    # Create test environment
    env = create_environment(data, window_size, initial_balance, max_shares, trade_max)
    env = Monitor(env, None)
    env = DummyVecEnv([lambda: env])
    
    # Load model
    model = model_class.load(model_path)
    
    # Run evaluation episodes
    obs = env.reset()
    done = False
    truncated = False
    
    # Track performance metrics
    portfolio_values = []
    returns = []
    actions = []
    dates = data.index[window_size:]
    
    while not (done or truncated):
        action, _ = model.predict(obs)
        obs, reward, done, info = env.step(action)
        
        portfolio_values.append(info[0]['portfolio_value'])
        
        # Calculate daily return
        if len(portfolio_values) > 1:
            daily_return = (portfolio_values[-1] - portfolio_values[-2]) / portfolio_values[-2]
            returns.append(daily_return)
        
        actions.append(action)
    
    # Check length
    if len(dates) == len(portfolio_values) == len(returns) == len(actions):
        # Write each output to a CSV file
        df = pd.DataFrame({
            'Date': dates,
            'Portfolio Value': portfolio_values,
            'Daily Returns': returns,
            'Actions': actions
        })
        df.to_csv(f"models/{model_path.split('/')[-1]}/evaluation_results.csv", index=False)

    print("Can't save evaluation results!")
    return portfolio_values, returns, actions, dates

In [8]:
def calculate_metrics(portfolio_values, returns):
    """Calculate trading performance metrics"""
    # Convert to numpy arrays for calculations
    portfolio_values = np.array(portfolio_values)
    returns = np.array(returns)
    
    # Total return
    total_return = (portfolio_values[-1] / portfolio_values[0]) - 1
    
    # Annualized return (assuming 252 trading days per year)
    n_days = len(returns)
    annualized_return = (1 + total_return) ** (252 / n_days) - 1
    
    # Maximum drawdown
    peak = np.maximum.accumulate(portfolio_values)
    drawdown = (portfolio_values - peak) / peak
    max_drawdown = drawdown.min()
    
    # Sharpe ratio (assuming risk-free rate of 0.01 annually)
    risk_free_daily = 0.01 / 252
    excess_returns = returns - risk_free_daily
    sharpe_ratio = (np.mean(excess_returns) / np.std(returns)) * np.sqrt(252)
    
    # Sortino ratio (only considering negative returns)
    negative_returns = returns[returns < 0]
    sortino_ratio = 0
    if len(negative_returns) > 0:
        sortino_ratio = (np.mean(excess_returns) / np.std(negative_returns)) * np.sqrt(252)
    
    # Calmar ratio (annualized return divided by max drawdown)
    calmar_ratio = 0
    if max_drawdown != 0:
        calmar_ratio = annualized_return / abs(max_drawdown)
    
    # Win rate
    win_rate = np.sum(returns > 0) / len(returns)
    
    return {
        "total_return": total_return,
        "annualized_return": annualized_return,
        "max_drawdown": max_drawdown,
        "sharpe_ratio": sharpe_ratio,
        "sortino_ratio": sortino_ratio,
        "calmar_ratio": calmar_ratio,
        "win_rate": win_rate
    }

# Evaluate / Visualize

In [None]:
def main():
    STOCK_LIST_VN = ['FPT', 'VCB', 'VIC', 'GAS', 'VHM', 'BID', 'MBB', 'HVN', 'HPG', 'GVR', 'VNM', 'SSI', 'GEX', 'ACB']
    STOCK_LIST_GLOBAL = ['AXP', 'AAPL', 'BA', 'CAT', 'CSCO', 'CVX', 'GS', 'HON', 'IBM', 'INTC', 'JPM', 'MSFT', 'NKE', 'GOOGL', 'AMZN', 'WMT', 'NVDA']
    print(f"Number of stocks using: {len(STOCK_LIST_VN) + len(STOCK_LIST_GLOBAL)}")
    
    print("Loading TRAIN data...")
    START_DATE = '2020-01-01'
    END_DATE = '2024-01-01'
    train_data = fetch_stock_data(STOCK_LIST_VN, STOCK_LIST_GLOBAL, START_DATE, END_DATE)
    
    # Check data date from start to end to ensure it is run frequently
    
    # print(train_data.head())
    
    print("Loading TEST data...")
    START_DATE = '2024-01-01'
    END_DATE = '2025-04-13'
    test_data = fetch_stock_data(STOCK_LIST_VN, STOCK_LIST_GLOBAL, START_DATE, END_DATE)
    
    # print(test_data.head())
    print("Data loaded successfully.")
    
    # Create directories
    os.makedirs("models", exist_ok=True)
    os.makedirs("logs", exist_ok=True)
    os.makedirs("plots", exist_ok=True)
    
    # Dictionary to store results for all models
    all_results = {}
    
    # Define model classes and their hyperparameter lists
    models_to_train = [
        (PPO, HYPERPARAMS_PPO),
        (A2C, HYPERPARAMS_A2C),
        (SAC, HYPERPARAMS_SAC),
        (RecurrentPPO, HYPERPARAMS_RPPO),
        (TRPO, HYPERPARAMS_TRPO)
    ]
    
    # Train all models with different hyperparameters
    for model_class, hyperparams_list in models_to_train:
        model_type = model_class.__name__
        print(f"\nTraining {model_type} models...")
        
        for params in hyperparams_list:
            for win_size in WINDOW_SIZE:
                model_name = params["name"]
                print(f"  Training {model_name}...")

                # Create a copy of params to avoid modifying the original
                params_copy = params.copy()

                # Train model
                model_path, window_size = train_model(
                    model_class=model_class,
                    params=params_copy,
                    data=train_data,
                    initial_balance=10000,
                    max_shares=500,
                    trade_max=100,
                    timesteps=200000,
                    window_size=win_size
                )

                # Evaluate model
                portfolio_values, returns, actions, dates = evaluate_model(
                    model_path=model_path,
                    model_class=model_class,
                    data=test_data,
                    initial_balance=10000,
                    max_shares=500,
                    trade_max=100,
                    window_size=win_size
                )

                # Calculate performance metrics
                metrics = calculate_metrics(portfolio_values, returns)
                print(f"  {model_name} metrics:")
                for key, value in metrics.items():
                    print(f"    {key}: {value:.4f}")

if __name__ == "__main__":
    main()

Number of stocks using: 31
Loading TEST data...
Fetching 14 Vietnamese stock data...


[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed

Fetching 17 Global stock data...



[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%*******

Stock          AAPL                               ACB                       \
Metric        close     high      low     open  close   high    low   open   
2024-01-01  4705.57  4776.54  4661.21  4743.84  20.54  20.79  20.12  20.12   
2024-01-02  4705.57  4776.54  4661.21  4743.84  20.54  20.79  20.12  20.12   
2024-01-03  4670.33  4711.65  4649.55  4669.57  21.00  21.00  20.37  20.58   
2024-01-04  4611.02  4640.93  4584.91  4617.10  21.25  21.55  21.08  21.17   
2024-01-05  4592.52  4632.57  4566.92  4613.05  21.38  21.38  21.04  21.25   

Stock          AMZN           ...   VIC           VNM                       \
Metric        close     high  ...   low   open  close   high    low   open   
2024-01-01  3823.21  3885.69  ...  44.0  44.95  64.65  64.84  64.27  64.56   
2024-01-02  3823.21  3885.69  ...  44.0  44.95  64.65  64.84  64.27  64.56   
2024-01-03  3785.99  3851.78  ...  43.5  43.50  65.31  65.31  64.37  64.75   
2024-01-04  3686.54  3758.19  ...  43.8  44.15  65.31  65.79  6

ValueError: Error: Unexpected observation shape (1, 10, 341) for Box environment, please use (5, 341) or (n_env, 5, 341) for the observation shape.