In [9]:
import gymnasium as gym
import numpy as np
import pandas as pd
from gymnasium import spaces
from stable_baselines3 import PPO
from stable_baselines3.common.env_util import make_vec_env
import talib

Xây Dựng Môi Trường

In [10]:
class AVGOTradingEnv(gym.Env):
    def __init__(self, df, initial_cash=100000, penalty=3072):
        super(AVGOTradingEnv, self).__init__()
        
        self.df = df.reset_index()
        self.initial_cash = initial_cash
        self.cash = initial_cash
        self.shares_held = 0
        self.total_assets = initial_cash
        self.current_step = 0
        self.days_without_trade = 0
        self.penalty = penalty
        
        # Add Indicators
        self.df["SMA_10"] = talib.SMA(self.df["Close"], timeperiod=10)
        self.df["SMA_50"] = talib.SMA(self.df["Close"], timeperiod=50)
        self.df["SMA_200"] = talib.SMA(self.df["Close"], timeperiod=200)
        self.df["RSI"] = talib.RSI(self.df["Close"], timeperiod=14)
        macd, signal, _ = talib.MACD(self.df["Close"], fastperiod=12, slowperiod=26, signalperiod=9)
        self.df["MACD"] = macd - signal
        upper, middle, lower = talib.BBANDS(self.df["Close"], timeperiod=20)
        self.df["BB_Upper"] = upper
        self.df["BB_Lower"] = lower
        
        self.df.fillna(method="bfill", inplace=True)

        # Action space: continuous value between -1 (sell) and 1 (buy)
        self.action_space = spaces.Box(low=-1, high=1, shape=(1,), dtype=np.float32)

        # Observation space: giá trị thực tế của các chỉ báo
        self.observation_space = spaces.Box(
            low=-np.inf, high=np.inf, shape=(10,), dtype=np.float32
        )
    
    def reset(self, seed=None, options=None):
        self.current_step = 0
        self.cash = self.initial_cash
        self.shares_held = 0
        self.total_assets = self.initial_cash
        self.days_without_trade = 0
        
        return self._next_observation(), {}
    
    def _next_observation(self):
        row = self.df.iloc[self.current_step]
        return np.array([
            row["Close"], self.cash, self.shares_held, self.total_assets, self.days_without_trade,
            row["SMA_10"], row["SMA_50"], row["SMA_200"], row["RSI"], row["MACD"]
        ], dtype=np.float32)

    def step(self, action):
        if self.current_step >= len(self.df) - 1:
            return self._next_observation(), 0, True, False, {}

        action = np.clip(action, -1, 1)[0]  # Đảm bảo action nằm trong khoảng [-1, 1]
        current_price = self.df.loc[self.current_step, "Close"]

        if action > 0:  # Mua
            min_buy = 0.39 * self.cash
            max_buy = 0.75 * self.cash
            buy_amount = np.random.uniform(min_buy, max_buy)
            shares_bought = buy_amount // current_price
            self.cash -= shares_bought * current_price
            self.shares_held += shares_bought
            self.days_without_trade = 0
        elif action < 0 and self.shares_held > 0:  # Bán
            min_sell = 0.23 * self.shares_held
            max_sell = 0.75 * self.shares_held
            sell_amount = np.random.uniform(min_sell, max_sell)   # Tỷ lệ cổ phiếu bán theo action
            self.cash += sell_amount * current_price
            self.shares_held -= sell_amount
            self.days_without_trade = 0
        else:  # Không giao dịch
            self.days_without_trade += 1
            if self.days_without_trade >= 35:
                self.cash -= self.penalty  # Phạt nếu không giao dịch quá lâu

        self.total_assets = self.cash + self.shares_held * current_price
        reward = self.total_assets - self.initial_cash  # Lợi nhuận

        done = self.cash <= -5000 or self.total_assets >= 1000000 or self.total_assets < 1000
        truncated = self.current_step >= len(self.df) - 1

        self.current_step += 1
        return self._next_observation(), reward, done, truncated, {}
    
    def render(self, mode="human"):
        current_date = self.df.loc[self.current_step, "Date"]
        print(f"Day: {current_date}, Cash: {self.cash}, Shares: {self.shares_held}, Total Assets: {self.total_assets}")

Train Agent

In [11]:
from stable_baselines3 import DDPG
from stable_baselines3.common.env_util import make_vec_env

In [12]:
# Load dữ liệu OHLC
df = pd.read_csv("data/data_train.csv")
df = df.iloc[::-1].reset_index(drop=True)

env = make_vec_env(lambda: AVGOTradingEnv(df), n_envs=1)

# Train DDPG
model = DDPG("MlpPolicy", env, verbose=1)
model.learn(total_timesteps=100000)

# Save model
model.save("ddpg_trading_model")

Using cpu device
----------------------------------
| rollout/           |           |
|    ep_len_mean     | 1.96e+03  |
|    ep_rew_mean     | 6.21e+08  |
| time/              |           |
|    episodes        | 4         |
|    fps             | 54        |
|    time_elapsed    | 144       |
|    total_timesteps | 7828      |
| train/             |           |
|    actor_loss      | -9.69e+06 |
|    critic_loss     | 7.64e+10  |
|    learning_rate   | 0.001     |
|    n_updates       | 7727      |
----------------------------------
----------------------------------
| rollout/           |           |
|    ep_len_mean     | 1.95e+03  |
|    ep_rew_mean     | 6.34e+08  |
| time/              |           |
|    episodes        | 8         |
|    fps             | 53        |
|    time_elapsed    | 291       |
|    total_timesteps | 15624     |
| train/             |           |
|    actor_loss      | -1.68e+07 |
|    critic_loss     | 1.66e+11  |
|    learning_rate   | 0.001     |
|  

Test Agent

In [13]:
df = pd.read_csv("data/data_test.csv")
df = df.iloc[::-1].reset_index(drop=True)

model = DDPG.load("ddpg_trading_model")

env = AVGOTradingEnv(df)
obs, _ = env.reset()
done = False

while not done:
    action, _states = model.predict(obs)
    obs, reward, done, truncated, info = env.step(action)
    env.render()


Day: 2023-02-07, Cash: 38727.53, Shares: 1019.0, Total Assets: 100000.0
Day: 2023-02-08, Cash: 20847.034999999996, Shares: 1310.0, Total Assets: 101339.98500000002
Day: 2023-02-09, Cash: 6225.4819999999945, Shares: 1553.0, Total Assets: 99671.045
Day: 2023-02-10, Cash: 2988.667999999994, Shares: 1607.0, Total Assets: 99313.855
Day: 2023-02-13, Cash: 1090.2679999999941, Shares: 1639.0, Total Assets: 98323.943
Day: 2023-02-14, Cash: 669.4839999999941, Shares: 1646.0, Total Assets: 99613.836
Day: 2023-02-15, Cash: 247.8669999999941, Shares: 1653.0, Total Assets: 99809.71
Day: 2023-02-16, Cash: 126.3289999999941, Shares: 1655.0, Total Assets: 100699.024
Day: 2023-02-17, Cash: 66.2749999999941, Shares: 1656.0, Total Assets: 99515.699
Day: 2023-02-21, Cash: 66.2749999999941, Shares: 1656.0, Total Assets: 98695.979
Day: 2023-02-22, Cash: 66.2749999999941, Shares: 1656.0, Total Assets: 96369.299
Day: 2023-02-23, Cash: 66.2749999999941, Shares: 1656.0, Total Assets: 95480.02699999999
Day: 2023-