<a href="https://colab.research.google.com/github/Likelipop/DQL_Trading_based/blob/main/new_code.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

**PART 0: DEPENDENCY**

In [None]:
!pip install vnstock3

In [2]:
!pip install stable-baselines3 gymnasium

Collecting stable-baselines3
  Downloading stable_baselines3-2.5.0-py3-none-any.whl.metadata (4.8 kB)
Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch<3.0,>=2.3->stable-baselines3)
  Downloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.4.127 (from torch<3.0,>=2.3->stable-baselines3)
  Downloading nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-cupti-cu12==12.4.127 (from torch<3.0,>=2.3->stable-baselines3)
  Downloading nvidia_cuda_cupti_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cudnn-cu12==9.1.0.70 (from torch<3.0,>=2.3->stable-baselines3)
  Downloading nvidia_cudnn_cu12-9.1.0.70-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cublas-cu12==12.4.5.8 (from torch<3.0,>=2.3->stable-baselines3)
  Downloading nvidia_cublas_cu12-12.4.5.8-py3-none-manylinux2014_x86_64.whl.metadata (

In [3]:
import pandas as pd
from vnstock import Vnstock


    Khi tiếp tục sử dụng Vnstock3, bạn xác nhận rằng bạn đã đọc, hiểu và đồng ý với Chính sách quyền riêng tư và Điều khoản, điều kiện về giấy phép sử dụng Vnstock3.

    Chi tiết:

    - Giấy phép sử dụng phần mềm: https://vnstocks.com/docs/tai-lieu/giay-phep-su-dung
    - Chính sách quyền riêng tư: https://vnstocks.com/docs/tai-lieu/chinh-sach-quyen-rieng-tu
    


**PART 1: DATA PREPARATION**

Ở đây, ta sẽ xây dựng một lớp để xử lý việc tải và xử lý dữ liệu từ VNSTOCK3

In [4]:
def load_data(symbol, date_start, date_end):
    """Loads data from vnstock3 for a given symbol.

    Args:
      symbol: The stock symbol (e.g., 'TCB', 'VIC').
      date_start: starting query date: 'YYYY-MM-DD'.
      date_end: ending query date: 'YYYY-MM-DD'.

    Returns:
      A pandas DataFrame containing the historical data for the given symbol,
      or None if an error occurs.
    """
    try:
        # Replace with actual vnstock3 data loading logic
        # Example using a placeholder CSV file
        # df = pd.read_csv(f"path/to/data/{symbol}.csv")
        stock = Vnstock().stock(symbol=symbol, source="VCI")
        return stock.quote.history(start=date_start, end=date_end)
    except FileNotFoundError:
        print(f"Error: Data for symbol '{symbol}' not found.")
        return None
    except Exception as e:
        print(f"An error occurred: {e}")
        return None


In [5]:
df_data = load_data(symbol = "VNM",date_start = "2015-01-01", date_end = "2025-01-01")
df_data



Unnamed: 0,time,open,high,low,close,volume
0,2015-01-05,30.34,30.50,30.18,30.34,70290
1,2015-01-06,30.34,30.66,30.03,30.34,100890
2,2015-01-07,30.34,31.13,30.34,30.97,194130
3,2015-01-08,30.97,31.29,30.82,31.13,70650
4,2015-01-09,31.45,32.55,31.29,32.55,290390
...,...,...,...,...,...,...
2495,2024-12-26,64.20,64.20,63.80,63.80,1711855
2496,2024-12-27,63.90,64.10,63.70,63.80,2445187
2497,2024-12-30,63.60,63.70,63.30,63.30,1492983
2498,2024-12-31,63.40,63.80,63.30,63.40,1643941


In [6]:
close_column = df_data["close"].to_numpy()
Xtrain, Xtest = close_column[:int(len(close_column)*0.8)], close_column[int(len(close_column)*0.8):]
print(f"X_train: {Xtrain.shape}, X_test: {Xtest.shape}")

X_train: (2000,), X_test: (500,)


In [7]:
Xtrain

array([30.34, 30.34, 30.97, ..., 70.02, 70.02, 69.2 ])

**PART 2: ENVIROMENT**

In [None]:
import gymnasium as gym
from gymnasium.spaces import Discreate, Box
import numpy as np
import matplotlib.pyplot as plt
class TradingEnv(gym.Env):
    def __init__(self, window_size,data):
        super(TradingEnv, self).__init__()
        #Keep the stock prices in the form of np array, also keep tra
        self.data = data
        self.window_size = window_size
        self.action_space = Discreate(3)  # [0: Sell, 1: Hold, 2: Buy]
        self.observation_space = Box(
            low=np.array([0]*self.window_size),
            high=np.array([1]*self.window_size))
        #Keep track of some pillars parameter
        self.state = self._get_state(self.window_size)
        self.total_profit = 0
        self.done = False
        self.current_day = 0
        #Keep track of the price when buying.
        self.inventory = []

        # Log the date information perform each actions
        self.logs = {'states_buy': [], 'states_sell': []}


    def reset(self):
        self.state = self._get_state(self.window_size)
        self.total_profit = 0
        self.done = False
        self.current_day = 0
        self.inventory = []
        self.logs = {'states_buy': [], 'states_sell': []}

        return self._next_observation(),{}

    def _get_state(self,t):
      """
      Description: get the state via self.data, including all the days back
      Arg: days -> int: number of days to look back
      Return: np.array of the state
      """
      block = []
      obs = []
      try :
        if self.window_size > t:
          delta = self.window_size - t
          block = np.concatenate(([0]*delta,self.data[:t]))
        else
          block = self.data[t-self.window_size:t]

        for i in range(1,len(block)):
          obs.append(normalized_obs(block[i] - block[i-1]))
        return np.array(obs)

      except:
        return np.array([])

    def _normalize(self, data):
        return 1 / (1 + np.exp(-data))  # Sigmoid normalization

    def step(self, action):
        reward = 0
        current_price = self.data[self.current_day]

        # Action logic

        if (action == 0) and (len(self.inventory) > 0):  # Decide to sell and there is something for selling.
            #reward estimate
            bought_price = self.inventory.pop()
            reward = max(current_price - bought_price, 0)
            #logging
            self.logs['states_sell'].append(self.current_day)
        elif action == 2:  # Buy
            #reward estimate
            self.inventory.append(current_price)
            reward = 0
            #logging
            self.logs['states_buy'].append(self.current_day)

        # Hold action doesn't change inventory or immediate reward

        #update parameter
        self.current_day += 1
        self.state = self._get_state(self.current_day)
        self.total_profit += reward

        #check if the env terminal
        if self.current_day >= len(self.data):
            self.done = True
            self.render()

        return observation, reward, self.done, {}

    def render(self, mode='rgb'):
        plt.figure(figsize=(12, 6))
        plt.plot(self.data, color='r', lw=2.)
        plt.plot(self.data, 'v', markersize=10, color='m', label='Selling signal', markevery=self.logs["states_sell"])
        plt.plot(self.data, "^", markersize=10, color="k", label="Buy signal", markevery = self.logs["states_buy"])
        plt.title('Total gains: %f'%self.total_profit)
        plt.legend()
        plt.show()


`should_run_async` will not call `transform_cell` automatically in the future. Please pass the result to `transformed_cell` argument and any exception that happen during thetransform in `preprocessing_exc_tuple` in IPython 7.17 and above.


In [None]:
import os
import random
import numpy as np
from stable_baselines3 import DQN

# Initialize environment
env = TradingEnv(window_size=10)

states = env.observation_space.shape
actions = env.action_space.n



Using cpu device
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.
New model created.


`should_run_async` will not call `transform_cell` automatically in the future. Please pass the result to `transformed_cell` argument and any exception that happen during thetransform in `preprocessing_exc_tuple` in IPython 7.17 and above.


AttributeError: 'DQN' object has no attribute '_logger'

In [None]:
from stable_baselines3.common.callbacks import BaseCallback

class SaveOnEpisodeEndCallback(BaseCallback):
    def __init__(self, verbose=0):
        super(SaveOnEpisodeEndCallback, self).__init__(verbose)

    def _on_step(self):
        # Check if the episode is done
        if self.locals.get('dones', [False])[0]:
            # Save the model and replay buffer
            self.model.save(model_filename)
            self.model.save_replay_buffer(replay_buffer_filename)
            if self.verbose > 0:
                print(f"Episode {self.num_timesteps} completed. Model and replay buffer saved.")
        return True


In [None]:
import os
import random
import numpy as np
import torch
from stable_baselines3 import DQN
from stable_baselines3.common.callbacks import BaseCallback

# Set random seeds


# Initialize the environment
env = TradingEnv(window_size=10)

# File names
model_filename = 'dqn_trading_model.zip'
replay_buffer_filename = 'dqn_replay_buffer.pkl'

# Load or create a model
if os.path.exists(model_filename):
    model = DQN.load(model_filename, env=env, verbose=1)
    print("Model loaded.")
    if os.path.exists(replay_buffer_filename):
        model.load_replay_buffer(replay_buffer_filename)
        print("Replay buffer loaded.")
    else:
        print("No replay buffer found. Starting with an empty buffer.")
else:
    model = DQN('MlpPolicy', env, verbose=1)
    print("New model created.")

# Custom callback to save the model at the end of each episode
class SaveOnEpisodeEndCallback(BaseCallback):
    def __init__(self, verbose=0):
        super(SaveOnEpisodeEndCallback, self).__init__(verbose)

    def _on_step(self):
        # Check if the episode is done
        if self.locals.get('dones', [False])[0]:
            # Save the model and replay buffer
            self.model.save(model_filename)
            self.model.save_replay_buffer(replay_buffer_filename)
            if self.verbose > 0:
                print(f"Episode at timestep {self.num_timesteps} completed. Model and replay buffer saved.")
        return True

callback = SaveOnEpisodeEndCallback(verbose=1)

# Train the model for one episode
total_timesteps_per_episode = len(env.data) - env.window_size  # Steps in one episode

model.learn(
    total_timesteps=total_timesteps_per_episode,
    callback=callback,
    reset_num_timesteps=False
)


Using cpu device
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.
New model created.


TypeError: TradingEnv.reset() got an unexpected keyword argument 'seed'