In [1]:
import gym
from gym import spaces
import numpy as np
import pandas as pd
from stable_baselines3 import DQN  # Or A2C, PPO, etc.
from stable_baselines3.common.vec_env import DummyVecEnv

In [None]:
class TradingEnv(gym.Env):
    def __init__(self, data):
        super(TradingEnv, self).__init__()
        self.data = data #Sets the stock data for the environment
        self.current_step = 0 # start at the first step
        self.start_balance = 1000 # create an indial baalance for the trading
        self.balance = self.start_balance #create the current account balance storage
        self.shares_held = 0 #number of shares held (currently 0)

        # 3 actions 0 = Hold, 1 = Buy, 2 = Sell
        self.action_space = spaces.Discrete(3)

        # shape depends on the number of columns in data
        self.observation_space = spaces.Box(
            low=0, high=1, shape=(data.shape[1] + 2,), dtype=np.float32
        )

    def reset(self):
        self.current_step = 0
        self.balance = self.start_balance
        self.shares_held = 0
        return self._next_observation()

        # Define the observation space, including stock data and account info
        # Shape is based on the number of data columns + balance and shares held
    def _next_observation(self):
        stock_data = self.data.iloc[self.current_step].values
        obs = np.append(stock_data, [self.balance, self.shares_held])
        return obs.astype(np.float32)

    def step(self, action):
        current_price = self.data.iloc[self.current_step]["Close"]
        reward = 0
#Adds up the total value of the price
        if action == 1:  # Buy
            if self.balance >= current_price:
                self.shares_held += 1
                self.balance -= current_price
        elif action == 2:  # Sell
            if self.shares_held > 0:
                self.shares_held -= 1
                self.balance += current_price
#Adds up the total balance
        portfolio_value = self.balance + self.shares_held * current_price
        reward = portfolio_value - self.start_balance
        self.start_balance = portfolio_value

        self.current_step += 1
        done = self.current_step >= len(self.data) - 1
#reqrd if profit is made
        return self._next_observation(), reward, done, {}

    def render(self, mode="human"):
        pass 

In [3]:
data = pd.DataFrame({
    'Open': np.random.rand(100),
    'High': np.random.rand(100),
    'Low': np.random.rand(100),
    'Close': np.random.rand(100),
    'Volume': np.random.randint(1, 1000, 100)
})

In [4]:
env = DummyVecEnv([lambda: TradingEnv(data)])



In [5]:
import stable_baselines3

In [6]:


# Initialize the model
model = DQN("MlpPolicy", env, verbose=1)

# Train the model
model.learn(total_timesteps=5000)

# Save the model
model.save("dqn_trading_model")


Using cpu device
----------------------------------
| rollout/            |          |
|    exploration_rate | 0.248    |
| time/               |          |
|    episodes         | 4        |
|    fps              | 2857     |
|    time_elapsed     | 0        |
|    total_timesteps  | 396      |
----------------------------------
----------------------------------
| rollout/            |          |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 8        |
|    fps              | 3419     |
|    time_elapsed     | 0        |
|    total_timesteps  | 792      |
----------------------------------
----------------------------------
| rollout/            |          |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 12       |
|    fps              | 3564     |
|    time_elapsed     | 0        |
|    total_timesteps  | 1188     |
----------------------------------
----------------------------------
| r

In [7]:
print(stable_baselines3.__version__)

print(stable_baselines3.__file__)

2.0.0
c:\Users\sriee\OneDrive\AppData\Desktop\Trading Code\trading_env\Lib\site-packages\stable_baselines3\__init__.py
