In [3]:
!pip install gymnasium



In [5]:
!pip install pandas



In [6]:
import gymnasium as gym
from gymnasium import spaces
import numpy as np
import pandas as pd

In [13]:
class StockMarketEnv(gym.Env):

  metadata = {"render_modes": ["human"]}

  def __init__(self, stock_data : pd.DataFrame, initial_balance= 100000):
    super(StockMarketEnv, self).__init__()

    self.stock_data=stock_data.reset_index(drop=True)
    self.n_steps=len(stock_data)

    self.initial_balance = initial_balance
    self.current_balance = initial_balance
    self.current_step = 0
    self.total_shares = 0
    self.portfolio_value = initial_balance
    self.action_space = spaces.Discrete(3)

    # Observation space consists of [current_price, balance, shares held, portfolio value]
    self.observation_space = spaces.Box(
        low=0, high=np.inf, shape=(4,), dtype=np.float32
    )

  def reset(self):
        # to reset the environemnt
    self.current_balance = self.initial_balance
    self.current_step = 0
    self.total_shares = 0
    self.portfolio_value = self.initial_balance

    return self._get_obs(), {}

  def _get_obs(self):

    #To get the current state
    current_price= self.stock_data.loc[self.current_step, "Close"]
    return np.array([current_price, self.current_balance, self.total_shares, self.portfolio_value], dtype=np.float32)

  def step(self, action):
    """
        the actions possible
        Action: 0 = Hold, 1 = Buy, 2 = Sell
        """
    current_price = self.stock_data.loc[self.current_step, "Close"]

    if action == 1:
      shares_to_buy = self.current_balance // current_price
      self.total_shares += shares_to_buy
      self.current_balance -= shares_to_buy * current_price
      self.portfolio_value = self.current_balance + self.total_shares * current_price

    elif action == 2:
      shares_to_sell = self.total_shares
      self.current_balance += shares_to_sell * current_price
      self.total_shares = 0
      self.portfolio_value = self.current_balance + self.total_shares * current_price

      self.portfolio_value = self.current_balance + self.total_shares * current_price

    self.current_step += 1

      # Calculate reward
    reward = self.portfolio_value - self.initial_balance
    
    done = self.current_step >= self.n_steps - 1

    reward = self.portfolio_value - self.initial_balance 
    done = self.current_step >= self.n_steps - 1 
    return self._get_obs(), reward, done, False, {}

  def render(self, mode="human"):#Just to print about current env
    current_price = self.stock_data.loc[self.current_step, "Close"]
    print(f"Step: {self.current_step}")
    print(f"Price: {current_price}")
    print(f"Balance: {self.current_balance}")
    print(f"Shares: {self.total_shares}")
    print(f"Portfolio Value: {self.portfolio_value}")

  def close(self):
      pass







In [14]:
import pandas as pd

data = pd.DataFrame({
    "Close": [100, 102, 101, 105, 110, 108]  # Example prices
})

env = StockMarketEnv(stock_data=data)

# To test
obs, info = env.reset()
done = False

while not done:
    action = env.action_space.sample()  # Random action
    obs, reward, done, truncated, info = env.step(action)
    env.render()


Step: 1
Price: 102
Balance: 100000
Shares: 0
Portfolio Value: 100000
Step: 2
Price: 101
Balance: 40
Shares: 980
Portfolio Value: 100000
Step: 3
Price: 105
Balance: 99020
Shares: 0
Portfolio Value: 99020
Step: 4
Price: 110
Balance: 5
Shares: 943
Portfolio Value: 99020
Step: 5
Price: 108
Balance: 5
Shares: 943
Portfolio Value: 103735
