In [41]:
import gym
from gym import spaces
import numpy as np
import pandas as pd

class StockMarketEnv(gym.Env):

    def __init__(self, stock_price_history,initial_cash=1000):
        self.stock_price_history = stock_price_history
        self.initial_cash = initial_cash

        self.action_space = spaces.Discrete(3, start=-1) # -1: Sell, 0: Hold, 1: Buy
        self.observation_space = spaces.Box(low=0,high=np.inf,shape=(5,), dtype=np.float32) # Shape = (Open, High, Low, Close, Volume)
        self.reset()

    def reset(self):
        self.current_step = 0
        self.cash_in_hand = self.initial_cash
        self.stock_holding = 0
        self.step_info = []  # Initialize an empty list to store step information
        self.stock_price = self.stock_price_history[self.current_step]
        self.total_portfolio_value = self.cash_in_hand + (self.stock_holding * self.stock_price)
    
    def step(self, action):
        assert self.action_space.contains(action)

        prev_valuation = self.total_portfolio_value
        step_data = {
            'Step': self.current_step,
            'Portfolio Value': self.total_portfolio_value,
            'Cash': self.cash_in_hand,
            'Stock Value': self.stock_price * self.stock_holding, 
            'Stock Holdings': self.stock_holding,
            'Stock Price': self.stock_price,
            "Action": action
        }


        
        if action == -1: # Sell
            if self.stock_holding > 0:
                self.num_stocks_sell = self.stock_holding # Sell all stocks (Current Mehtod)
                self.cash_in_hand += self.num_stocks_sell * self.stock_price  # No commission fee can be added later
                self.stock_holding -= self.num_stocks_sell
                self.num_stocks_sell = 0
        elif action == 0: # Hold
            pass
        elif action == 1: # Buy
            if self.cash_in_hand >= self.stock_price:
                self.num_stocks_buy = np.floor(self.cash_in_hand/self.stock_price) # Buy Maximum allowed (Current Method)
                self.cash_in_hand -= self.num_stocks_buy * self.stock_price
                self.stock_holding = self.num_stocks_buy
                self.num_stocks_buy = 0

        self.total_portfolio_value = self.cash_in_hand + (self.stock_holding * self.stock_price)
        reward = self.total_portfolio_value - prev_valuation

        done = self.current_step >= len(self.stock_price_history) - 1
        print(step_data)
        self.step_info.append(step_data)

        if not done:
            self.current_step += 1
            self.stock_price = self.stock_price_history[self.current_step]

        return np.array([self.cash_in_hand, self.stock_holding]), reward, done
    
    def get_step_data(self):
        return pd.DataFrame(self.step_info)  # Generate a DataFrame from stored step information

    
# Example usage:
# Define a stock price history (this could be real data or simulated)
stock_prices = [100, 105, 98, 110, 102]  # Replace this with your own stock price data
env = StockMarketEnv(stock_prices, initial_cash=1000)

observation = env.reset()
actions = [0,1, 0, 1, -1]  # List of actions (replace with your RL algorithm's actions)

# Start the loop 
for action in actions:
    assert env.action_space.contains(action), f"Action {action} is not in the action space"
    observation, reward, done = env.step(action)
    

env.close()  # Close the environment

# Retrieve the accumulated step information and create a DataFrame
full_sequence = env.get_step_data()
display(full_sequence)

{'Step': 0, 'Portfolio Value': 1000, 'Cash': 1000, 'Stock Value': 0, 'Stock Holdings': 0, 'Stock Price': 100, 'Action': 0}
{'Step': 1, 'Portfolio Value': 1000, 'Cash': 1000, 'Stock Value': 0, 'Stock Holdings': 0, 'Stock Price': 105, 'Action': 1}
{'Step': 2, 'Portfolio Value': 1000.0, 'Cash': 55.0, 'Stock Value': 882.0, 'Stock Holdings': 9.0, 'Stock Price': 98, 'Action': 0}
{'Step': 3, 'Portfolio Value': 937.0, 'Cash': 55.0, 'Stock Value': 990.0, 'Stock Holdings': 9.0, 'Stock Price': 110, 'Action': 1}
{'Step': 4, 'Portfolio Value': 1045.0, 'Cash': 55.0, 'Stock Value': 918.0, 'Stock Holdings': 9.0, 'Stock Price': 102, 'Action': -1}


Unnamed: 0,Step,Portfolio Value,Cash,Stock Value,Stock Holdings,Stock Price,Action
0,0,1000.0,1000.0,0.0,0.0,100,0
1,1,1000.0,1000.0,0.0,0.0,105,1
2,2,1000.0,55.0,882.0,9.0,98,0
3,3,937.0,55.0,990.0,9.0,110,1
4,4,1045.0,55.0,918.0,9.0,102,-1
