In [12]:
import gym
from gym import spaces
import numpy as np
import pandas as pd

class DiscretizedOHLCVEnv(gym.Env):
       
    def __init__(self, ohlcv_data,bins_per_feature:list,initial_cash=1000):
        self.ohlcv_raw_data = ohlcv_data
        self.initial_cash = initial_cash
        self.bins_per_feature = bins_per_feature
        self.action_space = spaces.Discrete(3, start=-1) # -1: Sell, 0: Hold, 1: Buy
        self.available_actions = set(range(self.action_space.start,self.action_space.n-1))
        self.observation_space = spaces.MultiDiscrete([bins_per_feature] * len(ohlcv_data[0])) # Shape = (Open, High, Low, Close, Volume)
        self.ohlcv_binned_data = self.discretize_ohlcv(self.ohlcv_raw_data,self.bins_per_feature)
        self.reset()

    def reset(self):
        self.current_step = 0
        self.cash_in_hand = self.initial_cash
        self.stock_holding = 0
        self.step_info = []  # Initialize an empty list to store step information
        self.stock_price = self.ohlcv_raw_data[self.current_step][3] #Stock price set to closing price
        self.total_portfolio_value = self.cash_in_hand + (self.stock_holding * self.stock_price)
    
    def step(self, action):
        assert self.action_space.contains(action) ##move to available_actions

        prev_valuation = self.total_portfolio_value
        step_data = {
            'Step': self.current_step,
            'Portfolio Value': self.total_portfolio_value,
            'Cash': self.cash_in_hand,
            'Stock Value': self.stock_price * self.stock_holding, 
            'Stock Holdings': self.stock_holding,
            'Stock Price': self.stock_price,
            "Action": action
        }
        

        
        if action == -1: # Sell
            if self.stock_holding > 0:
                self.num_stocks_sell = self.stock_holding # Sell all stocks (Current Mehtod)
                self.cash_in_hand += self.num_stocks_sell * self.stock_price  # No commission fee can be added later
                self.stock_holding -= self.num_stocks_sell
                self.num_stocks_sell = 0
                self.available_actions.remove(-1)
                self.available_actions.add(1)
        elif action == 0: # Hold
            pass
        elif action == 1: # Buy
            if self.cash_in_hand >= self.stock_price:
                self.num_stocks_buy = np.floor(self.cash_in_hand/self.stock_price) # Buy Maximum allowed (Current Method)
                self.cash_in_hand -= self.num_stocks_buy * self.stock_price
                self.stock_holding = self.num_stocks_buy
                self.num_stocks_buy = 0
                self.available_actions.remove(1)
                self.available_actions.add(-1)

        self.total_portfolio_value = self.cash_in_hand + (self.stock_holding * self.stock_price)
        reward = self.total_portfolio_value - prev_valuation

        done = self.current_step >= len(self.ohlcv_raw_data) - 1
        print(step_data)
        self.step_info.append(step_data)

        if not done:
            self.current_step += 1
            self.stock_price = self.ohlcv_raw_data[self.current_step][3] ## Assuming Closing price for stock price, 2nd place implemented...need to simplify

        next_observation = self.get_observation()
        return next_observation, reward, done
    
    def get_observation(self):
        return(self.ohlcv_binned_data[self.current_step])

    def get_step_data(self):
        return pd.DataFrame(self.step_info)  # Generate a DataFrame from stored step information

    def discretize_ohlcv(self, data, bins_for_feature):
        discretized_data = []
        for column,num_bins in zip(data.T, bins_for_feature):  # Transpose to iterate through columns
            min_val = np.min(column)
            max_val = np.max(column)
            bin_width = (max_val - min_val) / num_bins
            bins = [min_val + i * bin_width for i in range(num_bins)]
            digitized = np.digitize(column, bins)
            discretized_data.append(digitized)
        return np.array(discretized_data).T  # Transpose back to original shape
    
# Example usage:
# Define a stock price history (this could be real data or simulated)
stock_prices = np.array([[100, 103, 95, 102, 50000],
                [105, 105, 90, 96, 75000],
                [98, 105, 98, 104, 65000],
                [110, 110, 100, 101, 100000],
                [102, 111, 102, 110, 100000]])  # Replace this with your own stock price data
env = DiscretizedOHLCVEnv(stock_prices,[20,10,10,20,20], initial_cash=1000)

observation = env.reset()
actions = [1,0,-1, 1, -1]  # List of actions (replace with your RL algorithm's actions)

# Start the loop 
for action in actions:
    assert env.action_space.contains(action), f"Action {action} is not in the action space"
    observation, reward, done = env.step(action)
    print(env.available_actions)
    print(observation)
env.close()  # Close the environment

# Retrieve the accumulated step information and create a DataFrame
full_sequence = env.get_step_data()
display(full_sequence)


{'Step': 0, 'Portfolio Value': 1000, 'Cash': 1000, 'Stock Value': 0, 'Stock Holdings': 0, 'Stock Price': 102, 'Action': 1}
{0, -1}
[12  3  1  1 11]
{'Step': 1, 'Portfolio Value': 1000.0, 'Cash': 82.0, 'Stock Value': 864.0, 'Stock Holdings': 9.0, 'Stock Price': 96, 'Action': 0}
{0, -1}
[ 1  3  7 12  7]
{'Step': 2, 'Portfolio Value': 946.0, 'Cash': 82.0, 'Stock Value': 936.0, 'Stock Holdings': 9.0, 'Stock Price': 104, 'Action': -1}
{0, 1}
[20  9  9  8 20]
{'Step': 3, 'Portfolio Value': 1018.0, 'Cash': 1018.0, 'Stock Value': 0.0, 'Stock Holdings': 0.0, 'Stock Price': 101, 'Action': 1}
{0, -1}
[ 7 10 10 20 20]
{'Step': 4, 'Portfolio Value': 1018.0, 'Cash': 8.0, 'Stock Value': 1100.0, 'Stock Holdings': 10.0, 'Stock Price': 110, 'Action': -1}
{0, 1}
[ 7 10 10 20 20]


Unnamed: 0,Step,Portfolio Value,Cash,Stock Value,Stock Holdings,Stock Price,Action
0,0,1000.0,1000.0,0.0,0.0,102,1
1,1,1000.0,82.0,864.0,9.0,96,0
2,2,946.0,82.0,936.0,9.0,104,-1
3,3,1018.0,1018.0,0.0,0.0,101,1
4,4,1018.0,8.0,1100.0,10.0,110,-1
