# Stock Market Environment

In [22]:
# https://pettingzoo.farama.org/content/environment_creation/
from __future__ import annotations

import functools
from typing import Any, Dict, Optional, Sequence, Tuple, Union

import gymnasium
import numpy as np
from gymnasium.spaces import Box, Discrete
from gymnasium.spaces import Tuple as TupleSpace
from gymnasium.utils import seeding
from pettingzoo.utils.env import AECEnv, ParallelEnv
from pettingzoo.utils.agent_selector import agent_selector

from src.environment.stock_market import StockMarketEnv

In [2]:
env = StockMarketEnv(num_agents=10)
env.reset()

(array(100.),
 {'correlated_stocks': array([  1.        , 154.97147265,   1.        ,  98.27648006,
           1.        ,  24.96401415,  35.80929104,  15.62987943,
         154.86335629,  37.32828658, 135.13664923, 310.14316109,
          41.75586891, 127.32227573,   1.        ,  94.99460969,
          48.77161991,   1.        , 197.78679955]),
  'uncorrelated_stocks': array([233.46535223, 132.33239285,  29.9598907 , 206.89127044,
          92.70236164, 165.94849926,   1.        ,  37.04451169,
         103.73522043, 201.44036382]),
  'budgets': array([6529.216 , 9136.279 , 5085.9067, 6105.6934, 9710.354 , 7322.015 ,
         6359.48  , 5481.886 , 5643.1816, 9357.217 ], dtype=float32),
  'shares': array([27735, 81585, 67087,   274, 39415, 85740, 55431,  3359, 76489,
         72965]),
  'valid_mask': array([[False,  True,  True,  True,  True,  True,  True,  True,  True,
           True,  True,  True,  True,  True,  True,  True,  True,  True,
           True,  True,  True,  True,  True,

## Design Idea

- The shared full state of the environment is a numpy array of shape [2, $N_{stocks}$]
- The local observations are simple vector multiplication between share states and masks
- Each agent has their own initial holdings and budget limit.

In [24]:
class StockMarket(ParallelEnv):
    metadata = {"render_modes": ["human"], "name": "stock_market_v2"}

    def __init__(self,
                 num_agents: int,
                 num_company: int = 5,
                 num_correlated_stocks: int = 19,
                 num_uncorrelated_stocks: int = 10,
                 max_shares: int = 100000,
                 start_prices: Union[float, Sequence[float]] = 100.0,
                 budget_range: Tuple[float, float] = (100.0, 10000.0),
                 budget_discount: float = 0.99,
                 step_size: float = 1.0,
                 price_std: float = 100.0,
                 noise_std: float = 10.0,
                 worth_of_stocks: float = 0.1,
                 seed: int = 0) -> None:
        super().__init__()

        # Agent parameters
        self.num_company = num_company
        self.budget_range = budget_range
        self.budget_discount = budget_discount
        self.max_shares = max_shares,
        self.worth_of_stocks = worth_of_stocks
        self.possible_agents = [f'agent_{i}' for i in range(num_agents)]
        self.agents = self.possible_agents[:]  # Just to follow AECEnv
        self._index_map = {
            name: idx for idx, name in enumerate(self.possible_agents)
        }

        # Stock market parameters
        self.dt = step_size
        self.start_prices = start_prices
        self.price_std = price_std
        self.noise_std = noise_std
        self.num_correlated_stocks = num_correlated_stocks
        self.num_uncorrelated_stocks = num_uncorrelated_stocks
        self.num_stocks = num_correlated_stocks + \
                          num_uncorrelated_stocks + 1
        self.seed()
        self._reset_market()

        # Observation and Action Spaces
        

        self._observation_spaces: Dict[str, Any] = {}
        self._action_spaces: Dict[str, Any] = {}
        for agent in self.agents:
            # TODO: Agent specific spaces
            self._observation_spaces[agent] = Box(
                low=1.0,
                high=+np.float32(np.inf),
                shape=(1, )
            )
            self._action_spaces = TupleSpace(
                (Box(low=1.0))
            )
            pass
        self._state_space = Box(
            low=1.0,  # minimum price is $1.0
            high=+np.float32(np.inf),
            shape=(1 + num_correlated_stocks + num_uncorrelated_stocks,),
            dtype=np.float32
        )
        
        self._agent_selector = agent_selector(self.agents)

    def observation_space(self, agent: str) -> gymnasium.spaces.Space:
        return self._observation_spaces[agent]

    def action_space(self, agent: str) -> gymnasium.spaces.Space:
        return self._action_spaces[agent]
    
    def reset(self,
              seed: Optional[int] = None,
              return_info: bool = False,
              options: Optional[Dict] = None) -> Dict[str, Any]:
        # TODO: reset stock market
        if seed is not None:
            self.seed(seed)
        self._reset_market(self._np_rng)

        pass

    def seed(self, seed: Optional[int] = None) -> None:
        self._np_rng, seed = seeding.np_random(seed)
    
    def step(self,
             actions: Dict[str, Tuple[np.ndarray, np.ndarray]]
             ) -> Tuple[Dict, Dict, Dict, Dict, Dict]:
        return super().step(actions)
    
    def _reset_market(self, np_rng: np.random.Generator) -> None:
        if isinstance(self.start_prices, float):
            self.current_price = self.start_prices
            _other_stocks = np.clip(
                np_rng.normal(loc=self.start_prices,
                              scale=self.price_std,
                              size=(self.num_correlated_stocks +
                                    self.num_uncorrelated_stocks,)),
                a_min=1.0, a_max=None  # Can never have prices below $1
            )
        else:
            self.current_price = self.start_prices[0]
            _other_stocks = self.start_prices[1:]
        self._corr_prices = _other_stocks[:self.num_correlated_stocks]
        self._uncorr_prices = _other_stocks[-self.num_uncorrelated_stocks:]

        # Randomly initialize masks for agents
        self.valid_mask = np.zeros(shape=(self.num_agents, self.num_stocks),
                                   dtype="bool")
        self.valid_mask[self._np_rng.choice(2, self.num_agents).astype(bool),
                        1:1+self.num_correlated_stocks] = True
        self.valid_mask[self._np_rng.choice(2, self.num_agents).astype(bool),
                        -self.num_uncorrelated_stocks:] = True

        # Initialize budget and holdings
        self.budgets = self.budget_range[0] + self._np_rng.random(
            size=(self.num_agents, ), dtype='float32'
        ) * (self.budget_range[1] - self.budget_range[0])
        self.shares = self._np_rng.integers(low=1,
                                            high=self.max_shares,
                                            size=(self.num_agents,))
        
        # Randomly initialize utility functions
        self.eta = np.clip(
            np.random.normal(loc=1.5, scale=1.5, size=(self.num_agents,)),
            a_min=0, a_max=10
        )
        def utility(c, eta):
            if eta != 1.:
                return (c ** (1. - eta) - 1.) / (1. - eta)
            else:
                return np.log(c)
        self.CRRA_utility = np.vectorize(utility)

        self.timestep = 0
        self.ep_len = 390

env = StockMarket(5)
env.agents

['agent_0', 'agent_1', 'agent_2', 'agent_3', 'agent_4']

In [29]:
from pettingzoo.mpe import simple_adversary_v2

env = simple_adversary_v2.parallel_env()
env.reset()
actions = {k:env.action_space(k).sample() for k in env.agents}
env.step(actions)

({'adversary_0': array([1.3983899 , 1.0659161 , 0.31051356, 0.4426825 , 1.0245105 ,
         0.0577454 , 0.8874174 , 0.23579477], dtype=float32),
  'agent_0': array([-0.7139969 ,  0.3849371 ,  0.37387943,  1.0081707 , -0.7139969 ,
          0.3849371 , -1.0245105 , -0.0577454 , -0.13709307,  0.17804937],
        dtype=float32),
  'agent_1': array([-0.5769038 ,  0.20688774,  0.5109725 ,  0.83012134, -0.5769038 ,
          0.20688774, -0.8874174 , -0.23579477,  0.13709307, -0.17804937],
        dtype=float32)},
 defaultdict(int,
             {'adversary_0': -0.5407277307482516,
              'agent_0': -0.07215118644408491,
              'agent_1': -0.07215118644408491}),
 {'adversary_0': False, 'agent_0': False, 'agent_1': False},
 {'adversary_0': False, 'agent_0': False, 'agent_1': False},
 {'adversary_0': {}, 'agent_0': {}, 'agent_1': {}})