# Multi-Agent Deep Deterministic Policy Gradient for Stock Market

In [8]:
from src.environment.stock_market import StockMarketEnv

env = StockMarketEnv(seed=42)
state_dict, _ = env.reset()

In [22]:
state_dict['agent_views'].shape

(10, 30)

## Stock Market Environment

- __Hyperparameters__
- __Observation Space__
  - `stock_price`: `ndarray` of shape $[N_{stock}, ]$
  - `correlated_stock`: `ndarray` of shape $[N_{correlated}, ]$
  - `uncorrelated_stock`: `ndarray` of shape $[N_{uncorrelated}, ]$
  - `budgets`: `ndarray` of shape $[N_{agents}, ]
  - `shares_held`: `ndarray` of shape $[N_{agents}, ]$
  - `agent_views`: `ndarray` of shape $[N_{agents}, N_{stock}]$
  - `company_states`: `ndarray` of shape $[N_{company}, ]$
- __Action Space__
  - dimension_1: log buy/sell prices $\log p\in\[$
  - dimension_2: discrete shares $s\in\mathbb{N}$


In [11]:
# Process state dictionary
# =========================================
n_agents: int = len(state_dict["budgets"])
print(n_agents)

10


---

In [1]:
import gym
import torch as th
from src.critic.ddpg_critic import DDPGCritic
from src.memory.replay_buffer import ReplayBuffer
from src.policy.ddpg_policy import DDPGPolicy

In [2]:
env = gym.make('CartPole-v1',
                new_step_api=True,
                render_mode='single_rgb_array').unwrapped

# Assert high-dimensional observation
is_image = len(env.observation_space.shape) > 2
# Assert discrete action space
is_discrete = isinstance(env.action_space, gym.spaces.Discrete)

observation_size = (
    env.observation_space.shape if is_image
    else env.observation_space.shape[0]
)
action_size = (
    env.action_space.n if is_discrete
    else env.action_space.shape[0]
)

critic = DDPGCritic(
    observation_size=observation_size,
    action_size=action_size,
    critic_net='mlp',
    critic_net_kwargs={
        'hidden_size': 64,
        'num_layers': 2
    }
)
policy = DDPGPolicy(
    observation_size=observation_size,
    action_size=action_size,
    discrete_action=is_discrete,
    policy_net='mlp',
    policy_net_kwargs={
        'hidden_size': 64,
        'num_layers': 2
    }
)

replay_buffer = ReplayBuffer(max_size=1000)

In [3]:
s = env.reset()
a = policy.get_action(th.from_numpy(s).unsqueeze(0))
s_, r, done, _, _ = env.step(a[0])

In [5]:
s, s_

(array([-0.00554833, -0.00416659, -0.00582624,  0.00763587]),
 array([-0.00830804,  0.00192193, -0.13183148,  0.29397739]))