# Multi-Agent Deep Deterministic Policy Gradient for Stock Market

In [7]:
from src.environment.stock_market import StockMarketEnv

env = StockMarketEnv(seed=42)
state_dict = env.reset()

In [8]:
# Process state dictionary
# =========================================
print(state_dict)

({'stock_price': array(100.), 'correlated_stocks': array([149.6714153 ,  86.17356988, 164.76885381, 252.30298564,
        76.58466253,  76.58630431, 257.92128155, 176.74347292,
        53.05256141, 154.25600436,  53.65823072,  53.42702464,
       124.19622716,   1.        ,   1.        ,  43.77124708,
         1.        , 131.42473326,   9.19759245]), 'uncorrelated_stocks': array([  1.        , 246.56487689,  77.42236995, 106.75282047,
         1.        ,  45.56172755, 111.09225897,   1.        ,
       137.56980183,  39.93613101]), 'budgets': array([  100.,   100.,   100.,   100.,   100.,  1000.,  1000.,  1000.,
        1000., 10000.]), 'shares_held': array([500., 500., 500., 500., 500., 500., 500., 500., 500., 500.]), 'agent_views': array([[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
        1, 1, 1, 1, 1, 1, 1, 1],
       [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0],
       [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,

---

In [1]:
import gym
import torch as th
from src.critic.ddpg_critic import DDPGCritic
from src.memory.replay_buffer import ReplayBuffer
from src.policy.ddpg_policy import DDPGPolicy

In [2]:
env = gym.make('CartPole-v1',
                new_step_api=True,
                render_mode='single_rgb_array').unwrapped

# Assert high-dimensional observation
is_image = len(env.observation_space.shape) > 2
# Assert discrete action space
is_discrete = isinstance(env.action_space, gym.spaces.Discrete)

observation_size = (
    env.observation_space.shape if is_image
    else env.observation_space.shape[0]
)
action_size = (
    env.action_space.n if is_discrete
    else env.action_space.shape[0]
)

critic = DDPGCritic(
    observation_size=observation_size,
    action_size=action_size,
    critic_net='mlp',
    critic_net_kwargs={
        'hidden_size': 64,
        'num_layers': 2
    }
)
policy = DDPGPolicy(
    observation_size=observation_size,
    action_size=action_size,
    discrete_action=is_discrete,
    policy_net='mlp',
    policy_net_kwargs={
        'hidden_size': 64,
        'num_layers': 2
    }
)

replay_buffer = ReplayBuffer(max_size=1000)

In [3]:
s = env.reset()
a = policy.get_action(th.from_numpy(s).unsqueeze(0))
s_, r, done, _, _ = env.step(a[0])

In [5]:
s, s_

(array([-0.00554833, -0.00416659, -0.00582624,  0.00763587]),
 array([-0.00830804,  0.00192193, -0.13183148,  0.29397739]))