# Learning Environment Dynamics in Partially Observable and Multi-agent Settings with Feed-forward and Recurrent Networks

## Single agent environments with partial observability

Dependencies: OpenAI Gym, fancy impute, keras, mujoco

Tensorboard log is written to "./out/dynamics_learning/...."

In [None]:
import MDP_learning.single_agent.dynamics_learning as ml
import gym

env_name = "Hopper-v2"
env = gym.make(env_name)
observation_space = env.observation_space
action_space = env.action_space

ML = ml.ModelLearner(env_name, observation_space, action_space, partial_obs_rate=0.25, sequence_length=3, epochs=10)
ML.run(env)

## Multi-Agent environment
Dependencies: OpenAI multi-agent environments (https://github.com/openai/multiagent-particle-envs)

Tensorboard log will be written to "./out/multi/<MultiAgentEnv instance>..."

Try different sequence_lengths to see the difference in providing more information, i.e. more steps.

In [None]:
from MDP_learning.multi_agent import multi, make_env2

env_name = 'simple'
env = make_env2.make_env(env_name)

# Sequence length of 0 uses a feed-forward network
MAML = multi.MultiAgentModelLearner(env, mem_size=100000, sequence_length=100, scenario_name=env_name, epochs=100)
MAML.run()


## ATARI environment
Dependencies: https://github.com/keras-rl/keras-rl

Tensorboard log will be written to "./dqn_logs/..."

In [None]:
from MDP_learning.from_pixels import dqn_kerasrl_modellearn
import gym

env_name = 'PongDeterministic-v4'
cfg = dqn_kerasrl_modellearn.AtariConfig(env_name)
environment = gym.make(cfg.env_name)
print('Playing: {}'.format(environment))
num_actions = environment.action_space.n

processor = dqn_kerasrl_modellearn.AtariProcessor(cfg.INPUT_SHAPE)
dqn_agent, hidden_state_size = dqn_kerasrl_modellearn.setupDQN(cfg, num_actions, processor)

dqn_kerasrl_modellearn.trainDQN(cfg, environment, dqn_agent)

for seq_len in [1, 4, 16]:
    dynamics_model, dqn_convolutions = dqn_kerasrl_modellearn.trainML(
        cfg, dqn_agent,
        sequence_length=seq_len,
        hstate_size=hidden_state_size,
        layer_width=2048)