# Taxi OOMDP

This notebook tests and evaluates different algorithms on the Taxi World OOMDP

In [None]:
%matplotlib inline
%load_ext autoreload
%autoreload 2

Load in the MDP and the associated packages required for that

In [None]:
import os
import sys
import numpy as np

from baselines.her.envs.oomdp_taxi import TaxiEnv

Create the domain

In [None]:
env = TaxiEnv(layout='diuk_7x7', seed=1)
mdp = env.mdp

## Simple RL tests

Tests of the taxi MDP as provided by the `simple_rl` package

In [None]:
from simple_rl.agents import QLearningAgent, RandomAgent, LinearQAgent, DQNAgent, RMaxAgent
from simple_rl.run_experiments import run_agents_on_mdp, run_single_agent_on_mdp

In [None]:
# Define the agents
ql_agent = QLearningAgent(actions=mdp.get_actions())
rand_agent = RandomAgent(actions=mdp.get_actions())
linear_ql_agent = LinearQAgent(
    actions=mdp.get_actions(), num_features=mdp.get_num_state_feats(),
    alpha=0.2, rbf=False
)
dqn_agent = DQNAgent(
    actions=mdp.get_actions(),
    x_dim=mdp.get_num_state_feats(), y_dim=1, num_channels=1
)
rmax_agent = RMaxAgent(actions=mdp.get_actions(), s_a_threshold=2)

In [None]:
# Train and visualize one of the agents
run_single_agent_on_mdp(
    ql_agent, mdp,
    episodes=500, steps=500,
    verbose=True
)

In [None]:
mdp.visualize_agent(rand_agent)

In [None]:
run_agents_on_mdp(
    [dqn_agent, linear_ql_agent, ql_agent, rand_agent],
    mdp, instances=3, episodes=500, steps=200,
    reset_at_terminal=True, open_plot=False
)

## Baselines RL tests

Tests of the Taxi MDP with the algorithms in baselines

In [None]:
from baselines.her.experiment import train

In [None]:
train.launch(
    'Taxi-OOMDP-v1',
    logdir=None,
    n_epochs=500,
    num_cpu=1,
    seed=1,
    policy_save_interval=5,
    replay_strategy="future",
    clip_return=1
)