In [1]:
from agents.AbstractAgent import AbstractAgent
from environments.AbstractEnvironment import AbstractEnvironment
from environments.Runner import Runner
from environments.TwoDMazeEnvironment import TwoDMazeEnvironment


def test_agent(agent: AbstractAgent, environment: AbstractEnvironment):
    runner = Runner(agent, max_steps_per_episode=200)
    runner.run(environment, n_episodes=1000, learn=True)
    runner.run(environment, n_episodes=1, verbose=True, learn=False)
    environment.reset()
    
# Single goal
environment = TwoDMazeEnvironment(position_shape=(5,5), initial_agent_position=(0,0), exit_positions=[(4,4)], initial_position_rewards={(4,4): 10})
    
# Duel goal
# environment = TwoDMazeEnvironment(position_shape=(10,10), initial_agent_position=(3,3), exit_positions=[(0,0),(9,9)], initial_position_rewards={(0,0):1, (9,9): 10})

In [2]:
from agents.QLearningAgent import QLearningAgent


test_agent(QLearningAgent(environment), environment)

Episode: 1
Step: 1
[A| | | | ]
[ | | | | ]
[ | | | | ]
[ | | | | ]
[ | | | |E]
Step: 2
[ | | | | ]
[A| | | | ]
[ | | | | ]
[ | | | | ]
[ | | | |E]
Step: 3
[ | | | | ]
[A| | | | ]
[ | | | | ]
[ | | | | ]
[ | | | |E]
Step: 4
[ | | | | ]
[ | | | | ]
[A| | | | ]
[ | | | | ]
[ | | | |E]
Step: 5
[ | | | | ]
[ | | | | ]
[ | | | | ]
[A| | | | ]
[ | | | |E]
Step: 6
[ | | | | ]
[ | | | | ]
[ | | | | ]
[ |A| | | ]
[ | | | |E]
Step: 7
[ | | | | ]
[ | | | | ]
[ | | | | ]
[ | | | | ]
[ |A| | |E]
Step: 8
[ | | | | ]
[ | | | | ]
[ | | | | ]
[ | | | | ]
[ | |A| |E]
Step: 9
[ | | | | ]
[ | | | | ]
[ | | | | ]
[ | | | | ]
[ | | |A|E]
Step: 10
[ | | | | ]
[ | | | | ]
[ | | | | ]
[ | | | | ]
[ | | | |A]


In [3]:
from agents.DoubleQLearningAgent import DoubleQLearningAgent


test_agent(DoubleQLearningAgent(environment), environment)

Episode: 1
Step: 1
[ | | | | ]
[A| | | | ]
[ | | | | ]
[ | | | | ]
[ | | | |E]
Step: 2
[ | | | | ]
[ |A| | | ]
[ | | | | ]
[ | | | | ]
[ | | | |E]
Step: 3
[ | | | | ]
[ | |A| | ]
[ | | | | ]
[ | | | | ]
[ | | | |E]
Step: 4
[ | |A| | ]
[ | | | | ]
[ | | | | ]
[ | | | | ]
[ | | | |E]
Step: 5
[ | | |A| ]
[ | | | | ]
[ | | | | ]
[ | | | | ]
[ | | | |E]
Step: 6
[ | | | | ]
[ | | |A| ]
[ | | | | ]
[ | | | | ]
[ | | | |E]
Step: 7
[ | | | | ]
[ | |A| | ]
[ | | | | ]
[ | | | | ]
[ | | | |E]
Step: 8
[ | | | | ]
[ | | |A| ]
[ | | | | ]
[ | | | | ]
[ | | | |E]
Step: 9
[ | | | | ]
[ | |A| | ]
[ | | | | ]
[ | | | | ]
[ | | | |E]
Step: 10
[ | | | | ]
[ | | | | ]
[ | |A| | ]
[ | | | | ]
[ | | | |E]
Step: 11
[ | | | | ]
[ | | | | ]
[ | | |A| ]
[ | | | | ]
[ | | | |E]
Step: 12
[ | | | | ]
[ | | | | ]
[ | | | | ]
[ | | |A| ]
[ | | | |E]
Step: 13
[ | | | | ]
[ | | | | ]
[ | | | | ]
[ | |A| | ]
[ | | | |E]
Step: 14
[ | | | | ]
[ | | | | ]
[ | | | | ]
[ |A| | | ]
[ | | | |E]
Step: 15
[ | | | | ]
[ | | | | ]

In [4]:
from agents.SarsaAgent import SarsaAgent


test_agent(SarsaAgent(environment), environment)

Episode: 1
Step: 1
[A| | | | ]
[ | | | | ]
[ | | | | ]
[ | | | | ]
[ | | | |E]
Step: 2
[A| | | | ]
[ | | | | ]
[ | | | | ]
[ | | | | ]
[ | | | |E]
Step: 3
[ | | | | ]
[A| | | | ]
[ | | | | ]
[ | | | | ]
[ | | | |E]
Step: 4
[ | | | | ]
[ |A| | | ]
[ | | | | ]
[ | | | | ]
[ | | | |E]
Step: 5
[ | | | | ]
[ | | | | ]
[ |A| | | ]
[ | | | | ]
[ | | | |E]
Step: 6
[ | | | | ]
[ |A| | | ]
[ | | | | ]
[ | | | | ]
[ | | | |E]
Step: 7
[ | | | | ]
[ | |A| | ]
[ | | | | ]
[ | | | | ]
[ | | | |E]
Step: 8
[ | | | | ]
[ | | | | ]
[ | |A| | ]
[ | | | | ]
[ | | | |E]
Step: 9
[ | | | | ]
[ | |A| | ]
[ | | | | ]
[ | | | | ]
[ | | | |E]
Step: 10
[ | |A| | ]
[ | | | | ]
[ | | | | ]
[ | | | | ]
[ | | | |E]
Step: 11
[ | | | | ]
[ | |A| | ]
[ | | | | ]
[ | | | | ]
[ | | | |E]
Step: 12
[ | | | | ]
[ | | |A| ]
[ | | | | ]
[ | | | | ]
[ | | | |E]
Step: 13
[ | | |A| ]
[ | | | | ]
[ | | | | ]
[ | | | | ]
[ | | | |E]
Step: 14
[ | | |A| ]
[ | | | | ]
[ | | | | ]
[ | | | | ]
[ | | | |E]
Step: 15
[ | |A| | ]
[ | | | | ]

In [5]:
from agents.ExpectedSarsaAgent import ExpectedSarsaAgent


test_agent(ExpectedSarsaAgent(environment), environment)

Episode: 1
Step: 1
[A| | | | ]
[ | | | | ]
[ | | | | ]
[ | | | | ]
[ | | | |E]
Step: 2
[ | | | | ]
[A| | | | ]
[ | | | | ]
[ | | | | ]
[ | | | |E]
Step: 3
[ | | | | ]
[ | | | | ]
[A| | | | ]
[ | | | | ]
[ | | | |E]
Step: 4
[ | | | | ]
[ | | | | ]
[ |A| | | ]
[ | | | | ]
[ | | | |E]
Step: 5
[ | | | | ]
[ | | | | ]
[ | |A| | ]
[ | | | | ]
[ | | | |E]
Step: 6
[ | | | | ]
[ | | | | ]
[ | | | | ]
[ | |A| | ]
[ | | | |E]
Step: 7
[ | | | | ]
[ | | | | ]
[ | | | | ]
[ | | | | ]
[ | |A| |E]
Step: 8
[ | | | | ]
[ | | | | ]
[ | | | | ]
[ | | | | ]
[ | | |A|E]
Step: 9
[ | | | | ]
[ | | | | ]
[ | | | | ]
[ | | | | ]
[ | | | |A]


In [6]:
from agents.StateValueTemporalDifferenceZeroAgent import StateValueTemporalDifferenceZeroAgent


test_agent(StateValueTemporalDifferenceZeroAgent(environment), environment)

Episode: 1
Step: 1
[A| | | | ]
[ | | | | ]
[ | | | | ]
[ | | | | ]
[ | | | |E]
Step: 2
[ |A| | | ]
[ | | | | ]
[ | | | | ]
[ | | | | ]
[ | | | |E]
Step: 3
[ | |A| | ]
[ | | | | ]
[ | | | | ]
[ | | | | ]
[ | | | |E]
Step: 4
[ | |A| | ]
[ | | | | ]
[ | | | | ]
[ | | | | ]
[ | | | |E]
Step: 5
[ | | | | ]
[ | |A| | ]
[ | | | | ]
[ | | | | ]
[ | | | |E]
Step: 6
[ | | | | ]
[ | | | | ]
[ | |A| | ]
[ | | | | ]
[ | | | |E]
Step: 7
[ | | | | ]
[ | | | | ]
[ | | | | ]
[ | |A| | ]
[ | | | |E]
Step: 8
[ | | | | ]
[ | | | | ]
[ | | | | ]
[ | | |A| ]
[ | | | |E]
Step: 9
[ | | | | ]
[ | | | | ]
[ | | | | ]
[ | | | | ]
[ | | |A|E]
Step: 10
[ | | | | ]
[ | | | | ]
[ | | | | ]
[ | | | | ]
[ | | | |A]


In [7]:
from agents.TemporalDifferenceNStepsToExpectedSarsaAgent import TemporalDifferenceNStepsToExpectedSarsaAgent


test_agent(TemporalDifferenceNStepsToExpectedSarsaAgent(environment, n_steps=2), environment)

Episode: 1
Step: 1
[ | | | | ]
[A| | | | ]
[ | | | | ]
[ | | | | ]
[ | | | |E]
Step: 2
[ | | | | ]
[ |A| | | ]
[ | | | | ]
[ | | | | ]
[ | | | |E]
Step: 3
[ | | | | ]
[ | | | | ]
[ |A| | | ]
[ | | | | ]
[ | | | |E]
Step: 4
[ | | | | ]
[ | | | | ]
[ | |A| | ]
[ | | | | ]
[ | | | |E]
Step: 5
[ | | | | ]
[ | | | | ]
[ | | | | ]
[ | |A| | ]
[ | | | |E]
Step: 6
[ | | | | ]
[ | | | | ]
[ | | | | ]
[ | | |A| ]
[ | | | |E]
Step: 7
[ | | | | ]
[ | | | | ]
[ | | | | ]
[ | | | |A]
[ | | | |E]
Step: 8
[ | | | | ]
[ | | | | ]
[ | | | | ]
[ | | | | ]
[ | | | |A]


In [8]:
from agents.MonteCarloAgent import MonteCarloAgent


test_agent(MonteCarloAgent(environment), environment)

Episode: 1
Step: 1
[ |A| | | ]
[ | | | | ]
[ | | | | ]
[ | | | | ]
[ | | | |E]
Step: 2
[ | | | | ]
[ |A| | | ]
[ | | | | ]
[ | | | | ]
[ | | | |E]
Step: 3
[ | | | | ]
[ | |A| | ]
[ | | | | ]
[ | | | | ]
[ | | | |E]
Step: 4
[ | | | | ]
[ | | |A| ]
[ | | | | ]
[ | | | | ]
[ | | | |E]
Step: 5
[ | | | | ]
[ | | | |A]
[ | | | | ]
[ | | | | ]
[ | | | |E]
Step: 6
[ | | | | ]
[ | | | | ]
[ | | | |A]
[ | | | | ]
[ | | | |E]
Step: 7
[ | | | | ]
[ | | | | ]
[ | | | | ]
[ | | | |A]
[ | | | |E]
Step: 8
[ | | | | ]
[ | | | | ]
[ | | | | ]
[ | | |A| ]
[ | | | |E]
Step: 9
[ | | | | ]
[ | | | | ]
[ | | | | ]
[ | | | | ]
[ | | |A|E]
Step: 10
[ | | | | ]
[ | | | | ]
[ | | | | ]
[ | | | | ]
[ | | | |A]


In [9]:
from agents.PolicyIterationAgent import PolicyIterationAgent


test_agent(PolicyIterationAgent(environment), environment)

Policy evaluation stopped at iteration 0
Policy improvement stopped at iteration 8
Episode: 1
Step: 1
[ | | | | ]
[A| | | | ]
[ | | | | ]
[ | | | | ]
[ | | | |E]
Step: 2
[ | | | | ]
[ | | | | ]
[A| | | | ]
[ | | | | ]
[ | | | |E]
Step: 3
[ | | | | ]
[ | | | | ]
[ | | | | ]
[A| | | | ]
[ | | | |E]
Step: 4
[ | | | | ]
[ | | | | ]
[ | | | | ]
[ | | | | ]
[A| | | |E]
Step: 5
[ | | | | ]
[ | | | | ]
[ | | | | ]
[ | | | | ]
[ |A| | |E]
Step: 6
[ | | | | ]
[ | | | | ]
[ | | | | ]
[ | | | | ]
[ | |A| |E]
Step: 7
[ | | | | ]
[ | | | | ]
[ | | | | ]
[ | | | | ]
[ | | |A|E]
Step: 8
[ | | | | ]
[ | | | | ]
[ | | | | ]
[ | | | | ]
[ | | | |A]


In [10]:
from agents.ValueIterationAgent import ValueIterationAgent


test_agent(ValueIterationAgent(environment), environment)

Stopped at iteration 59
Episode: 1
Step: 1
[ | | | | ]
[A| | | | ]
[ | | | | ]
[ | | | | ]
[ | | | |E]
Step: 2
[ | | | | ]
[ | | | | ]
[A| | | | ]
[ | | | | ]
[ | | | |E]
Step: 3
[ | | | | ]
[ | | | | ]
[ | | | | ]
[A| | | | ]
[ | | | |E]
Step: 4
[ | | | | ]
[ | | | | ]
[ | | | | ]
[ | | | | ]
[A| | | |E]
Step: 5
[ | | | | ]
[ | | | | ]
[ | | | | ]
[ | | | | ]
[ |A| | |E]
Step: 6
[ | | | | ]
[ | | | | ]
[ | | | | ]
[ | | | | ]
[ | |A| |E]
Step: 7
[ | | | | ]
[ | | | | ]
[ | | | | ]
[ | | | | ]
[ | | |A|E]
Step: 8
[ | | | | ]
[ | | | | ]
[ | | | | ]
[ | | | | ]
[ | | | |A]


In [11]:
from agents.ReinforceAgent import ReinforceAgent


test_agent(ReinforceAgent(environment), environment)

Episode: 1
Step: 1
[ |A| | | ]
[ | | | | ]
[ | | | | ]
[ | | | | ]
[ | | | |E]
Step: 2
[A| | | | ]
[ | | | | ]
[ | | | | ]
[ | | | | ]
[ | | | |E]
Step: 3
[ |A| | | ]
[ | | | | ]
[ | | | | ]
[ | | | | ]
[ | | | |E]
Step: 4
[ | |A| | ]
[ | | | | ]
[ | | | | ]
[ | | | | ]
[ | | | |E]
Step: 5
[ | | |A| ]
[ | | | | ]
[ | | | | ]
[ | | | | ]
[ | | | |E]
Step: 6
[ | | | |A]
[ | | | | ]
[ | | | | ]
[ | | | | ]
[ | | | |E]
Step: 7
[ | | | | ]
[ | | | |A]
[ | | | | ]
[ | | | | ]
[ | | | |E]
Step: 8
[ | | | | ]
[ | | | | ]
[ | | | |A]
[ | | | | ]
[ | | | |E]
Step: 9
[ | | | | ]
[ | | | | ]
[ | | | |A]
[ | | | | ]
[ | | | |E]
Step: 10
[ | | | | ]
[ | | | | ]
[ | | | | ]
[ | | | |A]
[ | | | |E]
Step: 11
[ | | | | ]
[ | | | | ]
[ | | | | ]
[ | | | | ]
[ | | | |A]


In [12]:
from agents.ActorCriticAgent import ActorCriticAgent


test_agent(ActorCriticAgent(environment), environment)

Episode: 1
Step: 1
[ |A| | | ]
[ | | | | ]
[ | | | | ]
[ | | | | ]
[ | | | |E]
Step: 2
[ | |A| | ]
[ | | | | ]
[ | | | | ]
[ | | | | ]
[ | | | |E]
Step: 3
[ | | | | ]
[ | |A| | ]
[ | | | | ]
[ | | | | ]
[ | | | |E]
Step: 4
[ | | | | ]
[ | | | | ]
[ | |A| | ]
[ | | | | ]
[ | | | |E]
Step: 5
[ | | | | ]
[ | | | | ]
[ | | | | ]
[ | |A| | ]
[ | | | |E]
Step: 6
[ | | | | ]
[ | | | | ]
[ | | | | ]
[ | | |A| ]
[ | | | |E]
Step: 7
[ | | | | ]
[ | | | | ]
[ | | | | ]
[ | | | |A]
[ | | | |E]
Step: 8
[ | | | | ]
[ | | | | ]
[ | | | | ]
[ | | | | ]
[ | | | |A]
