In [2]:
from agents.AbstractAgent import AbstractAgent
from agents.HumanAgent import HumanAgent
from agents.RandomAgent import RandomAgent
from environments.Runner import Runner
from environments.NoughtsAndCrossesVersusAgentEnvironment import NoughtsAndCrossesVersusAgentEnvironment


def test_agent(agent: AbstractAgent, environment: NoughtsAndCrossesVersusAgentEnvironment):
    environment.set_opposition_agent(RandomAgent(environment))
    runner = Runner(agent, max_steps_per_episode=200)
    runner.run(environment, n_episodes=1000, learn=True)
    runner.run(environment, n_episodes=1, verbose=True, learn=False)
    environment.reset()
    
def train_then_compete_with_agent(agent: AbstractAgent, environment: NoughtsAndCrossesVersusAgentEnvironment):
    environment.set_opposition_agent(RandomAgent(environment))
    runner = Runner(agent, max_steps_per_episode=200)
    runner.run(environment, n_episodes=1000, learn=True)
    environment.reset()
    environment.set_opposition_agent(HumanAgent(environment))
    runner.run(environment, n_episodes=1, verbose=True, learn=False)
    environment.reset()

environment = NoughtsAndCrossesVersusAgentEnvironment()

In [2]:
from agents.QLearningAgent import QLearningAgent


test_agent(QLearningAgent(environment), environment)

Episode: 1
Step: 1
  |   |  
---------
  |   |  
---------
O |   | X
Step: 2
O |   |  
---------
  | X |  
---------
O |   | X
Step: 3
O | X | O
---------
  | X |  
---------
O |   | X
Step: 4
O | X | O
---------
  | X |  
---------
O | X | X


In [3]:
from agents.DoubleQLearningAgent import DoubleQLearningAgent


test_agent(DoubleQLearningAgent(environment), environment)

Episode: 1
Step: 1
  |   |  
---------
  |   |  
---------
X |   | O
Step: 2
O | X |  
---------
  |   |  
---------
X |   | O
Step: 3
O | X |  
---------
  |   |  
---------
X |   | O
Step: 4
O | X | X
---------
  |   |  
---------
X | O | O
Step: 5
O | X | X
---------
  |   |  
---------
X | O | O
Step: 6
O | X | X
---------
  | X |  
---------
X | O | O


In [4]:
from agents.SarsaAgent import SarsaAgent


test_agent(SarsaAgent(environment), environment)

Episode: 1
Step: 1
  |   |  
---------
O |   |  
---------
  | X |  
Step: 2
  |   |  
---------
O |   |  
---------
  | X |  
Step: 3
  | O |  
---------
O | X |  
---------
  | X |  
Step: 4
  | O |  
---------
O | X |  
---------
  | X |  
Step: 5
  | O | O
---------
O | X |  
---------
  | X | X
Step: 6
  | O | O
---------
O | X |  
---------
  | X | X
Step: 7
X | O | O
---------
O | X |  
---------
  | X | X


In [5]:
from agents.ExpectedSarsaAgent import ExpectedSarsaAgent


test_agent(ExpectedSarsaAgent(environment), environment)

Episode: 1
Step: 1
X |   |  
---------
  |   |  
---------
  |   | O
Step: 2
X |   |  
---------
  | X |  
---------
O |   | O
Step: 3
X |   | X
---------
  | X | O
---------
O |   | O
Step: 4
X | X | X
---------
  | X | O
---------
O |   | O


In [6]:
from agents.StateValueTemporalDifferenceZeroAgent import StateValueTemporalDifferenceZeroAgent


test_agent(StateValueTemporalDifferenceZeroAgent(environment), environment)

Episode: 1
Step: 1
  |   |  
---------
  |   | O
---------
  | X |  
Step: 2
  |   | O
---------
  | X | O
---------
  | X |  
Step: 3
  | X | O
---------
  | X | O
---------
  | X |  


In [7]:
from agents.TemporalDifferenceNStepsToExpectedSarsaAgent import TemporalDifferenceNStepsToExpectedSarsaAgent


test_agent(TemporalDifferenceNStepsToExpectedSarsaAgent(environment, n_steps=2), environment)

Episode: 1
Step: 1
X |   | O
---------
  |   |  
---------
  |   |  
Step: 2
X |   | O
---------
  |   | O
---------
  |   | X
Step: 3
X | O | O
---------
  |   | O
---------
X |   | X
Step: 4
X | O | O
---------
  |   | O
---------
X |   | X
Step: 5
X | O | O
---------
  |   | O
---------
X |   | X
Step: 6
X | O | O
---------
  |   | O
---------
X |   | X
Step: 7
X | O | O
---------
  |   | O
---------
X |   | X
Step: 8
X | O | O
---------
  |   | O
---------
X |   | X
Step: 9
X | O | O
---------
  |   | O
---------
X |   | X
Step: 10
X | O | O
---------
  |   | O
---------
X |   | X
Step: 11
X | O | O
---------
  |   | O
---------
X |   | X
Step: 12
X | O | O
---------
  |   | O
---------
X |   | X
Step: 13
X | O | O
---------
  |   | O
---------
X |   | X
Step: 14
X | O | O
---------
  |   | O
---------
X |   | X
Step: 15
X | O | O
---------
  |   | O
---------
X |   | X
Step: 16
X | O | O
---------
  |   | O
---------
X |   | X
Step: 17
X | O | O
---------
  |   | O
---------
X |  

In [8]:
from agents.MonteCarloAgent import MonteCarloAgent


test_agent(MonteCarloAgent(environment), environment)

Episode: 1
Step: 1
X |   |  
---------
  |   |  
---------
  | O |  
Step: 2
X |   |  
---------
  | X | O
---------
  | O |  
Step: 3
X | O | X
---------
  | X | O
---------
  | O |  
Step: 4
X | O | X
---------
  | X | O
---------
  | O | X


In [6]:
from agents.PolicyIterationAgent import PolicyIterationAgent


# takes 13 minutes
test_agent(PolicyIterationAgent(environment), environment)

Policy improvement stopped at iteration 7
Episode: 1
Step: 1
  |   |  
---------
O |   |  
---------
  | X |  
Step: 2
  | X |  
---------
O |   |  
---------
O | X |  
Step: 3
  | X |  
---------
O | X |  
---------
O | X |  


In [5]:
from agents.ValueIterationAgent import ValueIterationAgent


test_agent(ValueIterationAgent(environment), environment)

Stopped at iteration 5
Episode: 1
Step: 1
X | O |  
---------
  |   |  
---------
  |   |  
Step: 2
X | O |  
---------
X |   |  
---------
O |   |  
Step: 3
X | O | O
---------
X | X |  
---------
O |   |  
Step: 4
X | O | O
---------
X | X | X
---------
O |   |  


In [3]:
from agents.ReinforceAgent import ReinforceAgent


test_agent(ReinforceAgent(environment), environment)

Episode: 1
Step: 1
X |   |  
---------
O |   |  
---------
  |   |  
Step: 2
X | O |  
---------
O | X |  
---------
  |   |  
Step: 3
X | O | X
---------
O | X |  
---------
O |   |  
Step: 4
X | O | X
---------
O | X | O
---------
O | X |  
Step: 5
X | O | X
---------
O | X | O
---------
O | X |  
Step: 6
X | O | X
---------
O | X | O
---------
O | X |  
Step: 7
X | O | X
---------
O | X | O
---------
O | X | X


In [4]:
from agents.ActorCriticAgent import ActorCriticAgent


test_agent(ActorCriticAgent(environment), environment)

Episode: 1
Step: 1
O |   |  
---------
  | X |  
---------
  |   |  
Step: 2
O |   |  
---------
  | X | O
---------
X |   |  
Step: 3
O |   |  
---------
  | X | O
---------
X |   |  
Step: 4
O |   | X
---------
  | X | O
---------
X |   |  
