# Monte Carlo Tree Search

In [1]:
import os
import sys
module_path = os.path.abspath(os.path.join('..'))
if module_path not in sys.path:
    sys.path.append(module_path)

import numpy as np
import matplotlib.pyplot as plt

from game import TicTacToe, ConnectFour
from game.manager import GameManager
from agent.player import IOPlayer, RandomPlayer, BFSPlayer
from agent.player.mcts import MCTS

In [2]:
tic_tac_toe = TicTacToe()
start_state = tic_tac_toe.start()

io_player = IOPlayer(from_str='list[int]')

manager = GameManager(tic_tac_toe, MCTS(start_state), io_player)
manager.run_single_game()

It`s a draw.
Final state:
o|x|x
x|x|o
o|o|x


In [8]:
from game import ConnectFour

connect_four = ConnectFour()
start_state = connect_four.start()

manager = GameManager(connect_four, MCTS(start_state), IOPlayer('int'))
manager.run_single_game()

You win!
Final state:
 0 1 2 3 4 5 6 
 v v v v v v v 
|x|o|o|o| | | |
|x|x|o|o|o|o| |
|o|x|o|x|x|x| |
|x|x|x|o|o|o| |
|x|o|o|o|x|x| |
|x|o|x|x|o|x|_|



np.int64(2)

In [9]:
manager = GameManager(ConnectFour(), MCTS(ConnectFour().start()), RandomPlayer(), n_plays=10, shuffling='circular')
results = manager.run(verbose=True)
results["winners matrix"]

  0%|          | 0/10 [00:00<?, ?it/s]

array([[0., 0., 0.],
       [0., 5., 0.],
       [0., 5., 0.]])

In [10]:
manager = GameManager(ConnectFour(), MCTS(ConnectFour().start()), BFSPlayer(1), n_plays=10, shuffling='circular')
results = manager.run(verbose=True)
results["winners matrix"]

  0%|          | 0/10 [00:00<?, ?it/s]

array([[0., 0., 0.],
       [0., 5., 0.],
       [0., 5., 0.]])

In [11]:
manager = GameManager(ConnectFour(), MCTS(ConnectFour().start()), BFSPlayer(2), n_plays=10, shuffling='circular')
results = manager.run(verbose=True)
results["winners matrix"]

  0%|          | 0/10 [00:00<?, ?it/s]

array([[0., 0., 0.],
       [0., 5., 0.],
       [0., 5., 0.]])

In [12]:
manager = GameManager(ConnectFour(), MCTS(ConnectFour().start()), BFSPlayer(3), n_plays=10, shuffling='circular')
results = manager.run(verbose=True)
results["winners matrix"]

  0%|          | 0/10 [00:00<?, ?it/s]

array([[0., 0., 0.],
       [0., 5., 1.],
       [0., 4., 0.]])

In [13]:
from agent.player import DFSPlayer

ts = DFSPlayer(verbose=True)

tic_tac_toe = TicTacToe()
ts.fit(tic_tac_toe)

Found 10 states.
Found 100 states.
Found 1000 states.
Found 5478 states in total.


In [14]:
manager = GameManager(TicTacToe(), ts, MCTS(TicTacToe().start()), n_plays=10, shuffling='circular')
results = manager.run(verbose=True)
results["winners matrix"]

  0%|          | 0/10 [00:00<?, ?it/s]

array([[10.,  0.,  0.],
       [ 0.,  0.,  0.],
       [ 0.,  0.,  0.]])

In [15]:
manager = GameManager(
    ConnectFour(),
    MCTS(ConnectFour().start()),
    MCTS(ConnectFour().start()),
    n_plays=10,
    shuffling='no',
)
results = manager.run(verbose=True)
results["winners matrix"]

  0%|          | 0/10 [00:00<?, ?it/s]

array([[0., 0., 0.],
       [0., 9., 0.],
       [0., 0., 1.]])

In [16]:
manager = GameManager(
    ConnectFour(),
    MCTS(ConnectFour().start()),
    MCTS(ConnectFour().start(), time_for_action=0.1),
    n_plays=10,
    shuffling='circular',
)
results = manager.run(verbose=True)
results["winners matrix"]

  0%|          | 0/10 [00:00<?, ?it/s]

array([[1., 0., 0.],
       [0., 4., 0.],
       [0., 5., 0.]])

In [17]:
manager = GameManager(
    ConnectFour(),
    MCTS(ConnectFour().start()),
    MCTS(ConnectFour().start(), rollout_policy=BFSPlayer(1)),
    n_plays=10,
    shuffling='circular',
)
results = manager.run(verbose=True)
results["winners matrix"]

  0%|          | 0/10 [00:00<?, ?it/s]

array([[0., 0., 0.],
       [0., 4., 1.],
       [0., 4., 1.]])

In [18]:
manager = GameManager(
    ConnectFour(),
    MCTS(ConnectFour().start()),
    MCTS(ConnectFour().start(), rollout_policy=BFSPlayer(2)),
    n_plays=10,
    shuffling='circular',
)
results = manager.run(verbose=True)
results["winners matrix"]

  0%|          | 0/10 [00:00<?, ?it/s]

array([[0., 0., 0.],
       [0., 4., 0.],
       [0., 5., 1.]])

In [14]:
connect_four = ConnectFour()
start_state = connect_four.start()

manager = GameManager(connect_four, IOPlayer('int'), MCTS(start_state))
manager.run_single_game()

You win!
Final state:
 0 1 2 3 4 5 6 
 v v v v v v v 
|x|x| |x| |o|x|
|o|x| |x|x|x|o|
|x|o| |o|o|o|x|
|o|x| |x|x|x|o|
|x|x|x|x|o|o|o|
|o|o|o|x|o|o|o|



1

In [23]:
connect_four = ConnectFour()
start_state = connect_four.start()

manager = GameManager(connect_four, IOPlayer('int'), MCTS(start_state, time_for_action=1.0))
manager.run_single_game()

You win!
Final state:
 0 1 2 3 4 5 6 
 v v v v v v v 
| | | |x| | | |
|x| |o|x|o| | |
|o|x|o|o|x| |o|
|x|o|x|x|o| |x|
|o|x|o|x|x| |o|
|o|x|x|x|o|_|o|



1