In [12]:
import ipytest

from build.Board import Board
from build.player.QPlayer import QPlayer, QUtils
from build.player.qlearner.QTableLearner import QTableLearner
from build.Game import Game
from unittest import mock
from collections import defaultdict

ipytest.autoconfig()

## QTableLearner

### PyTest
see docs/presentations/Pytest.ipynb

In [25]:
%%ipytest

def test_q_table_type():

    q_table_lerner = QTableLearner()
    assert type(q_table_lerner.q_table) == defaultdict

def test_q_table_size():

    q_table_lerner = QTableLearner()
    assert len(q_table_lerner.q_table) == 0

def test_q_table_select_move():

    q_table_lerner = QTableLearner()
    assert q_table_lerner.select_move(Board().field) is not None

def test_q_table_update():

    q_table_lerner = QTableLearner()

    prev_state = Board().field
    state = prev_state.copy()
    state[0, 0] = 'x'
    prev_action = 4
    reward = 100

    q_table_lerner.update(prev_state, state, prev_action, reward)

    assert q_table_lerner.select_move(prev_state, theta=0) == prev_action

[32m.[0m[32m.[0m[32m.[0m[32m.[0m[32m                                                                                         [100%][0m
[32m[32m[1m4 passed[0m[32m in 0.02s[0m[0m


### Mock
see docs/presentations/Mocking.ipynb

Testet wie gut der QTableLearner gegen einen stetigen Gegner anlernt.

In [28]:
%%ipytest

def test_q_table_learner_stupid():

    player1 = mock.Mock()
    player1.representation_char = 'x'
    player1.make_move.side_effect = [1, 4, 7]

    player2 = QPlayer('o', QTableLearner(theta=0))

    players = [player1, player2]

    Game(players).run()

    assert player2.stats.get_lost() == 1

def test_q_table_learner_smart():

    player1 = mock.Mock()
    player1.representation_char = 'x'

    player2 = QPlayer('o', QTableLearner(theta=0))

    game = Game([player1, player2])
    for i in range(4):

        player1.make_move.side_effect = [1, 4, 7, 2, 3, 5, 6, 8]
        game.run()

    assert player2.stats.get_lost() == 3
    assert player2.stats.get_won() == 1

def test_q_table_learner_smarter():

    player1 = mock.Mock()
    player1.representation_char = 'x'

    player2 = QPlayer('o', QTableLearner(theta=0))

    game = Game([player1, player2])
    for i in range(1000):

        player1.make_move.side_effect = [1, 4, 7, 2, 3, 5, 6, 8]
        game.run()

    assert player2.stats.get_won() > 950

def test_q_table_learner_smartest():

    player1 = mock.Mock()
    player1.representation_char = 'x'

    player2 = QPlayer('o', QTableLearner(theta=0))

    game = Game([player1, player2])
    for i in range(10000):

        player1.make_move.side_effect = [1, 4, 7, 2, 3, 5, 6, 8]
        game.run()

    assert player2.stats.get_won() > 9950

[32m.[0m[32m.[0m[32m.[0m[32m.[0m[32m                                                                                         [100%][0m
[32m[32m[1m4 passed[0m[32m in 14.07s[0m[0m


In [17]:
QUtils.pretty_print_q_table(QUtils.get_dict_from_file('..\\output\\storage\\qtable.pkl'))

 [' ' ' ' ' ']
 [' ' ' ' ' ']
 [' ' ' ' ' ']

 [206.37 232.29 201.79]
 [178.57 230.32 189.74]
 [182.43 190.74 178.8 ]

-------------------------

 [' ' ' ' ' ']
 [' ' ' ' ' ']
 [' ' 'o' 'x']

 [ 147.83  140.12  139.93]
 [  96.63  153.14  196.4 ]
 [  85.84 -852.26 -849.45]

-------------------------

 [' ' ' ' ' ']
 [' ' 'o' ' ']
 ['x' 'o' 'x']

 [  43.12  105.97    4.92]
 [  59.77 -884.73   69.31]
 [-816.42 -718.1  -836.75]

-------------------------

 ['o' ' ' ' ']
 [' ' 'o' 'x']
 ['x' 'o' 'x']

 [-789.31  184.96  263.23]
 [  86.38 -658.85 -636.95]
 [-719.41 -691.1  -636.7 ]

-------------------------

 [' ' 'x' ' ']
 [' ' ' ' ' ']
 [' ' ' ' 'o']

 [ 234.64 -753.52  224.11]
 [ 120.1   321.85  160.35]
 [ 121.75  224.94 -755.98]

-------------------------

 [' ' 'x' ' ']
 [' ' 'x' ' ']
 ['o' ' ' 'o']

 [ 227.92 -668.46  213.45]
 [ 239.18 -672.62  227.83]
 [-672.65  438.84 -682.01]

-------------------------

 ['o' 'x' ' ']
 ['x' 'x' ' ']
 ['o' ' ' 'o']

 [-754.07 -735.69  182.97]
 [-738