In [None]:
from collections import defaultdict
from build.player.qlearner.QLearner import QLearner
from build.Board import Result

import numpy as np
import random

In [37]:
class QTableLearner(QLearner):
    """
    QLearner specification for q-table

    Terms:
        state
            the snapshot of the board fields
            example: [[ 'x',  'o',  'o'],
                      [None,  'x', None],
                      [None, None, None]]

        action
            equals a chosen value from the defined possible_actions
            example: [3, 3]
    """

    def __init__(self, q_table=None, learn_rate=0.1, discount_factor=0.8):
        """
        :param learn_rate: learning rate of this q learner
        :param discount_factor: discount factor of this q learner
        """

        if q_table is None:
            q_table = defaultdict(lambda: np.zeros(shape=[3, 3]))

        self.q_table = q_table
        self.learn_rate = learn_rate
        self.discount_factor = discount_factor

    #@save_q_table
    def update(self, prev_state, state, prev_action_idx, result):
        """
        see QLearner
        """
        prev_state, state = np.array_str(prev_state), np.array_str(state)

        old_value = self.q_table[prev_state][prev_action_idx // 3, prev_action_idx % 3]
        max_q = 0 if result in [Result.GAME_WON, Result.GAME_LOST, Result.GAME_DRAW] else np.max(self.q_table[state])
        new_value = result + self.discount_factor * max_q
        temporal_difference = new_value - old_value

        self.q_table[prev_state][prev_action_idx // 3, prev_action_idx % 3] = old_value + self.learn_rate * temporal_difference

        return self.q_table

    def select_move(self, state, theta=0.9):
        """
        see QLearner
        """
        if np.random.uniform(0, 1) > theta:  # then exploit the env --> use Qtable or memory info
            idx = np.argmax(self.q_table[np.array_str(state)])
            action = self.possible_actions[idx]

        else:  # then explore the enviroment --> randomly sample a move from available moves
            action = random.choice(self.possible_actions)  # that is the agent always explores the enviroment

        return action  # return choosen move