In [None]:
from player.QLearner import QLearner
from Board import Result
from player.Player import Player
from Decorators import debug, log
import numpy as np

class QPlayer(Player):
    """
    Q learner player class
    """

    def __init__(self, representationChar, q_learner, q_table_ttt=None):

        self.q_learner = q_learner
        Player.__init__(self ,representationChar)
        self.actions = {0: [1,1],1:[1,2],2:[1,3],3:[2,1],4:[2,2],5:[2,3],6:[3,1],7:[3,2],8:[3,3]}
        QLearner.__init__(self, env_action_space=9, actions=self.actions, q_table=q_table_ttt)
        self.prev_state = np.empty(shape=(3,3), dtype=Player)
        self.last_action = []
        self.action = []
        self.state = np.empty(shape=(3,3), dtype=Player)
        self.last_reward = 0

    @debug
    #@log(aiReadable=True)
    def makeMove(self, board):
        """
        Makes a move on the board
        :param board: current state of the board
        :return: list of a random column (first) and row number (second)
        """
        self.state = board.field
        self.action = QLearner.select_move(self,s=self.state)
        return self.action

    def giveResult(self, result):
        """
        Set own reward for given result and updates the q table
        """
        reward = 0
        if Result.INVALID_MOVE == result: reward = -100
        if Result.GAME_LOST == result: reward = -10
        if Result.GAME_WON == result: reward = 100
        if Result.GAME_DRAW == result: reward = -1

        if len(self.last_action)>0:
            action_idx = list(self.actions.keys())[list(self.actions.values()).index(self.last_action)]
            QLearner.update(self, self.prev_state, self.state, action_idx, self.last_reward)

        self.last_action = self.action
        self.prev_state = self.state
        self.last_reward = reward
