In [None]:
from player.QLearner import QLearner
from Board import Result
from collections import defaultdict
from player.Player import Player
from Decorators import debug, log
import numpy as np

class QPlayer(Player, QLearner):
    """
    Q learner player class
    """

    def __init__(self, representationChar, exp_factor=0.9):
        Player.__init__(self ,representationChar)
        QLearner.__init__(self, env_action_space=9, actions=np.array([[1,1],[1,2],[1,3],[2,1],[2,2],[2,3],[3,1],[3,2],[3,3]]))
        self.states_value = []  # record all positions taken
        self.exp_factor = exp_factor
        self.env_action_space=9
        self.q_table=defaultdict(lambda: np.zeros(self.env_action_space)) # dictionary
        self.prev_state = np.empty(shape=(3,3), dtype=Player)
        self.last_action = []
        self.state = np.empty(shape=(3,3), dtype=Player)

    @debug
    #@log(aiReadable=True)
    def makeMove(self, board):
        """
        Makes a move on the board
        :param board: current state of the board
        :return: list of a random column (first) and row number (second)
        """
        self.state = board.field
        self.last_action = QLearner.select_move(self,s=self.state)
        print(self.last_action)
        return self.last_action

    def giveResult(self, result):
        """
        Set own reward for given result and updates the q table
        """
        reward = 0
        if Result.INVALID_MOVE == result: reward = -100
        if Result.GAME_LOST == result: reward = -10
        if Result.GAME_WON == result: reward = 100
        if Result.GAME_DRAW == result: reward = -1

        qtable = QLearner.update(self, self.prev_state, self.state, self.last_action, reward)
        #print(qtable)
        self.prev_state = self.state

