# Content

In [6]:
from Board import Result
from player.Player import Player
from Decorators import debug, log
from abc import abstractmethod
from collections import defaultdict


import os
import dill
import numpy as np
import random

In [2]:
class QPlayer(Player):
    """
    Q learner player class
    """

    def __init__(self, representationChar, q_learner):

        Player.__init__(self ,representationChar)
        self.q_learner = q_learner
        self.prev_state = None
        self.state = None
        self.prev_action = []
        self.action = []
        self.prev_reward = 0

    @debug
    #@log(aiReadable=True)
    def makeMove(self, board):
        """
        Makes a move on the board
        :param board: current state of the board
        :return: list of a random column (first) and row number (second)
        """
        self.state = board.field
        self.action = self.q_learner.select_move(state=self.state)

        return self.action

    def giveResult(self, result):
        """
        Set own reward for given result and updates the q table
        """
        reward = 0
        if Result.INVALID_MOVE == result: reward = -100
        if Result.GAME_LOST == result: reward = -10
        if Result.GAME_WON == result: reward = 100
        if Result.GAME_DRAW == result: reward = -1

        if len(self.prev_action) > 0:
            action_idx = list(self.q_learner.possible_actions.keys())[
                list(self.q_learner.possible_actions.values()).index(self.prev_action)
            ]

            self.q_learner.update(self.prev_state, self.state, action_idx, self.prev_reward)

        self.prev_action = self.action
        self.prev_state = self.state
        self.prev_reward = reward

In [3]:
class QLearner:
    """
    Q learner player class
    """

    @abstractmethod
    def update(self, state, new_state, action, result):
        """
        Calculates the new q value from the new state and action pair
        :param state: last state of the board
        :param new_state: new state of the board, including the new action
        :param action: chosen action
        :param result: reward for chosen action
        :return: updated q table with the new value
        """

    @abstractmethod
    def select_move(self, state, theta=0.9):
        """
        Choose action according to softmax function in state s
        :param state: state of the environment
        :param theta: "temperature" parameter
        :return: selected action
        """


In [4]:
class QTableLearner (QLearner):
    """
    QLearner specification for q-table
    """

    def __init__(self, q_table = None, learn_rate=0.1, discount_factor=0.8):
        """
        :param learn_rate: learning rate of this q learner
        :param discount_factor: discount factor of this q learner
        """
        self.possible_actions = {0:[1, 1], 1:[1, 2], 2:[1, 3], 3:[2, 1], 4:[2, 2], 5:[2, 3], 6:[3, 1], 7:[3, 2], 8:[3, 3]}
        self.q_table = q_table if q_table is None else defaultdict(lambda: np.zeros(len(self.possible_actions)))
        self.learn_rate = learn_rate
        self.discount_factor = discount_factor

    #@save_q_table
    def update(self, state, new_state, action, result):
        """
        Calculates the new q value from the new state and action pair
        :param state: last state of the board
        :param new_state: new state of the board, including the new action
        :param action: chosen action
        :param result: reward for chosen action
        :return: updated q table with the new value
        """
        state, new_state= np.array_str(state), np.array_str(new_state)

        self.q_table[state][action] = self.q_table[state][action] \
                                      * (1 - self.learn_rate) \
                                      + self.learn_rate \
                                      * (result + self.discount_factor * np.max(self.q_table[new_state]))

        return self.q_table

    def select_move(self, state, theta=0.9):
        """
        Choose action according to softmax function in state s
        :param state: state of the environment
        :param theta: "temperature" parameter
        :return: selected action
        """
        if np.random.uniform(0, 1) > theta: # then exploit the env --> use Qtable or memory info
            q_table = self.q_table#self.get_q_table_from_file()
            idx = np.argmax(q_table[np.array_str(state)])
            action = self.possible_actions[idx]

        else: # then explore the enviroment --> randomly sample a move from available moves
            action = random.choice(self.possible_actions) # that is the agent always explores the enviroment

        return action # return choosen move

In [5]:
class QUtils:

    @staticmethod
    def pretty_print_q_table(dictQTable):
        for key, values in dictQTable.items():

            prettyValues = ""
            for value in values:
                prettyValues += str(round(value, 1)).rjust(7)

            print(prettyValues)

    @staticmethod
    def get_dict_from_file(filepath):

        if os.path.isfile(filepath) and os.path.getsize(filepath) > 0:
            with open(filepath, "rb") as file:
                return dill.load(file)

    @staticmethod
    def save_dict_to_file(filepath, dict):

        with open(filepath, 'wb') as file:
            dill.dump(dict, file)

    @staticmethod
    def merge_dicts(d0:dict, d1:dict):

        d = d0.copy()
        for k,v in d1.items():
            if (k not in d):
                d[k] = d1[k]

        return d

# Tests

In [None]:
# QTableHelper.prettyPrintQTavle(QTableHelper.readInDict("..\\qtable.pkl"))

# Doku