# Content

In [6]:
from Board import Result
from player.Player import Player
from Decorators import debug
from abc import abstractmethod
from collections import defaultdict

import os
import dill
import numpy as np
import random

In [2]:
class QPlayer(Player):
    """
    A Player specification used to handle various learning algorithms.

    To specify a learning algorithm a QLearner is defined.
    Different QLearner will be used to handle the learning process and to choose the best next move.
    QPlayer and QLearner interact according to the player-role pattern.
    """

    def __init__(self, representationChar, q_learner):
        """
        The constructor for the Q-Player
        :param representationChar: As from the Player class inherited the QPlayer is given a single Char representing the user on the bord.
        Longer strings would also get accepted but would yield a worse representation.
        :param q_learner: The QLearner used to handle the learning process as well as the choosing of the best option
        """

        Player.__init__(self ,representationChar)
        self.q_learner = q_learner
        self.prev_state = None
        self.state = None
        self.prev_action = None
        self.action = None
        self.prev_reward = 0

    @debug
    #@log(aiReadable=True)
    def makeMove(self, board):
        """
        Makes a move on the board
        :param board: current state of the board
        :return: list of a random column (first) and row number (second)
        """
        self.state = board.field
        self.action = self.q_learner.select_move(state=self.state)

        return self.action

    def giveResult(self, result):
        """
        Set own reward for given result and updates the q table
        """
        reward = 0
        if Result.INVALID_MOVE == result:
            reward = -100

        elif Result.GAME_LOST == result:
            reward = -10
            self.stats.incrLost()

        elif Result.GAME_WON == result:
            reward = 100
            self.stats.incrWon()

        elif Result.GAME_DRAW == result:
            reward = -1
            self.stats.incrDraw()

        if self.prev_action is not None:
            action_idx = list(self.q_learner.possible_actions.keys())[
                list(self.q_learner.possible_actions.values()).index(self.prev_action)
            ]

            self.q_learner.update(self.prev_state, self.state, action_idx, self.prev_reward)

        self.prev_action = self.action
        self.prev_state = self.state
        self.prev_reward = reward

In [3]:
class QLearner:
    """
    The abstract class generalizing all q-learning methods
    """

    @abstractmethod
    def update(self, state, new_state, action, result):
        """
        Calculates the new q value from the new state and action pair
        :param state: last state of the board
        :param new_state: new state of the board, including the new action
        :param action: chosen action
        :param result: reward for chosen action
        :return: update algorithm for the new results
        """

    @abstractmethod
    def select_move(self, state, theta=0.9):
        """
        Choose action according to softmax function in state
        :param state: state of the environment
        :param theta: "temperature" parameter
        :return: the action that got calculated as the best next move
        """


In [4]:
class QTableLearner (QLearner):
    """
    QLearner specification for q-table
    """

    def __init__(self, q_table = None, learn_rate=0.1, discount_factor=0.8):
        """
        :param learn_rate: learning rate of this q learner
        :param discount_factor: discount factor of this q learner
        """

        if q_table is None:
            q_table = defaultdict(lambda: np.empty([3, 3]))

        self.possible_actions = {0:[1, 1], 1:[2, 1], 2:[3, 1], 3:[1, 2], 4:[2, 2], 5:[3, 2], 6:[1, 3], 7:[2, 3], 8:[3, 3]}
        self.q_table = q_table
        self.learn_rate = learn_rate
        self.discount_factor = discount_factor

    #@save_q_table
    def update(self, state, new_state, action, result):
        """
        see QLearner
        """
        state, new_state= np.array_str(state), np.array_str(new_state)

        self.q_table[state][action // 3, action % 3] = self.q_table[state][action // 3, action % 3] \
                                      * (1 - self.learn_rate) \
                                      + self.learn_rate \
                                      * (result + self.discount_factor * np.max(self.q_table[new_state]))

        return self.q_table

    def select_move(self, state, theta=0.9):
        """
        see QLearner
        """
        if np.random.uniform(0, 1) > theta: # then exploit the env --> use Qtable or memory info
            idx = np.argmax(self.q_table[np.array_str(state)])
            action = self.possible_actions[idx]

        else: # then explore the enviroment --> randomly sample a move from available moves
            action = random.choice(self.possible_actions) # that is the agent always explores the enviroment

        return action # return choosen move

In [5]:
class QUtils:

    @staticmethod
    def pretty_print_q_table(dict_q_table):
        """
        prints a human readable representation of the given q_table
        :param dict_q_table:
        """
        for key, values in dict_q_table.items():
            values = np.round_(values, 2)

            pretty_key = str(key).replace("None", "' '").replace('[[', ' [').replace(']]', ']')
            pretty_value = str(values).replace('[[', ' [').replace(']]', ']')
            print(f"{pretty_key}\n\n{pretty_value}\n\n-------------------------\n")

    @staticmethod
    def get_dict_from_file(filepath):
        """
        uses the given filepath to read a dict from the specified file
        :param filepath: the file to read in
        :return: the dict read from the file
        """

        if os.path.isfile(filepath) and os.path.getsize(filepath) > 0:
            with open(filepath, "rb") as file:
                return dill.load(file)

    @staticmethod
    def save_dict_to_file(filepath, dict):
        """
        save a dict to a specified file
        :param filepath: the file to write to
        :param dict: the savable dict
        """

        with open(filepath, 'wb') as file:
            dill.dump(dict, file)

    @staticmethod
    def merge_dicts(d0:dict, d1:dict):
        """
        merges 2 given dicts
        :param d0: first dict
        :param d1: second dict
        :return:
        """

        d = d0.copy()
        for k,v in d1.items():
            if (k not in d):
                d[k] = d1[k]

        return d

# Test

In [None]:
import ipytest
import pytest
import sys
from unittest import mock

ipytest.autoconfig()

## QTableLearner

### PyTest

In [None]:
%%ipytest
player1 = QPlayer('x', QTableLearner(q_table=QUtils.get_dict_from_file(filepath)))

### Mock