In [None]:
from Decorators import debug, log, save_q_table
import numpy as np
import random

class QLearner:
    """
    Q learner player class
    """

    def __init__(self, alpha=0.1, gamma=0.8, filename="qtabletest.pkl", env_action_space=0, actions=[], q_table=None):
        """
        :param alpha: learning rate of this q learner
        :param gamma: discount factor of this q learner
        """
        self.state = None
        self.next_state = None
        self.gamma = gamma
        self.alpha = alpha
        self.env_action_space = env_action_space
        self.filename = filename
        self.actions = actions
        self.q_table = q_table

    #@save_q_table
    def update(self,s,s_next,a,r):
        """
        Calculates the new q value from the new state and action pair
        :param s: last state of the board
        :param s_next: new state of the board, including the new action
        :param a: chosen action
        :param r: reward for chosen action
        :return: updated q table with the new value
        """
        s,s_next=np.array_str(s),np.array_str(s_next)

        q_table = self.q_table #self.get_q_table_from_file()

        q_table[s][a] = q_table[s][a] * (1 - self.alpha) + self.alpha * (r + self.gamma * np.max(q_table[s_next]))

        return q_table

    def select_move(self, s, theta=0.9):
        """
        Choose action according to softmax function in state s
        :param s: state of the environment
        :param theta: "temperature" parameter
        :return: selected action
        """
        self.state = s
        if np.random.uniform(0, 1) > theta: # then exploit the env --> use Qtable or memory info
            q_table = self.q_table #self.get_q_table_from_file()
            idx = np.argmax(q_table[np.array_str(s)])
            action = self.actions[idx]
        else: # then explore the enviroment --> randomly sample a move from available moves
            action = random.choice(self.actions) # that is the agent always explores the enviroment
        return action # return choosen move
