# Q学習用クラス
## Q data
`Q[x座標][y座標]["上","下","右","左","B"]`

`"上","下","右","左"`->Q値

`"B"`->その時点での最大Q値の方向("上","下","右","左", "")

QData側では壁と道の区別はしない

In [3]:
from abc import ABCMeta, abstractmethod
import random
from enum import Enum

In [22]:
# 行動
class Action(Enum):
    UP = 1
    DOWN = 2
    LEFT = 4
    RIGHT = 8
    BEST = 64
    
    @staticmethod
    def action_to_transition(action):
        if action == Action.UP:
            dx = 0
            dy = -1
        elif action == Action.DOWN:
            dx = 0
            dy = 1
        elif action == Action.LEFT:
            dx = -1
            dy = 0
        elif action == Action.RIGHT:
            dx = 1
            dy = 0
        return dx, dy

In [24]:
#Q値更新
class QUpdate(metaclass=ABCMeta):
    @abstractmethod
    def update_QData(self, parameter):
        pass
    
class QLearning(QUpdate):
    def update_QData(self, parameter):
        pass

In [15]:
# 行動選択方法
class ActionSelect(metaclass=ABCMeta):
    @abstractmethod
    def get_next_state(self, qData):
        pass
    
class EpGreedy(ActionSelect):
    def __init__(self, epsilon):
        self.epsilon = epsilon
    def set_epsilon(self, epsilon):
        self.epsilon = epsilon
        
    def get_next_state(self, qData):
        if (qData[Action.BEST] == 0) or (random.random() <= self.epsilon):
            return random.choice(list(Action))
        else:
            best = qData[Action.BEST]
            actions = []
            if best & 1 > 0: 
                actions.append(Action.UP)
            elif best & 2 > 0: 
                actions.append(Action.DOWN)
            elif best & 4 > 0: 
                actions.append(Action.LEFT)
            elif best & 8 > 0: 
                actions.append(Action.RIGHT)
            return actions[random.randrange(len(actions))]

In [23]:
class ReinforcementLearning:
    PATH = 0 #道
    WALL = 1 #壁
    
    # rewardData ... 報酬データ[x][y] = 報酬値
    def __init__(self, width, height, rewardData, actionSelect):
        self.width = width
        self.height = height
        self.qData = []
        self.rewardData = rewardData
        self.actionSelect = actionSelect

        for x in range(0, self.width):
            qRow = []
            for y in range(0, self.height):
                qRow.append({Action.UP : 0, Action.DOWN : 0, Action.RIGHT : 0, Action.LEFT : 0, Action.BEST : 0})
            self.qData.append(qRow)
        
    def get_next_state(self, x, y):
        while True:
            action = self.actionSelect.get_next_state(qData[x][y])
            dx, dy = Action.action_to_transition(action)
            _x = x + dx
            _y = y + dy
            if is_in_maze(_x, _y):
                break
        return _x, _y
        
    def update_QData(self, parameter):
        pass
        
    def is_in_maze(self, x, y):
        if (x < 0) or (y < 0) or (x > self.width) or (y > self.height):
            return False
        else:
            return True
        