In [20]:
import random
from enum import Enum
import numpy as np

#状態を表すクラス
class State():
    def __init__(self, row=-1, column=-1):
        self.column = column
        self.row = row

    #状態の表現
    def __repr__(self):
        return "<State:[{}, {}]>".format(self.row, self.column)
    #クローン生成
    def clone(self):
        return State(self.row, self.column)
    #ハッシュ型のクローン?
    def __hash__(self):
        return hash((self.row, self.column))

    #同値判定
    def __eq__(self, other):
        return self.row == other.row and self.column == other.column

#行動の定義
class Action(Enum):
    UP = 1
    DOWN = -1
    LEFT = 2
    RIGHT = -2
    STAY = 0

#Agent_Base
class Agent():
    def __init__(self, env):
        self.observer = Observer(env)
        #self.planner = planner
        self.actions = env.actions()

    def act(self, state): #TODO 行動確率を返したい．行動を返す関数 & Plannerに行動選択も委託しよう
        a = random.choice(self.actions)
        return a

    def learn(self): #TODO 学習方法について(引数には必要な要素) & Plannerに役割を分散できるか．
        pass
        
#環境の情報取得のためのクラス
class Observer():

    def __init__(self, env):
        self.env = env

    def get_state(self): #状態観測
        return self.env.agent_state

    def reset(self): 
        #TODO
        pass

    def transform(self, state): #観測情報扱いやすい形に変換する
        #TODO
        pass

class Planner():

    def __init__(self):
        pass
    
    def learn(self, state, action, reward): #学習方法
        pass

    def policy(self): #行動選択
        pass

class Logger(): #記録用クラス

    def __init__(self):
        pass

    def log_func(self):
        pass

In [21]:
#environment関連のクラス

#ライブラリimport
import numpy as np

class Maze():

    def __init__(self, grid):

        self.grid = grid
        self.agent_state = State()

        self.default_reward = -0.04
        self.collision_reward = -10 #マルチエージェントの時に使用
    
    @property
    def row_length(self):
        return len(self.grid)
    @property
    def column_length(self):
        return len(self.grid[0])

    def actions(self):
        return [Action.UP, Action.DOWN, Action.LEFT, Action.RIGHT, Action.STAY]

    #環境の初期化を行う
    def reset(self):
        row_len = self.row_length
        self.agent_state = State(row_len - 1, 0) #エージェントの位置を初期化 *とりまこれだけ
        return self.agent_state
        
    #遷移のための関数    return 遷移確率
    def transit_func(self, state, action):
        transition_probs = {}
        #動けないときは空の辞書
        if not self.can_action_at(state):
            return transition_probs

        for a in self.actions:
            prob = 0
            #選んだ行動を確実にとる
            if a == action:
                prob = 1
            
            next_state = self._move(state, a)
            if next_state not in transition_probs:
                transition_probs[next_state] = prob
            else:
                transition_probs[next_state] += prob
        return transition_probs
    
    #遷移を行う
    def transit(self, state, action): #遷移確率をエージェントから獲得する．
        transition_probs = self.transit_func(state, action)
        if len(transition_probs) == 0:
            return None, None, True

        next_states = []
        probs = []

        for s in transition_probs:
            next_states.append(s)
            probs.append(transition_probs[s])

        #おそらく選択行動がそのまま反映されるはず
        next_state = np.random.choice(next_states, p=probs)

        #報酬獲得と終了判定
        reward, done = self.reward_func(next_state)
        return next_state, reward, done
    
    #1 step turn
    def step(self, action):
        #TODO

    #行動可能かの判定
    def can_action_at(self, state):
        #現在空マスにいるならTrue
        if self.grid[state.row][state.column] == 0:
            return True
        else:
            return False

    def _move(self, state, action):

        if not self.can_action_at(state):
            raise Exception("Can't move from here")

        next_state = state.clone()

        #移動
        if action == Action.UP:
            next_state.row -= 1
        if action == Action.DOWN:
            next_state.row += 1
        if action == Action.LEFT:
            next_state.column -= 1
        if action == Action.RIGHT:
            next_state.column += 1
        
        #移動可能かのチェック 無理なら元に戻す
        #迷路外に出たか
        if not (0 <= next_state.row < self.row_length):
            next_state = state
        if not (0 <= next_state.column < self.column_length):
            next_state = state

        #壁にいるか
        if self.grid[next_state.row][next_state.column] == 9:
            next_state = state

        return next_state

    #報酬を与える関数
    def reward_func(self, state):
        reward = self.default_reward
        done = False

        attribute = self.grid[state.row][state.column]

        if attribute == 1:
            reward = 10
            done = True
        elif attribute == -1:
            reward = -10
            done = True

        return reward, done

In [22]:
#Training全体を受け持つクラス


class Trainer():

    def __init__(self, agent, env, episode=1):
        self.env = env
        self.agent = agent

        self.episode = episode

    def train(self):
        for i in range(self.episode):
            print("Episode {}: Agent gets {} reward.".format(i, self.one_episode()))

    def one_episode(self):
        state = self.env.reset()
        total_reward = 0
        done = False

        while not done:
            action = self.agent.act(state)
            next_state, reward, done = self.env.step(action)
            total_reward += reward
            state = next_state

        return total_reward 

    def learn(self):
        pass

    def log(self):
        pass


def main():
    #環境データ
     grid = [
         [0,0,0,1],
         [0,9,0,-1],
         [0,0,0,0]
     ]

     env = Maze(grid)
     agent = Agent(env)

     trainer = Trainer(agent, env)

     trainer.train()

if __name__=="__main__":
    main()


    

AttributeError: 'Maze' object has no attribute 'step'