# Q-Learning

In [1]:
import numpy as np
import random
from Q_env import Env
from collections import defaultdict

class QLearningAgent:
    def __init__(self, actions):
        self.actions = actions
        self.step_size = 0.01
        self.discount_factor = 0.9
        self.epsilon = 0.1
        self.q_table = defaultdict(lambda: [0.0, 0.0, 0.0, 0.0])

    # <s, a, r, s'> 샘플로부터 큐함수 업데이트
    def learn(self, state, action, reward, next_state):
        state, next_state = str(state), str(next_state)
        q_1 = self.q_table[state][action]
        # 벨만 최적 방정식을 사용한 큐함수의 업데이트
        q_2 = reward + self.discount_factor * max(self.q_table[next_state])
        self.q_table[state][action] += self.step_size * (q_2 - q_1)

    # 큐함수에 의거하여 입실론 탐욕 정책에 따라서 행동을 반환
    def get_action(self, state):
        if np.random.rand() < self.epsilon:
            # 무작위 행동 반환
            action = np.random.choice(self.actions)
        else:
            # 큐함수에 따른 행동 반환
            state = str(state)
            q_list = self.q_table[state]
            action = arg_max(q_list)
        return action


# 큐함수의 값에 따라 최적의 행동을 반환
def arg_max(q_list):
    max_idx_list = np.argwhere(q_list == np.amax(q_list))
    max_idx_list = max_idx_list.flatten().tolist()
    return random.choice(max_idx_list)


if __name__ == "__main__":
    env = Env()
    agent = QLearningAgent(actions=list(range(env.n_actions)))

    for episode in range(1000):
        state = env.reset()

        while True:
            # 게임 환경과 상태를 초기화
            env.render()
            # 현재 상태에 대한 행동 선택
            action = agent.get_action(state)
            # 행동을 취한 후 다음 상태, 보상 에피소드의 종료여부를 받아옴
            next_state, reward, done = env.step(action)
            # <s,a,r,s'>로 큐함수를 업데이트
            agent.learn(state, action, reward, next_state)

            state = next_state
            
            # 모든 큐함수를 화면에 표시
            env.print_value_all(agent.q_table)

            if done:
                env.label_update()   
                print(f'episode : {episode + 1}번째')
                break

episode : 1번째
episode : 2번째
episode : 3번째
episode : 4번째
episode : 5번째
episode : 6번째
episode : 7번째
episode : 8번째
episode : 9번째
episode : 10번째
episode : 11번째
episode : 12번째
episode : 13번째
episode : 14번째
episode : 15번째
episode : 16번째
episode : 17번째
episode : 18번째
episode : 19번째
episode : 20번째
episode : 21번째
episode : 22번째
episode : 23번째
episode : 24번째
episode : 25번째
episode : 26번째
episode : 27번째
episode : 28번째
episode : 29번째
episode : 30번째
episode : 31번째
episode : 32번째
episode : 33번째
episode : 34번째
episode : 35번째
episode : 36번째
episode : 37번째
episode : 38번째
episode : 39번째
episode : 40번째
episode : 41번째
episode : 42번째
episode : 43번째
episode : 44번째
episode : 45번째
episode : 46번째
episode : 47번째
episode : 48번째
episode : 49번째
episode : 50번째
episode : 51번째
episode : 52번째
episode : 53번째
episode : 54번째
episode : 55번째
episode : 56번째
episode : 57번째
episode : 58번째
episode : 59번째
episode : 60번째
episode : 61번째
episode : 62번째
episode : 63번째
episode : 64번째
episode : 65번째
episode : 66번째
episode : 67번째
epis

TclError: invalid command name ".!canvas"