<a href="https://colab.research.google.com/github/JSJeong-me/AI-Innovation-2024/blob/main/RL/6-3-Taxi.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import gym
import numpy as np
import random
from IPython.display import clear_output

# Taxi-v3 환경 불러오기
env = gym.make("Taxi-v3").env

# Q-테이블 초기화
q_table = np.zeros([env.observation_space.n, env.action_space.n])

# 하이퍼파라미터 설정
alpha = 0.1  # 학습률
gamma = 0.6  # 할인율
epsilon = 0.1  # 탐험 확률

# 학습 에피소드 수
num_episodes = 10000

# 학습 프로세스
for i in range(num_episodes):
    state = env.reset()
    epochs, penalties, reward, = 0, 0, 0
    done = False

    while not done:
        if random.uniform(0, 1) < epsilon:
            # 무작위 액션(탐험)
            action = env.action_space.sample()
        else:
            # 최적의 액션(활용)
            action = np.argmax(q_table[state])

        next_state, reward, done, info = env.step(action)

        # Q-값 업데이트
        old_value = q_table[state, action]
        next_max = np.max(q_table[next_state])

        new_value = (1 - alpha) * old_value + alpha * (reward + gamma * next_max)
        q_table[state, action] = new_value

        # 상태 업데이트
        state = next_state

        # 패널티가 있을 경우 기록
        if reward == -10:
            penalties += 1

        epochs += 1

    # 1000 에피소드마다 진행 상황 출력
    if i % 1000 == 0:
        clear_output(wait=True)
        print(f"에피소드: {i}")

print("학습 완료!")

# 성능 평가
total_epochs, total_penalties = 0, 0
episodes = 100

for _ in range(episodes):
    state = env.reset()
    epochs, penalties, reward = 0, 0, 0
    done = False

    while not done:
        action = np.argmax(q_table[state])
        state, reward, done, info = env.step(action)

        if reward == -10:
            penalties += 1

        epochs += 1

    total_penalties += penalties
    total_epochs += epochs

print(f"에피소드 {episodes} 동안 평균 시간 스텝: {total_epochs / episodes}")
print(f"에피소드 {episodes} 동안 평균 패널티: {total_penalties / episodes}")


에피소드: 9000
학습 완료!
