In [1]:
import random
import numpy as np
from colorama import Fore, Style
from Sensor import add_breeze, add_glitter, add_stench

class GridWorld:

    def __init__(self):
        self.grid_size = 6
        self.grid = [[[] for _ in range(self.grid_size)] for _ in range(self.grid_size)]
        self.wumpus_location = None
        self.setup_grid()

    def setup_grid(self):
        # 내부 셀을 초기화 및 설정
        for i in range(self.grid_size):
            for j in range(self.grid_size):
                self.grid[i][j] = []  # 모든 셀을 빈 리스트로 초기화

        self.grid[1][1].append('Safe')
        self.grid[4][4].append('Gold')
        add_glitter(self.grid, 4, 4, self.grid_size)

        for i in range(1, self.grid_size - 1):
            for j in range(1, self.grid_size - 1):
                if (i, j) not in [(1, 1), (4, 4)]:
                    if np.random.rand() < 0.1:
                        self.grid[i][j].append('Pit')
                        add_breeze(self.grid, i, j, self.grid_size)
                    elif np.random.rand() < 0.1:
                        self.grid[i][j].append('Wumpus')
                        add_stench(self.grid, i, j, self.grid_size)
                        self.wumpus_location = (i, j)

        # 경계 셀을 설정
        for i in range(self.grid_size):
            for j in range(self.grid_size):
                if i == 0 or i == self.grid_size - 1 or j == 0 or j == self.grid_size - 1:
                    self.grid[i][j] = ['~~~']  # 경계 셀에 '~~~'만 추가


    def print_grid(self, agent_x=None, agent_y=None, step=None):
        if step is not None:
            print(f"Step: {step}")

        cell_width = 14
        horizontal_line = "-" * (self.grid_size * (cell_width + 1) + 1)

        for i, row in enumerate(self.grid):
            print(horizontal_line)
            for j, cell in enumerate(row):
                if agent_x == i and agent_y == j:
                    cell_content = Fore.GREEN + 'Agent'.center(cell_width) + Style.RESET_ALL
                elif not cell:
                    cell_content = 'None'.center(cell_width)
                elif 'Wumpus' in cell:
                    cell_content = Fore.MAGENTA + ','.join(cell).center(cell_width) + Style.RESET_ALL
                elif 'Pit' in cell:
                    cell_content = Fore.RED + ','.join(cell).center(cell_width) + Style.RESET_ALL
                elif 'Gold' in cell:
                    cell_content = Fore.YELLOW + ','.join(cell).center(cell_width) + Style.RESET_ALL
                else:
                    cell_content = ','.join(cell).center(cell_width)
                print("|" + cell_content, end="")
            print("|")
        print(horizontal_line + "\n")




In [2]:
class QLearningAgent:

    def __init__(self, grid_size, alpha=0.1, gamma=0.9, epsilon=0.1):
        self.grid_size = grid_size
        self.alpha = alpha
        self.gamma = gamma
        self.epsilon = epsilon
        self.q_table = np.zeros((grid_size, grid_size, 8))  # 8 actions: up, down, left, right, shoot_up, shoot_down, shoot_left, shoot_right
        self.actions = ['up', 'down', 'left', 'right', 'shoot_up', 'shoot_down', 'shoot_left', 'shoot_right']
        self.arrows = 2

    def choose_action(self, state):
        if np.random.rand() < self.epsilon:
            return random.choice(self.actions)
        else:
            x, y = state
            return self.actions[np.argmax(self.q_table[x, y])]

    def update_q_value(self, state, action, reward, next_state):
        x, y = state
        x_next, y_next = next_state
        action_index = self.actions.index(action)
        best_next_action = np.argmax(self.q_table[x_next, y_next])
        td_target = reward + self.gamma * self.q_table[x_next, y_next, best_next_action]
        td_error = td_target - self.q_table[x, y, action_index]
        self.q_table[x, y, action_index] += self.alpha * td_error

    def take_action(self, world, state, action, bump_count, wumpus_kill_count):
        x, y = state
        new_x, new_y = x, y  # 새로운 위치를 미리 설정

        if action == 'up':
            new_x = x - 1
        elif action == 'down':
            new_x = x + 1
        elif action == 'left':
            new_y = y - 1
        elif action == 'right':
            new_y = y + 1
        elif action.startswith('shoot'):
            next_state, reward, wumpus_kill_count = self.shoot(world, state, action, wumpus_kill_count)
            return next_state, reward, bump_count, wumpus_kill_count

        # 경계에 도달하려는 경우 "Bump" 출력
        if new_x == 0 or new_x == self.grid_size - 1 or new_y == 0 or new_y == self.grid_size - 1:
            bump_count += 1
            return state, -0.04, bump_count, wumpus_kill_count  # 현재 상태 유지 및 작은 페널티

        # 새로운 위치가 경계가 아닌 경우 실제로 이동
        x, y = new_x, new_y

        reward = self.get_reward(world, x, y)
        return (x, y), reward, bump_count, wumpus_kill_count

    def shoot(self, world, state, action, wumpus_kill_count):
        if self.arrows == 0:
            return state, -0.5, wumpus_kill_count  # No arrows left, no action taken

        self.arrows -= 1
        x, y = state
        if action == 'shoot_up':
            for i in range(x, 0, -1):
                if 'Wumpus' in world.grid[i][y]:
                    world.grid[i][y].remove('Wumpus')
                    wumpus_kill_count += 1
                    return state, 1, wumpus_kill_count  # Reward for killing Wumpus
        elif action == 'shoot_down':
            for i in range(x, world.grid_size):
                if 'Wumpus' in world.grid[i][y]:
                    world.grid[i][y].remove('Wumpus')
                    wumpus_kill_count += 1
                    return state, 1, wumpus_kill_count
        elif action == 'shoot_left':
            for j in range(y, 0, -1):
                if 'Wumpus' in world.grid[x][j]:
                    world.grid[x][j].remove('Wumpus')
                    wumpus_kill_count += 1
                    return state, 1, wumpus_kill_count
        elif action == 'shoot_right':
            for j in range(y, world.grid_size):
                if 'Wumpus' in world.grid[x][j]:
                    world.grid[x][j].remove('Wumpus')
                    wumpus_kill_count += 1
                    return state, 1, wumpus_kill_count

        return state, -0.5, wumpus_kill_count  # Penalty if Wumpus not found


    def get_reward(self, world, x, y):
        if 'Wumpus' in world.grid[x][y] or 'Pit' in world.grid[x][y]:
            return -1
        elif 'Gold' in world.grid[x][y]:
            return 1
        else:
            return -0.04  # Small penalty for each move
    
    def learn(self, world, episodes=1000):
        for episode in range(episodes):
            state = (1, 1)  # Starting position
            self.arrows = 2  # Reset arrows for each episode
            total_reward = 0
            steps = 0
            bump_count = 0
            wumpus_kill_count = 0  # Wumpus를 죽인 횟수를 추적하기 위한 변수
            while True:
                action = self.choose_action(state)
                next_state, reward, bump_count, wumpus_kill_count = self.take_action(world, state, action, bump_count, wumpus_kill_count)
                self.update_q_value(state, action, reward, next_state)
                state = next_state
                total_reward += reward
                steps += 1
                if state == (4, 4) or reward == -1:
                    break
            print(f"Episode {episode + 1}: Total Reward: {total_reward}, Steps: {steps}, Bumps: {bump_count}, Wumpus Kills: {wumpus_kill_count}")

    
    def test_agent(self, world):
        state = (1, 1)
        step = 0
        total_reward = 0
        bump_count = 0
        wumpus_kill_count = 0  # Wumpus를 죽인 횟수를 추적하기 위한 변수
        while state != (4, 4):
            world.print_grid(agent_x=state[0], agent_y=state[1], step=step)
            action = self.choose_action(state)
            next_state, reward, bump_count, wumpus_kill_count = self.take_action(world, state, action, bump_count, wumpus_kill_count)
            total_reward += reward
            state = next_state
            step += 1
            if reward == -1:
                print(f"Agent died at step {step}. Total Reward: {total_reward}, Bumps: {bump_count}, Wumpus Kills: {wumpus_kill_count}")
                return
        world.print_grid(agent_x=state[0], agent_y=state[1], step=step)
        print(f"Agent found the gold! Total Reward: {total_reward}, Steps: {step}, Bumps: {bump_count}, Wumpus Kills: {wumpus_kill_count}")


In [3]:
if __name__ == "__main__":
    world = GridWorld()
    agent = QLearningAgent(world.grid_size)
    agent.learn(world)
    agent.test_agent(world)

Episode 1: Total Reward: -5.920000000000001, Steps: 32, Bumps: 7, Wumpus Kills: 0
Episode 2: Total Reward: -3.9800000000000004, Steps: 18, Bumps: 2, Wumpus Kills: 0
Episode 3: Total Reward: -5.5200000000000005, Steps: 22, Bumps: 3, Wumpus Kills: 0
Episode 4: Total Reward: -5.800000000000001, Steps: 29, Bumps: 6, Wumpus Kills: 0
Episode 5: Total Reward: -4.34, Steps: 31, Bumps: 6, Wumpus Kills: 0
Episode 6: Total Reward: -1.6600000000000006, Steps: 33, Bumps: 8, Wumpus Kills: 0
Episode 7: Total Reward: -1.44, Steps: 12, Bumps: 3, Wumpus Kills: 0
Episode 8: Total Reward: -1.24, Steps: 7, Bumps: 2, Wumpus Kills: 0
Episode 9: Total Reward: -2.6800000000000006, Steps: 23, Bumps: 3, Wumpus Kills: 1
Episode 10: Total Reward: -0.30000000000000027, Steps: 22, Bumps: 7, Wumpus Kills: 0
Episode 11: Total Reward: -1.2, Steps: 6, Bumps: 3, Wumpus Kills: 0
Episode 12: Total Reward: -0.18000000000000038, Steps: 19, Bumps: 2, Wumpus Kills: 0
Episode 13: Total Reward: -2.4400000000000013, Steps: 41, Bu