<a href="https://colab.research.google.com/github/BartonThomas/sem-3/blob/main/gridworld_para_gera_o_de_rotas_com_programa_o_din_mica.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import random

# Define actions
ACTIONS = ['up', 'down', 'left', 'right']

class Agent:
    def __init__(self, rows, cols):
        self.rows = rows
        self.cols = cols
        self.q_table = {(x, y): {action: 0 for action in ACTIONS} for x in range(rows) for y in range(cols)}
        self.epsilon = 0.1  # Exploration rate
        self.alpha = 0.1  # Learning rate
        self.gamma = 0.9  # Discount factor
        self.position = (0, 0)  # Starting position

    def choose_action(self):
        if random.uniform(0, 1) < self.epsilon:
            return random.choice(ACTIONS)
        else:
            return max(self.q_table[self.position], key=self.q_table[self.position].get)

    def move(self, action):
        x, y = self.position
        if action == 'up':
            x -= 1
        elif action == 'down':
            x += 1
        elif action == 'left':
            y -= 1
        elif action == 'right':
            y += 1

        # Check if the move is valid
        if 0 <= x < self.rows and 0 <= y < self.cols:
            if grid[x][y] == '|':  # If there's a mountain
                reward = -1
            elif grid[x][y] == '*':  # If there's sand (agent dies)
                reward = -10
                x, y = 0, 0  # Reset position
            else:
                reward = 0
            self.position = (x, y)
        else:
            reward = -1  # Move not allowed
        return reward

    def update_q_table(self, action, reward, next_position):
        x, y = self.position
        next_x, next_y = next_position
        max_next_q = max(self.q_table[next_position].values())
        self.q_table[x, y][action] += self.alpha * (reward + self.gamma * max_next_q - self.q_table[x, y][action])

    def train(self, episodes):
        for _ in range(episodes):
            self.position = (0, 0)  # Reset position for each episode
            while True:
                action = self.choose_action()
                reward = self.move(action)
                if reward == -10:  # If the agent dies, end episode
                    break
                self.update_q_table(action, reward, self.position)

# Define grid
rows = 8
cols = 8
grid = [['.' for _ in range(cols)] for _ in range(rows)]
num_sand = 2
num_mountains = 4
for _ in range(num_sand):
    x = random.randint(0, rows - 1)
    y = random.randint(0, cols - 1)
    grid[x][y] = '*'
for _ in range(num_mountains):
    x = random.randint(0, rows - 1)
    y = random.randint(0, cols - 1)
    grid[x][y] = '|'

# Initialize and train agent
agent = Agent(rows, cols)
agent.train(episodes=100000)

for position, actions in agent.q_table.items():
    print(f"Posição: {position}, Q-values: {actions}")


In [11]:
def navigate_grid(agent, grid):
    current_position = (0, 0)
    rows = len(grid)
    cols = len(grid[0])

    print("Grid inicial:")
    print_grid(grid)

    while True:
        action = agent.choose_action()
        reward = agent.move(action)
        x, y = agent.position
        if reward == -10:
            print("Agente morto.")
            agent.position = (0, 0)
            current_position = (0, 0)
            continue
        elif reward == -1:
            continue

        print("\nPosição do agente:")
        for i in range(rows):
            for j in range(cols):
                if (i, j) == (x, y):
                    print('X', end=' ')
                else:
                    print(grid[i][j], end=' ')
            print()

        current_position = (x, y)

        # Check if the agent reached the end of the grid
        if current_position == (rows - 1, cols - 1):
            print("Agente atingiu o final!")
            break

# Call the function to navigate the grid
navigate_grid(agent, grid)


Grid inicial:


NameError: name 'print_grid' is not defined