In [None]:
import numpy as np
import matplotlib.pyplot as plt
from ql_core import Oriented2DGrid, QLAgent, train
from  utils import plot_rewards, visualize_policy

In [None]:
grid_obs = np.load("grids/grid_relatorio.npy")
print(grid_obs)

In [None]:
# 1. Definição do Ambiente
X_SIZE = 10
Y_SIZE = 10
GRID_SIZE = (X_SIZE, Y_SIZE)
START_STATE = (0, 0, 0)  # (x, y, psi)
GOAL_STATE = (X_SIZE-1, Y_SIZE-1, 0)  # (x, y, psi)

print("Inicializando o ambiente...")
environment = Oriented2DGrid(
    grid_size=GRID_SIZE, start=START_STATE, goal=GOAL_STATE, actions_type="omni",
    reward_gains={"goal": 100.0, "invalid": 100.0, "step": 1.0, "turn": 1.0, "nearby_obs": 20.0},
    obs_grid=grid_obs
)

# 2. Definição do Agente com hiperparâmetros
print("Inicializando o agente Q-Learning...")
agent = QLAgent(
    state_shape=environment.state_shape,
    n_actions=environment.n_actions,
    learning_rate=0.2,
    discount_factor=0.9,
    e_greedy_type="linear"
)

# 3. Treinamento
print("Iniciando o treinamento do agente...")
data_backup = train(
    agent=agent,
    environment=environment,
    n_episodes=20000,
    verbose=True,
    verbose_interval=1000,
)
print("\nTreinamento concluído.")

In [None]:
# 4. Visualização dos Resultados
print("Gerando visualizações...")

# Gráfico de Recompensas
plot_rewards(data_backup["rewards_history"])

# Gráfico da Taxa de Exploração (Epsilon)
plot_rewards(data_backup["epsilon_history"])

# Mapa da Política Aprendida
visualize_policy(agent, environment)