In [None]:
import torch
import gymnasium as gym
import numpy as np
import matplotlib.pyplot as plt

In [None]:
# Definir uma rede neural simples para Q-learning
class SimpleQNetwork(torch.nn.Module):
    def __init__(self, input_size, output_size, hidden_size=128):
        super(SimpleQNetwork, self).__init__()
        self.fc1 = torch.nn.Linear(input_size, hidden_size)
        self.fc2 = torch.nn.Linear(hidden_size, hidden_size)
        self.fc3 = torch.nn.Linear(hidden_size, output_size)
    
    def forward(self, x):
        x = torch.relu(self.fc1(x))
        x = torch.relu(self.fc2(x))
        return self.fc3(x)

# Função para carregar um modelo treinado
def load_model(env_name, model_type, hidden_size):
    env = gym.make(env_name)
    input_size = env.observation_space.n
    output_size = env.action_space.n
    model = SimpleQNetwork(input_size, output_size, hidden_size)
    model.load_state_dict(torch.load(f"trained_models/{env_name}{model_type}.pth"))
    model.eval()  # Coloca o modelo em modo de avaliação
    env.close()  # Fechar o ambiente após usar
    return model

In [None]:
# Função para avaliar um modelo
def evaluate_model(env, model, num_episodes=50, max_steps_per_episode=1000):
    total_rewards = []
    total_steps = []
    
    for episode in range(num_episodes):
        print(f"Episódio {episode + 1}/{num_episodes}")
        state, _ = env.reset()
        done = False
        episode_reward = 0
        episode_steps = 0
        
        while not done and episode_steps < max_steps_per_episode:
            state_tensor = torch.tensor([state], dtype=torch.long)
            state_tensor = to_one_hot(state_tensor, model.fc1.in_features)[0].float().unsqueeze(0)
            with torch.no_grad():
                q_values = model(state_tensor)
                action = torch.argmax(q_values).item()
            state, reward, done, truncated, info = env.step(action)
            episode_reward += reward
            episode_steps += 1
        
        total_rewards.append(episode_reward)
        total_steps.append(episode_steps)
    
    avg_reward = np.mean(total_rewards)
    std_reward = np.std(total_rewards)
    avg_steps = np.mean(total_steps)
    std_steps = np.std(total_steps)
    
    return avg_reward, std_reward, avg_steps, std_steps, total_rewards

In [None]:
# Função para comparar os métodos
def compare_methods(env_name, hidden_size, num_episodes=50):
    env = gym.make(env_name)
    
    # Carregar modelos CQL-DQN e FQI-DQN
    cql_model = load_model(env_name, "CQL-DQN", hidden_size)
    fqi_model = load_model(env_name, "FQI-DQN", hidden_size)
    
    # Avaliar modelos
    cql_avg_reward, cql_std_reward, cql_avg_steps, cql_std_steps, cql_rewards = evaluate_model(env, cql_model, num_episodes)
    fqi_avg_reward, fqi_std_reward, fqi_avg_steps, fqi_std_steps, fqi_rewards = evaluate_model(env, fqi_model, num_episodes)
    
    # Exibir resultados
    print(f"Ambiente: {env_name}")
    print(f"Método: CQL-DQN")
    print(f"  Recompensa média: {cql_avg_reward:.2f} ± {cql_std_reward:.2f}")
    print(f"  Passos médios por episódio: {cql_avg_steps:.2f} ± {cql_std_steps:.2f}")
    print(f"Método: FQI-DQN")
    print(f"  Recompensa média: {fqi_avg_reward:.2f} ± {fqi_std_reward:.2f}")
    print(f"  Passos médios por episódio: {fqi_avg_steps:.2f} ± {fqi_std_steps:.2f}")
    print("-" * 60)
    
    # Plotar gráfico de comparação de recompensas
    plt.figure(figsize=(10, 6))
    plt.plot(cql_rewards, label="CQL-DQN", alpha=0.7)
    plt.plot(fqi_rewards, label="FQI-DQN", alpha=0.7)
    plt.xlabel("Episódio")
    plt.ylabel("Recompensa")
    plt.title(f"Comparação de Recompensas - {env_name}")
    plt.legend()
    plt.grid()
    plt.show()

# Função para codificar estados em one-hot
def to_one_hot(state_tensor, num_states):
    return torch.zeros(state_tensor.size(0), num_states).scatter_(1, state_tensor.unsqueeze(1), 1)


In [None]:
# Comparar métodos para cada ambiente
if __name__ == "__main__":
    envs = [
        ("FrozenLake-v1", 128),
        ("Taxi-v3", 512),
        ("CliffWalking-v0", 256)
    ]
    
    for env_name, hidden_size in envs:
        compare_methods(env_name, hidden_size)