In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
from implementation import *

### Testing the Grid World Simulation

In [None]:
# Test the environment
env = DynamicGridWorldEnv(prize_spawn_prob=0.5, 
                          repair_shop_pos = None,
                          monster_spawn_prob = 0.7, 
                          num_possible_monster_points = 5, 
                          possible_monster_pos = [])

obs = env.reset()
print("Initial Observation:", obs)

for _ in range(5):
    action = env.action_space.sample()  # Random action
    obs, reward, done, info = env.step(action)
    print(f"Action: {env.action_map[action]}, Observation: {obs}, Reward: {reward}")
    env.render()

### Q-Learning in the Simulator

In [None]:
# Initialize environment and agent
agent = QLearningAgent(state_space=(5, 5, 5, 2), 
                       action_space=4,
                       lr=1e-3,
                       gamma=0.9,
                       epsilon=1,
                       epsilon_decay=0.999,
                       epsilon_min=0.001)

In [None]:
# Train the agent
episodes = 50000
rewards = train_agent(env, agent, episodes=episodes, max_steps=200)

# Plot cumulative rewards
plt.plot(rewards)
plt.xlabel("Episodes")
plt.ylabel("Cumulative Reward")
plt.title("Agent Performance Over Episodes")
plt.show()

print("Mean reward per episode", sum(rewards)/len(rewards))

In [None]:
# Test the trained agent
test_rewards = test_agent(env, agent, episodes=10, max_steps = 200, render=False)
print("Mean reward per test episode", sum(test_rewards)/len(test_rewards))

### Approximate Q-Learning in the Simulator

In [None]:
# Initialize environment and approximate Q-learning agent
episodes = 10000
max_steps = 200
lr=1e-3
gamma=0.9
epsilon=1
epsilon_decay=0.999
epsilon_min=0.001

approx_agent = ApproxQLearningAgent(feature_extractor,
                                    env,
                                    action_space=4,
                       lr=lr,
                       gamma=gamma,
                       epsilon=epsilon,
                       epsilon_decay=epsilon_decay,
                       epsilon_min=epsilon_min)

# Compare with standard Q-learning
standard_agent = QLearningAgent(state_space=(5, 5, 5, 2), 
                       action_space=4,
                       lr=lr,
                       gamma=gamma,
                       epsilon=epsilon,
                       epsilon_decay=epsilon_decay,
                       epsilon_min=epsilon_min)

In [None]:
# Train the approximate Q-learning agent
approx_rewards = train_approx_agent(env, approx_agent, episodes, max_steps=max_steps)
standard_rewards = train_agent(env, standard_agent, episodes, max_steps=max_steps)

# Plot cumulative rewards for comparison
plt.plot(approx_rewards, label="Approximate Q-Learning")
plt.plot(standard_rewards, label="Standard Q-Learning")
plt.xlabel("Episodes")
plt.ylabel("Cumulative Reward")
plt.title("Performance Comparison")
plt.legend()
plt.show()


print("Mean reward per episode - Approximate method:", sum(approx_rewards)/len(approx_rewards))

print("Mean reward per episode - Standard method:", sum(standard_rewards)/len(standard_rewards))

# Test the approximate Q-learning agent
test_aprox_reward = test_approx_agent(env, approx_agent, episodes=10, max_steps = 200, render=False)
print("Mean reward per test episode - Approximate method:", sum(test_aprox_reward)/len(test_aprox_reward))

test_standard_reward =  test_agent(env, standard_agent, episodes=10, max_steps = 200, render=False)
print("Mean reward per test episode - Standard method:", sum(test_standard_reward)/len(test_standard_reward))