In [1]:
# Cell 1: Imports
import numpy as np
import torch
import matplotlib.pyplot as plt
from env import TreasureGuardianEnv
from maddpg import MADDPG

# Set device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Using device:", device)




Using device: cpu


In [2]:
env = TreasureGuardianEnv()
print("Observation Space:", env.observation_space)
print("Observation Space Type:", type(env.observation_space))


Observation Space: Dict('guardian': Box(0, 9, (2,), int32), 'keys': Box(0, 9, (3, 2), int32), 'pits': Box(0, 9, (2, 2), int32), 'treasure': Box(0, 9, (2,), int32), 'villains': Box(0, 9, (1, 2), int32), 'walls': Box(0, 9, (15, 2), int32))
Observation Space Type: <class 'gymnasium.spaces.dict.Dict'>


In [3]:
env = TreasureGuardianEnv()
num_agents = 1 + env.num_villains  # Guardian + Villains
maddpg = MADDPG(env=env, num_agents=num_agents)
print("MADDPG initialized successfully!")


MADDPG initialized successfully!


In [4]:
print("Observation Space Keys:", env.observation_space.spaces.keys())


Observation Space Keys: dict_keys(['guardian', 'keys', 'pits', 'treasure', 'villains', 'walls'])


In [6]:
# Cell 3: Training Loop

n_episodes = 3000
max_steps = 100
reward_log = []

for ep in range(1, n_episodes + 1):
    obs = env.reset()
    total_reward = np.zeros(num_agents)
    done = [False] * num_agents

    for step in range(max_steps):
        actions = maddpg.act(obs)
        next_obs, rewards, done, _ = env.step(actions)
        maddpg.step(obs, actions, rewards, next_obs, done)

        obs = next_obs
        total_reward += np.array(rewards)

        if any(done):
            break

    reward_log.append(total_reward)

    if ep % 100 == 0:
        avg_rewards = np.mean(reward_log[-100:], axis=0)
        print(f"Episode {ep} - Avg Reward: {avg_rewards}")


AttributeError: 'MADDPG' object has no attribute 'act'

In [None]:
# Cell 4: Plot Rewards

reward_log = np.array(reward_log)
plt.figure(figsize=(12, 6))
for i in range(num_agents):
    plt.plot(reward_log[:, i], label=f"Agent {i}")
plt.xlabel("Episode")
plt.ylabel("Reward")
plt.title("Training Rewards per Agent")
plt.legend()
plt.grid()
plt.show()


In [None]:
# Cell 5: Save Trained Models
maddpg.save("maddpg_models/")
print("Models saved to 'maddpg_models/'")
