Execute if spirits are not visible

In [None]:
from pathlib import Path
import sys
import os

current_path = Path.cwd()
project_root = None
for parent in [current_path] + list(current_path.parents):
    if parent.name == "distributed_project":
        project_root = parent
        break
os.chdir(project_root)
print(f"Set /distributed_project as working directory: {project_root}")

## Training with sparse reward

In [2]:
%load_ext autoreload
%autoreload 2

from DQN import *
from myenv_5_sparse_reward import MyGridWorld

  from pkg_resources import resource_stream, resource_exists


In [3]:
GRID_SIZE = 10

env = MyGridWorld(grid_size = GRID_SIZE)
agents = {}
action_size = env.action_space("agent1").n
obs_shape = env.observation_space("agent1").shape[0]

print(f"Agents: {env.possible_agents}\nObservation space: {obs_shape}\nAction space: {action_size}")

for agent_id in env.possible_agents:
    agents[agent_id] = DQNAgent(
    obs_dim=obs_shape, 
    action_dim=action_size, 
    lr=0.001, 
    gamma=0.9,
    epsilon=0.9
)

NUM_EPISODES = 5000
EPSILON_DECAY_RATE = 0.9995
MIN_EPSILON = 0.01

Agents: ['agent1', 'agent2']
Observation space: 7
Action space: 5


Train or load

In [None]:
# TRAINING 
trained_agents = train_agents(
    env, 
    agents, 
    NUM_EPISODES, 
    EPSILON_DECAY_RATE, 
    MIN_EPSILON
)

for agent_id in env.possible_agents:
    agents[agent_id].save(f"{agent_id}_sparse")

In [None]:
for agent_id in env.possible_agents:
    agents[agent_id].load(f"{agent_id}_sparse")

Test

In [None]:
# TESTING
env_test = MyGridWorld(grid_size = GRID_SIZE) 
test_agents(env_test, trained_agents, num_test_episodes=100)

env.close()
env_test.close()


IQ TEST PHASE
Num testing episodes: 100
Test Ep. 1/100: Status=TRUNCATED, Reward=-304.00, Steps=100
Test Ep. 21/100: Status=TRUNCATED, Reward=-206.00, Steps=100
Test Ep. 41/100: Status=TRUNCATED, Reward=-200.00, Steps=100
Test Ep. 61/100: Status=TRUNCATED, Reward=-200.00, Steps=100
Test Ep. 81/100: Status=TRUNCATED, Reward=-202.00, Steps=100
END OF TESTING PHASE

TEST METRICS
Success rate: 0.00%
Avg reward per episode: -256.64
Avg steps per episode: 100.00


## Dense reward, v1

In [None]:
%load_ext autoreload
%autoreload 2

from myenv_5_dense_reward1 import MyGridWorld
from DQN import *

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [None]:
GRID_SIZE = 13

env = MyGridWorld(grid_size = GRID_SIZE)
agents = {}
action_size = env.action_space("agent1").n
obs_shape = env.observation_space("agent1").shape[0]

print(f"Agents: {env.possible_agents}\nObservation space: {obs_shape}\nAction space: {action_size}")

for agent_id in env.possible_agents:
    agents[agent_id] = DQNAgent(
    obs_dim=obs_shape, 
    action_dim=action_size, 
    lr=0.001, 
    gamma=0.9,
    epsilon=1
)

NUM_EPISODES = 5000
EPSILON_DECAY_RATE = 0.9996
MIN_EPSILON = 0.01

Agents: ['agent1', 'agent2']
Observation space: 7
Action space: 5
IQ TRAINING PHASE
Num training episodes: 5000
Espilon decaying rate: 0.9996
Episode 0/5000, Epsilon: 1.000, Episode joint_total_reward: -1005
Episode 100/5000, Epsilon: 0.960, Episode joint_total_reward: -1226
Episode 200/5000, Epsilon: 0.923, Episode joint_total_reward: -817
Episode 300/5000, Epsilon: 0.887, Episode joint_total_reward: -1056
Episode 400/5000, Epsilon: 0.852, Episode joint_total_reward: -1095
Episode 500/5000, Epsilon: 0.818, Episode joint_total_reward: -618
Episode 600/5000, Epsilon: 0.786, Episode joint_total_reward: -702
Episode 700/5000, Epsilon: 0.755, Episode joint_total_reward: 4
Episode 800/5000, Epsilon: 0.726, Episode joint_total_reward: -620
Episode 900/5000, Epsilon: 0.697, Episode joint_total_reward: 21
Episode 1000/5000, Epsilon: 0.670, Episode joint_total_reward: 22
Episode 1100/5000, Epsilon: 0.644, Episode joint_total_reward: -497
Episode 1200/5000, Epsilon: 0.618, Episode joint_total_re

Train or load

In [None]:
# TRAINING 
trained_agents = train_agents(
    env, 
    agents, 
    NUM_EPISODES, 
    EPSILON_DECAY_RATE, 
    MIN_EPSILON
)

for agent_id in env.possible_agents:
    agents[agent_id].save(f"{agent_id}_densev1")

In [None]:
for agent_id in env.possible_agents:
    agents[agent_id].load(f"{agent_id}_densev1")

Test

In [20]:
# TESTING
env_test = MyGridWorld(grid_size = GRID_SIZE, render_mode="human") 
test_agents(env_test, trained_agents, num_test_episodes=100)

env.close()
env_test.close()


IQ TEST PHASE
Num testing episodes: 100
Test Ep. 1/100: Status=SUCCESS, Reward=179.00, Steps=8
Test Ep. 21/100: Status=SUCCESS, Reward=185.00, Steps=9
Test Ep. 41/100: Status=SUCCESS, Reward=198.00, Steps=3
Test Ep. 61/100: Status=SUCCESS, Reward=158.00, Steps=11
Test Ep. 81/100: Status=SUCCESS, Reward=166.00, Steps=10
END OF TESTING PHASE

TEST METRICS
Success rate: 100.00%
Avg reward per episode: 180.38
Avg steps per episode: 7.90


## Dense reward, v2 with coherent reward function

**Version with grid 13*13**

In [5]:
%load_ext autoreload
%autoreload 2

from myenv_5_dense_reward2 import MyGridWorld
from DQN import *

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [6]:
GRID_SIZE = 13

env = MyGridWorld(grid_size = GRID_SIZE)
agents = {}
action_size = env.action_space("agent1").n
obs_shape = env.observation_space("agent1").shape[0]

print(f"Agnets: {env.possible_agents}\nObservation space: {obs_shape}\nAction space: {action_size}")

for agent_id in env.possible_agents:
    agents[agent_id] = DQNAgent(
    obs_dim=obs_shape, 
    action_dim=action_size, 
    lr=0.001, 
    gamma=0.9,
    epsilon=1
)

NUM_EPISODES = 2500
EPSILON_DECAY_RATE = 0.9996
MIN_EPSILON = 0.01

Agnets: ['agent1', 'agent2']
Observation space: 7
Action space: 5


Train or load

In [7]:
# TRAINING 
trained_agents = train_agents(
    env, 
    agents, 
    NUM_EPISODES, 
    EPSILON_DECAY_RATE, 
    MIN_EPSILON
)

for agent_id in env.possible_agents:
    agents[agent_id].save(f"{agent_id}_densev2")

IQ TRAINING PHASE
Num training episodes: 2500
Espilon decaying rate: 0.9996
Episode 0/2500, Epsilon: 1.000, Episode joint_total_reward: -63.30962558794381
Episode 100/2500, Epsilon: 0.960, Episode joint_total_reward: -59.70669941596052
Episode 200/2500, Epsilon: 0.923, Episode joint_total_reward: -40.4247505070384
Episode 300/2500, Epsilon: 0.887, Episode joint_total_reward: -49.00839023734399
Episode 400/2500, Epsilon: 0.852, Episode joint_total_reward: -41.72739739731466
Episode 500/2500, Epsilon: 0.818, Episode joint_total_reward: 199.43042787807866
Episode 600/2500, Epsilon: 0.786, Episode joint_total_reward: -35.08342325487675
Episode 700/2500, Epsilon: 0.755, Episode joint_total_reward: -37.536405365534236
Episode 800/2500, Epsilon: 0.726, Episode joint_total_reward: -19.96435480840395
Episode 900/2500, Epsilon: 0.697, Episode joint_total_reward: -34.54499767178958
Episode 1000/2500, Epsilon: 0.670, Episode joint_total_reward: -25.805962835460182
Episode 1100/2500, Epsilon: 0.644

In [None]:
for agent_id in env.possible_agents:
    agents[agent_id].load(f"{agent_id}_densev2_2")

Test

In [8]:
# TESTING
env_test = MyGridWorld(grid_size = GRID_SIZE, render_mode="human") 
test_agents(env_test, trained_agents, num_test_episodes=100)

env.close()
env_test.close()


IQ TEST PHASE
Num testing episodes: 100
Test Ep. 1/100: Status=SUCCESS, Reward=199.02, Steps=5
Test Ep. 21/100: Status=SUCCESS, Reward=198.88, Steps=10
Test Ep. 41/100: Status=SUCCESS, Reward=196.93, Steps=10
Test Ep. 61/100: Status=SUCCESS, Reward=197.19, Steps=13
Test Ep. 81/100: Status=SUCCESS, Reward=197.03, Steps=11
END OF TESTING PHASE

TEST METRICS
Success rate: 100.00%
Avg reward per episode: 198.62
Avg steps per episode: 8.40


**Version with grid 15*15**

In [12]:
GRID_SIZE = 15

env = MyGridWorld(grid_size = GRID_SIZE)
agents = {}
action_size = env.action_space("agent1").n
obs_shape = env.observation_space("agent1").shape[0]

print(f"Agnets: {env.possible_agents}\nObservation space: {obs_shape}\nAction space: {action_size}")

for agent_id in env.possible_agents:
    agents[agent_id] = DQNAgent(
    obs_dim=obs_shape, 
    action_dim=action_size, 
    lr=0.001, 
    gamma=0.9,
    epsilon=1
)

NUM_EPISODES = 5000
EPSILON_DECAY_RATE = 0.9997
MIN_EPSILON = 0.01

Agnets: ['agent1', 'agent2']
Observation space: 7
Action space: 5


Train or load

In [13]:
# TRAINING 
trained_agents = train_agents(
    env, 
    agents, 
    NUM_EPISODES, 
    EPSILON_DECAY_RATE, 
    MIN_EPSILON
)

for agent_id in env.possible_agents:
    agents[agent_id].save(f"{agent_id}_densev2_1515")

IQ TRAINING PHASE
Num training episodes: 5000
Espilon decaying rate: 0.9997
Episode 0/5000, Epsilon: 1.000, Episode joint_total_reward: -86.06103659210683
Episode 100/5000, Epsilon: 0.970, Episode joint_total_reward: -60.30122087323233
Episode 200/5000, Epsilon: 0.941, Episode joint_total_reward: -64.67133872604983
Episode 300/5000, Epsilon: 0.914, Episode joint_total_reward: -45.581371742268395
Episode 400/5000, Epsilon: 0.887, Episode joint_total_reward: -38.31011504814913
Episode 500/5000, Epsilon: 0.860, Episode joint_total_reward: -45.43268121644287
Episode 600/5000, Epsilon: 0.835, Episode joint_total_reward: -37.43825153838172
Episode 700/5000, Epsilon: 0.810, Episode joint_total_reward: -49.182498901777535
Episode 800/5000, Epsilon: 0.786, Episode joint_total_reward: -35.3108054410063
Episode 900/5000, Epsilon: 0.763, Episode joint_total_reward: -51.55158768297595
Episode 1000/5000, Epsilon: 0.741, Episode joint_total_reward: 191.01129968299873
Episode 1100/5000, Epsilon: 0.719

In [None]:
for agent_id in env.possible_agents:
    agents[agent_id].load(f"{agent_id}_densev2_1515_1")

In [14]:
# TESTING
env_test = MyGridWorld(grid_size = GRID_SIZE, render_mode="human") 
test_agents(env_test, trained_agents, num_test_episodes=100)

env.close()
env_test.close()


IQ TEST PHASE
Num testing episodes: 100
Test Ep. 1/100: Status=SUCCESS, Reward=197.78, Steps=12
Test Ep. 21/100: Status=SUCCESS, Reward=195.76, Steps=13
Test Ep. 41/100: Status=SUCCESS, Reward=198.06, Steps=7
Test Ep. 61/100: Status=SUCCESS, Reward=197.23, Steps=10
Test Ep. 81/100: Status=SUCCESS, Reward=197.70, Steps=8
END OF TESTING PHASE

TEST METRICS
Success rate: 100.00%
Avg reward per episode: 197.30
Avg steps per episode: 10.34
