In [1]:
from simulator import *

seeds = [0, 101, 202, 303, 404, 505, 606, 707, 808, 909]

# Simple Maze Environment


In [2]:
simple_maze = """
||||||||||||||||||||||
|....................|
|..|||||.....|..|....|
|..|   |...||||||||..|
|..||| |........|....|
|.....| |||||||.|||..|
|.....|       |.|....|
|.....|       |......|
|||||||  ||||||......|
         |.......|...|
         |.......|||.|
         |...........|
         |||||||||||||
"""


def simple_env_generator(seed):
    return create_env(make_map(simple_maze, 5, premapped=False, start=(3, 6), stairs=(10, 10), seed=seed),
                      apple_reward=1,
                      penalty_time=-0.01)


simple_env = simple_env_generator(0)
simple_env.reset()
simple_env.render()


[0;37mH[0;37me[0;37ml[0;37ml[0;37mo[0;30m [0;37mA[0;37mg[0;37me[0;37mn[0;37mt[0;37m,[0;30m [0;37mw[0;37me[0;37ml[0;37mc[0;37mo[0;37mm[0;37me[0;30m [0;37mt[0;37mo[0;30m [0;37mN[0;37me[0;37mt[0;37mH[0;37ma[0;37mc[0;37mk[0;37m![0;30m [0;30m [0;37mY[0;37mo[0;37mu[0;30m [0;37ma[0;37mr[0;37me[0;30m [0;37ma[0;30m [0;37mn[0;37me[0;37mu[0;37mt[0;37mr[0;37ma[0;37ml[0;30m [0;37mh[0;37mu[0;37mm[0;37ma[0;37mn[0;30m [0;37mC[0;37ma[0;37mv[0;37me[0;37mm[0;37ma[0;37mn[0;37m.[0;30m [0;30m [0;30m [0;30m [0;30m [0;30m [0;30m [0;30m [0;30m [0;30m [0;30m [0;30m [0;30m [0;30m 
[0;30m [0;30m [0;30m [0;30m [0;30m [0;30m [0;30m [0;30m [0;30m [0;30m [0;30m [0;30m [0;30m [0;30m [0;30m [0;30m [0;30m [0;30m [0;30m [0;30m [0;30m [0;30m [0;30m [0;30m [0;30m [0;30m [0;30m [0;30m [0;30m [0;30m [0;30m [0;30m [0;30m [0;30m [0;30m [0;30m [0;30m [0;30m [0;30m [0;30m [0;30m [0;30m [0;30m [0;30m [0;30

# Complex Maze Environment

In [3]:
complex_maze = """
               |||||||||||||||||
 ||||||||||    |...............|
 |........|    |............|..|
 |........|    |............|..|
 ||||||...|    |..|||||..||||..|
      |...|    |..|   |........|
      |...|    |..|   |........|  ||||||
|||||||...||||||..|   |.....||||  |....|
|.................|   |.....|     |....|
|...||||....|..|..|   |.....|||||||....|
|....| |..|||||||.|   |................|
|....| |.......|..|   |................|
|....| |||||||.||.|   |....||||||||....|
|....|       |.|..|   |....|      |....|
|....|       |....|   |....||||   |....|
||||||  ||||||....|   |.......|   ||||||
        |......|..|   |.......|
        |......||.|   |.......|
        |.........|   |||||||||
        |||||||||||
"""


def complex_env_generator(seed):
    return create_env(make_map(complex_maze, 5, premapped=False, start=(3, 6), stairs=(37, 13), seed=seed),
                      apple_reward=1,
                      penalty_time=-0.01)


complex_env = complex_env_generator(0)
complex_env.reset()
complex_env.render()


[0;37mH[0;37me[0;37ml[0;37ml[0;37mo[0;30m [0;37mA[0;37mg[0;37me[0;37mn[0;37mt[0;37m,[0;30m [0;37mw[0;37me[0;37ml[0;37mc[0;37mo[0;37mm[0;37me[0;30m [0;37mt[0;37mo[0;30m [0;37mN[0;37me[0;37mt[0;37mH[0;37ma[0;37mc[0;37mk[0;37m![0;30m [0;30m [0;37mY[0;37mo[0;37mu[0;30m [0;37ma[0;37mr[0;37me[0;30m [0;37ma[0;30m [0;37mn[0;37me[0;37mu[0;37mt[0;37mr[0;37ma[0;37ml[0;30m [0;37mh[0;37mu[0;37mm[0;37ma[0;37mn[0;30m [0;37mC[0;37ma[0;37mv[0;37me[0;37mm[0;37ma[0;37mn[0;37m.[0;30m [0;30m [0;30m [0;30m [0;30m [0;30m [0;30m [0;30m [0;30m [0;30m [0;30m [0;30m [0;30m [0;30m 
[0;30m [0;30m [0;30m [0;30m [0;30m [0;30m [0;30m [0;30m [0;30m [0;30m [0;30m [0;30m [0;30m [0;30m [0;30m [0;30m [0;30m [0;30m [0;30m [0;30m [0;30m [0;30m [0;30m [0;30m [0;30m [0;30m [0;30m [0;30m [0;30m [0;30m [0;30m [0;30m [0;30m [0;30m [0;30m [0;30m [0;30m [0;30m [0;30m [0;30m [0;30m [0;30m [0;30m [0;30m [0;30

# Benchmarking Online Algorithms

The simulation code have been adapted to not rely on a fixed, fully observable map, but rather to generate the path dynamically based on the current state of the environment. This allows for more flexibility and adaptability in the pathfinding algorithms but comes with a performance cost due to the need to recompute the path at each step.

As the stairs won't be visible until the agent reaches the end of the maze, we will use a Frontier Search algorithm to decide the target position to explore the environment. Once again, we will use the same algorithms as in the offline benchmark to path to the chosen target, to see which one manage to find the stairs first and collect the most apples along the way.



## A* Star Online Algorithm

In [4]:
from algorithms_online import planner_online
from algorithms import a_star_apple
import numpy as np

In [5]:
param_grid = {
    'weight': np.linspace(0.2, 2, 5),
    'apple_bonus': np.linspace(0.5, 5, 5)
}

a_star_apple_df = benchmark_simulation(simple_env_generator, planner_online, seeds, param_grid,
                                       online=True, planner_func=a_star_apple)

Benchmarking: 100%|██████████| 250/250 [03:19<00:00,  1.25it/s]


In [6]:
a_star_online_avg = a_star_apple_df.groupby(['weight', 'apple_bonus']).agg(
    avg_reward=('reward', 'mean'),
    avg_path_length=('path_length', 'mean'),
    avg_apples=('apples_eaten', 'mean'),
    success_rate=('success', 'mean')
).reset_index()
a_star_online_avg

Unnamed: 0,weight,apple_bonus,avg_reward,avg_path_length,avg_apples,success_rate
0,0.2,0.5,2.625,68.3,2.1,1.0
1,0.2,1.625,4.454,77.1,4.0,1.0
2,0.2,2.75,4.433,82.2,4.0,1.0
3,0.2,3.875,4.524,83.1,4.1,1.0
4,0.2,5.0,4.792,88.1,4.4,1.0
5,0.65,0.5,2.18,73.6,1.7,1.0
6,0.65,1.625,4.394,71.3,3.9,1.0
7,0.65,2.75,4.249,79.0,3.8,1.0
8,0.65,3.875,4.352,80.4,3.9,1.0
9,0.65,5.0,4.349,81.2,3.9,1.0


In [7]:
a_star_apple_df_complex = benchmark_simulation(complex_env_generator, planner_online, seeds, param_grid,
                                               online=True, planner_func=a_star_apple)
a_star_online_complex_avg = a_star_apple_df_complex.groupby(['weight', 'apple_bonus']).agg(
    avg_reward=('reward', 'mean'),
    avg_path_length=('path_length', 'mean'),
    avg_apples=('apples_eaten', 'mean'),
    success_rate=('success', 'mean')
).reset_index()
a_star_online_complex_avg

Benchmarking: 100%|██████████| 250/250 [06:59<00:00,  1.68s/it]


Unnamed: 0,weight,apple_bonus,avg_reward,avg_path_length,avg_apples,success_rate
0,0.2,0.5,1.391,127.1,1.4,1.0
1,0.2,1.625,2.986,134.3,3.1,1.0
2,0.2,2.75,3.159,139.0,3.3,1.0
3,0.2,3.875,2.788,116.8,2.7,1.0
4,0.2,5.0,3.328,135.9,3.4,1.0
5,0.65,0.5,1.177,141.0,1.3,1.0
6,0.65,1.625,2.394,125.7,2.4,1.0
7,0.65,2.75,2.936,123.7,2.9,1.0
8,0.65,3.875,2.787,125.2,2.8,1.0
9,0.65,5.0,2.634,122.9,2.6,1.0


## Monte Carlo Tree Search Online Algorithm

In [8]:
from MCTS import mcts

param_grid = {
    'C': np.linspace(1.0, 5.0, 5),
}

mcts_df = benchmark_simulation(simple_env_generator, planner_online, seeds, param_grid,
                               online=True, planner_func=mcts)


Benchmarking: 100%|██████████| 50/50 [02:44<00:00,  3.30s/it]


In [9]:
mcts_online_avg = mcts_df.groupby(['C']).agg(
    avg_reward=('reward', 'mean'),
    avg_path_length=('path_length', 'mean'),
    avg_apples=('apples_eaten', 'mean'),
    success_rate=('success', 'mean')
).reset_index()
mcts_online_avg

Unnamed: 0,C,avg_reward,avg_path_length,avg_apples,success_rate
0,1.0,4.426,108.8,4.2,1.0
1,2.0,4.225,80.0,3.8,1.0
2,3.0,4.78,74.0,4.3,1.0
3,4.0,4.574,77.7,4.1,1.0
4,5.0,4.357,78.1,3.9,1.0


In [10]:
mcts_online_complex_df = benchmark_simulation(complex_env_generator, planner_online, seeds, param_grid,
                                              online=True, planner_func=mcts)
mcts_online_complex_avg = mcts_online_complex_df.groupby(['C']).agg(
    avg_reward=('reward', 'mean'),
    avg_path_length=('path_length', 'mean'),
    avg_apples=('apples_eaten', 'mean'),
    success_rate=('success', 'mean')
).reset_index()
mcts_online_complex_avg

Benchmarking: 100%|██████████| 50/50 [06:32<00:00,  7.84s/it]


Unnamed: 0,C,avg_reward,avg_path_length,avg_apples,success_rate
0,1.0,2.757,190.9,3.3,1.0
1,2.0,2.814,133.3,2.9,1.0
2,3.0,2.406,109.9,2.2,1.0
3,4.0,2.832,155.7,3.2,1.0
4,5.0,2.458,137.8,2.5,1.0


## Greedy Best First Search Online Algorithm

## Potential Fields Online Algorithm

In [11]:
from algorithms import potential_field_path

potential_field_df = benchmark_simulation(simple_env_generator, planner_online, seeds, {},
                                          online=True, planner_func=potential_field_path)

potential_field_online_avg = potential_field_df.groupby(['success']).agg(
    avg_reward=('reward', 'mean'),
    avg_path_length=('path_length', 'mean'),
    avg_apples=('apples_eaten', 'mean'),
    success_rate=('success', 'mean')
).reset_index()
potential_field_online_avg

Benchmarking: 100%|██████████| 10/10 [00:13<00:00,  1.37s/it]


Unnamed: 0,success,avg_reward,avg_path_length,avg_apples,success_rate
0,True,4.067,129.9,4.1,1.0


## Beam Search Online Algorithm