In [1]:
from simulator import *

seeds = [0, 101, 202, 303, 404, 505, 606, 707, 808, 909]

# Simple Maze Environment


In [2]:
simple_maze = """
||||||||||||||||||||||
|....................|
|..|||||.....|..|....|
|..|   |...||||||||..|
|..||| |........|....|
|.....| |||||||.|||..|
|.....|       |.|....|
|.....|       |......|
|||||||  ||||||......|
         |.......|...|
         |.......|||.|
         |...........|
         |||||||||||||
"""


def simple_env_generator(seed):
    return create_env(make_map(simple_maze, 5, premapped=False, start=(3, 6), stairs=(10, 10), seed=seed),
                      apple_reward=1,
                      penalty_time=-0.01)


simple_env = simple_env_generator(0)
simple_env.reset()
simple_env.render()


[0;37mH[0;37me[0;37ml[0;37ml[0;37mo[0;30m [0;37mA[0;37mg[0;37me[0;37mn[0;37mt[0;37m,[0;30m [0;37mw[0;37me[0;37ml[0;37mc[0;37mo[0;37mm[0;37me[0;30m [0;37mt[0;37mo[0;30m [0;37mN[0;37me[0;37mt[0;37mH[0;37ma[0;37mc[0;37mk[0;37m![0;30m [0;30m [0;37mY[0;37mo[0;37mu[0;30m [0;37ma[0;37mr[0;37me[0;30m [0;37ma[0;30m [0;37mn[0;37me[0;37mu[0;37mt[0;37mr[0;37ma[0;37ml[0;30m [0;37mh[0;37mu[0;37mm[0;37ma[0;37mn[0;30m [0;37mC[0;37ma[0;37mv[0;37me[0;37mm[0;37ma[0;37mn[0;37m.[0;30m [0;30m [0;30m [0;30m [0;30m [0;30m [0;30m [0;30m [0;30m [0;30m [0;30m [0;30m [0;30m [0;30m 
[0;30m [0;30m [0;30m [0;30m [0;30m [0;30m [0;30m [0;30m [0;30m [0;30m [0;30m [0;30m [0;30m [0;30m [0;30m [0;30m [0;30m [0;30m [0;30m [0;30m [0;30m [0;30m [0;30m [0;30m [0;30m [0;30m [0;30m [0;30m [0;30m [0;30m [0;30m [0;30m [0;30m [0;30m [0;30m [0;30m [0;30m [0;30m [0;30m [0;30m [0;30m [0;30m [0;30m [0;30m [0;30

# Complex Maze Environment

In [3]:
complex_maze = """
               |||||||||||||||||
 ||||||||||    |...............|
 |........|    |............|..|
 |........|    |............|..|
 ||||||...|    |..|||||..||||..|
      |...|    |..|   |........|
      |...|    |..|   |........|  ||||||
|||||||...||||||..|   |.....||||  |....|
|.................|   |.....|     |....|
|...||||....|..|..|   |.....|||||||....|
|....| |..|||||||.|   |................|
|....| |.......|..|   |................|
|....| |||||||.||.|   |....||||||||....|
|....|       |.|..|   |....|      |....|
|....|       |....|   |....||||   |....|
||||||  ||||||....|   |.......|   ||||||
        |......|..|   |.......|
        |......||.|   |.......|
        |.........|   |||||||||
        |||||||||||
"""


def complex_env_generator(seed):
    return create_env(make_map(complex_maze, 5, premapped=False, start=(3, 6), stairs=(37, 13), seed=seed),
                      apple_reward=1,
                      penalty_time=-0.01)


complex_env = complex_env_generator(0)
complex_env.reset()
complex_env.render()


[0;37mH[0;37me[0;37ml[0;37ml[0;37mo[0;30m [0;37mA[0;37mg[0;37me[0;37mn[0;37mt[0;37m,[0;30m [0;37mw[0;37me[0;37ml[0;37mc[0;37mo[0;37mm[0;37me[0;30m [0;37mt[0;37mo[0;30m [0;37mN[0;37me[0;37mt[0;37mH[0;37ma[0;37mc[0;37mk[0;37m![0;30m [0;30m [0;37mY[0;37mo[0;37mu[0;30m [0;37ma[0;37mr[0;37me[0;30m [0;37ma[0;30m [0;37mn[0;37me[0;37mu[0;37mt[0;37mr[0;37ma[0;37ml[0;30m [0;37mh[0;37mu[0;37mm[0;37ma[0;37mn[0;30m [0;37mC[0;37ma[0;37mv[0;37me[0;37mm[0;37ma[0;37mn[0;37m.[0;30m [0;30m [0;30m [0;30m [0;30m [0;30m [0;30m [0;30m [0;30m [0;30m [0;30m [0;30m [0;30m [0;30m 
[0;30m [0;30m [0;30m [0;30m [0;30m [0;30m [0;30m [0;30m [0;30m [0;30m [0;30m [0;30m [0;30m [0;30m [0;30m [0;30m [0;30m [0;30m [0;30m [0;30m [0;30m [0;30m [0;30m [0;30m [0;30m [0;30m [0;30m [0;30m [0;30m [0;30m [0;30m [0;30m [0;30m [0;30m [0;30m [0;30m [0;30m [0;30m [0;30m [0;30m [0;30m [0;30m [0;30m [0;30m [0;30

# Benchmarking Online Algorithms

The simulation code have been adapted to not rely on a fixed, fully observable map, but rather to generate the path dynamically based on the current state of the environment. This allows for more flexibility and adaptability in the pathfinding algorithms but comes with a performance cost due to the need to recompute the path at each step.

As the stairs won't be visible until the agent reaches the end of the maze, we will use a Frontier Search algorithm to decide the target position to explore the environment. Once again, we will use the same algorithms as in the offline benchmark to path to the chosen target, to see which one manage to find the stairs first and collect the most apples along the way.



## A* Star Online Algorithm

In [4]:
from algorithms_online import planner_online
from algorithms import a_star_apple
import numpy as np

In [5]:
param_grid = {
    'weight': np.linspace(0.2, 2, 5),
    'apple_bonus': np.linspace(0.5, 5, 5)
}

a_star_apple_df = benchmark_simulation(simple_env_generator, planner_online, seeds, param_grid,
                                       online=True, planner_func=a_star_apple)

Benchmarking: 100%|██████████| 250/250 [01:59<00:00,  2.09it/s]


In [6]:
a_star_online_avg = a_star_apple_df.groupby(['weight', 'apple_bonus']).agg(
    avg_reward=('reward', 'mean'),
    avg_path_length=('path_length', 'mean'),
    avg_apples=('apples_eaten', 'mean'),
    success_rate=('success', 'mean'), 
    avg_planning_time = ('planning_time','mean')
).reset_index()
a_star_online_avg

Unnamed: 0,weight,apple_bonus,avg_reward,avg_path_length,avg_apples,success_rate,avg_planning_time
0,0.2,0.5,2.803,60.3,2.2,1.0,0.085602
1,0.2,1.625,4.471,76.0,4.0,1.0,0.086185
2,0.2,2.75,4.722,83.2,4.3,1.0,0.093518
3,0.2,3.875,4.72,86.6,4.3,1.0,0.090919
4,0.2,5.0,4.822,84.8,4.4,1.0,0.084903
5,0.65,0.5,2.097,61.7,1.5,1.0,0.083403
6,0.65,1.625,4.019,69.8,3.5,1.0,0.089653
7,0.65,2.75,4.749,80.2,4.3,1.0,0.093715
8,0.65,3.875,4.737,82.0,4.3,1.0,0.087287
9,0.65,5.0,4.638,82.8,4.2,1.0,0.085739


In [7]:
a_star_apple_df_complex = benchmark_simulation(complex_env_generator, planner_online, seeds, param_grid,
                                               online=True, planner_func=a_star_apple)
a_star_online_complex_avg = a_star_apple_df_complex.groupby(['weight', 'apple_bonus']).agg(
    avg_reward=('reward', 'mean'),
    avg_path_length=('path_length', 'mean'),
    avg_apples=('apples_eaten', 'mean'),
    success_rate=('success', 'mean'),
    avg_planning_time = ('planning_time','mean')
).reset_index()
a_star_online_complex_avg

Benchmarking: 100%|██████████| 250/250 [03:26<00:00,  1.21it/s]


Unnamed: 0,weight,apple_bonus,avg_reward,avg_path_length,avg_apples,success_rate,avg_planning_time
0,0.2,0.5,1.834,90.1,1.5,1.0,0.197604
1,0.2,1.625,2.737,98.6,2.5,1.0,0.210494
2,0.2,2.75,3.684,139.2,3.8,1.0,0.235863
3,0.2,3.875,3.304,128.4,3.3,1.0,0.310052
4,0.2,5.0,3.17,135.6,3.2,1.0,0.248498
5,0.65,0.5,0.929,98.4,0.7,1.0,0.181776
6,0.65,1.625,1.87,70.1,1.4,1.0,0.142118
7,0.65,2.75,2.888,131.2,2.9,1.0,0.307098
8,0.65,3.875,2.639,98.9,2.4,1.0,0.223625
9,0.65,5.0,2.104,77.3,1.7,1.0,0.18256


## Monte Carlo Tree Search Online Algorithm

In [8]:
from MCTS import mcts

param_grid = {
    'C': np.linspace(1.0, 5.0, 5),
}

mcts_df = benchmark_simulation(simple_env_generator, planner_online, seeds, param_grid,
                               online=True, planner_func=mcts)


Benchmarking: 100%|██████████| 50/50 [01:25<00:00,  1.71s/it]


In [9]:
mcts_online_avg = mcts_df.groupby(['C']).agg(
    avg_reward=('reward', 'mean'),
    avg_path_length=('path_length', 'mean'),
    avg_apples=('apples_eaten', 'mean'),
    success_rate=('success', 'mean'),
    avg_planning_time = ('planning_time','mean')
).reset_index()
mcts_online_avg

Unnamed: 0,C,avg_reward,avg_path_length,avg_apples,success_rate,avg_planning_time
0,1.0,3.596,89.9,3.2,1.0,2.288402
1,2.0,4.145,79.0,3.7,1.0,1.287095
2,3.0,4.954,77.1,4.5,1.0,0.872716
3,4.0,3.626,71.0,3.1,1.0,1.035203
4,5.0,4.203,74.7,3.7,1.0,0.879985


In [10]:
mcts_online_complex_df = benchmark_simulation(complex_env_generator, planner_online, seeds, param_grid,
                                              online=True, planner_func=mcts)
mcts_online_complex_avg = mcts_online_complex_df.groupby(['C']).agg(
    avg_reward=('reward', 'mean'),
    avg_path_length=('path_length', 'mean'),
    avg_apples=('apples_eaten', 'mean'),
    success_rate=('success', 'mean'),
    avg_planning_time = ('planning_time','mean')
).reset_index()
mcts_online_complex_avg

Benchmarking: 100%|██████████| 50/50 [03:28<00:00,  4.17s/it]


Unnamed: 0,C,avg_reward,avg_path_length,avg_apples,success_rate,avg_planning_time
0,1.0,2.168,153.0,2.4,1.0,4.827968
1,2.0,2.619,118.5,2.5,1.0,2.50301
2,3.0,2.729,219.1,3.7,1.0,5.188801
3,4.0,2.076,97.9,1.8,1.0,2.191206
4,5.0,2.277,118.7,2.2,1.0,2.475073


## Greedy Best First Search Online Algorithm

In [11]:
from utils import cached_bfs, manhattan_distance
from algorithms import greedy_best_first_search

param_grid = {
    'heuristic': [cached_bfs, manhattan_distance],
}

simple_greedy_df = benchmark_simulation(simple_env_generator, planner_online, seeds, param_grid,
                                        planner_func=greedy_best_first_search, online=True)

Benchmarking: 100%|██████████| 20/20 [00:10<00:00,  1.97it/s]


In [12]:
# convert the heuristic to a string for better readability
simple_greedy_df['heuristic'] = simple_greedy_df['heuristic'].apply(lambda x: x.__name__ if callable(x) else str(x))

simple_greedy_df_avg = simple_greedy_df.groupby(['heuristic']).agg(
    avg_reward=('reward', 'mean'),
    avg_path_length=('path_length', 'mean'),
    avg_apples=('apples_eaten', 'mean'),
    success_rate=('success', 'mean'),
    avg_planning_time = ('planning_time','mean')
).reset_index()
simple_greedy_df_avg

Unnamed: 0,heuristic,avg_reward,avg_path_length,avg_apples,success_rate,avg_planning_time
0,cached_bfs,4.939,79.9,4.5,1.0,0.094018
1,manhattan_distance,5.351,81.9,4.9,1.0,0.079539


In [13]:
complex_greedy_df = benchmark_simulation(complex_env_generator, planner_online, seeds, param_grid,
                                         planner_func=greedy_best_first_search, online=True)
complex_greedy_df['heuristic'] = complex_greedy_df['heuristic'].apply(lambda x: x.__name__ if callable(x) else str(x))
complex_greedy_df_avg = complex_greedy_df.groupby(['heuristic']).agg(
    avg_reward=('reward', 'mean'),
    avg_path_length=('path_length', 'mean'),
    avg_apples=('apples_eaten', 'mean'),
    success_rate=('success', 'mean'),
    avg_planning_time = ('planning_time','mean')
).reset_index()
complex_greedy_df_avg

Benchmarking: 100%|██████████| 20/20 [00:21<00:00,  1.10s/it]


Unnamed: 0,heuristic,avg_reward,avg_path_length,avg_apples,success_rate,avg_planning_time
0,cached_bfs,3.911,139.7,4.0,1.0,0.356862
1,manhattan_distance,3.738,148.7,3.9,1.0,0.324647


## Potential Fields Online Algorithm

In [14]:
from algorithms import potential_field_path

param_grid = {
    'weight_apple': np.linspace(.1, .3, 5),
    'modality_potential': ['max', 'sum']
}

potential_field_df = benchmark_simulation(simple_env_generator, planner_online, seeds, param_grid,
                                          online=True, planner_func=potential_field_path)

potential_field_online_avg = potential_field_df.groupby(['weight_apple', 'modality_potential']).agg(
    avg_reward=('reward', 'mean'),
    avg_path_length=('path_length', 'mean'),
    avg_apples=('apples_eaten', 'mean'),
    success_rate=('success', 'mean'),
    avg_planning_time = ('planning_time','mean')
).reset_index()
potential_field_online_avg

Benchmarking: 100%|██████████| 100/100 [01:09<00:00,  1.45it/s]


Unnamed: 0,weight_apple,modality_potential,avg_reward,avg_path_length,avg_apples,success_rate,avg_planning_time
0,0.1,max,4.674,130.9,4.7,1.0,0.080146
1,0.1,sum,2.753,102.9,2.5,1.0,0.084821
2,0.15,max,4.776,131.1,4.8,1.0,0.082731
3,0.15,sum,2.626,106.0,2.4,1.0,0.087143
4,0.2,max,4.776,131.1,4.8,1.0,0.082698
5,0.2,sum,2.755,102.7,2.5,1.0,0.086363
6,0.25,max,4.776,131.1,4.8,1.0,0.083283
7,0.25,sum,2.726,104.9,2.5,1.0,0.086713
8,0.3,max,4.562,131.1,4.6,1.0,0.080948
9,0.3,sum,2.631,104.3,2.4,1.0,0.084889


In [15]:
potential_field_df_complex = benchmark_simulation(complex_env_generator, planner_online, seeds, param_grid,
                                                  online=True, planner_func=potential_field_path)

potential_field_online_avg_complex = potential_field_df_complex.groupby(['weight_apple', 'modality_potential']).agg(
    avg_reward=('reward', 'mean'),
    avg_path_length=('path_length', 'mean'),
    avg_apples=('apples_eaten', 'mean'),
    success_rate=('success', 'mean'),
    avg_planning_time = ('planning_time','mean')
).reset_index()

potential_field_online_avg_complex

Benchmarking: 100%|██████████| 100/100 [01:33<00:00,  1.07it/s]


Unnamed: 0,weight_apple,modality_potential,avg_reward,avg_path_length,avg_apples,success_rate,avg_planning_time
0,0.1,max,3.345,147.5,3.6,1.0,0.163131
1,0.1,sum,1.144,332.4,1.5,1.0,0.233817
2,0.15,max,2.666,228.6,2.8,1.0,0.10954
3,0.15,sum,1.072,127.0,1.1,1.0,0.144817
4,0.2,max,2.97,172.6,3.5,1.0,0.21258
5,0.2,sum,1.412,90.4,1.1,1.0,0.119142
6,0.25,max,3.155,158.3,3.5,1.0,0.156565
7,0.25,sum,0.537,127.2,0.8,1.0,0.153086
8,0.3,max,2.883,254.6,3.9,1.0,0.241713
9,0.3,sum,1.409,139.4,1.6,1.0,0.234088


## Beam Search Online Algorithm

In [16]:
from algorithms import beam_search_apple
import numpy as np

param_grid = {
    'beam_width': np.arange(4, 11),  # from 4 to 10
    'apple_reward': np.arange(3, 11)  # from 3 to 10
}

beam_online_df = benchmark_simulation(simple_env_generator, planner_online, seeds, param_grid,
                                      online=True, planner_func=beam_search_apple)
beam_online_df_mean = beam_online_df.groupby(['beam_width', 'apple_reward']).agg(
    avg_reward=('reward', 'mean'),
    avg_path_length=('path_length', 'mean'),
    success_rate=('success', 'mean'),
    avg_apples=('apples_eaten', 'mean'),
    avg_planning_time = ('planning_time','mean')
).reset_index()

# print the combinations with a success rate less than 1
beam_online_df_mean

Benchmarking: 100%|██████████| 560/560 [05:40<00:00,  1.64it/s]


Unnamed: 0,beam_width,apple_reward,avg_reward,avg_path_length,success_rate,avg_apples,avg_planning_time
0,4,3,3.851,67.3,1.0,3.3,0.323101
1,4,4,3.935,69.5,1.0,3.4,0.234669
2,4,5,4.232,69.3,1.0,3.7,0.195347
3,4,6,4.229,70.0,1.0,3.7,0.228015
4,4,7,4.214,73.1,1.0,3.7,0.220451
5,4,8,4.282,73.7,1.0,3.8,0.202555
6,4,9,4.38,75.0,1.0,3.9,0.199658
7,4,10,4.384,74.5,1.0,3.9,0.200992
8,5,3,3.828,70.1,1.0,3.3,0.328996
9,5,4,4.04,68.8,1.0,3.5,0.249487


In [17]:
param_grid = {
    'beam_width': np.arange(4, 9),  # from 4 to 10
    'apple_reward': np.arange(3, 7)  # from 3 to 10
}

beam_online_df_complex = benchmark_simulation(complex_env_generator, planner_online, seeds, param_grid,
                                              online=True, planner_func=beam_search_apple)

Benchmarking: 100%|██████████| 200/200 [09:00<00:00,  2.70s/it]


In [18]:
beam_online_df_complex_mean = beam_online_df_complex.groupby(['beam_width', 'apple_reward']).agg(
    avg_reward=('reward', 'mean'),
    avg_path_length=('path_length', 'mean'),
    success_rate=('success', 'mean'),
    avg_apples=('apples_eaten', 'mean'),
    avg_planning_time = ('planning_time','mean')
).reset_index()

beam_online_df_complex_mean

Unnamed: 0,beam_width,apple_reward,avg_reward,avg_path_length,success_rate,avg_apples,avg_planning_time
0,4,3,1.741,99.6,1.0,1.5,1.30954
1,4,4,2.033,122.4,1.0,2.0,2.061214
2,4,5,1.505,99.9,1.0,1.3,1.764506
3,4,6,2.636,155.5,1.0,2.9,1.269131
4,5,3,1.702,104.2,1.0,1.5,2.437376
5,5,4,2.072,107.8,1.0,1.9,1.929474
6,5,5,2.364,115.2,1.0,2.3,1.521359
7,5,6,2.099,136.3,1.0,2.2,1.624101
8,6,3,1.697,113.1,1.0,1.6,2.935396
9,6,4,1.912,110.3,1.0,1.8,1.299987
