In [1]:
from simulator import *

seeds = [0, 101, 202, 303, 404, 505, 606, 707, 808, 909]

# Simple Maze Environment


In [2]:
simple_maze = """
||||||||||||||||||||||
|....................|
|..|||||.....|..|....|
|..|   |...||||||||..|
|..||| |........|....|
|.....| |||||||.|||..|
|.....|       |.|....|
|.....|       |......|
|||||||  ||||||......|
         |.......|...|
         |.......|||.|
         |...........|
         |||||||||||||
"""


def simple_env_generator(seed):
    return create_env(make_map(simple_maze, 5, premapped=False, start=(3, 6), stairs=(10, 10), seed=seed),
                      apple_reward=1,
                      penalty_time=-0.01)


simple_env = simple_env_generator(0)
simple_env.reset()
simple_env.render()


[0;37mH[0;37me[0;37ml[0;37ml[0;37mo[0;30m [0;37mA[0;37mg[0;37me[0;37mn[0;37mt[0;37m,[0;30m [0;37mw[0;37me[0;37ml[0;37mc[0;37mo[0;37mm[0;37me[0;30m [0;37mt[0;37mo[0;30m [0;37mN[0;37me[0;37mt[0;37mH[0;37ma[0;37mc[0;37mk[0;37m![0;30m [0;30m [0;37mY[0;37mo[0;37mu[0;30m [0;37ma[0;37mr[0;37me[0;30m [0;37ma[0;30m [0;37mn[0;37me[0;37mu[0;37mt[0;37mr[0;37ma[0;37ml[0;30m [0;37mh[0;37mu[0;37mm[0;37ma[0;37mn[0;30m [0;37mC[0;37ma[0;37mv[0;37me[0;37mm[0;37ma[0;37mn[0;37m.[0;30m [0;30m [0;30m [0;30m [0;30m [0;30m [0;30m [0;30m [0;30m [0;30m [0;30m [0;30m [0;30m [0;30m 
[0;30m [0;30m [0;30m [0;30m [0;30m [0;30m [0;30m [0;30m [0;30m [0;30m [0;30m [0;30m [0;30m [0;30m [0;30m [0;30m [0;30m [0;30m [0;30m [0;30m [0;30m [0;30m [0;30m [0;30m [0;30m [0;30m [0;30m [0;30m [0;30m [0;30m [0;30m [0;30m [0;30m [0;30m [0;30m [0;30m [0;30m [0;30m [0;30m [0;30m [0;30m [0;30m [0;30m [0;30m [0;30

# Complex Maze Environment

In [3]:
complex_maze = """
               |||||||||||||||||
 ||||||||||    |...............|
 |........|    |............|..|
 |........|    |............|..|
 ||||||...|    |..|||||..||||..|
      |...|    |..|   |........|
      |...|    |..|   |........|  ||||||
|||||||...||||||..|   |.....||||  |....|
|.................|   |.....|     |....|
|...||||....|..|..|   |.....|||||||....|
|....| |..|||||||.|   |................|
|....| |.......|..|   |................|
|....| |||||||.||.|   |....||||||||....|
|....|       |.|..|   |....|      |....|
|....|       |....|   |....||||   |....|
||||||  ||||||....|   |.......|   ||||||
        |......|..|   |.......|
        |......||.|   |.......|
        |.........|   |||||||||
        |||||||||||
"""


def complex_env_generator(seed):
    return create_env(make_map(complex_maze, 5, premapped=False, start=(3, 6), stairs=(37, 13), seed=seed),
                      apple_reward=1,
                      penalty_time=-0.01)


complex_env = complex_env_generator(0)
complex_env.reset()
complex_env.render()


[0;37mH[0;37me[0;37ml[0;37ml[0;37mo[0;30m [0;37mA[0;37mg[0;37me[0;37mn[0;37mt[0;37m,[0;30m [0;37mw[0;37me[0;37ml[0;37mc[0;37mo[0;37mm[0;37me[0;30m [0;37mt[0;37mo[0;30m [0;37mN[0;37me[0;37mt[0;37mH[0;37ma[0;37mc[0;37mk[0;37m![0;30m [0;30m [0;37mY[0;37mo[0;37mu[0;30m [0;37ma[0;37mr[0;37me[0;30m [0;37ma[0;30m [0;37mn[0;37me[0;37mu[0;37mt[0;37mr[0;37ma[0;37ml[0;30m [0;37mh[0;37mu[0;37mm[0;37ma[0;37mn[0;30m [0;37mC[0;37ma[0;37mv[0;37me[0;37mm[0;37ma[0;37mn[0;37m.[0;30m [0;30m [0;30m [0;30m [0;30m [0;30m [0;30m [0;30m [0;30m [0;30m [0;30m [0;30m [0;30m [0;30m 
[0;30m [0;30m [0;30m [0;30m [0;30m [0;30m [0;30m [0;30m [0;30m [0;30m [0;30m [0;30m [0;30m [0;30m [0;30m [0;30m [0;30m [0;30m [0;30m [0;30m [0;30m [0;30m [0;30m [0;30m [0;30m [0;30m [0;30m [0;30m [0;30m [0;30m [0;30m [0;30m [0;30m [0;30m [0;30m [0;30m [0;30m [0;30m [0;30m [0;30m [0;30m [0;30m [0;30m [0;30m [0;30

# Benchmarking Online Algorithms

The simulation code have been adapted to not rely on a fixed, fully observable map, but rather to generate the path dynamically based on the current state of the environment. This allows for more flexibility and adaptability in the pathfinding algorithms but comes with a performance cost due to the need to recompute the path at each step.

As the stairs won't be visible until the agent reaches the end of the maze, we will use a Frontier Search algorithm to decide the target position to explore the environment. Once again, we will use the same algorithms as in the offline benchmark to path to the chosen target, to see which one manage to find the stairs first and collect the most apples along the way.



## A* Star Online Algorithm

In [4]:
from algorithms_online import planner_online
from algorithms import a_star_apple
import numpy as np

In [5]:
param_grid = {
    'weight': np.linspace(0.2, 2, 5),
    'apple_bonus': np.linspace(0.5, 5, 5)
}

a_star_apple_df = benchmark_simulation(simple_env_generator, planner_online, seeds, param_grid,
                                       online=True, planner_func=a_star_apple)

Benchmarking: 100%|██████████| 250/250 [03:19<00:00,  1.25it/s]


In [6]:
a_star_online_avg = a_star_apple_df.groupby(['weight', 'apple_bonus']).agg(
    avg_reward=('reward', 'mean'),
    avg_path_length=('path_length', 'mean'),
    avg_apples=('apples_eaten', 'mean'),
    success_rate=('success', 'mean')
).reset_index()
a_star_online_avg

Unnamed: 0,weight,apple_bonus,avg_reward,avg_path_length,avg_apples,success_rate
0,0.2,0.5,2.625,68.3,2.1,1.0
1,0.2,1.625,4.454,77.1,4.0,1.0
2,0.2,2.75,4.433,82.2,4.0,1.0
3,0.2,3.875,4.524,83.1,4.1,1.0
4,0.2,5.0,4.792,88.1,4.4,1.0
5,0.65,0.5,2.18,73.6,1.7,1.0
6,0.65,1.625,4.394,71.3,3.9,1.0
7,0.65,2.75,4.249,79.0,3.8,1.0
8,0.65,3.875,4.352,80.4,3.9,1.0
9,0.65,5.0,4.349,81.2,3.9,1.0


In [7]:
a_star_apple_df_complex = benchmark_simulation(complex_env_generator, planner_online, seeds, param_grid,
                                               online=True, planner_func=a_star_apple)
a_star_online_complex_avg = a_star_apple_df_complex.groupby(['weight', 'apple_bonus']).agg(
    avg_reward=('reward', 'mean'),
    avg_path_length=('path_length', 'mean'),
    avg_apples=('apples_eaten', 'mean'),
    success_rate=('success', 'mean')
).reset_index()
a_star_online_complex_avg

Benchmarking: 100%|██████████| 250/250 [06:59<00:00,  1.68s/it]


Unnamed: 0,weight,apple_bonus,avg_reward,avg_path_length,avg_apples,success_rate
0,0.2,0.5,1.391,127.1,1.4,1.0
1,0.2,1.625,2.986,134.3,3.1,1.0
2,0.2,2.75,3.159,139.0,3.3,1.0
3,0.2,3.875,2.788,116.8,2.7,1.0
4,0.2,5.0,3.328,135.9,3.4,1.0
5,0.65,0.5,1.177,141.0,1.3,1.0
6,0.65,1.625,2.394,125.7,2.4,1.0
7,0.65,2.75,2.936,123.7,2.9,1.0
8,0.65,3.875,2.787,125.2,2.8,1.0
9,0.65,5.0,2.634,122.9,2.6,1.0


## Monte Carlo Tree Search Online Algorithm

In [8]:
from MCTS import mcts

param_grid = {
    'C': np.linspace(1.0, 5.0, 5),
}

mcts_df = benchmark_simulation(simple_env_generator, planner_online, seeds, param_grid,
                               online=True, planner_func=mcts)


Benchmarking: 100%|██████████| 50/50 [02:44<00:00,  3.30s/it]


In [9]:
mcts_online_avg = mcts_df.groupby(['C']).agg(
    avg_reward=('reward', 'mean'),
    avg_path_length=('path_length', 'mean'),
    avg_apples=('apples_eaten', 'mean'),
    success_rate=('success', 'mean')
).reset_index()
mcts_online_avg

Unnamed: 0,C,avg_reward,avg_path_length,avg_apples,success_rate
0,1.0,4.426,108.8,4.2,1.0
1,2.0,4.225,80.0,3.8,1.0
2,3.0,4.78,74.0,4.3,1.0
3,4.0,4.574,77.7,4.1,1.0
4,5.0,4.357,78.1,3.9,1.0


In [10]:
mcts_online_complex_df = benchmark_simulation(complex_env_generator, planner_online, seeds, param_grid,
                                              online=True, planner_func=mcts)
mcts_online_complex_avg = mcts_online_complex_df.groupby(['C']).agg(
    avg_reward=('reward', 'mean'),
    avg_path_length=('path_length', 'mean'),
    avg_apples=('apples_eaten', 'mean'),
    success_rate=('success', 'mean')
).reset_index()
mcts_online_complex_avg

Benchmarking: 100%|██████████| 50/50 [06:32<00:00,  7.84s/it]


Unnamed: 0,C,avg_reward,avg_path_length,avg_apples,success_rate
0,1.0,2.757,190.9,3.3,1.0
1,2.0,2.814,133.3,2.9,1.0
2,3.0,2.406,109.9,2.2,1.0
3,4.0,2.832,155.7,3.2,1.0
4,5.0,2.458,137.8,2.5,1.0


## Greedy Best First Search Online Algorithm

In [18]:
from utils import cached_bfs, manhattan_distance
from algorithms import greedy_best_first_search

param_grid = {
    'heuristic': [cached_bfs, manhattan_distance],
}

simple_greedy_df = benchmark_simulation(simple_env_generator, planner_online, seeds, param_grid,
                                        planner_func=greedy_best_first_search, online=True)


Benchmarking:   0%|          | 0/20 [00:00<?, ?it/s][A
Benchmarking:   5%|▌         | 1/20 [00:00<00:13,  1.39it/s][A
Benchmarking:  10%|█         | 2/20 [00:01<00:13,  1.30it/s][A
Benchmarking:  15%|█▌        | 3/20 [00:02<00:13,  1.25it/s][A
Benchmarking:  20%|██        | 4/20 [00:03<00:12,  1.24it/s][A
Benchmarking:  25%|██▌       | 5/20 [00:04<00:13,  1.11it/s][A
Benchmarking:  30%|███       | 6/20 [00:05<00:12,  1.09it/s][A
Benchmarking:  35%|███▌      | 7/20 [00:06<00:11,  1.12it/s][A
Benchmarking:  40%|████      | 8/20 [00:06<00:10,  1.17it/s][A
Benchmarking:  45%|████▌     | 9/20 [00:07<00:09,  1.11it/s][A
Benchmarking:  50%|█████     | 10/20 [00:08<00:09,  1.08it/s][A
Benchmarking:  55%|█████▌    | 11/20 [00:09<00:07,  1.14it/s][A
Benchmarking:  60%|██████    | 12/20 [00:10<00:06,  1.16it/s][A
Benchmarking:  65%|██████▌   | 13/20 [00:11<00:05,  1.20it/s][A
Benchmarking:  70%|███████   | 14/20 [00:11<00:04,  1.23it/s][A
Benchmarking:  75%|███████▌  | 15/20 [00:1

In [19]:
# convert the heuristic to a string for better readability
simple_greedy_df['heuristic'] = simple_greedy_df['heuristic'].apply(lambda x: x.__name__ if callable(x) else str(x))

simple_greedy_df_avg = simple_greedy_df.groupby(['heuristic']).agg(
    avg_reward=('reward', 'mean'),
    avg_path_length=('path_length', 'mean'),
    avg_apples=('apples_eaten', 'mean'),
    success_rate=('success', 'mean')
).reset_index()
simple_greedy_df_avg

Unnamed: 0,heuristic,avg_reward,avg_path_length,avg_apples,success_rate
0,cached_bfs,5.066,77.3,4.6,1.0
1,manhattan_distance,5.261,81.6,4.8,1.0


In [20]:
complex_greedy_df = benchmark_simulation(complex_env_generator, planner_online, seeds, param_grid,
                                         planner_func=greedy_best_first_search, online=True)
complex_greedy_df['heuristic'] = complex_greedy_df['heuristic'].apply(lambda x: x.__name__ if callable(x) else str(x))
complex_greedy_df_avg = complex_greedy_df.groupby(['heuristic']).agg(
    avg_reward=('reward', 'mean'),
    avg_path_length=('path_length', 'mean'),
    avg_apples=('apples_eaten', 'mean'),
    success_rate=('success', 'mean')
).reset_index()
complex_greedy_df_avg


Benchmarking:   0%|          | 0/20 [00:00<?, ?it/s][A
Benchmarking:   5%|▌         | 1/20 [00:00<00:09,  2.00it/s][A
Benchmarking:  10%|█         | 2/20 [00:02<00:21,  1.18s/it][A
Benchmarking:  15%|█▌        | 3/20 [00:02<00:12,  1.34it/s][A
Benchmarking:  20%|██        | 4/20 [00:03<00:12,  1.32it/s][A
Benchmarking:  25%|██▌       | 5/20 [00:04<00:15,  1.02s/it][A
Benchmarking:  30%|███       | 6/20 [00:05<00:12,  1.10it/s][A
Benchmarking:  35%|███▌      | 7/20 [00:05<00:08,  1.50it/s][A
Benchmarking:  40%|████      | 8/20 [00:07<00:13,  1.09s/it][A
Benchmarking:  45%|████▌     | 9/20 [00:09<00:15,  1.42s/it][A
Benchmarking:  50%|█████     | 10/20 [00:12<00:17,  1.77s/it][A
Benchmarking:  55%|█████▌    | 11/20 [00:12<00:11,  1.26s/it][A
Benchmarking:  60%|██████    | 12/20 [00:13<00:10,  1.32s/it][A
Benchmarking:  65%|██████▌   | 13/20 [00:14<00:07,  1.14s/it][A
Benchmarking:  70%|███████   | 14/20 [00:16<00:07,  1.26s/it][A
Benchmarking:  75%|███████▌  | 15/20 [00:1

Unnamed: 0,heuristic,avg_reward,avg_path_length,avg_apples,success_rate
0,cached_bfs,3.383,83.1,3.0,1.0
1,manhattan_distance,4.012,130.9,4.0,1.0


## Potential Fields Online Algorithm

In [5]:
from algorithms import potential_field_path

param_grid = {
    'weight_apple': np.linspace(.1, .3, 5),
    'modality_potential': ['max', 'sum']
}

potential_field_df = benchmark_simulation(simple_env_generator, planner_online, seeds, param_grid,
                                          online=True, planner_func=potential_field_path)

potential_field_online_avg = potential_field_df.groupby(['weight_apple', 'modality_potential']).agg(
    avg_reward=('reward', 'mean'),
    avg_path_length=('path_length', 'mean'),
    avg_apples=('apples_eaten', 'mean'),
    success_rate=('success', 'mean')
).reset_index()
potential_field_online_avg

Benchmarking: 100%|██████████| 100/100 [01:57<00:00,  1.18s/it]


Unnamed: 0,weight_apple,modality_potential,avg_reward,avg_path_length,avg_apples,success_rate
0,0.1,max,4.797,129.0,4.8,1.0
1,0.1,sum,2.784,109.7,2.6,1.0
2,0.15,max,4.776,131.1,4.8,1.0
3,0.15,sum,2.755,102.7,2.5,1.0
4,0.2,max,4.674,131.1,4.7,1.0
5,0.2,sum,2.648,103.3,2.4,1.0
6,0.25,max,4.791,133.9,4.8,1.0
7,0.25,sum,2.541,102.9,2.3,1.0
8,0.3,max,4.672,131.1,4.7,1.0
9,0.3,sum,2.524,104.3,2.3,1.0


In [6]:
potential_field_df_complex = benchmark_simulation(complex_env_generator, planner_online, seeds, param_grid,
                                                  online=True, planner_func=potential_field_path)

potential_field_online_avg_complex = potential_field_df_complex.groupby(['weight_apple', 'modality_potential']).agg(
    avg_reward=('reward', 'mean'),
    avg_path_length=('path_length', 'mean'),
    avg_apples=('apples_eaten', 'mean'),
    success_rate=('success', 'mean')
).reset_index()

potential_field_online_avg_complex

Benchmarking: 100%|██████████| 100/100 [02:35<00:00,  1.55s/it]


Unnamed: 0,weight_apple,modality_potential,avg_reward,avg_path_length,avg_apples,success_rate
0,0.1,max,2.836,256.5,2.9,1.0
1,0.1,sum,1.383,140.5,1.5,1.0
2,0.15,max,2.509,177.0,3.0,1.0
3,0.15,sum,1.243,166.2,1.4,1.0
4,0.2,max,3.581,141.6,3.7,1.0
5,0.2,sum,1.269,99.3,1.1,1.0
6,0.25,max,3.07,195.0,3.7,1.0
7,0.25,sum,1.482,205.7,2.0,1.0
8,0.3,max,3.194,246.9,4.1,1.0
9,0.3,sum,1.101,96.7,1.0,1.0


## Beam Search Online Algorithm

In [9]:
from algorithms import beam_search_apple
import numpy as np

param_grid = {
    'beam_width': np.arange(4, 11),  # from 4 to 10
    'apple_reward': np.arange(3, 11)  # from 3 to 10
}

beam_online_df = benchmark_simulation(simple_env_generator, planner_online, seeds, param_grid,
                                      online=True, planner_func=beam_search_apple)
beam_online_df_mean = beam_online_df.groupby(['beam_width', 'apple_reward']).agg(
    avg_reward=('reward', 'mean'),
    avg_path_length=('path_length', 'mean'),
    success_rate=('success', 'mean')
).reset_index()

# print the combinations with a success rate less than 1
beam_online_df_mean



Benchmarking:   0%|          | 0/560 [00:00<?, ?it/s][A[A

Benchmarking:   0%|          | 1/560 [00:02<24:38,  2.64s/it][A[A

Benchmarking:   0%|          | 2/560 [00:03<14:44,  1.59s/it][A[A

Benchmarking:   1%|          | 3/560 [00:04<11:25,  1.23s/it][A[A

Benchmarking:   1%|          | 4/560 [00:05<09:39,  1.04s/it][A[A

Benchmarking:   1%|          | 5/560 [00:05<08:46,  1.05it/s][A[A

Benchmarking:   1%|          | 6/560 [00:06<08:16,  1.11it/s][A[A

Benchmarking:   1%|▏         | 7/560 [00:07<08:02,  1.15it/s][A[A

Benchmarking:   1%|▏         | 8/560 [00:08<07:49,  1.18it/s][A[A

Benchmarking:   2%|▏         | 9/560 [00:09<07:40,  1.20it/s][A[A

Benchmarking:   2%|▏         | 10/560 [00:09<07:30,  1.22it/s][A[A

Benchmarking:   2%|▏         | 11/560 [00:10<07:31,  1.22it/s][A[A

Benchmarking:   2%|▏         | 12/560 [00:11<08:02,  1.13it/s][A[A

Benchmarking:   2%|▏         | 13/560 [00:12<07:53,  1.16it/s][A[A

Benchmarking:   2%|▎         | 14/56

     seed  beam_width  apple_reward  reward  path_length  planning_time  \
0       0           4             3    4.45           78       0.233817   
1       0           4             4    4.45           78       0.197977   
2       0           4             5    4.45           78       0.199502   
3       0           4             6    4.45           78       0.178664   
4       0           4             7    4.45           78       0.182945   
..    ...         ...           ...     ...          ...            ...   
555   909          10             6    3.66           53       0.186555   
556   909          10             7    3.66           53       0.192087   
557   909          10             8    3.66           53       0.189420   
558   909          10             9    3.66           53       0.189263   
559   909          10            10    3.66           53       0.186118   

     apples_eaten  success  end_status  is_ascended  
0               4     True           2       

Unnamed: 0,beam_width,apple_reward,avg_reward,avg_path_length,success_rate
0,4,3,3.636,78.0,1.0
1,4,4,3.827,79.3,1.0
2,4,5,3.916,80.0,1.0
3,4,6,4.014,80.2,1.0
4,4,7,4.381,85.0,1.0
5,4,8,4.386,84.1,1.0
6,4,9,4.386,84.1,1.0
7,4,10,4.386,84.1,1.0
8,5,3,3.636,78.0,1.0
9,5,4,3.827,79.3,1.0


In [None]:
param_grid = {
    'beam_width': np.arange(4, 9),  # from 4 to 10
    'apple_reward': np.arange(3, 7)  # from 3 to 10
}

beam_online_df_complex = benchmark_simulation(complex_env_generator, planner_online, seeds, param_grid,
                                              online=True, planner_func=beam_search_apple)

In [20]:
beam_online_df_complex_mean = beam_online_df_complex.groupby(['beam_width', 'apple_reward']).agg(
    avg_reward=('reward', 'mean'),
    avg_path_length=('path_length', 'mean'),
    success_rate=('success', 'mean'),
    avg_apples=('apples_eaten', 'mean'),
).reset_index()

beam_online_df_complex_mean

Unnamed: 0,beam_width,apple_reward,avg_reward,avg_path_length,success_rate,avg_apples
0,4,3,1.526,143.8,1.0,1.7
1,4,4,1.56,113.9,1.0,1.5
2,4,5,2.158,128.1,1.0,2.2
3,4,6,1.947,136.3,1.0,2.1
4,5,3,1.762,118.5,1.0,1.7
5,5,4,1.596,125.8,1.0,1.6
6,5,5,1.842,123.9,1.0,1.9
7,5,6,2.135,132.9,1.0,2.2
8,6,3,1.788,137.1,1.0,1.9
9,6,4,2.29,157.2,1.0,2.6
