# Quantum Nematode Simulation Notebook
This notebook runs a simulation using the quantum nematode agent.

In [1]:
"""Imports."""
import logging

from quantumnematode.agent import QuantumNematodeAgent
from quantumnematode.logging_config import logger

# Suppress logs from Qiskit
logging.getLogger("qiskit").setLevel(logging.WARNING)

In [6]:
"""Config."""
runs = 3
maze_grid_size = 5
max_steps = 100
show_last_frame_only = False
log_level = logging.INFO

In [None]:
"""Run simulation."""
logger.setLevel(log_level)
agent = QuantumNematodeAgent(maze_grid_size=maze_grid_size)

all_results = []
for run in range(runs):
    print(f"Run {run + 1}/{runs}")  # noqa: T201
    path = agent.run_episode(max_steps=max_steps, show_last_frame_only=show_last_frame_only)

    steps = len(path)
    all_results.append((run + 1, steps, path))

    if run < runs - 1:
        agent.reset_environment()

2025-04-19 17:40:54,064 - INFO - Step 1: Action=up, Reward=-0.1
2025-04-19 17:40:54,123 - INFO - Step 2: Action=left, Reward=-0.1
2025-04-19 17:40:54,179 - INFO - Step 3: Action=down, Reward=-0.1


Run 1/3
. . . . *
. . . . .
. @ . . .
. O . . .
. . . . .

. . . . *
. . . . .
@ O . . .
. O . . .
. . . . .

. . . . *
. . . . .
O O . . .
@ O . . .
. . . . .



2025-04-19 17:40:54,244 - INFO - Step 4: Action=right, Reward=-0.1
2025-04-19 17:40:54,305 - INFO - Step 5: Action=down, Reward=-0.1
2025-04-19 17:40:54,364 - INFO - Step 6: Action=right, Reward=-0.1
2025-04-19 17:40:54,421 - INFO - Step 7: Action=up, Reward=-0.1


. . . . *
. . . . .
O O . . .
@ O . . .
. . . . .

. . . . *
. . . . .
O O . . .
O O . . .
@ . . . .

. . . . *
. . . . .
O O . . .
O . . . .
O @ . . .

. . . . *
. . . . .
O . . . .
O @ . . .
O O . . .



2025-04-19 17:40:54,482 - INFO - Step 8: Action=left, Reward=-0.1
2025-04-19 17:40:54,541 - INFO - Step 9: Action=left, Reward=-0.1
2025-04-19 17:40:54,598 - INFO - Step 10: Action=left, Reward=-0.1
2025-04-19 17:40:54,659 - INFO - Step 11: Action=down, Reward=-0.1


. . . . *
. . . . .
O . . . .
O @ . . .
O O . . .

. . . . *
. . . . .
O . . . .
O @ . . .
O O . . .

. . . . *
. . . . .
O . . . .
O @ . . .
O O . . .

. . . . *
. . . . .
O . . . .
O @ . . .
O O . . .



2025-04-19 17:40:54,722 - INFO - Step 12: Action=left, Reward=-0.1
2025-04-19 17:40:54,784 - INFO - Step 13: Action=down, Reward=-0.1
2025-04-19 17:40:54,839 - INFO - Step 14: Action=up, Reward=-0.1
2025-04-19 17:40:54,896 - INFO - Step 15: Action=down, Reward=-0.1


. . . . *
. . . . .
O . . . .
O @ . . .
O O . . .

. . . . *
. . . . .
O . . . .
O @ . . .
O O . . .

. . . . *
. . . . .
. @ . . .
O O . . .
O O . . .

. . . . *
. . . . .
. @ . . .
O O . . .
O O . . .



2025-04-19 17:40:54,960 - INFO - Step 16: Action=down, Reward=-0.1
2025-04-19 17:40:55,021 - INFO - Step 17: Action=right, Reward=-0.1
2025-04-19 17:40:55,080 - INFO - Step 18: Action=right, Reward=-0.1
2025-04-19 17:40:55,148 - INFO - Step 19: Action=left, Reward=-0.1


. . . . *
. . . . .
. @ . . .
O O . . .
O O . . .

. . . . *
. . . . .
. O @ . .
. O . . .
O O . . .

. . . . *
. . . . .
. O O @ .
. O . . .
. O . . .

. . . . *
. . . . .
. O O @ .
. O . . .
. O . . .



2025-04-19 17:40:55,209 - INFO - Step 20: Action=left, Reward=-0.1
2025-04-19 17:40:55,270 - INFO - Step 21: Action=left, Reward=-0.1
2025-04-19 17:40:55,328 - INFO - Step 22: Action=left, Reward=-0.1
2025-04-19 17:40:55,386 - INFO - Step 23: Action=down, Reward=-0.1


. . . . *
. . . . .
. O O @ .
. O . . .
. O . . .

. . . . *
. . . . .
. O O @ .
. O . . .
. O . . .

. . . . *
. . . . .
. O O @ .
. O . . .
. O . . .

. . . . *
. . . . .
. O O O .
. O . @ .
. . . . .



2025-04-19 17:40:55,448 - INFO - Step 24: Action=up, Reward=-0.1
2025-04-19 17:40:55,516 - INFO - Step 25: Action=up, Reward=-0.1
2025-04-19 17:40:55,575 - INFO - Step 26: Action=right, Reward=-0.1
2025-04-19 17:40:55,634 - INFO - Step 27: Action=up, Reward=-0.1


. . . . *
. . . . .
. O O O .
. O . @ .
. . . . .

. . . . *
. . . . .
. O O O .
. O . @ .
. . . . .

. . . . *
. . . . .
. O O O .
. . . O @
. . . . .

. . . . *
. . . . .
. . O O @
. . . O O
. . . . .



2025-04-19 17:40:55,692 - INFO - Step 28: Action=up, Reward=-0.1
2025-04-19 17:40:55,751 - INFO - Step 29: Action=left, Reward=-0.1
2025-04-19 17:40:55,809 - INFO - Step 30: Action=right, Reward=-0.1
2025-04-19 17:40:55,866 - INFO - Step 31: Action=right, Reward=-0.1


. . . . *
. . . . @
. . . O O
. . . O O
. . . . .

. . . . *
. . . @ O
. . . . O
. . . O O
. . . . .

. . . . *
. . . @ O
. . . . O
. . . O O
. . . . .

. . . . *
. . . @ O
. . . . O
. . . O O
. . . . .



2025-04-19 17:40:55,924 - INFO - Step 32: Action=down, Reward=-0.1
2025-04-19 17:40:55,982 - INFO - Step 33: Action=right, Reward=-0.1
2025-04-19 17:40:56,042 - INFO - Step 34: Action=down, Reward=-0.1


. . . . *
. . . O O
. . . @ O
. . . . O
. . . . .

. . . . *
. . . O O
. . . @ O
. . . . O
. . . . .

. . . . *
. . . O O
. . . O O
. . . @ .
. . . . .



2025-04-19 17:40:56,128 - INFO - Step 35: Action=down, Reward=-0.1
2025-04-19 17:40:56,269 - INFO - Step 36: Action=unknown, Reward=-0.1
2025-04-19 17:40:56,325 - INFO - Step 37: Action=right, Reward=-0.1


. . . . *
. . . O O
. . . O .
. . . O .
. . . @ .

. . . . *
. . . O O
. . . O .
. . . O .
. . . @ .

. . . . *
. . . O .
. . . O .
. . . O .
. . . O @



2025-04-19 17:40:56,380 - INFO - Step 38: Action=unknown, Reward=-0.1
2025-04-19 17:40:56,438 - INFO - Step 39: Action=unknown, Reward=-0.1
2025-04-19 17:40:56,498 - INFO - Step 40: Action=unknown, Reward=-0.1
2025-04-19 17:40:56,565 - INFO - Step 41: Action=left, Reward=-0.1


. . . . *
. . . O .
. . . O .
. . . O .
. . . O @

. . . . *
. . . O .
. . . O .
. . . O .
. . . O @

. . . . *
. . . O .
. . . O .
. . . O .
. . . O @

. . . . *
. . . O .
. . . O .
. . . O .
. . . O @



2025-04-19 17:40:56,628 - INFO - Step 42: Action=up, Reward=-0.1
2025-04-19 17:40:56,689 - INFO - Step 43: Action=up, Reward=-0.1
2025-04-19 17:40:56,745 - INFO - Step 44: Action=left, Reward=-0.1
2025-04-19 17:40:56,803 - INFO - Step 45: Action=left, Reward=-0.1


. . . . *
. . . . .
. . . O .
. . . O @
. . . O O

. . . . *
. . . . .
. . . . @
. . . O O
. . . O O

. . . . *
. . . . .
. . . @ O
. . . . O
. . . O O

. . . . *
. . . . .
. . @ O O
. . . . O
. . . . O



2025-04-19 17:40:56,864 - INFO - Step 46: Action=left, Reward=-0.1
2025-04-19 17:40:56,927 - INFO - Step 47: Action=right, Reward=-0.1
2025-04-19 17:40:56,988 - INFO - Step 48: Action=down, Reward=-0.1
2025-04-19 17:40:57,050 - INFO - Step 49: Action=down, Reward=-0.1


. . . . *
. . . . .
. @ O O O
. . . . O
. . . . .

. . . . *
. . . . .
. @ O O O
. . . . O
. . . . .

. . . . *
. . . . .
. O O O O
. @ . . .
. . . . .

. . . . *
. . . . .
. O O O .
. O . . .
. @ . . .



2025-04-19 17:40:57,128 - INFO - Step 50: Action=unknown, Reward=-0.1
2025-04-19 17:40:57,190 - INFO - Step 51: Action=unknown, Reward=-0.1
2025-04-19 17:40:57,250 - INFO - Step 52: Action=up, Reward=-0.1
2025-04-19 17:40:57,312 - INFO - Step 53: Action=up, Reward=-0.1


. . . . *
. . . . .
. O O O .
. O . . .
. @ . . .

. . . . *
. . . . .
. O O O .
. O . . .
. @ . . .

. . . . *
. . . . .
. O O O .
. O . . .
. @ . . .

. . . . *
. . . . .
. O O O .
. O . . .
. @ . . .



2025-04-19 17:40:57,371 - INFO - Step 54: Action=left, Reward=-0.1
2025-04-19 17:40:57,430 - INFO - Step 55: Action=unknown, Reward=-0.1
2025-04-19 17:40:57,487 - INFO - Step 56: Action=unknown, Reward=-0.1
2025-04-19 17:40:57,546 - INFO - Step 57: Action=unknown, Reward=-0.1


. . . . *
. . . . .
. O O . .
. O . . .
@ O . . .

. . . . *
. . . . .
. O O . .
. O . . .
@ O . . .

. . . . *
. . . . .
. O O . .
. O . . .
@ O . . .

. . . . *
. . . . .
. O O . .
. O . . .
@ O . . .



2025-04-19 17:40:57,726 - INFO - Step 58: Action=up, Reward=-0.1
2025-04-19 17:40:57,782 - INFO - Step 59: Action=right, Reward=-0.1
2025-04-19 17:40:57,837 - INFO - Step 60: Action=down, Reward=-0.1
2025-04-19 17:40:57,893 - INFO - Step 61: Action=right, Reward=-0.1


. . . . *
. . . . .
. O . . .
@ O . . .
O O . . .

. . . . *
. . . . .
. O . . .
@ O . . .
O O . . .

. . . . *
. . . . .
. O . . .
@ O . . .
O O . . .

. . . . *
. . . . .
. O . . .
@ O . . .
O O . . .



2025-04-19 17:40:57,951 - INFO - Step 62: Action=right, Reward=-0.1
2025-04-19 17:40:58,006 - INFO - Step 63: Action=right, Reward=-0.1
2025-04-19 17:40:58,062 - INFO - Step 64: Action=right, Reward=-0.1
2025-04-19 17:40:58,118 - INFO - Step 65: Action=up, Reward=-0.1


. . . . *
. . . . .
. O . . .
@ O . . .
O O . . .

. . . . *
. . . . .
. O . . .
@ O . . .
O O . . .

. . . . *
. . . . .
. O . . .
@ O . . .
O O . . .

. . . . *
. . . . .
@ . . . .
O O . . .
O O . . .



2025-04-19 17:40:58,175 - INFO - Step 66: Action=up, Reward=-0.1
2025-04-19 17:40:58,231 - INFO - Step 67: Action=up, Reward=-0.1
2025-04-19 17:40:58,285 - INFO - Step 68: Action=unknown, Reward=-0.1
2025-04-19 17:40:58,341 - INFO - Step 69: Action=unknown, Reward=-0.1


. . . . *
@ . . . .
O . . . .
O . . . .
O O . . .

@ . . . *
O . . . .
O . . . .
O . . . .
O . . . .

@ . . . *
O . . . .
O . . . .
O . . . .
O . . . .

@ . . . *
O . . . .
O . . . .
O . . . .
O . . . .



2025-04-19 17:40:58,397 - INFO - Step 70: Action=unknown, Reward=-0.1
2025-04-19 17:40:58,456 - INFO - Step 71: Action=unknown, Reward=-0.1
2025-04-19 17:40:58,514 - INFO - Step 72: Action=unknown, Reward=-0.1
2025-04-19 17:40:58,576 - INFO - Step 73: Action=unknown, Reward=-0.1


@ . . . *
O . . . .
O . . . .
O . . . .
O . . . .

@ . . . *
O . . . .
O . . . .
O . . . .
O . . . .

@ . . . *
O . . . .
O . . . .
O . . . .
O . . . .

@ . . . *
O . . . .
O . . . .
O . . . .
O . . . .



2025-04-19 17:40:58,637 - INFO - Step 74: Action=right, Reward=-0.1
2025-04-19 17:40:58,706 - INFO - Step 75: Action=down, Reward=-0.1
2025-04-19 17:40:58,762 - INFO - Step 76: Action=down, Reward=-0.1
2025-04-19 17:40:58,820 - INFO - Step 77: Action=right, Reward=-0.1


O @ . . *
O . . . .
O . . . .
O . . . .
. . . . .

O O . . *
O @ . . .
O . . . .
. . . . .
. . . . .

O O . . *
O O . . .
. @ . . .
. . . . .
. . . . .

O O . . *
. O . . .
. O @ . .
. . . . .
. . . . .



2025-04-19 17:40:58,877 - INFO - Step 78: Action=down, Reward=-0.1
2025-04-19 17:40:58,939 - INFO - Step 79: Action=right, Reward=-0.1
2025-04-19 17:40:58,996 - INFO - Step 80: Action=down, Reward=-0.1
2025-04-19 17:40:59,056 - INFO - Step 81: Action=right, Reward=-0.1


. O . . *
. O . . .
. O O . .
. . @ . .
. . . . .

. . . . *
. O . . .
. O O . .
. . O @ .
. . . . .

. . . . *
. . . . .
. O O . .
. . O O .
. . . @ .

. . . . *
. . . . .
. . O . .
. . O O .
. . . O @



2025-04-19 17:40:59,119 - INFO - Step 82: Action=unknown, Reward=-0.1
2025-04-19 17:40:59,178 - INFO - Step 83: Action=unknown, Reward=-0.1
2025-04-19 17:40:59,235 - INFO - Step 84: Action=left, Reward=-0.1
2025-04-19 17:40:59,291 - INFO - Step 85: Action=left, Reward=-0.1


. . . . *
. . . . .
. . O . .
. . O O .
. . . O @

. . . . *
. . . . .
. . O . .
. . O O .
. . . O @

. . . . *
. . . . .
. . O . .
. . O O .
. . . O @

. . . . *
. . . . .
. . O . .
. . O O .
. . . O @



2025-04-19 17:40:59,359 - INFO - Step 86: Action=up, Reward=-0.1
2025-04-19 17:40:59,420 - INFO - Step 87: Action=left, Reward=-0.1
2025-04-19 17:40:59,480 - INFO - Step 88: Action=up, Reward=-0.1
2025-04-19 17:40:59,543 - INFO - Step 89: Action=down, Reward=-0.1


. . . . *
. . . . .
. . . . .
. . O O @
. . . O O

. . . . *
. . . . .
. . . . .
. . O O @
. . . O O

. . . . *
. . . . .
. . . . @
. . . O O
. . . O O

. . . . *
. . . . .
. . . . @
. . . O O
. . . O O



2025-04-19 17:40:59,606 - INFO - Step 90: Action=unknown, Reward=-0.1
2025-04-19 17:40:59,667 - INFO - Step 91: Action=down, Reward=-0.1
2025-04-19 17:40:59,733 - INFO - Step 92: Action=unknown, Reward=-0.1
2025-04-19 17:40:59,793 - INFO - Step 93: Action=up, Reward=-0.1


. . . . *
. . . . .
. . . . @
. . . O O
. . . O O

. . . . *
. . . . .
. . . . @
. . . O O
. . . O O

. . . . *
. . . . .
. . . . @
. . . O O
. . . O O

. . . . *
. . . . @
. . . . O
. . . . O
. . . O O



2025-04-19 17:40:59,854 - INFO - Step 94: Action=left, Reward=-0.1
2025-04-19 17:40:59,912 - INFO - Step 95: Action=left, Reward=-0.1
2025-04-19 17:40:59,973 - INFO - Step 96: Action=right, Reward=-0.1
2025-04-19 17:41:00,031 - INFO - Step 97: Action=right, Reward=-0.1


. . . . *
. . . @ O
. . . . O
. . . . O
. . . . O

. . . . *
. . @ O O
. . . . O
. . . . O
. . . . .

. . . . *
. . @ O O
. . . . O
. . . . O
. . . . .

. . . . *
. . @ O O
. . . . O
. . . . O
. . . . .



2025-04-19 17:41:00,093 - INFO - Step 98: Action=right, Reward=-0.1
2025-04-19 17:41:00,156 - INFO - Step 99: Action=left, Reward=-0.1
2025-04-19 17:41:00,215 - INFO - Step 100: Action=left, Reward=-0.1
2025-04-19 17:41:00,215 - INFO - Environment reset. Retaining learned data.
2025-04-19 17:41:00,276 - INFO - Step 1: Action=left, Reward=-0.1


. . . . *
. . @ O O
. . . . O
. . . . O
. . . . .

. . . . *
. @ O O O
. . . . O
. . . . .
. . . . .

. . . . *
@ O O O O
. . . . .
. . . . .
. . . . .

Run 2/3
. . . . *
. . . . .
. . . . .
@ O . . .
. . . . .



2025-04-19 17:41:00,338 - INFO - Step 2: Action=down, Reward=-0.1
2025-04-19 17:41:00,397 - INFO - Step 3: Action=unknown, Reward=-0.1
2025-04-19 17:41:00,455 - INFO - Step 4: Action=right, Reward=-0.1
2025-04-19 17:41:00,514 - INFO - Step 5: Action=left, Reward=-0.1


. . . . *
. . . . .
. . . . .
O O . . .
@ . . . .

. . . . *
. . . . .
. . . . .
O O . . .
@ . . . .

. . . . *
. . . . .
. . . . .
O O . . .
O @ . . .

. . . . *
. . . . .
. . . . .
O O . . .
O @ . . .



2025-04-19 17:41:00,577 - INFO - Step 6: Action=left, Reward=-0.1
2025-04-19 17:41:00,638 - INFO - Step 7: Action=left, Reward=-0.1
2025-04-19 17:41:00,698 - INFO - Step 8: Action=left, Reward=-0.1
2025-04-19 17:41:00,759 - INFO - Step 9: Action=up, Reward=-0.1


. . . . *
. . . . .
. . . . .
O O . . .
O @ . . .

. . . . *
. . . . .
. . . . .
O O . . .
O @ . . .

. . . . *
. . . . .
. . . . .
O O . . .
O @ . . .

. . . . *
. . . . .
. . . . .
O O . . .
O @ . . .



2025-04-19 17:41:00,821 - INFO - Step 10: Action=right, Reward=-0.1
2025-04-19 17:41:00,885 - INFO - Step 11: Action=left, Reward=-0.1
2025-04-19 17:41:00,943 - INFO - Step 12: Action=up, Reward=-0.1
2025-04-19 17:41:01,003 - INFO - Step 13: Action=left, Reward=-0.1


. . . . *
. . . . .
. . . . .
O O . . .
O O @ . .

. . . . *
. . . . .
. . . . .
O O . . .
O O @ . .

. . . . *
. . . . .
. . . . .
O . @ . .
O O O . .

. . . . *
. . . . .
. . . . .
. @ O . .
O O O . .



2025-04-19 17:41:01,126 - INFO - Step 14: Action=down, Reward=-0.1
2025-04-19 17:41:01,193 - INFO - Step 15: Action=down, Reward=-0.1
2025-04-19 17:41:01,251 - INFO - Step 16: Action=right, Reward=-0.1
2025-04-19 17:41:01,308 - INFO - Step 17: Action=down, Reward=-0.1


. . . . *
. . . . .
. . . . .
. @ O . .
O O O . .

. . . . *
. . . . .
. . . . .
. @ O . .
O O O . .

. . . . *
. . . . .
. . . . .
. @ O . .
O O O . .

. . . . *
. . . . .
. . . . .
. @ O . .
O O O . .



2025-04-19 17:41:01,372 - INFO - Step 18: Action=right, Reward=-0.1
2025-04-19 17:41:01,432 - INFO - Step 19: Action=down, Reward=-0.1
2025-04-19 17:41:01,491 - INFO - Step 20: Action=down, Reward=-0.1
2025-04-19 17:41:01,551 - INFO - Step 21: Action=down, Reward=-0.1


. . . . *
. . . . .
. . . . .
. @ O . .
O O O . .

. . . . *
. . . . .
. . . . .
. @ O . .
O O O . .

. . . . *
. . . . .
. . . . .
. @ O . .
O O O . .

. . . . *
. . . . .
. . . . .
. @ O . .
O O O . .



2025-04-19 17:41:01,620 - INFO - Step 22: Action=down, Reward=-0.1
2025-04-19 17:41:01,690 - INFO - Step 23: Action=down, Reward=-0.1
2025-04-19 17:41:01,746 - INFO - Step 24: Action=down, Reward=-0.1
2025-04-19 17:41:01,801 - INFO - Step 25: Action=down, Reward=-0.1


. . . . *
. . . . .
. . . . .
. @ O . .
O O O . .

. . . . *
. . . . .
. . . . .
. @ O . .
O O O . .

. . . . *
. . . . .
. . . . .
. @ O . .
O O O . .

. . . . *
. . . . .
. . . . .
. @ O . .
O O O . .



2025-04-19 17:41:01,860 - INFO - Step 26: Action=down, Reward=-0.1
2025-04-19 17:41:01,920 - INFO - Step 27: Action=right, Reward=-0.1
2025-04-19 17:41:01,985 - INFO - Step 28: Action=down, Reward=-0.1
2025-04-19 17:41:02,047 - INFO - Step 29: Action=down, Reward=-0.1


. . . . *
. . . . .
. . . . .
. @ O . .
O O O . .

. . . . *
. . . . .
. . . . .
. @ O . .
O O O . .

. . . . *
. . . . .
. . . . .
. @ O . .
O O O . .

. . . . *
. . . . .
. . . . .
. @ O . .
O O O . .



2025-04-19 17:41:02,112 - INFO - Step 30: Action=up, Reward=-0.1
2025-04-19 17:41:02,175 - INFO - Step 31: Action=left, Reward=-0.1
2025-04-19 17:41:02,235 - INFO - Step 32: Action=unknown, Reward=-0.1
2025-04-19 17:41:02,294 - INFO - Step 33: Action=up, Reward=-0.1


. . . . *
. . . . .
. @ . . .
. O O . .
. O O . .

. . . . *
. . . . .
@ O . . .
. O O . .
. . O . .

. . . . *
. . . . .
@ O . . .
. O O . .
. . O . .

. . . . *
@ . . . .
O O . . .
. O O . .
. . . . .



2025-04-19 17:41:02,352 - INFO - Step 34: Action=up, Reward=-0.1
2025-04-19 17:41:02,412 - INFO - Step 35: Action=unknown, Reward=-0.1
2025-04-19 17:41:02,470 - INFO - Step 36: Action=unknown, Reward=-0.1
2025-04-19 17:41:02,527 - INFO - Step 37: Action=unknown, Reward=-0.1


@ . . . *
O . . . .
O O . . .
. O . . .
. . . . .

@ . . . *
O . . . .
O O . . .
. O . . .
. . . . .

@ . . . *
O . . . .
O O . . .
. O . . .
. . . . .

@ . . . *
O . . . .
O O . . .
. O . . .
. . . . .



2025-04-19 17:41:02,588 - INFO - Step 38: Action=unknown, Reward=-0.1
2025-04-19 17:41:02,650 - INFO - Step 39: Action=unknown, Reward=-0.1
2025-04-19 17:41:02,739 - INFO - Step 40: Action=unknown, Reward=-0.1


@ . . . *
O . . . .
O O . . .
. O . . .
. . . . .

@ . . . *
O . . . .
O O . . .
. O . . .
. . . . .

@ . . . *
O . . . .
O O . . .
. O . . .
. . . . .



2025-04-19 17:41:02,798 - INFO - Step 41: Action=unknown, Reward=-0.1
2025-04-19 17:41:02,919 - INFO - Step 42: Action=unknown, Reward=-0.1
2025-04-19 17:41:02,977 - INFO - Step 43: Action=unknown, Reward=-0.1


@ . . . *
O . . . .
O O . . .
. O . . .
. . . . .

@ . . . *
O . . . .
O O . . .
. O . . .
. . . . .

@ . . . *
O . . . .
O O . . .
. O . . .
. . . . .



2025-04-19 17:41:03,044 - INFO - Step 44: Action=unknown, Reward=-0.1
2025-04-19 17:41:03,114 - INFO - Step 45: Action=right, Reward=-0.1
2025-04-19 17:41:03,170 - INFO - Step 46: Action=right, Reward=-0.1
2025-04-19 17:41:03,225 - INFO - Step 47: Action=unknown, Reward=-0.1


@ . . . *
O . . . .
O O . . .
. O . . .
. . . . .

O @ . . *
O . . . .
O O . . .
. . . . .
. . . . .

O O @ . *
O . . . .
O . . . .
. . . . .
. . . . .

O O @ . *
O . . . .
O . . . .
. . . . .
. . . . .



2025-04-19 17:41:03,281 - INFO - Step 48: Action=unknown, Reward=-0.1
2025-04-19 17:41:03,343 - INFO - Step 49: Action=left, Reward=-0.1
2025-04-19 17:41:03,408 - INFO - Step 50: Action=down, Reward=-0.1
2025-04-19 17:41:03,475 - INFO - Step 51: Action=right, Reward=-0.1


O O @ . *
O . . . .
O . . . .
. . . . .
. . . . .

O O @ . *
O . . . .
O . . . .
. . . . .
. . . . .

O O O . *
O . @ . .
. . . . .
. . . . .
. . . . .

O O O . *
. . O @ .
. . . . .
. . . . .
. . . . .



2025-04-19 17:41:03,533 - INFO - Step 52: Action=right, Reward=-0.1
2025-04-19 17:41:03,588 - INFO - Step 53: Action=left, Reward=-0.1
2025-04-19 17:41:03,646 - INFO - Step 54: Action=up, Reward=10
2025-04-19 17:41:03,646 - INFO - Environment reset. Retaining learned data.
2025-04-19 17:41:03,703 - INFO - Step 1: Action=down, Reward=-0.1


. O O . *
. . O O @
. . . . .
. . . . .
. . . . .

. O O . *
. . O O @
. . . . .
. . . . .
. . . . .

. . O . @
. . O O O
. . . . .
. . . . .
. . . . .

Run 3/3
. . . . *
. . . . .
. . . . .
. O . . .
. @ . . .



2025-04-19 17:41:03,778 - INFO - Step 2: Action=unknown, Reward=-0.1
2025-04-19 17:41:03,842 - INFO - Step 3: Action=unknown, Reward=-0.1
2025-04-19 17:41:03,902 - INFO - Step 4: Action=unknown, Reward=-0.1
2025-04-19 17:41:03,962 - INFO - Step 5: Action=right, Reward=-0.1


. . . . *
. . . . .
. . . . .
. O . . .
. @ . . .

. . . . *
. . . . .
. . . . .
. O . . .
. @ . . .

. . . . *
. . . . .
. . . . .
. O . . .
. @ . . .

. . . . *
. . . . .
. . . . .
. O . . .
. O @ . .



2025-04-19 17:41:04,054 - INFO - Step 6: Action=right, Reward=-0.1
2025-04-19 17:41:04,114 - INFO - Step 7: Action=unknown, Reward=-0.1
2025-04-19 17:41:04,175 - INFO - Step 8: Action=unknown, Reward=-0.1
2025-04-19 17:41:04,235 - INFO - Step 9: Action=right, Reward=-0.1


. . . . *
. . . . .
. . . . .
. O . . .
. O O @ .

. . . . *
. . . . .
. . . . .
. O . . .
. O O @ .

. . . . *
. . . . .
. . . . .
. O . . .
. O O @ .

. . . . *
. . . . .
. . . . .
. O . . .
. O O O @



2025-04-19 17:41:04,300 - INFO - Step 10: Action=unknown, Reward=-0.1
2025-04-19 17:41:04,362 - INFO - Step 11: Action=unknown, Reward=-0.1
2025-04-19 17:41:04,424 - INFO - Step 12: Action=unknown, Reward=-0.1
2025-04-19 17:41:04,483 - INFO - Step 13: Action=unknown, Reward=-0.1


. . . . *
. . . . .
. . . . .
. O . . .
. O O O @

. . . . *
. . . . .
. . . . .
. O . . .
. O O O @

. . . . *
. . . . .
. . . . .
. O . . .
. O O O @

. . . . *
. . . . .
. . . . .
. O . . .
. O O O @



2025-04-19 17:41:04,541 - INFO - Step 14: Action=unknown, Reward=-0.1
2025-04-19 17:41:04,603 - INFO - Step 15: Action=unknown, Reward=-0.1
2025-04-19 17:41:04,662 - INFO - Step 16: Action=unknown, Reward=-0.1
2025-04-19 17:41:04,724 - INFO - Step 17: Action=unknown, Reward=-0.1


. . . . *
. . . . .
. . . . .
. O . . .
. O O O @

. . . . *
. . . . .
. . . . .
. O . . .
. O O O @

. . . . *
. . . . .
. . . . .
. O . . .
. O O O @

. . . . *
. . . . .
. . . . .
. O . . .
. O O O @



2025-04-19 17:41:04,794 - INFO - Step 18: Action=unknown, Reward=-0.1
2025-04-19 17:41:04,859 - INFO - Step 19: Action=unknown, Reward=-0.1
2025-04-19 17:41:04,920 - INFO - Step 20: Action=unknown, Reward=-0.1
2025-04-19 17:41:04,988 - INFO - Step 21: Action=unknown, Reward=-0.1


. . . . *
. . . . .
. . . . .
. O . . .
. O O O @

. . . . *
. . . . .
. . . . .
. O . . .
. O O O @

. . . . *
. . . . .
. . . . .
. O . . .
. O O O @

. . . . *
. . . . .
. . . . .
. O . . .
. O O O @



2025-04-19 17:41:05,058 - INFO - Step 22: Action=unknown, Reward=-0.1
2025-04-19 17:41:05,118 - INFO - Step 23: Action=unknown, Reward=-0.1
2025-04-19 17:41:05,180 - INFO - Step 24: Action=unknown, Reward=-0.1
2025-04-19 17:41:05,237 - INFO - Step 25: Action=unknown, Reward=-0.1


. . . . *
. . . . .
. . . . .
. O . . .
. O O O @

. . . . *
. . . . .
. . . . .
. O . . .
. O O O @

. . . . *
. . . . .
. . . . .
. O . . .
. O O O @

. . . . *
. . . . .
. . . . .
. O . . .
. O O O @



2025-04-19 17:41:05,294 - INFO - Step 26: Action=left, Reward=-0.1
2025-04-19 17:41:05,351 - INFO - Step 27: Action=up, Reward=-0.1
2025-04-19 17:41:05,418 - INFO - Step 28: Action=up, Reward=-0.1
2025-04-19 17:41:05,474 - INFO - Step 29: Action=up, Reward=-0.1


. . . . *
. . . . .
. . . . .
. O . . .
. O O O @

. . . . *
. . . . .
. . . . .
. . . . @
. O O O O

. . . . *
. . . . .
. . . . @
. . . . O
. . O O O

. . . . *
. . . . @
. . . . O
. . . . O
. . . O O



2025-04-19 17:41:05,530 - INFO - Step 30: Action=up, Reward=10


. . . . @
. . . . O
. . . . O
. . . . O
. . . . O



In [None]:
"""Output results."""
print("\nResults:")  # noqa: T201
for run, steps, path in all_results:
    print(f"Run {run}: {steps} steps")  # noqa: T201
    print("Path:", path)  # noqa: T201

print("\nSummary:")  # noqa: T201
average_steps = sum(steps for _, steps, _ in all_results) / runs
print(f"Average steps: {average_steps:.2f}")  # noqa: T201
improvement_rate = (all_results[0][1] - all_results[-1][1]) / all_results[0][1] * 100
print(f"Improvement rate: {improvement_rate:.2f}%")  # noqa: T201
print(f"Total runs: {runs}")  # noqa: T201
print(f"Grid size: {maze_grid_size}x{maze_grid_size}")  # noqa: T201
print(f"Max steps: {max_steps}")  # noqa: T201


Results:
Run 1: 101 steps
Path: [(1, 1), (1, 2), (0, 2), (0, 1), (0, 1), (0, 0), (1, 0), (1, 1), (1, 1), (1, 1), (1, 1), (1, 1), (1, 1), (1, 1), (1, 2), (1, 2), (1, 2), (2, 2), (3, 2), (3, 2), (3, 2), (3, 2), (3, 2), (3, 1), (3, 1), (3, 1), (4, 1), (4, 2), (4, 3), (3, 3), (3, 3), (3, 3), (3, 2), (3, 2), (3, 1), (3, 0), (3, 0), (4, 0), (4, 0), (4, 0), (4, 0), (4, 0), (4, 1), (4, 2), (3, 2), (2, 2), (1, 2), (1, 2), (1, 1), (1, 0), (1, 0), (1, 0), (1, 0), (1, 0), (0, 0), (0, 0), (0, 0), (0, 0), (0, 1), (0, 1), (0, 1), (0, 1), (0, 1), (0, 1), (0, 1), (0, 2), (0, 3), (0, 4), (0, 4), (0, 4), (0, 4), (0, 4), (0, 4), (0, 4), (1, 4), (1, 3), (1, 2), (2, 2), (2, 1), (3, 1), (3, 0), (4, 0), (4, 0), (4, 0), (4, 0), (4, 0), (4, 1), (4, 1), (4, 2), (4, 2), (4, 2), (4, 2), (4, 2), (4, 3), (3, 3), (2, 3), (2, 3), (2, 3), (2, 3), (1, 3), (0, 3)]
Run 2: 55 steps
Path: [(1, 1), (0, 1), (0, 0), (0, 0), (1, 0), (1, 0), (1, 0), (1, 0), (1, 0), (1, 0), (2, 0), (2, 0), (2, 1), (1, 1), (1, 1), (1, 1), (1, 1),