# Escaping the maze
In this notebook we will cover the basics of a reinforcement learning (RL) environment.

Specifically, we will cover the observation, action, and state space following the example of a maze.

In [None]:
import numpy as np

from qgym.environment import Environment
from qgym.rewarder import Rewarder

## Map of the maze

Our maze will have 4 different field types.

- `S`: start position
- `F`: a free field
- `W`: a wall
- `G`: the goal

In [None]:
maze_map_4x4 = [
    "SFFF",
    "FWFW",
    "FFFW",
    "WFFG"
]

- `0`: UP
- `1`: RIGHT
- `2`: DOWN
- `3`: LEFT

In [None]:
class MazeRewarder(Rewarder):
    # todo: implement this
    pass

In [None]:
class Maze(Environment):
    
    def __init__(self, maze_map):
        self.nrows = len(maze_map)
        self.ncols = len(maze_map[0])
        
        self.maze_map = maze_map
        
        self.start_position_distribution = (self.maze_map == b"S").ravel().astype("float64")
        self.start_position_distribution /= self.maze_map.sum()
        
        self.action_space = None
        self.observation_space = None
        self.metadata = None  # can we skip this?
        self._state = {"position": None}
        self._rewarder = None
    
    def rowcol_to_pos(row, col):
        return row * self.nrows + col
    
    def pos_to_rowcol(pos):
        return pos / self.nrows, pos % self.nrows
    
    def reset(self, *, seed=None, return_info=False):
        start_position = self.rng.choice(self.nrows * self.ncols, p=self.start_position_distribution)
        self._state["position"] = pos_to_rowcol(start_position)
        
        return super().reset(seed=seed, return_info=return_info)
    
    def _update_state(self, action):
        # todo: implement this
        pass
    
    def _obtain_observation(self):
        return rowcol_to_pos(self._state["position"])
    
    def _is_done(self):
        row, col = self._state["position"]
        return self.maze_map[row][col] == b"G"
    
    def _obtain_info(self):
        return {}

In [None]:
# todo: run script