In [6]:
from enum import Enum
from envs.grid import Grid
from envs.constants import SQUARE_SIZE

# Fire Evacuation Planner MDP
This agent will implement a classic MDP with states, rewards and transition models
Extending the MDP to our use case could include:
 - Fire Spread algorithm:
   - Episode ends if stepping in fire state
   - Firefighter (MDP agent) recieves reward for steps that have people needing to rescue
   - Generate an environment that includes more sophisticated properties - generate walls, based on grid, doors and so on...
   - Default reward could be something like -0.04 to encourage efficiency
   - Pass arguments to the grid when defining the base environment (walls, starting fire, people)

## Compare Reinforcement Learning Methods (Q-learning, SARSA) to Classical Methods (Policy iteration, value iteration, linear programming)
The separate models will aim to answer whether classical models or RL-based are better suited for a simulation of a real-world fire hazard on a building floor.

# Possible challenges of a classical MDP implementation
Since we are dealing with a classical-based MDP, we would need to make sure that all processes are markovian - taking action based only on current state and possible rewards.

If we encode the fire in a way that it spreads independently, then that would mean that our agent acts in a non-MDP way.

 - One way to solve this would be to include the fire status of every grid, which can quickly turn out to be alot of calculations and statuses for a simple grid.

For small grids in examples like 3x4 size, this would be a challenge but for bigger ones, Reinforcement Learning almost definitely need to be adopted in order to manage the changing environment.

In [7]:
class FireEvacuationAgentMDP:
    def __init__(self, start_state, grid: Grid):
        self.grid = grid
        self.rows = grid.size
        self.cols = grid.size
        self.actions = ['up', 'left', 'right', 'down']

        self.possible_states = []
        for y in range(self.rows):
            for x in range(self.cols):
                if self.grid.tiles[x][y].is_traversable:
                    self.possible_states.append((x, y))

        if start_state in self.possible_states:
            self.start_state = self.current_state = start_state
        else:
            if self.possible_states:
                self.start_state = self.current_state = self.possible_states[0]
            else:
                self.start_state = self.current_state = (0, 0) # Fallback, should ideally not happen if grid is valid

    def reset(self):
        self.current_state = self.start_state
        return self.current_state

    def step(self, action):
        if action not in self.actions:
            raise ValueError("Invalid action")

        old_state = self.current_state
        x, y = self.current_state
        
        new_state = None
        match action:
            case 'up':
                new_state = (x, y + 1)
            case 'left':
                new_state = (x - 1, y)
            case 'right':
                new_state = (x + 1, y)
            case 'down': # New action
                new_state = (x, y - 1)
        
        if (0 <= new_state[0] < self.cols and
            0 <= new_state[1] < self.rows and
            self.grid.tiles[new_state[0]][new_state[1]].is_traversable): # Check traversability using the grid
            self.current_state = new_state
        else:
            self.current_state = old_state  # stay in place if hitting wall or obstacle

    def get_possible_states(self):
        return self.possible_states

    def __str__(self):
        return f"Agent is at state {self.current_state}"


In [11]:
grid_instance = Grid(size = 5, tile_size=SQUARE_SIZE)
initial_agent_pos = (0,0)
found_traversable = False
for y in range(grid_instance.size):
    for x in range(grid_instance.size):
        if grid_instance.tiles[x][y].is_traversable:
            actual_start_state = (x, y)
            found_traversable = True
            break
    if found_traversable:
        break


if not found_traversable:
    print("Warning: No traversable states found in the grid!")        

In [12]:
mdp = FireEvacuationAgentMDP(start_state=actual_start_state, grid=grid_instance)

In [13]:
print(mdp)

Agent is at state (0, 0)


In [14]:
# Test moves
mdp.step('right')
print(mdp)
mdp.step('down')
print(mdp)
mdp.step('left')
print(mdp)
mdp.step('up')
print(mdp)

Agent is at state (1, 0)
Agent is at state (1, 0)
Agent is at state (0, 0)
Agent is at state (0, 1)
