# Miniproject 1

In [None]:
!pip install pyperplan
!pip install sympy


## Imports and Utilities
**Note**: these imports and functions are available in catsoop. You do not need to copy them in.

In [None]:
import os
import time
import copy
import numpy as np
import pdb
from sympy import Symbol, And, Or, satisfiable
from pyperplan.pddl.parser import Parser
from pyperplan import grounding, planner
import tempfile

class State:
    """
    States have the following attributes:
      "robot": A (row, col) representing the robot's loc.
      "hospital": A (row, col) representing the hospital's loc.
      "carrying": The str name of a person being carried,
        or None, if no person is being carried.
      "people": A dict mapping str people names to (row, col)
        locs. If a person is being carried, they do not
        appear in this dict.
      "state_map": A numpy array of str 'C', 'F', 'S', and 'W', 
        where 'C' represents free space, 'F' represents fire, 
        'S' represents smoke, and 'W' represents an obstacle(wall).
        The robot may safely enter any cell that is clear (‘C’) 
        or contains smoke (‘S’).
    """
    def __init__(self, robot=None, hospital=None,
                 carrying=None, people=None, state_map=None):
        default_state_map = np.array([
            ['C', 'C', 'C', 'C', 'C', 'C', 'C'],
            ['C', 'W', 'W', 'C', 'C', 'W', 'W'],
            ['C', 'C', 'C', 'C', 'C', 'C', 'C'],
            ['C', 'C', 'W', 'C', 'C', 'C', 'C'],
            ['C', 'C', 'W', 'C', 'W', 'C', 'C'],
            ['C', 'C', 'C', 'C', 'C', 'W', 'C'],
            ['C', 'W', 'C', 'C', 'W', 'C', 'C']
        ], dtype=np.str0)
        default_robot = (0, 0)  # top left corner
        default_hospital = (6, 6)  # bottom right corner
        default_carrying = None
        default_people = {
            "p1": (4, 0),
            "p2": (6, 0),
            "p3": (0, 6),
            "p4": (3, 3)
        }
        self.state_map = state_map if state_map is not None else default_state_map
        self.robot = robot if robot is not None else default_robot
        self.hospital = hospital if hospital is not None else default_hospital
        self.carrying = carrying if carrying is not None else default_carrying
        self.people = people if people is not None else default_people

    def get_safe_grid(self):
        """
        "safe_grid": A grid map of boolean values where `True`
        indicate the locations where the robot are allowed to move into.

        Clear and Smoke grid cells are safe to enter
        """
        safe_grid = np.logical_or(
            self.state_map == "C",
            self.state_map == "S")
        return safe_grid

    def render(self, msg=None):
        height, width = self.state_map.shape
        state_arr = np.full((height, width), "  ", dtype=object)
        state_arr[self.state_map == 'W'] = "##"
        state_arr[self.state_map == 'F'] = "XX"
        state_arr[self.state_map == 'S'] = "||"
        state_arr[self.state_map == 'U'] = "??"
        state_arr[self.hospital] = "Ho"
        state_arr[self.robot] = "Ro"
        # Draw the people not at the hospital
        for person, loc in self.people.items():
            if loc == self.hospital:
                continue
            elif loc == self.robot:
                person = "R" + person[-1]
            state_arr[loc] = person
        # Add padding
        padded_state_arr = np.full((height + 2, width + 2), "##", dtype=object)
        padded_state_arr[1:-1, 1:-1] = state_arr
        state_arr = padded_state_arr
        carrying_str = f"Carrying: {self.carrying}"
        # Print
        if msg:
            print(msg)
        for row in state_arr:
            print(''.join(row))
        print(carrying_str)
        print()

    def copy(self):
        state_copy = copy.copy(self)
        state_copy.state_map = self.state_map.copy()   # copy the numpy array
        state_copy.people = self.people.copy()
        return state_copy

class SearchAndRescueProblem:
    """Defines a search and rescue (SAR) problem.

    In search and rescue, a robot must navigate to, pick up, and
    drop off people that are in need of help.

    Actions are strs. The following actions are defined:
      "up" / "down" / "left" / "right" : Moves the robot. The
        robot cannot move into obstacles or off the map.
      "pickup-{person}": If the robot is at the person, and if
        the robot is not already carrying someone, picks them up.
      "dropoff": If the robot is carrying a person, they are
        dropped off at the robot's current location.
      "look...": later we'll allow these actions, but they
        have no effect on the state.

    This structure serves as a container for a transition model
    "get_next_state(state, action)", an observaton model "get_observation(state)"
    and an action model "get_legal_actions(state)"

    Example usage:
      problem = SearchAndRescueProblem()
      state = State()
      state.render()
      action = "down"
      next_state = problem.get_next_state(state, action)
      next_state.render()
    """
    def __init__(self):
        self.action_deltas = {
            "up": (-1, 0),
            "down": (1, 0),
            "left": (0, -1),
            "right": (0, 1),
            }

    @staticmethod
    def is_valid_location(loc_r, loc_c, state, verbose=False):
        if not (0 <= loc_r < state.state_map.shape[0] and
                0 <= loc_c < state.state_map.shape[1]):
            if verbose:
                print("WARNING: attempted to move out of bounds, action has no effect.")
            return False
        if not state.get_safe_grid()[loc_r, loc_c]:
            if verbose:
                print("WARNING: attempted to move into an obstacle/unsafe region, action has no effect.")
            return False
        return True

    @staticmethod
    def get_legal_actions(state):
        legal_actions = ["up", "down", "left", "right", "dropoff"]
        for person in state.people:
            legal_actions.append(f"pickup-{person}")
        return legal_actions

    def get_next_state(self, state, action, verbose=False):
        legal_actions = self.get_legal_actions(state)
        if action not in legal_actions and not action.startswith('look'):
            raise ValueError(f"Unrecognized action {action}. Actions must be one of: {legal_actions}")

        if action in ["up", "down", "left", "right"]:
            dr, dc = self.action_deltas[action]
            r, c = state.robot
            if not self.is_valid_location(r + dr, c + dc, state, verbose=verbose):
                if verbose:
                    print(f"Action {action} is invalid in {state}.")
                return state, False
            new_state = state.copy()
            new_state.robot = (r + dr, c + dc)
            return new_state, True

        elif action.startswith("pickup"):
            person = action.split("-")[1]
            if state.carrying is not None:
                if verbose:
                    print("WARNING: attempted to pick up a person while already carrying someone, action has no effect.")
                return state, False
            if person not in state.people or (state.people[person] != state.robot):
                if verbose:
                    print("WARNING: attempted to pick up a person not at the robot location, action has no effect.")
                return state, False
            new_state = state.copy()
            del new_state.people[person]
            new_state.carrying = person
            return new_state, True

        elif action == "dropoff":
            if state.carrying is None:
                if verbose:
                    print("WARNING: attempted to dropoff while not carrying anyone, action has no effect.")
                return state, False
            person = state.carrying
            new_state = state.copy()
            new_state.carrying = None
            new_state.people[person] = state.robot
            return new_state, True

        elif action.startswith('look'):
            return state, True

        else:
            raise KeyError

    def get_observation(self, state):
        """ Return the states of the adjacent (non-wall) grid squares"""
        height, width = state.state_map.shape
        deltas = self.action_deltas
        r, c = state.robot
        observation = {(r, c): state.state_map[r, c]}
        for direction, (dr, dc) in deltas.items():
            nr = r + dr
            nc = c + dc
            if not (0 <= nr < height and 0 <= nc < width):
                continue
            if state.state_map[nr, nc] == "W":
                continue
            observation[(nr, nc)] = state.state_map[nr, nc]
        return observation

def execute_plan(problem, plan, state):
    """ See MP01 introduction """
    for action in plan:
        state.render(msg=f'execute_plan: {action}')
        # Resulting state
        state, valid = problem.get_next_state(state, action)
        assert valid, 'Attempted to execute invalid action'
    state.render(msg=f'execute_plan: Final state')
    return state

def agent_loop(problem, initial_state, policy, initial_belief,
               max_steps = 200):
    """ See MP01 introduction """
    state = initial_state
    state.render(msg='initial state')
    belief = initial_belief
    belief.render(msg='initial belief')
    # An initial observation
    observation = problem.get_observation(state)
    print('Initial observation', observation)
    # Update the belief, first with transition, then with observation
    belief = belief.update(problem, observation)
    belief.render(msg='new belief')
    for step in range(max_steps):
        action = policy(belief)
        if action in ('*Success*', '*Failure*'):
            print('Terminate with', action)
            return action, state, belief
        # Resulting state
        state, valid = problem.get_next_state(state, action)
        assert valid, 'Attempted to execute invalid action'
        # Get observation of grid squares around the robot
        observation = problem.get_observation(state)
        # Update the belief, first with transition, then with observation
        belief = belief.update(problem, observation, action)
        print('agent_loop: step', step, 'action', action, 'observation', observation)
        state.render(msg='new state')
        belief.render(msg='new belief')
    return '*Failure*', state, belief

def get_num_delivered(state):
    """
        Returns the number of people located in the hospital.
    """
    num_delivered = 0
    for loc in state.people.values():
        if loc == state.hospital:
            num_delivered += 1
    return num_delivered

def execute_count_num_delivered(problem, state, plan):
    """Execute a plan for search and rescue and count the number of
      people delivered.

    Args:
      problem: A SearchAndRescueProblem
      plan: A list of action strs, see SearchAndRescueProblem.

    Returns:
      num_delivered: int
    """
    state = execute_plan(problem=problem, plan=plan, state=state)
    return get_num_delivered(state)


def run_planning(domain_pddl_str, problem_pddl_str, search_alg_name,
                 heuristic=None):
    """Plan a sequence of actions to solve the given PDDL problem.

    This function is a lightweight wrapper around pyperplan.

    Args:
      domain_pddl_str: A str, the contents of a domain.pddl file.
      problem_pddl_str: A str, the contents of a problem.pddl file.
      search_alg_name: A str, the name of a search algorithm in
        pyperplan. Options: astar, wastar, gbf, bfs, ehs, ids, sat.
      heuristic: A str or a pyperplan `Heuristic` class.
        A str, the name of a heuristic in pyperplan.
          Options: blind, hadd, hmax, hsa, hff, lmcut, landmark.
        A pyperplan `Heuristic` class.
          See: https://github.com/aibasel/pyperplan/blob/main/doc/documentation.md#implementing-new-heuristics

    Returns:
      plan: A list of actions; each action is a pyperplan Operator.
    """
    # Parsing the PDDL
    domain_file = tempfile.NamedTemporaryFile(delete=False)
    problem_file = tempfile.NamedTemporaryFile(delete=False)
    with open(domain_file.name, 'w') as f:
        f.write(domain_pddl_str)
    with open(problem_file.name, 'w') as f:
        f.write(problem_pddl_str)
    parser = Parser(domain_file.name, problem_file.name)
    domain = parser.parse_domain()
    problem = parser.parse_problem(domain)
    os.remove(domain_file.name)
    os.remove(problem_file.name)

    # Ground the PDDL
    task = grounding.ground(problem)

    # Get the search alg
    search_alg = planner.SEARCHES[search_alg_name]

    if heuristic is None:
        return search_alg(task)

    if isinstance(heuristic, str):
        # Get the heuristic from pyperplan
        heuristic_initialized = planner.HEURISTICS[heuristic](task)
    else:
        # Use customized heuristic
        heuristic_initialized = heuristic(task)

    # Run planning
    return search_alg(task, heuristic_initialized)


# Test Cases

# First problem
P1_B0 = np.array([
    ["U", "U", "U", "U", "U"],
    ["U", "U", "U", "U", "U"],
    ["U", "U", "U", "U", "U"],
    ["U", "U", "U", "U", "U"],
    ["U", "U", "U", "U", "U"],
    ["U", "U", "U", "U", "U"]])

P1_B1 = np.array([
    ["C", "S", "C", "C", "C"],
    ["S", "U", "U", "U", "U"],
    ["S", "U", "U", "U", "U"],
    ["S", "U", "U", "U", "U"],
    ["C", "U", "U", "U", "U"],
    ["C", "C", "C", "C", "C"]])

P1_G0 = np.array([
    ["C", "S", "C", "C", "C"],
    ["S", "F", "S", "C", "C"],
    ["S", "F", "S", "S", "S"],
    ["S", "F", "F", "F", "F"],
    ["C", "S", "S", "S", "S"],
    ["C", "C", "C", "C", "C"]])

# Second problem
P2_B1 = np.array([
    ["C", "S", "C", "C", "C"],
    ["S", "U", "U", "C", "U"],
    ["S", "U", "U", "C", "U"],
    ["S", "U", "U", "U", "U"],
    ["C", "U", "U", "C", "U"],
    ["C", "C", "C", "C", "C"]])

P2_G0 = np.array([
    ["C", "S", "C", "C", "C"],
    ["S", "F", "S", "C", "C"],
    ["S", "F", "S", "C", "S"],
    ["S", "F", "F", "S", "F"],
    ["C", "S", "S", "C", "S"],
    ["C", "C", "C", "C", "C"]])

def test_policy(belief_map, true_map, problem, policy):
    """
        Test a policy on a SearchAndRescue problem

        Args:
            belief_map: A numpy array specifying the belief map
            true_map:   A numpy array specifying the state map
            problem:    A SearchAndRescueProblem instance
            policy:     A policy returned by a policy making fn.
                        e.g. make_planner_policy(problem, planner)
    """
    height, width = true_map.shape
    bottom, right = height-1, width-1
    robot = (0, right)
    hospital = (bottom, right)
    people = {'pp' : (bottom, right-1)}  # Peter Parker
    carrying = None
    # Environment state
    env_state = State(robot=robot, hospital=hospital, people=people, 
                        carrying=carrying, state_map=true_map)
    # Initial belief: omniscient
    b0 = BeliefState(robot=robot, hospital=hospital, people=people,
                     carrying=carrying, state_map=belief_map)
    # Do it
    return agent_loop(problem, env_state, policy, b0)



## Search and Rescue Warmup 1


### Question
Find the robot location in a SearchAndRescue State.

For reference, our solution is **2** line(s) of code.

In [None]:
def sar_warmup1(sar_state):
  """Find the initial robot location in the SearchAndRescueProblem.

  Args:
    sar_state: A State

  Returns:
    robot_loc: A tuple of ints (row, col) representing the robot state.
  """
  raise NotImplementedError("Implement me!")

### Tests

In [None]:
def sar_warmup_test1():
    robot_loc = sar_warmup1(State())
    assert robot_loc == (0, 0)

sar_warmup_test1()

print('Tests passed.')

## Search and Rescue Warmup 2


### Question
Check if a row and col have an obstacle in a SearchAndRescue State.

For reference, our solution is **2** line(s) of code.

In [None]:
def sar_warmup2(sar_state, row, col):
  """Check if a row and col have an obstacle in a SearchAndRescueProblem state.

  Args:
    sar_state: A SearchAndRescue State.
    row: An int.
    col: An int.

  Returns:
    has_obstacle: True if (row, col) has an obstacle(wall) in sar_state.
  """
  raise NotImplementedError("Implement me!")

### Tests

In [None]:
def sar_warmup_test2():
    state = State()
    assert sar_warmup2(state, 0, 0) == False
    assert sar_warmup2(state, 0, 1) == False
    assert sar_warmup2(state, 1, 1) == True
    assert sar_warmup2(state, 1, 2) == True

sar_warmup_test2()

print('Tests passed.')

## Search and Rescue Warmup 3


### Question
Hand-code a list of actions that will deliver person 'p1'(in the image above) to the hospital location.

For reference, our solution is **2** line(s) of code.

In [None]:
def sar_warmup3():
  """Hand-code a list of actions that will deliver person 'p1' to the hospital location.

  Returns:
    actions: A list of str actions that will take person p1 to the hospital loccation.
  """
  raise NotImplementedError("Implement me!")

### Tests

In [None]:
def sar_warmup_test3():
    problem = SearchAndRescueProblem()
    plan = sar_warmup3()
    state = execute_plan(problem, plan, State())
    assert state.people["p1"] == (6,6)

sar_warmup_test3()

print('Tests passed.')

## Inferring from observations


### Question
Write a program that takes a grid as input and infers unknown values.

Your program should output a new grid with all determinable unknown values replaced with the inferred value. If an unknown value cannot be determined, it should be left unknown.

**Your program should use sympy.**


For reference, our solution is **62** line(s) of code.

In [None]:
def infer_unknown_values(grid):
  """Fill in any unknown values in the grid that can be inferred.

  Args: grid: A list of lists of "F", "U", "S", "W", or "C".
  Returns:
    inferred_grid: A copy of grid with some unknown values replaced.

  Example:
    >> grid = [
    >>   ["F", "U", "C"],
    >>   ["W", "C", "U"],
    >>   ["U", "U", "C"]
    >> ]
    >> infer_unknown_values(grid)
    >> [["F" "S" "C"]
    >>  ["W" "C" "C"]
    >>  ["U" "U" "C"]]
  """
  raise NotImplementedError("Implement me!")

### Tests

In [None]:

assert infer_unknown_values([["U", "F"]]) == [["U", "F"]]


assert infer_unknown_values([["F", "U", "C"], ["S", "C", "U"], ["U", "U", "C"]]) == [["F", "S", "C"], ["S", "C", "C"], ["U", "U", "C"]]


assert infer_unknown_values([["U", "C", "C"], ["S", "C", "U"], ["U", "U", "C"]]) == [["C", "C", "C"], ["S", "C", "C"], ["F", "S", "C"]]


assert infer_unknown_values([["U", "S", "C", "U"], ["U", "U", "C", "U"], ["U", "S", "C", "U"]]) == [["F", "S", "C", "C"], ["U", "U", "C", "C"], ["F", "S", "C", "C"]]


assert infer_unknown_values([["U", "U", "C", "U", "U", "U", "U", "U"], ["C", "U", "U", "U", "U", "U", "U", "U"], ["U", "U", "U", "U", "U", "U", "U", "U"], ["U", "U", "U", "U", "U", "U", "C", "C"], ["U", "U", "U", "U", "U", "U", "C", "C"], ["U", "C", "U", "U", "U", "U", "U", "U"], ["U", "U", "U", "F", "U", "U", "U", "U"], ["U", "U", "U", "U", "U", "U", "U", "U"]]) == [["C", "C", "C", "U", "U", "U", "U", "U"], ["C", "U", "U", "U", "U", "U", "U", "U"], ["U", "U", "U", "U", "U", "U", "U", "U"], ["U", "U", "U", "U", "U", "U", "C", "C"], ["U", "U", "U", "U", "U", "U", "C", "C"], ["U", "C", "U", "U", "U", "U", "U", "U"], ["U", "U", "U", "F", "U", "U", "U", "U"], ["U", "U", "U", "U", "U", "U", "U", "U"]]


assert infer_unknown_values([["C", "U", "C", "U", "U", "C", "U"], ["U", "W", "W", "U", "C", "W", "W"], ["U", "F", "U", "U", "U", "F", "U"], ["C", "S", "W", "C", "U", "U", "U"], ["U", "U", "W", "U", "W", "U", "U"], ["C", "C", "U", "C", "U", "W", "U"], ["U", "W", "C", "U", "W", "U", "C"]]) == [["C", "C", "C", "C", "C", "C", "C"], ["C", "W", "W", "C", "C", "W", "W"], ["S", "F", "U", "U", "S", "F", "U"], ["C", "S", "W", "C", "U", "U", "U"], ["C", "C", "W", "C", "W", "U", "U"], ["C", "C", "C", "C", "C", "W", "U"], ["C", "W", "C", "C", "W", "C", "C"]]


assert infer_unknown_values([["C", "U", "C", "U", "U", "C", "U"], ["U", "W", "W", "U", "C", "W", "W"], ["U", "F", "U", "U", "U", "F", "U"], ["C", "S", "W", "C", "U", "F", "U"], ["U", "U", "W", "U", "W", "U", "U"], ["C", "C", "U", "C", "U", "W", "F"], ["U", "W", "C", "U", "W", "U", "U"]]) == [["C", "C", "C", "C", "C", "C", "C"], ["C", "W", "W", "C", "C", "W", "W"], ["S", "F", "U", "U", "S", "F", "U"], ["C", "S", "W", "C", "S", "F", "U"], ["C", "C", "W", "C", "W", "U", "U"], ["C", "C", "C", "C", "C", "W", "F"], ["C", "W", "C", "C", "W", "U", "U"]]
print('Tests passed.')

## Belief update


### Question
Finish the implementation of the update method for BeliefState.


For reference, our solution is **41** line(s) of code.

In addition to all of the utilities defined at the top of the colab notebook, the following functions are available in this question environment: `infer_unknown_values`. You may not need to use all of them.

In [None]:
class BeliefState(State):
    def __init__(self,  **kwargs):
        super().__init__(**kwargs)
        if "state_map" not in kwargs:
            self.state_map = np.array([
                ['U', 'U', 'U', 'U', 'U', 'U', 'U'],
                ['U', 'W', 'W', 'U', 'U', 'W', 'W'],
                ['U', 'U', 'U', 'U', 'U', 'U', 'U'],
                ['U', 'U', 'W', 'U', 'U', 'U', 'U'],
                ['U', 'U', 'W', 'U', 'W', 'U', 'U'],
                ['U', 'U', 'U', 'U', 'U', 'W', 'U'],
                ['U', 'W', 'U', 'U', 'W', 'U', 'U']
            ], dtype=np.str0)

    def update(self, problem, obs, action=None):
        """
        problem: SearchAndRescueProblem instance
        obs: {loc: entry, loc: entry,...}
        act: string or None

        # <<< TODO: >>>
            1. Do transition from action (if any)
            2. Update from observation
            3. Do inference
        """
        raise NotImplementedError('Implement me!')

    def get_optimistic_state(self):
        """Returns a copy of the belief with a completed map in
        which Unknowns are assumed to be Clear."""
        new_state = self.copy()
        new_state.state_map[self.state_map == 'U'] = 'C'
        return new_state

    def get_careful_state(self):
        """ Returns a copy of the belief.  Unknown states will not be treated as safe,
        see get_safe_grid."""
        return self.copy()

### Tests

In [None]:
def beliefupdate_test1():
    state_map = np.array([
        ["C", "S", "C", "C", "C"],
        ["S", "F", "S", "C", "C"],
        ["S", "F", "S", "S", "S"],
        ["S", "F", "F", "F", "F"],
        ["C", "S", "S", "S", "S"],
        ["C", "C", "C", "C", "C"]])
    beliefstate_map = np.array([
        ["U", "U", "U", "U", "U"],
        ["U", "U", "U", "U", "U"],
        ["U", "U", "U", "U", "U"],
        ["U", "U", "U", "U", "U"],
        ["U", "U", "U", "U", "U"],
        ["U", "U", "U", "U", "U"]])
    problem = SearchAndRescueProblem()
    state = State(state_map=state_map)
    bel = BeliefState(state_map=beliefstate_map)
    observation = problem.get_observation(state)
    new_bel = bel.update(problem, observation)
    assert new_bel.robot == (0, 0)
    assert new_bel.state_map.tolist() == [['C', 'S', 'U', 'U', 'U'], ['S', 'U', 'U', 'U', 'U'], ['U', 'U', 'U', 'U', 'U'], ['U', 'U', 'U', 'U', 'U'], ['U', 'U', 'U', 'U', 'U'], ['U', 'U', 'U', 'U', 'U']]

beliefupdate_test1()


def beliefupdate_test2():
    state_map = np.array([
        ["C", "S", "C", "C", "C"],
        ["S", "F", "S", "C", "C"],
        ["S", "F", "S", "S", "S"],
        ["S", "F", "F", "F", "F"],
        ["C", "S", "S", "S", "S"],
        ["C", "C", "C", "C", "C"]])
    beliefstate_map = np.array([
        ["U", "U", "U", "U", "U"],
        ["S", "U", "U", "U", "U"],
        ["U", "U", "U", "U", "U"],
        ["U", "U", "U", "U", "U"],
        ["U", "U", "U", "U", "U"],
        ["U", "U", "U", "U", "U"]])
    problem = SearchAndRescueProblem()
    state = State(state_map=state_map)
    bel = BeliefState(state_map=beliefstate_map)

    new_state, _ = problem.get_next_state(state, 'down')
    observation = problem.get_observation(new_state)
    new_bel = bel.update(problem, observation, 'down')
    assert new_bel.robot == (1,0)
    assert new_bel.state_map.tolist() == [['C', 'S', 'U', 'U', 'U'], ['S', 'F', 'U', 'U', 'U'], ['S', 'U', 'U', 'U', 'U'], ['U', 'U', 'U', 'U', 'U'], ['U', 'U', 'U', 'U', 'U'], ['U', 'U', 'U', 'U', 'U']]

beliefupdate_test2()

print('Tests passed.')

## Greedy policy


### Question
Implement a policy function that was described at the start of the problem.


For reference, our solution is **22** line(s) of code.

In addition to all of the utilities defined at the top of the colab notebook, the following functions are available in this question environment: `BeliefState`, `infer_unknown_values`. You may not need to use all of them.

In [None]:
def make_greedy_policy(problem):
    def policy(belief):
        """ Returns an action or '*Failure*"""
        # TODO: complete
        raise NotImplementedError('Implement me!')
    # return the policy function
    return policy

### Tests

In [None]:
def policy_test1():
    problem = SearchAndRescueProblem()
    policy = make_greedy_policy(problem)

    # Empty map
    state = State()
    state.state_map[:,:] = 'C'
    bel = BeliefState()
    bel.state_map[:,:] = 'C'

    s_or_f, final_state, final_bel = agent_loop(problem, state, policy, bel)
    print('Final robot location', final_state.robot)
    assert s_or_f == '*Success*' and final_state.robot == final_state.hospital

policy_test1()


def policy_test2():
    problem = SearchAndRescueProblem()

    # Use default map
    state = State()
    bel = BeliefState()

    policy = make_greedy_policy(problem)
    s_or_f, final_state, final_bel = agent_loop(problem, state, policy, bel)
    r, c = final_state.robot
    hr, hc = final_state.hospital
    distance = abs(hr - r) + abs(hc - c)
    print('Final robot location', final_state.robot)    
    print('Final distance =', distance)
    assert distance < 12

policy_test2()

print('Tests passed.')

## Search and Rescue PDDL Planner


### Question
Make a planner to solve a SearchAndRescueProblem.

The core function in this planner class is 'get_plan'
This function does the following:
1. Create PDDL domain and problem strings for search and rescue. The operators should work for any grid size, obstacles, people locations, and hospital location.
2. Invoke `run_planning` using the given `search_algo` search algorithm with the `heuristic` heuristic.
3. Convert the output of run_planning (pyperplan Operators) into actions that can be executed, via `execute_plan`.
For reference, 'get_plan' takes ~1-2 seconds to run with our implementation if using 'gbf' search and 'hff' heuristic.
To get credit on catsoop, make sure that your function finishes in <10 seconds.

**Notes**:
* In this problem, you will need to construct somewhat complicated strings.  We *strongly* encourage you to read about [Python-3 f-strings](https://www.digitalocean.com/community/tutorials/how-to-use-f-strings-to-create-strings-in-python-3) which make this process much easier than the alternatives.
* You may find `state.render()` useful for debugging.
* We also highly recommend printing out the domain and problem after they have been created, and copying them into [editor.planning.domains](http://editor.planning.domains) to check whether it's possible to find a plan. This editor can be helpful for syntax checking.
* The image in catsoop with the robot and the bears is a faithful depiction of the initial state. For example, the initial locations of the people are: "p1": (4, 0), "p2": (6, 0), "p3": (0, 6), "p4": (3, 3).
* One part of this problem that may be initially counterintuitive is the way that we'll represent locations in PDDL.
In the problem, a location is a tuple of integers. PDDL does not support such representations -- everything needs to be just an object with a string name.
So to represent a location like (3, 5), we will make a string "l3-5" (where the first character there is a lowercase L), and we'll create an object
with that name, of type "location". We will also need a way to encode the fact that the robot can only move between adjacent locations in the grid.
In Python, we can compare the numeric values of locations like (3, 5) and (3, 6) to see if they are neighbors.
But in PDDL, all we have are the objects with string names, and we need to encode everything in terms of predicates.
So, we will create a predicate `(conn ?v0 - location ?v1 - location ?v2 - direction)`, which says that location `?v0` is connected to locaction `?v1`
in direction `?v2`. For example, `(conn l3-5 l3-6 right)` might appear in the initial state. We can then use these `conn` predicates in
the preconditions of a `move` operator to encode the fact that the robot can only move between adjacent locations.
* We do not recommend modelling the hospital explicitly with special objects / types / predicates. Instead, the goal should be to deliver all people to the hospital, that is, `l6-6`.
In words, the goal should be "person1 is at l6-6 and person2 is at l6-6 and person3 is at l6-6 and person4 is at l6-6."
  

For reference, our solution is **173** line(s) of code.

In [None]:
class SearchAndRescuePlanner:
"""A planner for a search and rescue problem.

The core function in this class is 'get_plan'
This function does the following:
    1. Create PDDL domain and problem strings for search and rescue. The operators should work for any grid size, obstacles, people locations, and hospital location.
    2. Invoke `run_planning` using the given `search_algo` search algorithm with the `heuristic` heuristic.
    3. Convert the output of run_planning (pyperplan Operators) into actions
       that can be given to the SearchAndRescueProblem.

Example Usage:
    problem = SearchAndRescueProblem()
    state = State()

    planner = SearchAndRescuePlanner(search_algo='astar', heuristic='lmcut')
    plan, plan_time = planner.get_plan(state)
    state = execute_plan(problem, plan, state)

'get_plan' Returns:
    plan: A list of actions; each action is a str, see SearchAndRescueProblem.
    plan_time: Total planning time(sec) used for plan searching.

For reference, 'get_plan' takes ~1-2 seconds to run with our implementation if using 'gbf' search and 'lmcut' heuristic.
"""

    def __init__(self, search_algo='astar', heuristic='lmcut'):
        self.search_algo = search_algo
        self.heuristic = heuristic

    def generate_domain_pddl(self, domain_name, added_operators='', added_predicates=''):
        # <<< TODO: fill in missing parts in the PDDL domain below >>>
        predicates_str = """(conn ?v0 - location ?v1 - location ?v2 - direction)
        (is-clear ?v0 - location)
        ; TODO: write more here"""

        # <<< TODO: fill in missing parts in the PDDL domain below >>>
        operators_str = """(:action move-robot
    :parameters (?from - location ?to - location ?dir - direction)
    :precondition (and
      (conn ?from ?to ?dir)
      ; TODO: write more here
    )
    :effect (and
      ; TODO: write more here
    )
  )
  (:action pickup-person
    :parameters (?person - person ?loc - location)
    :precondition (and
      ; TODO: write more here
    )
    :effect (and
      ; TODO: write more here
    )
  )
  (:action dropoff-person
    :parameters (?person - person ?loc - location)
    :precondition (and
      ; TODO: write more here
    )
    :effect (and
      ; TODO: write more here
    )
  )"""

        domain_pddl = f"""(define (domain {domain_name})
    (:requirements :typing)
    (:types person location direction)
    (:constants
      down - direction
      left - direction
      right - direction
      up - direction
    )
    (:predicates
      {predicates_str}
      {added_predicates}
    )
    {operators_str}
    {added_operators}
)"""
        return domain_pddl

    def get_plan(self, state):
        search_algo, heuristic = self.search_algo, self.heuristic
        domain_name, added_predicate, added_operator = self.update_pddl_domain()
        domain_pddl = self.generate_domain_pddl(domain_name, added_operators=added_operator,
                                                added_predicates=added_predicate)
        # Create objects str
        obj_str = self.get_obj_strs(state)

        # Create init str
        init_str = self.get_init_strs(state)

        # Create goal str
        goal_str = self.get_goal_strs(state)

        problem_pddl = f"""(define (problem searchandrescue) (:domain {domain_name})
      (:objects
      {obj_str}
      )
      (:init 
      {init_str}
      )
      (:goal (and {goal_str}))
    )"""

        start_time = time.time()
        plan = run_planning(domain_pddl, problem_pddl, search_algo, heuristic)
        time_elapsed = time.time() - start_time
        if plan is None:
            print("Failed to find a plan.")
            return None, time_elapsed

        # Convert operators to actions
        actions = self.parse_plan(plan)
        return actions, time_elapsed

    def get_obj_strs(self, state):
        height, width = state.state_map.shape
        objects_strs = [f"{person} - person" for person in state.people]
        # <<< TODO: add object strs for locations >>>
        if state.carrying is not None:
            objects_strs.append(f"{state.carrying} - person")
        objects_str = " ".join(objects_strs)
        return objects_str

    def get_init_strs(self, state):
        height, width = state.state_map.shape
        robot_r, robot_c = state.robot
        init_strs = []

        deltas = {
            "up": (-1, 0),
            "down": (1, 0),
            "left": (0, -1),
            "right": (0, 1),
        }
        for r in range(height):
            for c in range(width):
                # Here we're going to add one (conn ...) atom for every pair
                # of adjacent locations.
                for direction, (dr, dc) in deltas.items():
                    if not (0 <= r + dr < height and 0 <= c + dc < width):
                        continue
                    # For example, if r == 0, c == 0, dr == 0, dc == 1, then
                    # this line adds the atom (conn l0-0 l0-1 right).
                    init_strs.append(f"(conn l{r}-{c} l{r + dr}-{c + dc} {direction})")
                # <<< TODO: add more init strs >>>

        # <<< TODO: add more init strs >>>
        if state.carrying is not None:
            # <<< TODO: add init strs >>>
            pass
        else:
            # <<< TODO: add init strs >>>
            pass
        init_str = " ".join(init_strs)
        return init_str

    def get_goal_strs(self, state):
        goal_strs = []
        hospital_r, hospital_c = state.hospital
        # <<< TODO: add goal strs >>>
        if state.carrying is not None:
            # <<< TODO: add goal strs >>>
            pass
        goal_str = " ".join(goal_strs)
        return goal_str

    def update_pddl_domain(self):
        domain_name = 'searchandrescue'
        added_predicate = ''
        added_operator = ''
        return domain_name, added_predicate, added_operator

    def parse_plan(self, plan):
        actions = []
        for op in plan:
            if "move-robot" in op.name:
                _, direction = op.name[:-1].rsplit(" ", 1)
                action = direction
            elif "pickup-person" in op.name:
                _, person, _ = op.name.split(" ")
                action = f"pickup-{person}"
            else:
                assert "dropoff-person" in op.name
                action = "dropoff"
            actions.append(action)
        return actions

### Tests

In [None]:
def sar_test():
    problem = SearchAndRescueProblem()
    planner = SearchAndRescuePlanner(search_algo="gbf", heuristic="hff")
    state = State()
    plan, plan_time = planner.get_plan(state)
    assert execute_count_num_delivered(problem=problem, state=state, plan=plan) == 4

sar_test()

print('Tests passed.')

## Plan Policy


### Question
Write a function that returns a policy function that follows a plan.


For reference, our solution is **49** line(s) of code.

In addition to all of the utilities defined at the top of the colab notebook, the following functions are available in this question environment: `SearchAndRescuePlanner`, `infer_unknown_values`. You may not need to use all of them.

In [None]:
def make_planner_policy(problem, planner):
    # Keep memory of plan and which step we're on
    status = {'plan': None, 'step': None}
    def policy(belief):
        """Returns an action string or '*Failure*' or '*Success*'"""
        # TODO: complete
        raise NotImplementedError('Implement me!')
    # return the policy function
    return policy

### Tests

In [None]:
def sar_policy_test():
    problem = SearchAndRescueProblem()
    base_planner = SearchAndRescuePlanner(search_algo="gbf", heuristic="hff")
    def planner(state):
        plan, time = base_planner.get_plan(state)
        return plan
    policy = make_planner_policy(problem, planner)
    state = State()
    # Observable
    bel = BeliefState(state_map=state.state_map)
    s_or_f, final_state, final_bel = agent_loop(problem, state, policy, bel)
    assert get_num_delivered(final_state) == 4

sar_policy_test()

print('Tests passed.')