# 6.882 HW 1.3 Code Part 1

See the problem set handout for instructions and deliverables.

See HW1.1 Starter Code for dependency installation instructions.

In [10]:
# Install dependencies (run this once ever 12 hours)
!pip install --upgrade git+https://github.com/tomsilver/pddlgym # Install most recent PDDLGym (must be from source!)
!pip install tabulate
!pip install pyperplan

Collecting git+https://github.com/tomsilver/pddlgym
  Cloning https://github.com/tomsilver/pddlgym to /private/var/folders/2y/1l8njzj51qd7b7jnwzz5v2bm0000gn/T/pip-req-build-d_sk6bwo
Building wheels for collected packages: pddlgym
  Building wheel for pddlgym (setup.py) ... [?25ldone
[?25h  Created wheel for pddlgym: filename=pddlgym-0.0.2-py3-none-any.whl size=5114477 sha256=e0a2e3c49062ac5737f7569ae0d1f304199c1e0890a315c9ced0e1fb8bb7bbe0
  Stored in directory: /private/var/folders/2y/1l8njzj51qd7b7jnwzz5v2bm0000gn/T/pip-ephem-wheel-cache-qby4m2dn/wheels/70/00/da/84f1ea25112e85e8e4218a6905deae211edd977267c483f457
Successfully built pddlgym
Installing collected packages: pddlgym
  Attempting uninstall: pddlgym
    Found existing installation: pddlgym 0.0.2
    Uninstalling pddlgym-0.0.2:
      Successfully uninstalled pddlgym-0.0.2
Successfully installed pddlgym-0.0.2
You should consider upgrading via the '/Users/tom/.pyenv/versions/3.6.7/bin/python3.6 -m pip install --upgrade pip' co

In [11]:
from collections import namedtuple, defaultdict, deque
from itertools import count, product
from tabulate import tabulate
import abc
import copy
import numpy as np
import heapq as hq
import pddlgym
from pddlgym.structs import Predicate, State, Type, LiteralConjunction
from pddlgym.parser import PDDLProblemParser
import functools
import tempfile
import os
import pyperplan
import time

### Classes
First we define some convenient abstract classes for Approach, Planner, Heuristic, and Featurizer.

In [12]:
class Approach:
    """Generic approach for learning and behaving in a domain.
    """
    @abc.abstractmethod
    def set_actions(self, actions):
        """Tell the approach what actions are available in the domain
        
        Parameters
        ----------
        actions : [ Any ]
            For a continuous action space, this would not work! If you are
            curious how one might handle actions more generally, see
            https://gym.openai.com/docs/#spaces.
        """
        raise NotImplementedError("Override me!")
    
    @abc.abstractmethod
    def reset(self, state):
        """Tell the approach to prepare to take actions from the given initial state.
        
        Parameters
        ----------
        state : pddlgym.State
            Note that the state contains the goal (state.goal).
            
        Returns
        -------
        info : dict
            Any logging or debugging info can go here.
        """
        raise NotImplementedError("Override me!")

    @abc.abstractmethod
    def step(self, state):
        """Ask the approach for an action to take given the input state.
        Assume that the action will be subsequently executed in the environment.
        
        Parameters
        ----------
        state : pddlgym.State
            Note that the state contains the goal (state.goal).
        
        Returns
        -------
        action : Any
        info : dict
            Any logging or debugging info can go here.
        """
        raise NotImplementedError("Override me!")

    @abc.abstractmethod
    def seed(self, seed):
        """Optionally set a random seed
        """
        raise NotImplementedError("Override me!")
        
    @abc.abstractmethod
    def train(self, env):
        """Some approaches learn. Others will do nothing for training.
        
        Parameters
        ----------
        env : pddlgym.PDDLEnv
            A training environment that encapsulates training problems.
        """
        raise NotImplementedError("Override me!")
        

class Planner:
    """Generic class for planning
    """
    @abc.abstractmethod
    def __call__(self, state):
        """Make a plan given the state.

        Parameters
        ----------
        state : pddlgym.State
            Note that the state contains the goal (state.goal).
        
        Returns
        -------
        actions : [ Any ]
            The plan
        info : dict
            Any logging or debugging info can go here.
        """
        raise NotImplementedError("Override me!")

    @abc.abstractmethod
    def set_actions(self, actions):
        """Tell the planner what actions are available in the domain
        
        Parameters
        ----------
        actions : [ Any ]
        """
        raise NotImplementedError("Override me!")
        

class Heuristic:
    """Generic class for heuristics
    """
    @abc.abstractmethod
    def __call__(self, node):
        """Return a heuristic value (estimated cost-to-go) given a search node.
        
        Parameters
        ----------
        node : AStar.Node

        Returns
        -------
        heuristic : float
        """
        raise NotImplementedError("Override me!")

    @abc.abstractmethod
    def set_actions(self, actions):
        """Tell the planner what actions are available in the domain
        
        Parameters
        ----------
        actions : [ Any ]
        """
        raise NotImplementedError("Override me!")

    @abc.abstractmethod
    def train(self, env):
        """Some heuristics are learnable. Others will do nothing for training.
        
        Parameters
        ----------
        env : pddlgym.PDDLEnv
            A training environment that encapsulates training problems.
        """
        raise NotImplementedError("Override me!")

In [13]:
class SearchApproach(Approach):
    """Make a plan and follow it
    """
    def __init__(self, planner, heuristic=None):
        self._planner = planner
        self._heuristic = heuristic
        self._actions = None
        self._plan = []
        self._rng = None

    def set_actions(self, actions):
        self._actions = actions
        self._planner.set_actions(actions)

    def reset(self, obs):
        self._plan, info = self._planner(obs)
        return info

    def step(self, obs):
        if not self._plan:
            print("Warning: step was called without a plan. Defaulting to random action.")
            return self._rng.choice(self._actions)
        return self._plan.pop(0)

    def seed(self, seed):
        self._rng = np.random.RandomState(seed)
        if isinstance(self._heuristic, Heuristic):
            self._heuristic.seed(seed)
        
    def train(self, env):
        if isinstance(self._heuristic, Heuristic):
            self._heuristic.train(env)

In [14]:
class AStar(Planner):
    """Planning with A* search
    """
    
    Node = namedtuple("Node", ["state", "parent", "action", "g"])

    def __init__(self, successor_fn, check_goal_fn, heuristic=None, timeout=100):
        self._get_successor_state = successor_fn
        self._check_goal = check_goal_fn
        self._heuristic = heuristic or (lambda s : 0)
        self._timeout = timeout
        self._actions = None
        
    def __call__(self, state, verbose=True):
        return self._get_plan(state, verbose=verbose)

    def set_actions(self, actions):
        self._actions = actions
        if isinstance(self._heuristic, Heuristic):
            self._heuristic.set_actions(actions)

    def _get_plan(self, state, verbose=True):
        start_time = time.time()
        queue = []
        state_to_best_g = defaultdict(lambda : float("inf"))
        tiebreak = count()

        root_node = self.Node(state=state, parent=None, action=None, g=0)
        hq.heappush(queue, (self._get_priority(root_node), next(tiebreak), root_node))
        num_expansions = 0

        while len(queue) > 0 and (time.time() - start_time < self._timeout):
            _, _, node = hq.heappop(queue)
            # If we already found a better path here, don't bother
            if state_to_best_g[node.state] < node.g:
                continue
            # If the goal holds, return
            if self._check_goal(node.state):
                if verbose:
                    print("\nPlan found!")
                return self._finish_plan(node), {'node_expansions' : num_expansions}
            num_expansions += 1
            if verbose:
                print(f"Expanding node {num_expansions}", end='\r', flush=True)
            # Generate successors
            for action, child_state in self._get_successors(node.state):
                # If we already found a better path to child, don't bother
                if state_to_best_g[child_state] <= node.g+1:
                    continue
                # Add new node
                child_node = self.Node(state=child_state, parent=node, action=action, g=node.g+1)
                priority = self._get_priority(child_node)
                hq.heappush(queue, (priority, next(tiebreak), child_node))
                state_to_best_g[child_state] = child_node.g

        if verbose:
            print("Warning: planning failed.")
        return [], {'node_expansions' : num_expansions}
    
    def _get_successors(self, state):
        for action in self._actions:
            next_state = self._get_successor_state(state, action)
            yield action, next_state

    def _finish_plan(self, node):
        plan = []
        while node.parent is not None:
            plan.append(node.action)
            node = node.parent
        plan.reverse()
        return plan

    def _get_priority(self, node):
        h = self._heuristic(node)
        if isinstance(h, tuple):
            return (tuple(node.g + hi for hi in h), h)
        return (node.g + h, h)


class BestFirstSearch(AStar):
    """Planning with best-first search
    """

    def _get_priority(self, node):
        h = self._heuristic(node)
        return h

### Heuristics

In [15]:
class PyperplanHeuristic(Heuristic):
    """Don't worry about this -- it's just infrastructure connecting one library (PDDLGym) to another (Pyperplan)
    """
    def __init__(self, heuristic_name, domain):
        super().__init__()
        self._heuristic_name = heuristic_name
        self._domain = domain
        self._heuristic = None
        self._heuristic_goal = None
        self._actions = None

    def __call__(self, node):
        if node.state.goal != self._heuristic_goal:
            self._heuristic = self._initialize_heuristic(node.state)
            self._heuristic_goal = node.state.goal
        return self._heuristic(node.state)

    def set_actions(self, actions):
        self._actions = actions

    def train(self, env):
        pass
    
    def seed(self, seed):
        pass

    def _create_pyperplan_problem(self, state):
        try:
            problem_file = self._create_problem_file(state)
            parser = pyperplan.Parser(self._domain.domain_fname, problem_file)
            pyperplan_domain = parser.parse_domain()
            pyperplan_problem = parser.parse_problem(pyperplan_domain)
        finally:
            try:
                os.remove(problem_file)
            except FileNotFoundError:
                pass
        return pyperplan_problem

    def _create_problem_file(self, state):
        filename = tempfile.NamedTemporaryFile(delete=False).name
        lits = state.literals
        if not self._domain.operators_as_actions:
            lits |= set(self._actions)
        PDDLProblemParser.create_pddl_file(
            filename, state.objects-set(self._domain.constants), lits, 
            "myproblem", self._domain.domain_name, state.goal, fast_downward_order=True)
        return filename

    def _initialize_heuristic(self, state, cache_maxsize=10000):
        pyperplan_problem = self._create_pyperplan_problem(state)
        task = pyperplan.grounding.ground(pyperplan_problem)
        heuristic = pyperplan.HEURISTICS[self._heuristic_name](task)

        @functools.lru_cache(cache_maxsize)
        def _call_heuristic(state):
            state = frozenset({lit.pddl_str() for lit in state.literals})
            state &= task.facts
            node = pyperplan.search.searchspace.make_root_node(state)
            h = heuristic(node)
            return h

        return _call_heuristic


### Registering approaches

In [16]:
def get_approach(name, env, planning_timeout=10):
    """Put new approaches here!
    """
    if name == "astar_uniform":
        planner = AStar(env.get_successor_state, env.check_goal, timeout=planning_timeout)
        return SearchApproach(planner=planner)
    
    if name == "astar_hmax":
        heuristic = PyperplanHeuristic("hmax", domain=env.domain)
        planner = AStar(env.get_successor_state, env.check_goal, heuristic=heuristic, timeout=planning_timeout)
        return SearchApproach(planner=planner)
    
    if name == "astar_hff":
        heuristic = PyperplanHeuristic("hff", domain=env.domain)
        planner = AStar(env.get_successor_state, env.check_goal, heuristic=heuristic, timeout=planning_timeout)
        return SearchApproach(planner=planner)
    
    if name == "astar_hadd":
        heuristic = PyperplanHeuristic("hadd", domain=env.domain)
        planner = AStar(env.get_successor_state, env.check_goal, heuristic=heuristic, timeout=planning_timeout)
        return SearchApproach(planner=planner)

    raise Exception(f"Unrecognized approach: {name}")

# Add your approach names here
approaches = [
    "astar_uniform",
    "astar_hmax",
    "astar_hff",
    "astar_hadd",
]

### Evaluation Pipeline
Here's all the code that you should need to evaluate your approaches.

In [17]:
def run_single_test(test_env, problem_idx, model, max_horizon=250, max_duration=10):
    print(f"Running test problem {problem_idx} in environment {test_env.spec.id}")
    test_env.fix_problem_index(problem_idx)
    start_time = time.time()
    obs, info = test_env.reset()
    model_info = model.reset(obs)
    node_expansions = model_info.get('node_expansions', 0)
    num_steps = 0
    success = False
    for t in range(max_horizon):
        if time.time() - start_time > max_duration:
            break
        print(".", end='', flush=True)
        act = model.step(obs)
        obs, reward, done, info = test_env.step(act)
        num_steps += 1
        if done:
            assert reward == 1
            success = True
            break
    duration = time.time() - start_time
    print(f" final duration: {duration} with num steps {num_steps} and success={success}.")
    return duration, num_steps, node_expansions, success

def run_single_experiment(model, train_env, test_env, seed=0):
    # Initialize
    test_env.reset()
    actions = test_env.get_possible_actions()
    model.set_actions(actions)
    model.seed(seed)
    
    # Training
    training_start_time = time.time()
    model.train(train_env)
    train_duration = time.time() - training_start_time
    train_durations = [train_duration] * len(test_env.problems) # for result reporting convenience

    # Test time
    test_durations = [] # seconds, one per problem
    test_num_steps = [] # integers
    test_node_expansions = [] # integers
    test_successes = [] # boolean, True if successful
    
    for problem_idx in range(len(test_env.problems)):
        duration, num_steps, node_expansions, success = \
            run_single_test(test_env, problem_idx, model)
        test_durations.append(duration)
        test_num_steps.append(num_steps)
        test_node_expansions.append(node_expansions)
        test_successes.append(success)

    return train_durations, test_durations, test_num_steps, test_node_expansions, test_successes

### Here's where the action happens

In [18]:
levels = list(range(1, 7))

all_results = {}
for level in levels:
    all_results[level] = {}
    train_env = pddlgym.make(f"PDDLSearchAndRescueLevel{level}-v0")
    test_env = pddlgym.make(f"PDDLSearchAndRescueLevel{level}Test-v0")
    for approach in approaches:
        all_results[level][approach] = []
        model = get_approach(approach, test_env)
        results = run_single_experiment(model, train_env, test_env)
        for (train_dur, dur, num_steps, num_nodes, succ) in zip(*results):
            all_results[level][approach].append((train_dur, dur, num_steps, num_nodes, succ))

Running test problem 0 in environment PDDLSearchAndRescueLevel1Test-v0
Expanding node 148
Plan found!
......... final duration: 0.3566441535949707 with num steps 9 and success=True.
Running test problem 1 in environment PDDLSearchAndRescueLevel1Test-v0
Expanding node 148
Plan found!
.......... final duration: 0.37461233139038086 with num steps 10 and success=True.
Running test problem 2 in environment PDDLSearchAndRescueLevel1Test-v0
Expanding node 148
Plan found!
............. final duration: 0.4298849105834961 with num steps 13 and success=True.
Running test problem 3 in environment PDDLSearchAndRescueLevel1Test-v0
Expanding node 148
Plan found!
............... final duration: 0.3606128692626953 with num steps 15 and success=True.
Running test problem 4 in environment PDDLSearchAndRescueLevel1Test-v0
Expanding node 148
Plan found!
........... final duration: 0.38443708419799805 with num steps 11 and success=True.
Running test problem 5 in environment PDDLSearchAndRescueLevel1Test-v0


Expanding node 330
Plan found!
........... final duration: 0.7992801666259766 with num steps 11 and success=True.
Running test problem 5 in environment PDDLSearchAndRescueLevel2Test-v0
Expanding node 45
Plan found!
........... final duration: 0.1041100025177002 with num steps 11 and success=True.
Running test problem 6 in environment PDDLSearchAndRescueLevel2Test-v0
Expanding node 408
Plan found!
.............. final duration: 0.9119560718536377 with num steps 14 and success=True.
Running test problem 7 in environment PDDLSearchAndRescueLevel2Test-v0
Expanding node 408
Plan found!
........... final duration: 0.8951449394226074 with num steps 11 and success=True.
Running test problem 8 in environment PDDLSearchAndRescueLevel2Test-v0
Expanding node 337
Plan found!
................ final duration: 0.7759590148925781 with num steps 16 and success=True.
Running test problem 9 in environment PDDLSearchAndRescueLevel2Test-v0
Expanding node 96
Plan found!
............. final duration: 0.234170

Expanding node 109
Plan found!
.......... final duration: 0.2550048828125 with num steps 10 and success=True.
Running test problem 9 in environment PDDLSearchAndRescueLevel3Test-v0
Expanding node 194
Plan found!
............ final duration: 0.5120339393615723 with num steps 12 and success=True.
Running test problem 0 in environment PDDLSearchAndRescueLevel3Test-v0
Expanding node 33
Plan found!
.............. final duration: 0.17821192741394043 with num steps 14 and success=True.
Running test problem 1 in environment PDDLSearchAndRescueLevel3Test-v0
Expanding node 9
Plan found!
....... final duration: 0.08170175552368164 with num steps 7 and success=True.
Running test problem 2 in environment PDDLSearchAndRescueLevel3Test-v0
Expanding node 17
Plan found!
........... final duration: 0.11825275421142578 with num steps 11 and success=True.
Running test problem 3 in environment PDDLSearchAndRescueLevel3Test-v0
Expanding node 22
Plan found!
........... final duration: 0.21304774284362793 wit

Expanding node 24
Plan found!
............ final duration: 0.17058610916137695 with num steps 12 and success=True.
Running test problem 4 in environment PDDLSearchAndRescueLevel4Test-v0
Expanding node 18
Plan found!
........... final duration: 0.12986111640930176 with num steps 11 and success=True.
Running test problem 5 in environment PDDLSearchAndRescueLevel4Test-v0
Expanding node 16
Plan found!
......... final duration: 0.11734414100646973 with num steps 9 and success=True.
Running test problem 6 in environment PDDLSearchAndRescueLevel4Test-v0
Expanding node 217
Plan found!
............... final duration: 1.077937126159668 with num steps 15 and success=True.
Running test problem 7 in environment PDDLSearchAndRescueLevel4Test-v0
Expanding node 19
Plan found!
......... final duration: 0.1422111988067627 with num steps 9 and success=True.
Running test problem 8 in environment PDDLSearchAndRescueLevel4Test-v0
Expanding node 124
Plan found!
............. final duration: 0.694006919860839

Expanding node 42
Plan found!
............ final duration: 0.2644517421722412 with num steps 12 and success=True.
Running test problem 0 in environment PDDLSearchAndRescueLevel5Test-v0
 final duration: 10.0016930103302 with num steps 0 and success=False.
Running test problem 1 in environment PDDLSearchAndRescueLevel5Test-v0
Expanding node 82
Plan found!
.................. final duration: 0.535179853439331 with num steps 18 and success=True.
Running test problem 2 in environment PDDLSearchAndRescueLevel5Test-v0
Expanding node 285
Plan found!
...................... final duration: 1.6433520317077637 with num steps 22 and success=True.
Running test problem 3 in environment PDDLSearchAndRescueLevel5Test-v0
 final duration: 10.00816011428833 with num steps 0 and success=False.
Running test problem 4 in environment PDDLSearchAndRescueLevel5Test-v0
Expanding node 203
Plan found!
....................... final duration: 1.132775068283081 with num steps 23 and success=True.
Running test problem 

 final duration: 10.005168914794922 with num steps 0 and success=False.
Running test problem 5 in environment PDDLSearchAndRescueLevel6Test-v0
Expanding node 187
Plan found!
.................. final duration: 1.1804261207580566 with num steps 18 and success=True.
Running test problem 6 in environment PDDLSearchAndRescueLevel6Test-v0
Expanding node 250
Plan found!
........................ final duration: 1.4929132461547852 with num steps 24 and success=True.
Running test problem 7 in environment PDDLSearchAndRescueLevel6Test-v0
 final duration: 10.005917072296143 with num steps 0 and success=False.
Running test problem 8 in environment PDDLSearchAndRescueLevel6Test-v0
Expanding node 246
Plan found!
......................... final duration: 1.409769058227539 with num steps 25 and success=True.
Running test problem 9 in environment PDDLSearchAndRescueLevel6Test-v0
Expanding node 245
Plan found!
................. final duration: 1.3990800380706787 with num steps 17 and success=True.
Runnin

In [20]:
columns = ["Approach", "Train Time", "Duration", "Num Steps", "Num Nodes", "Successes"]

for level in sorted(all_results):
    print(f"\n### LEVEL {level} ###")
    mean_table = [(a, ) + tuple(np.mean(all_results[level][a], axis=0)) for a in sorted(all_results[level])]
    std_table = [(a, ) + tuple(np.std(all_results[level][a], axis=0)) for a in sorted(all_results[level])]
    print("\n# Means #")
    print(tabulate(mean_table, headers=columns))
#     print("\n# Standard Deviations #")
#     print(tabulate(std_table, headers=columns))


### LEVEL 1 ###

# Means #
Approach         Train Time    Duration    Num Steps    Num Nodes    Successes
-------------  ------------  ----------  -----------  -----------  -----------
astar_hadd      2.14577e-06   0.0551833         11.6         13.8            1
astar_hff       1.90735e-06   0.0810943         11.6         14.6            1
astar_hmax      9.53674e-07   0.100419          11.6         21.6            1
astar_uniform   9.53674e-07   0.395004          11.6        148              1

### LEVEL 2 ###

# Means #
Approach         Train Time    Duration    Num Steps    Num Nodes    Successes
-------------  ------------  ----------  -----------  -----------  -----------
astar_hadd      2.14577e-06   0.0734372         13.1         16.9            1
astar_hff       1.19209e-06   0.0769238         13.1         17              1
astar_hmax      1.90735e-06   0.105282          12.9         25.9            1
astar_uniform   1.19209e-06   0.542052          12.9        227.3          