# 6.882 HW 1.2 Starter Code

See the problem set handout for instructions and deliverables.

See HW1.1 Starter Code for dependency installation instructions.

In [None]:
# Install dependencies (run this once ever 12 hours)
!pip install --upgrade git+https://github.com/tomsilver/pddlgym # Install most recent PDDLGym (must be from source!)
!pip install tabulate

In [None]:
from collections import namedtuple, defaultdict, deque
from itertools import count, product
from tabulate import tabulate
import abc
import copy
import numpy as np
import heapq as hq
import pddlgym
from pddlgym.structs import Predicate, State, Type, LiteralConjunction
import time
from sklearn.tree import DecisionTreeClassifier, DecisionTreeRegressor
from sklearn.neural_network import MLPRegressor

### Classes
First we define some convenient abstract classes for Approach, Planner, Heuristic, and Featurizer.

In [None]:
class Approach:
    """Generic approach for learning and behaving in a domain.
    """
    @abc.abstractmethod
    def set_actions(self, actions):
        """Tell the approach what actions are available in the domain
        
        Parameters
        ----------
        actions : [ Any ]
            For a continuous action space, this would not work! If you are
            curious how one might handle actions more generally, see
            https://gym.openai.com/docs/#spaces.
        """
        raise NotImplementedError("Override me!")
    
    @abc.abstractmethod
    def reset(self, state):
        """Tell the approach to prepare to take actions from the given initial state.
        
        Parameters
        ----------
        state : pddlgym.State
            Note that the state contains the goal (state.goal).
            
        Returns
        -------
        info : dict
            Any logging or debugging info can go here.
        """
        raise NotImplementedError("Override me!")

    @abc.abstractmethod
    def step(self, state):
        """Ask the approach for an action to take given the input state.
        Assume that the action will be subsequently executed in the environment.
        
        Parameters
        ----------
        state : pddlgym.State
            Note that the state contains the goal (state.goal).
        
        Returns
        -------
        action : Any
        info : dict
            Any logging or debugging info can go here.
        """
        raise NotImplementedError("Override me!")

    @abc.abstractmethod
    def seed(self, seed):
        """Optionally set a random seed
        """
        raise NotImplementedError("Override me!")
        
    @abc.abstractmethod
    def train(self, env):
        """Some approaches learn. Others will do nothing for training.
        
        Parameters
        ----------
        env : pddlgym.PDDLEnv
            A training environment that encapsulates training problems.
        """
        raise NotImplementedError("Override me!")
        

class Planner:
    """Generic class for planning
    """
    @abc.abstractmethod
    def __call__(self, state):
        """Make a plan given the state.

        Parameters
        ----------
        state : pddlgym.State
            Note that the state contains the goal (state.goal).
        
        Returns
        -------
        actions : [ Any ]
            The plan
        info : dict
            Any logging or debugging info can go here.
        """
        raise NotImplementedError("Override me!")

    @abc.abstractmethod
    def set_actions(self, actions):
        """Tell the planner what actions are available in the domain
        
        Parameters
        ----------
        actions : [ Any ]
        """
        raise NotImplementedError("Override me!")
        

class Heuristic:
    """Generic class for heuristics
    """
    @abc.abstractmethod
    def __call__(self, node):
        """Return a heuristic value (estimated cost-to-go) given a search node.
        
        Parameters
        ----------
        node : AStar.Node

        Returns
        -------
        heuristic : float
        """
        raise NotImplementedError("Override me!")

    @abc.abstractmethod
    def set_actions(self, actions):
        """Tell the planner what actions are available in the domain
        
        Parameters
        ----------
        actions : [ Any ]
        """
        raise NotImplementedError("Override me!")

    @abc.abstractmethod
    def train(self, env):
        """Some heuristics are learnable. Others will do nothing for training.
        
        Parameters
        ----------
        env : pddlgym.PDDLEnv
            A training environment that encapsulates training problems.
        """
        raise NotImplementedError("Override me!")

        
class Featurizer:
    """Generic class for featurizers
    """
    @abc.abstractmethod
    def initialize(self, all_data):
        """Initialize the featurizer from a training dataset
        
        Parameters
        ----------
        all_data : [ Any ]
            A list of data.
        """
        raise NotImplementedError("Override me!")

    @abc.abstractmethod
    def apply(self, x):
        """Convert a raw input to a featurized input.
        
        Parameters
        ----------
        x : Any
            A raw input
            
        Returns
        -------
        xhat : Any
            A featurized input
        """
        raise NotImplementedError("Override me!")

    @abc.abstractmethod
    def invert(self, xhat):
        """Convert a featurized input to a raw input.
        
        Parameters
        ----------
        x : Any
            A featurized input
            
        Returns
        -------
        x : Any
            A raw input
        """
        raise NotImplementedError("Override me!")

### Non-learning approaches
Now we define some non-learning approaches: Random, AStar, and Greedy Best-First Search. These approaches do not need to make use of featurizers.

In [None]:
class RandomActions(Approach):
    """Take random actions
    """
    def __init__(self):
        self._actions = None
        self._rng = None

    def set_actions(self, actions):
        self._actions = actions

    def reset(self, state):
        return {}

    def step(self, state):
        return self._rng.choice(self._actions)

    def seed(self, seed):
        self._rng = np.random.RandomState(seed)
        
    def train(self, env):
        pass

In [None]:
class SearchApproach(Approach):
    """Make a plan and follow it
    """
    def __init__(self, planner, heuristic=None):
        self._planner = planner
        self._heuristic = heuristic
        self._actions = None
        self._plan = []
        self._rng = None

    def set_actions(self, actions):
        self._actions = actions
        self._planner.set_actions(actions)

    def reset(self, obs):
        self._plan, info = self._planner(obs, heuristic=self._heuristic)
        return info

    def step(self, obs):
        if not self._plan:
            print("Warning: step was called without a plan. Defaulting to random action.")
            return self._rng.choice(self._actions)
        return self._plan.pop(0)

    def seed(self, seed):
        self._rng = np.random.RandomState(seed)
        if isinstance(self._heuristic, Heuristic):
            self._heuristic.seed(seed)
        
    def train(self, env):
        if isinstance(self._heuristic, Heuristic):
            self._heuristic.train(env)

In [None]:
class AStar(Planner):
    """Planning with A* search
    """
    
    Node = namedtuple("Node", ["state", "parent", "action", "g"])

    def __init__(self, successor_fn, check_goal_fn, timeout=100):
        self._get_successor_state = successor_fn
        self._check_goal = check_goal_fn
        self._heuristic = None
        self._timeout = timeout
        self._actions = None
        
    def __call__(self, state, heuristic=None, verbose=True):
        self._heuristic = heuristic or (lambda node : 0)
        return self._get_plan(state, verbose=verbose)

    def set_actions(self, actions):
        self._actions = actions
        if isinstance(self._heuristic, Heuristic):
            self._heuristic.set_actions(actions)

    def _get_plan(self, state, verbose=True):
        start_time = time.time()
        queue = []
        state_to_best_g = defaultdict(lambda : float("inf"))
        tiebreak = count()

        root_node = self.Node(state=state, parent=None, action=None, g=0)
        hq.heappush(queue, (self._get_priority(root_node), next(tiebreak), root_node))
        num_expansions = 0

        while len(queue) > 0 and (time.time() - start_time < self._timeout):
            _, _, node = hq.heappop(queue)
            # If we already found a better path here, don't bother
            if state_to_best_g[node.state] < node.g:
                continue
            # If the goal holds, return
            if self._check_goal(node.state):
                if verbose:
                    print("\nPlan found!")
                return self._finish_plan(node), {'node_expansions' : num_expansions}
            num_expansions += 1
            if verbose:
                print(f"Expanding node {num_expansions}", end='\r', flush=True)
            # Generate successors
            for action, child_state in self._get_successors(node.state):
                # If we already found a better path to child, don't bother
                if state_to_best_g[child_state] <= node.g+1:
                    continue
                # Add new node
                child_node = self.Node(state=child_state, parent=node, action=action, g=node.g+1)
                priority = self._get_priority(child_node)
                hq.heappush(queue, (priority, next(tiebreak), child_node))
                state_to_best_g[child_state] = child_node.g

        if verbose:
            print("Warning: planning failed.")
        return [], {'node_expansions' : num_expansions}
    
    def _get_successors(self, state):
        for action in self._actions:
            next_state = self._get_successor_state(state, action)
            yield action, next_state

    def _finish_plan(self, node):
        plan = []
        while node.parent is not None:
            plan.append(node.action)
            node = node.parent
        plan.reverse()
        return plan

    def _get_priority(self, node):
        h = self._heuristic(node)
        if isinstance(h, tuple):
            return (tuple(node.g + hi for hi in h), h)
        return (node.g + h, h)


class BestFirstSearch(AStar):
    """Planning with best-first search
    """

    def _get_priority(self, node):
        h = self._heuristic(node)
        return h

### Featurizers
We start you off with some featurizers. If you are interested, you are welcome to implement others. But feel free to just treat these as black boxes too.

In [None]:
class TabularFeaturizer(Featurizer):
    """A tabular featurizer assigns a unique ID to each input.
    """
    def __init__(self, one_hot=False):
        self._one_hot = one_hot
        self._x_to_idx = {}
        self._idx_to_x = {}
        self._num_features = 0
        self._unknown_idx = None
        self._initialized = False

    def initialize(self, all_data):
        for i, x in enumerate(sorted(set(all_data))):
            self._x_to_idx[x] = i
            self._idx_to_x[i] = x
        self._num_features = max(self._idx_to_x) + 1
        self._unknown_idx = self._num_features
        self._initialized = True
        print(f"Initialized {self._num_features} tabular features")

    def apply(self, x):
        assert self._initialized, "Must call `initialize(all_data)` before `apply(datum)`."
        x_id = self._x_to_idx.get(x, self._unknown_idx)
        if self._one_hot:
            xhat = np.zeros(self._num_features + 1, dtype=np.float32)
            xhat[x_id] = True
            return xhat
        return x_id

    def invert(self, xhat):
        if self._one_hot:
            assert sum(xhat) == 1
            idx = np.argwhere(xhat)
        else:
            idx = xhat
        return self._idx_to_x[idx]
    

class PropositionalFeaturizer(Featurizer):
    """A propositional featurizer creates a boolean vector with one dimension per fact (Literal).
    """
    def __init__(self):
        self._x_to_idx = {}
        self._idx_to_x = {}
        self._num_features = 0
        self._initialized = False

    @classmethod
    def _wrap_goal_literal(cls, x):
        if isinstance(x, Predicate):
            return Predicate("WANT"+x.name, x.arity, var_types=x.var_types,
                is_negative=x.is_negative, is_anti=x.is_anti)
        new_predicate = cls._wrap_goal_literal(x.predicate)
        return new_predicate(*x.variables)

    def _preproc_pddl_state(self, X):
        if isinstance(X, State):
            return X.literals | {self._wrap_goal_literal(x) for x in X.goal.literals}
        return X

    def initialize(self, all_data):
        all_props = { x for X in all_data for x in self._preproc_pddl_state(X) }
        for i, x in enumerate(sorted(all_props)):
            self._x_to_idx[x] = i
            self._idx_to_x[i] = x
        self._num_features = max(self._idx_to_x)+1
        self._initialized = True
        print(f"Initialized {self._num_features} propositional features")

    def apply(self, X):
        assert self._initialized, "Must call `initialize(all_data)` before `apply(datum)`."
        X = self._preproc_pddl_state(X)
        vec = np.zeros(self._num_features, dtype=np.float32)
        for x in X:
            try:
                idx = self._x_to_idx[x]
            except KeyError:
                continue
            vec[idx] = 1
        return vec

    def invert(self, vec):
        return { self._idx_to_x[idx] for idx in np.argwhere(vec) }
    

class SARStateFeaturizer(Featurizer):
    """This featurizer is specific to Search And Rescue states.
    It gives the dictionary-like state features that we saw in the previous homework.
    """
    def initialize(self, all_data):
        pass
    
    @classmethod
    def apply(cls, internal_state):
        state = { "carrying" : None }
        state["rescue"] = set()
        for lit in internal_state.goal.literals:
            state["rescue"].add(lit.variables[0].name)
        state["rescue"] = frozenset(state["rescue"]) # make hashable
        for lit in internal_state.literals:
            if lit.predicate.name.endswith("at"):
                obj_name = lit.variables[0].name
                r, c = cls._loc_to_rc(lit.variables[1])
                state[obj_name] = (r, c)
            if lit.predicate.name == "carrying":
                person_name = lit.variables[1].name
                state["carrying"] = person_name
        state = tuple(sorted(state.items())) # make hashable
        return state

    @staticmethod
    def _loc_to_rc(loc_str):
        assert loc_str.startswith("f") and loc_str.endswith("f")
        r, c = loc_str[1:-1].split('-')
        return (int(r), int(c))
    

class SARMinimalStateFeaturizer(Featurizer):
    """This featurizer is specific to Search And Rescue states.
    It puts the positions of the robot, hospital, and people into a normalized
    vector and ignores the walls. It also includes bits for whether each person
    is being carried and whether each person needs rescue.
    """
    # for normalization
    max_location = 6
    
    def initialize(self, all_data):
        pass
    
    def apply(self, x):
        sar_state = dict(SARStateFeaturizer.apply(x))
        state = []
        # add robot position
        state.extend(sar_state["robot0"])
        # add hospital position
        state.extend(sar_state["hospital0"])
        # get people
        people = sorted({ k for k in sar_state if k.startswith("person")})
        if sar_state["carrying"]:
            people.append(sar_state["carrying"])
            people.sort()
        # for each person...
        for person in people:
            # check whether the person is being carried
            if sar_state.get("carrying", None) == person:
                # add whether the person is being carried
                state.append(1.)
                # add the persons location (= robot's location)
                state.extend(sar_state["robot0"])
            else:
                # add whether the person is being carried
                state.append(0.)
                # add the persons location
                state.extend(sar_state[person])
            # add whether the person needs rescue
            state.append(float(person in sar_state["rescue"]))
        # normalize
        state = np.array(state, dtype=np.float32)
        state = (state / self.max_location) - 0.5
        return state


### Approaches
Implement your own approaches.

In [None]:
class MyLearningApproach1(Approach):
    """TODO: implement me!
    """
    def __init__(self):
        raise NotImplementedError("Implement me! You may want to add args or kwargs.")

    def set_actions(self, actions):
        raise NotImplementedError("Implement me!")

    def train(self, env):
        raise NotImplementedError("Implement me!")

    def reset(self, state):
        raise NotImplementedError("Implement me!")

    def step(self, obs):
        raise NotImplementedError("Implement me!")

    def seed(self, seed):
        raise NotImplementedError("Implement me!")

### Register your approaches
Give your approaches names in the block below.

In [None]:
def get_approach(name, env, planning_timeout=10):
    """Put new approaches here!
    """
    if name == "random":
        return RandomActions()
    
    if name == "astar_uniform":
        planner = AStar(env.get_successor_state, env.check_goal, timeout=planning_timeout)
        return SearchApproach(planner=planner)
    
    if name == "my_learning_approach1":
        raise NotImplementedError("Implement me!")
        
    raise Exception(f"Unrecognized approach: {name}")

# Add your approach names here
approaches = [
    "random",
    "astar_uniform",
#     "my_learning_approach1",
]

### Evaluation Pipeline
Here's all the code that you should need to evaluate your approaches.

In [None]:
def run_single_test(test_env, problem_idx, model, max_horizon=250, max_duration=10):
    print(f"Running test problem {problem_idx} in environment {test_env.spec.id}")
    test_env.fix_problem_index(problem_idx)
    start_time = time.time()
    obs, info = test_env.reset()
    model_info = model.reset(obs)
    node_expansions = model_info.get('node_expansions', 0)
    num_steps = 0
    success = False
    for t in range(max_horizon):
        if time.time() - start_time > max_duration:
            break
        print(".", end='', flush=True)
        act = model.step(obs)
        obs, reward, done, info = test_env.step(act)
        num_steps += 1
        if done:
            assert reward == 1
            success = True
            break
    duration = time.time() - start_time
    print(f" final duration: {duration} with num steps {num_steps} and success={success}.")
    return duration, num_steps, node_expansions, success

def run_single_experiment(model, train_env, test_env, seed=0):
    # Initialize
    test_env.reset()
    actions = test_env.get_possible_actions()
    model.set_actions(actions)
    model.seed(seed)
    
    # Training
    training_start_time = time.time()
    model.train(train_env)
    train_duration = time.time() - training_start_time
    train_durations = [train_duration] * len(test_env.problems) # for result reporting convenience

    # Test time
    test_durations = [] # seconds, one per problem
    test_num_steps = [] # integers
    test_node_expansions = [] # integers
    test_successes = [] # boolean, True if successful
    
    for problem_idx in range(len(test_env.problems)):
        duration, num_steps, node_expansions, success = \
            run_single_test(test_env, problem_idx, model)
        test_durations.append(duration)
        test_num_steps.append(num_steps)
        test_node_expansions.append(node_expansions)
        test_successes.append(success)

    return train_durations, test_durations, test_num_steps, test_node_expansions, test_successes

### Here's where the action happens
If you are impatient like me, you should feel free to change the levels list below to run only the first few levels, especially while you are developing your code. If you find that your approaches do not work at all after, e.g., level 3, then you do not need to run them on levels 4, 5, 6.


**Footnote** Last week we used "SearchAndRescueLevel1-v0". This week we are using "PDDLSearchAndRescueLevel-v0" instead. The difference is in the state representation. "SearchAndRescueLevel1-v0" used a dictionary-like state representation that is easy to read and interpret. "PDDLSearchAndRescueLevel1-v0" uses a state representation based on "Literals", which is a concept we will revisit in later weeks and you need not worry about now. If you want to recover the state representation from last week, you can use the `SARStateFeaturizer` above. Note that your code from last week will probably be much faster on the "PDDL" version of the environment because successor generation is faster.

In [None]:
levels = list(range(1, 7))

all_results = {}
for level in levels:
    all_results[level] = {}
    train_env = pddlgym.make(f"PDDLSearchAndRescueLevel{level}-v0")
    test_env = pddlgym.make(f"PDDLSearchAndRescueLevel{level}Test-v0")
    for approach in approaches:
        all_results[level][approach] = []
        model = get_approach(approach, test_env)
        results = run_single_experiment(model, train_env, test_env)
        for (train_dur, dur, num_steps, num_nodes, succ) in zip(*results):
            all_results[level][approach].append((train_dur, dur, num_steps, num_nodes, succ))

In [None]:
columns = ["Approach", "Train Time", "Duration", "Num Steps", "Num Nodes", "Successes"]

for level in sorted(all_results):
    print(f"\n### LEVEL {level} ###")
    mean_table = [(a, ) + tuple(np.mean(all_results[level][a], axis=0)) for a in sorted(all_results[level])]
    std_table = [(a, ) + tuple(np.std(all_results[level][a], axis=0)) for a in sorted(all_results[level])]
    print("\n# Means #")
    print(tabulate(mean_table, headers=columns))
    print("\n# Standard Deviations #")
    print(tabulate(std_table, headers=columns))