In [1]:
%pip install numpy scikit-learn wlplan tqdm pymdzcf==0.1.0

Note: you may need to restart the kernel to use updated packages.


To see how you can use `wlplan` for both training and search, see this [test](../../tests/train_eval_blocks_test.py). This notebook only contains the training part.

In [2]:
import os
import numpy as np
import pymimir
import wlplan
import time
from wlplan.data import Dataset, ProblemStates
from wlplan.feature_generation import get_feature_generator
from wlplan.planning import State, parse_domain, parse_problem
from sklearn.gaussian_process import GaussianProcessRegressor
from sklearn.gaussian_process.kernels import DotProduct
from tqdm import tqdm

1. [The most work] Parse the PDDL domain and training data in the form of (state, optimal cost to go) pairs using a parser of your choice. Here, I used `mimir` but anything else can do. 

In [3]:
t = time.time()


# Initialise WLPlan domain
DOMAIN_PDDL = "blocksworld/domain.pddl"
wlplan_domain = parse_domain(DOMAIN_PDDL)
predicates = wlplan_domain.predicates
name_to_predicate = {p.name: p for p in predicates}

wlplan_data = []
y = []

# Construct dataset
mimir_domain = pymimir.DomainParser(str(DOMAIN_PDDL)).parse()
for f in tqdm(sorted(os.listdir("blocksworld/training_plans"))):
    PROBLEM_PDDL = "blocksworld/training/" + f.replace(".plan", ".pddl")

    # Initialise WLPlan problem
    wlplan_problem = parse_problem(DOMAIN_PDDL, PROBLEM_PDDL)
    
    # Parse problem and plan with mimir
    plan_file = "blocksworld/training_plans/" + f
    mimir_problem = pymimir.ProblemParser(str(PROBLEM_PDDL)).parse(mimir_domain)
    mimir_state = mimir_problem.create_state(mimir_problem.initial)

    name_to_schema = {s.name: s for s in mimir_domain.action_schemas}
    name_to_object = {o.name: o for o in mimir_problem.objects}
    
    # Collect actions
    actions = []
    with open(plan_file, "r") as f:
        lines = f.readlines()
        for line in lines:
            if line.startswith(";"):
                continue
            action_name = line.strip()
            action_name = action_name.replace("(", "")
            action_name = action_name.replace(")", "")
            toks = action_name.split(" ")
            schema = toks[0]
            schema = name_to_schema[schema]
            args = toks[1:]
            args = [name_to_object[arg] for arg in args]
            action = pymimir.Action.new(mimir_problem, schema, args)
            actions.append(action)

    # Collect plan trace states
    wlplan_states = []

    def mimir_to_wlplan_state(mimir_state: pymimir.State):
        atoms = []
        for atom in mimir_state.get_atoms():
            wlplan_atom = wlplan.planning.Atom(
                predicate=name_to_predicate[atom.predicate.name],
                objects=[o.name for o in atom.terms],
            )
            atoms.append(wlplan_atom)
        return State(atoms)
    
    h_opt = len(actions)
    wlplan_states.append(mimir_to_wlplan_state(mimir_state))
    y.append(h_opt)
    for action in actions:
        h_opt -= 1
        mimir_state = action.apply(mimir_state)
        wlplan_states.append(mimir_to_wlplan_state(mimir_state))
        y.append(h_opt)

    problem_states = ProblemStates(problem=wlplan_problem, states=wlplan_states)
    wlplan_data.append(problem_states)

# This is what we need to feed into our feature generator below
dataset = Dataset(domain=wlplan_domain, data=wlplan_data)

t = time.time() - t
print(f"Time taken to parse data: {t:.2f} seconds")

100%|██████████| 56/56 [00:04<00:00, 12.79it/s]

Time taken to parse data: 4.43 seconds





2. Collect and generate features from the preprocessed data

In [4]:
WLPLAN_CONFIG = {
    "domain": wlplan_domain,
    "iterations": 4,
    "feature_algorithm": "wl",
    "graph_representation": "ilg",
}

In [5]:
t = time.time()

feature_generator = get_feature_generator(**WLPLAN_CONFIG)
feature_generator.collect(dataset)
X = np.array(feature_generator.embed(dataset)).astype(float)
y = np.array(y)

t = time.time() - t

print(f"{X.shape=}")
print(f"{y.shape=}")
print(f"Feature generation took {t:.2f} seconds")

[Iteration 0]
Collecting.
[Iteration 1]
Collecting.
[Iteration 2]
Collecting.
[Iteration 3]
Collecting.
[Iteration 4]
Collecting.
[complete]
X.shape=(1348, 10445)
y.shape=(1348,)
Feature generation took 0.82 seconds


3. Train a Gaussian Process Regression model

In [6]:
t = time.time()

linear_kernel = DotProduct(sigma_0=0, sigma_0_bounds="fixed")
model = GaussianProcessRegressor(kernel=linear_kernel, alpha=1e-7, random_state=0)
model.fit(X, y)
y_pred = model.predict(X)
loss = np.mean((y - y_pred) ** 2)

t = time.time() - t

print(f"{loss=}")
print(f"Training and prediction took {t:.2f} seconds")

loss=1.0568576234132687e-17
Training and prediction took 1.29 seconds
