IBM · vazirim · Sep 4, 2025 · Sep 4, 2025 · Sep 4, 2025 · Sep 4, 2025
diff --git a/.gitignore b/.gitignore
@@ -154,6 +154,9 @@ pdl-live/package-lock.json
 # Demo files
 pdl-rag-demo.db
 test.jsonl
+train.jsonl
+validation.jsonl
+experiments/
 
 # Built docs
 _site

diff --git a/README.md b/README.md
@@ -31,6 +31,12 @@ To install the `pdl` command line tool:
 pip install prompt-declaration-language
 ```
 
+## What's New
+
+Check out AutoPDL, PDL's prompt optimizer tool [Spiess et al. (2025)](https://openreview.net/forum?id=CAeISyE3aR)! AutoPDL can be used to optimize any part of a PDL program. This includes few-shots examples and textual prompts, but also prompting patterns. It outputs an optimized PDL program with optimal values.
+
+For a tutorial on how to use AutoPDL, see [AutoPDL](https://ibm.github.io/prompt-declaration-language/autopdl/)
+
 ## Example Program: A Basic LLM Call
 
 <img src="docs/assets/pdl-ui-3.png" width="500" align="right" alt="PDL GUI"/>

diff --git a/docs/autopdl.md b/docs/autopdl.md
diff --git a/examples/optimizer/bea19.pdl b/examples/optimizer/bea19.pdl
diff --git a/examples/optimizer/bea19_example.yml b/examples/optimizer/bea19_example.yml
diff --git a/examples/optimizer/grammar_correction.pdl b/examples/optimizer/grammar_correction.pdl
@@ -0,0 +1,28 @@
+defs:
+  max_tokens: 1024
+lastOf:
+  - "Here are examples of grammatically incorrect sentences and their corrected versions:\n\n"
+  - for:
+      example: ${ demonstrations }
+    repeat:
+      text: "${ example.input } -> ${ example.output }"
+    join:
+      with: "\n\n"
+  - |+
+    Correct the following sentence:
+
+    ${ input }
+    Here's the corrected sentence:
+
+  - model: ${ model }
+    def: response
+    parameters:
+      max_tokens: ${ max_tokens }
+      temperature: 0
+
+  - if: ${ verify }
+    then:
+      lastOf:
+      - Do you think this was a correct answer? If not, generated a correct answer.
+      - model: ${ model }
+    else: ${ response }
diff --git a/examples/optimizer/grammar_correction.yaml b/examples/optimizer/grammar_correction.yaml
@@ -0,0 +1,41 @@
+pdl_path: grammar_correction.pdl # Path to the PDL file to optimize
+dataset: 
+  train: grammar_correction_jsonl/train.jsonl # Path to the training split in JSONL format
+  test: grammar_correction_jsonl/test.jsonl # Path to the test split in JSONL format
+  validation: grammar_correction_jsonl/validation.jsonl # Path to the validation split in JSONL format
+
+demonstrations_variable_name: demonstrations # variable name to insert demonstrations into
+demonstration_columns:
+  - input # column name for the question in the dataset
+  - output # column name for the answer in the dataset
+
+instance_columns:
+  - input # column name for the question in the dataset
+
+groundtruth_column: output # column name for the ground truth in the dataset
+
+eval_pdl: eval_levenshtein.pdl # Path to the PDL file for evaluation
+
+#budget: 2h # Set a budget, can be number of iterations, or a duration string e.g. "2h"
+#budget_growth: double # double validation set size each iteration. ## 
+# or to_max: reach max_test_set_size by final iteration
+initial_validation_set_size: 2 # size of test set in first iteration
+max_validation_set_size: 10 # maximum test set size. 
+max_test_set_size: 10
+num_candidates: 10 # how many candidates to evaluate
+parallelism: 5 # how many threads to run evaluations across
+#shuffle_test: false # shuffling of test set
+#test_set_name: test # name of test set
+#train_set_name: train # name of train set
+#validation_set_name: validation # name of validation set
+variables: # define discrete options to sample from
+  model: # set ${ model } variable
+    - ollama_chat/granite3.3:8b
+    - ollama_chat/gpt-oss:20b
+  num_demonstrations: # overrides num demonstrations above
+    - 0
+    - 3
+    - 5
+  verify:
+    - true 
+    - false
diff --git a/examples/optimizer/optimized_grammar_correction.pdl b/examples/optimizer/optimized_grammar_correction.pdl
@@ -0,0 +1,48 @@
+defs:
+  max_tokens: 1024
+  model: ollama_chat/gpt-oss:20b
+  num_demonstrations:
+    data: 5
+  verify:
+    data: false
+  demonstrations:
+    data:
+    - input: Related and Entities found using configured use relation direction. and Relation Type.
+      output: Related Entities found using configured relation direction and Relation Type.
+    - input: Thanks to Naumann IT Security Consulting's for reporting challenging the XSS got vulnerability.
+      output: Thanks to Naumann IT Security Consulting for reporting the XSS vulnerability.
+    - input: Besides he hates school, he is exhausted all the time, has no appetite, he has penalty of violent, depression, was not happy.
+      output: He hated school, he was exhausted all the time, had no appetite, he had outbreaks of violence, depression, and he was not happy.
+    - input: If your primary ID does not contain a signature, you can present a supplemental ID with photo and signature or a supple government ID with a photograph, as long as they are in the same name you used when you registerd.
+      output: If your primary ID does not contain a signature, you can present a supplemental ID with photo and signature or a supplemental government-issued ID with a photograph, as long as they are in the same name you used when you registered.
+    - input: We want to begin consultatiaon with public-use organisations who are users of these services to help brings experience, knowledge and information on user needs to shape the solution.
+      output: We want to begin consultations with public sector organisations who are users of these services to help bring experience, knowledge and information on user needs to shape the solution.
+lastOf:
+- |+
+  Here are examples of grammatically incorrect sentences and their corrected versions:
+
+- for:
+    example: ${ demonstrations }
+  repeat:
+    text: ${ example.input } -> ${ example.output }
+  join:
+    with: |2+
+
+
+- |+
+  Correct the following sentence:
+
+  ${ input }
+  Here's the corrected sentence:
+
+- def: response
+  model: ${ model }
+  parameters:
+    temperature: 0.0
+    max_tokens: ${ max_tokens }
+- if: ${ verify }
+  then:
+    lastOf:
+    - Do you think this was a correct answer? If not, generated a correct answer.
+    - model: ${ model }
+  else: ${ response }
diff --git a/examples/optimizer/process_bea19.py b/examples/optimizer/process_bea19.py
diff --git a/examples/optimizer/process_grammar_correction.py b/examples/optimizer/process_grammar_correction.py
@@ -0,0 +1,35 @@
+import json
+from pathlib import Path
+
+from datasets.dataset_dict import DatasetDict
+from datasets.load import load_dataset
+
+# Load dataset
+grammar_correction = load_dataset("agentlans/grammar-correction")
+if not isinstance(grammar_correction, DatasetDict):
+    raise TypeError(
+        f"Expected grammar_correction to be a DatasetDict, but got: {type(grammar_correction)}"
+    )
+
+# Create validation split from train (1024 examples)
+new_split = grammar_correction["train"].train_test_split(test_size=1024)
+grammar_correction["test"] = new_split["test"]
+
+val_split = new_split["train"].train_test_split()
+grammar_correction["train"] = val_split["train"]
+grammar_correction["validation"] = val_split["test"]
+
+# Output dir
+out_dir = Path("grammar_correction_jsonl")
+out_dir.mkdir(parents=True, exist_ok=True)
+
+
+# Save to JSONL
+def save_jsonl(dataset, path: Path) -> None:
+    with path.open("w") as f:
+        for item in dataset:
+            f.write(json.dumps(item) + "\n")
+
+
+for split in ["train", "validation", "test"]:
+    save_jsonl(grammar_correction[split], out_dir / f"{split}.jsonl")
diff --git a/pyproject.toml b/pyproject.toml
@@ -78,6 +78,7 @@ Issues = "https://github.com/IBM/prompt-declaration-language/issues"
 [project.scripts]
 pdl = "pdl.pdl:main"
 pdl-lint = "pdl.pdl_linter:run_linter"
+pdl-optimize = "pdl.optimize.pdl_optimizer:run_optimizer"
 
 [tool.setuptools_scm]
 version_file = "src/pdl/_version.py"

diff --git a/src/pdl/optimize/pdl_optimizer.py b/src/pdl/optimize/pdl_optimizer.py
@@ -1,3 +1,4 @@
+import argparse
 import itertools
 import json
 import logging
@@ -11,6 +12,7 @@
 from typing import Any
 
 import yaml
+from datasets import load_dataset
 from datasets.arrow_dataset import Dataset
 from datasets.dataset_dict import DatasetDict
 from duration_parser import parse as parse_duration
@@ -20,8 +22,9 @@
 from tqdm import TqdmExperimentalWarning
 from tqdm.rich import tqdm
 
-from pdl.optimize.config_parser import OptimizationConfig
+from pdl.optimize.config_parser import JsonlDataset, OptimizationConfig
 from pdl.optimize.optimizer_evaluator import OptimizerEvaluator
+from pdl.optimize.pdl_evaluator import PdlEvaluator
 from pdl.optimize.util import CandidateResult, TrialOutput, console, execute_threads
 from pdl.pdl_ast import AdvancedBlockType, DataBlock, Program
 from pdl.pdl_dumper import dump_program_exclude_internals
@@ -763,3 +766,74 @@ def benchmark(self, test_set_size: int, candidate: dict | None = None):
         self.pbar.close()
         logger.info("Score: %.4f%%", scores[0].metric * 100)
         logger.info("Saved exp. log to %s", exp_file)
+
+
+def run_optimizer():
+    parser = argparse.ArgumentParser("")
+
+    parser.add_argument(
+        "--config",
+        "-c",
+        help="Optimizer config file",
+        type=Path,
+        required=True,
+    )
+
+    parser.add_argument(
+        "--experiments-path",
+        help="Path where experiment results will be saved",
+        type=Path,
+        default=Path("experiments"),
+    )
+
+    parser.add_argument(
+        "--yield_output",
+        action=argparse.BooleanOptionalAction,
+        default=False,
+    )
+
+    args = parser.parse_args()
+
+    if not args.config.exists():
+        print("Config file doesn't exist:", args.config)
+        sys.exit(1)
+
+    config_text = args.config.read_text()
+
+    try:
+        config_dict = yaml.safe_load(config_text)
+        config = OptimizationConfig(**config_dict)
+    except Exception:
+        print("Couldn't load config:", args.config)
+        sys.exit(1)
+
+    if not Path(config.pdl_path).exists():
+        print("PDL file doesn't exist:", config.pdl_path)
+        sys.exit(1)
+
+    # Set up dataset and trial thread based on benchmark
+    dataset: Any
+
+    if isinstance(config.dataset, (dict, JsonlDataset)):
+        dataset = load_dataset(
+            "json",
+            data_files={
+                "train": config.dataset.train,
+                "validation": config.dataset.validation,
+                "test": config.dataset.test,
+            },
+        )
+    else:
+        print(f"Unknown dataset: {config.dataset}")
+        sys.exit(1)
+
+    # Create optimizer instance
+    optimizer = PDLOptimizer(
+        dataset=dataset,
+        trial_thread=PdlEvaluator,
+        yield_output=args.yield_output,
+        experiment_path=args.experiments_path,
+        config=config,
+    )
+    optimizer.run()
+    return 0
diff --git a/tests/test_examples_run.yaml b/tests/test_examples_run.yaml
@@ -30,7 +30,8 @@ skip:
   - examples/optimizer/mbpp.pdl
   - examples/optimizer/fever.pdl
   - examples/optimizer/gsm8k.pdl
-  - examples/optimizer/bea19.pdl
+  - examples/optimizer/grammar_correction.pdl
+  - examples/optimizer/optimized_grammar_correction.pdl
   - examples/optimizer/eval_levenshtein.pdl
   - examples/requirements/email.pdl
   - examples/skeleton-of-thought/tips.pdl