From 9e68408ce0e6328223c368e0511d525b381c4d82 Mon Sep 17 00:00:00 2001 From: Mandana Vaziri Date: Tue, 9 Sep 2025 15:31:43 -0400 Subject: [PATCH 01/14] changes to requirements implementation Signed-off-by: Mandana Vaziri --- examples/requirements/email.pdl | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/examples/requirements/email.pdl b/examples/requirements/email.pdl index c949b33d3..24575f986 100644 --- a/examples/requirements/email.pdl +++ b/examples/requirements/email.pdl @@ -12,7 +12,9 @@ defs: Does the following email end with Kind regards. Answer with a JSON object and a result field with value True or False only. Email: ${ response } parser: json - - ${ result.result } + - if: ${ result.result } + then: 0 + else: -1000000 fix: function: From 71e93c46a5d5865a10519165322850b756545e2c Mon Sep 17 00:00:00 2001 From: Mandana Vaziri Date: Tue, 9 Sep 2025 15:33:04 -0400 Subject: [PATCH 02/14] changes to requirements implementation Signed-off-by: Mandana Vaziri --- src/pdl/pdl_interpreter.py | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/src/pdl/pdl_interpreter.py b/src/pdl/pdl_interpreter.py index 28a04260d..6329c5ac0 100644 --- a/src/pdl/pdl_interpreter.py +++ b/src/pdl/pdl_interpreter.py @@ -487,22 +487,23 @@ def process_advanced_block( # noqa:C901 requirements_satisfied = True for req in block.requirements: evalfn, _ = process_expr(scope, getattr(req, "evaluate"), loc) + requirement, _ = process_expr(scope, getattr(req, "description"), loc) evaluation = evalfn( - requirement=getattr(req, "description"), response=result + requirement=requirement, response=result ) - if evaluation.result() is False: + if evaluation.result() < -0.3: requirements_satisfied = False transfn, _ = process_expr( scope, getattr(req, "transformContext"), loc ) new_context = transfn( pdl_context=scope["pdl_context"], - requirement=getattr(req, "description"), + requirement=requirement, response=result, ) - scope = scope | {"pdl_context": new_context} + if trial_idx < max_retry: + scope = scope | {"pdl_context": new_context} if requirements_satisfied is False: - print("\nTrying again!") continue result = lazy_apply(id_with_set_first_use_nanos(block.pdl__timing), result) From a94417182ed7d5535d8baa5106c34f4a834ff419 Mon Sep 17 00:00:00 2001 From: Mandana Vaziri Date: Tue, 9 Sep 2025 16:18:05 -0400 Subject: [PATCH 03/14] new example Signed-off-by: Mandana Vaziri --- examples/requirements/gsm8k.pdl | 101 ++++++++++++++++++++++++++++++++ src/pdl/pdl-schema.json | 6 +- src/pdl/pdl_ast.py | 4 +- 3 files changed, 106 insertions(+), 5 deletions(-) create mode 100644 examples/requirements/gsm8k.pdl diff --git a/examples/requirements/gsm8k.pdl b/examples/requirements/gsm8k.pdl new file mode 100644 index 000000000..15060901b --- /dev/null +++ b/examples/requirements/gsm8k.pdl @@ -0,0 +1,101 @@ +defs: + model: ollama_chat/granite3.3:8b + llm_as_judge: ollama_chat/gpt-oss:20b + + problem: | + Carla is downloading a 200 GB file. Normally she can download 2 GB/minute, + but 40% of the way through the download, Windows forces a restart to install updates, + which takes 20 minutes. Then Carla has to restart the download from the beginning. + How load does it take to download the file? + + truth: "First find how many gigabytes are in 40% of the file: 200 GB * 40% = <<200*40*.01=80>>80 GB\nThen divide that number by the download rate to find the time until Windows restarts: 80 GB / 2 GB/minute = <<80/2=40>>40 minutes\nThen find the time to download the whole file after the restart: 200 GB / 2 GB/minute = <<200/2=100>>100 minutes\nThen add the time to download 40% of the file, to download the whole file, and to wait for Windows to update: 40 minutes + 100 minutes + 20 minutes = <<40+100+20=160>>160 minutes\n#### 160" + + extract_answer: + function: + solution: string + return: + lastOf: + - ${ solution } + - Extract the result from the above solution into a JSON object with field "answer" and a float as value. Remove any dollar signs or other symbols. + - model: ${ model } + parser: json + spec: { "answer": number } + + eval: + function: + requirement: string + response: string + return: + lastOf: + - model: ${ llm_as_judge } + def: evaluation + input: | + Is the following requirement satisfied in the solution below? Requirement: ${ requirement } + ${ response } + + Respond with a JSON object that has a field result set to true if the requirement is satisfied and false otherwise. + parser: json + - lang: python + code: | + print(evaluation) + print(${ evaluation.result == false }) + print(requirement) + print(response) + result = "" + - if: ${ evaluation.result } + then: 0 + else: -1000000 + + + transform: + function: + requirement: string + response: string + return: + lastOf: + - model: ${ model } + input: | + The following requirement is not satisfied, what instruction can be added to get the correct answer? + Requirement: ${ requirement } + Answer with only the instruction. + - lang: python + code: | + print(${pdl_context}) + result = "" + - ${ pdl_context } + + + + solve: + function: + problem: string + return: + defs: + solution: + text: + - ${ problem } + - "\n\n" + - model: ${ model } + parameters: + temperature: 0.1 + requirements: + - description: "The solution to this problem should be correct. Problem: ${ problem }" + evaluate: ${ eval } + transformContext: ${ transform } + retry: 2 + answer_obj: + call: ${ extract_answer } + args: + solution: ${ solution } + pdl_context: [] + debug: + lang: python + code: | + print(answer_obj) + result = "" + data: ${ answer_obj.answer } + +call: ${ solve } +args: + problem: ${ problem } + pdl_context: [] diff --git a/src/pdl/pdl-schema.json b/src/pdl/pdl-schema.json index 872549646..b011633f7 100644 --- a/src/pdl/pdl-schema.json +++ b/src/pdl/pdl-schema.json @@ -4531,6 +4531,7 @@ "type": "null" } ], + "default": null, "title": "Evaluate" }, "transformContext": { @@ -4548,13 +4549,12 @@ "type": "null" } ], + "default": null, "title": "Transformcontext" } }, "required": [ - "description", - "evaluate", - "transformContext" + "description" ], "title": "RequirementType", "type": "object" diff --git a/src/pdl/pdl_ast.py b/src/pdl/pdl_ast.py index 48959da53..512a591eb 100644 --- a/src/pdl/pdl_ast.py +++ b/src/pdl/pdl_ast.py @@ -341,10 +341,10 @@ class RequirementType(BaseModel): description: ExpressionType """English description of the requirement""" - evaluate: Optional[ExpressionType["FunctionBlock"]] + evaluate: Optional[ExpressionType["FunctionBlock"]] = None """Evaluation function for the requirement""" - transformContext: Optional[ExpressionType["FunctionBlock"]] + transformContext: Optional[ExpressionType["FunctionBlock"]] = None """Function to transform the context for the requirement""" From aa97159ef9d9aeb257d4cc76617c0b3aff4c5d67 Mon Sep 17 00:00:00 2001 From: Mandana Vaziri Date: Wed, 10 Sep 2025 11:25:44 -0400 Subject: [PATCH 04/14] wip Signed-off-by: Mandana Vaziri --- pyproject.toml | 2 +- src/pdl/pdl_interpreter.py | 25 +++++++++++++++++++++--- src/pdl/pdl_stdlib.pdl | 40 ++++++++++++++++++++++++++++++++++++++ 3 files changed, 63 insertions(+), 4 deletions(-) create mode 100644 src/pdl/pdl_stdlib.pdl diff --git a/pyproject.toml b/pyproject.toml index b107dd749..242216fde 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -87,7 +87,7 @@ version_file = "src/pdl/_version.py" where = ["src"] [tool.setuptools.package-data] -pdl = ["pdl-schema.json"] +pdl = ["pdl-schema.json", "pdl_stlib.pdl"] [tool.pyright] include = ["src", "tests", "examples", "docs"] diff --git a/src/pdl/pdl_interpreter.py b/src/pdl/pdl_interpreter.py index 6329c5ac0..cd4d23d28 100644 --- a/src/pdl/pdl_interpreter.py +++ b/src/pdl/pdl_interpreter.py @@ -304,8 +304,19 @@ def process_prog( PDLRuntimeError: If the program raises an error. """ scope = empty_scope | scope + + # Process stdlib + stdlib_file = Path(__file__).parent / "pdl_stdlib.pdl" + stdlib, loc = parse_file(stdlib_file) + _, _, stdlib_scope, _ = process_block( + state.with_yield_background(False).with_yield_result(False), + scope, + stdlib.root, + loc, + ) + result, document, final_scope, trace = process_block( - state, scope, block=prog.root, loc=loc + state, stdlib_scope, block=prog.root, loc=loc ) return result, document, final_scope, trace @@ -486,15 +497,23 @@ def process_advanced_block( # noqa:C901 if block.requirements != []: requirements_satisfied = True for req in block.requirements: - evalfn, _ = process_expr(scope, getattr(req, "evaluate"), loc) + evaluate = getattr(req, "evaluate", None) + if evaluate is None: + evaluate = scope["__eval"] + evalfn: Any + evalfn, _ = process_expr(scope, evaluate, loc) requirement, _ = process_expr(scope, getattr(req, "description"), loc) evaluation = evalfn( requirement=requirement, response=result ) if evaluation.result() < -0.3: requirements_satisfied = False + transformContext = getattr(req, "transformContext", None) + if transformContext is None: + transformContext = scope["__transformContext"] + transfn: Any transfn, _ = process_expr( - scope, getattr(req, "transformContext"), loc + scope, transformContext, loc ) new_context = transfn( pdl_context=scope["pdl_context"], diff --git a/src/pdl/pdl_stdlib.pdl b/src/pdl/pdl_stdlib.pdl new file mode 100644 index 000000000..1bb588e65 --- /dev/null +++ b/src/pdl/pdl_stdlib.pdl @@ -0,0 +1,40 @@ + +defs: + requirements: + object: + req_eval: + function: + requirement: string + response: string + llm_as_judge: {optional: string} + return: + lastOf: + - model: ${ llm_as_judge | default('ollama_chat/gpt-oss:20b') } + def: evaluation + input: | + Is the following requirement satisfied in the solution below? Requirement: ${ requirement } + ${ response } + + Respond with a JSON object that has a field result set to true if the requirement is satisfied and false otherwise. + parser: json + - lang: python + code: | + print(evaluation) + result = "" + - if: ${ evaluation.result } + then: 0 + else: -1000000 + + req_transform: + function: + requirement: string + response: string + model: {optional: string} + return: + lastOf: + - model: ${ model | default('ollama_chat/granite3.3:8b') } + input: | + The following requirement is not satisfied, what instruction can be added to get the correct answer? + Requirement: ${ requirement } + Answer with only the instruction. + - ${ pdl_context } From 210c531d5d7ae08bd73260e11b5d09006e12b8fd Mon Sep 17 00:00:00 2001 From: Mandana Vaziri Date: Wed, 10 Sep 2025 12:55:24 -0400 Subject: [PATCH 05/14] stdlib for requirements functions Signed-off-by: Mandana Vaziri --- examples/requirements/gsm8k.pdl | 51 +-------------------------------- 1 file changed, 1 insertion(+), 50 deletions(-) diff --git a/examples/requirements/gsm8k.pdl b/examples/requirements/gsm8k.pdl index 15060901b..5d30673f7 100644 --- a/examples/requirements/gsm8k.pdl +++ b/examples/requirements/gsm8k.pdl @@ -8,8 +8,6 @@ defs: which takes 20 minutes. Then Carla has to restart the download from the beginning. How load does it take to download the file? - truth: "First find how many gigabytes are in 40% of the file: 200 GB * 40% = <<200*40*.01=80>>80 GB\nThen divide that number by the download rate to find the time until Windows restarts: 80 GB / 2 GB/minute = <<80/2=40>>40 minutes\nThen find the time to download the whole file after the restart: 200 GB / 2 GB/minute = <<200/2=100>>100 minutes\nThen add the time to download 40% of the file, to download the whole file, and to wait for Windows to update: 40 minutes + 100 minutes + 20 minutes = <<40+100+20=160>>160 minutes\n#### 160" - extract_answer: function: solution: string @@ -21,51 +19,6 @@ defs: parser: json spec: { "answer": number } - eval: - function: - requirement: string - response: string - return: - lastOf: - - model: ${ llm_as_judge } - def: evaluation - input: | - Is the following requirement satisfied in the solution below? Requirement: ${ requirement } - ${ response } - - Respond with a JSON object that has a field result set to true if the requirement is satisfied and false otherwise. - parser: json - - lang: python - code: | - print(evaluation) - print(${ evaluation.result == false }) - print(requirement) - print(response) - result = "" - - if: ${ evaluation.result } - then: 0 - else: -1000000 - - - transform: - function: - requirement: string - response: string - return: - lastOf: - - model: ${ model } - input: | - The following requirement is not satisfied, what instruction can be added to get the correct answer? - Requirement: ${ requirement } - Answer with only the instruction. - - lang: python - code: | - print(${pdl_context}) - result = "" - - ${ pdl_context } - - - solve: function: problem: string @@ -80,9 +33,7 @@ defs: temperature: 0.1 requirements: - description: "The solution to this problem should be correct. Problem: ${ problem }" - evaluate: ${ eval } - transformContext: ${ transform } - retry: 2 + retry: 1 answer_obj: call: ${ extract_answer } args: From 1f6c7e408fb9858f94ac65cb9faa5779e2ae35b2 Mon Sep 17 00:00:00 2001 From: Mandana Vaziri Date: Wed, 10 Sep 2025 13:05:44 -0400 Subject: [PATCH 06/14] cleanup Signed-off-by: Mandana Vaziri --- src/pdl/pdl_interpreter.py | 35 +++++++++-------- src/pdl/pdl_stdlib.pdl | 74 ++++++++++++++++++------------------ tests/test_examples_run.yaml | 1 + 3 files changed, 57 insertions(+), 53 deletions(-) diff --git a/src/pdl/pdl_interpreter.py b/src/pdl/pdl_interpreter.py index cd4d23d28..7a5f30b2d 100644 --- a/src/pdl/pdl_interpreter.py +++ b/src/pdl/pdl_interpreter.py @@ -308,13 +308,15 @@ def process_prog( # Process stdlib stdlib_file = Path(__file__).parent / "pdl_stdlib.pdl" stdlib, loc = parse_file(stdlib_file) - _, _, stdlib_scope, _ = process_block( + _, _, stdlib_dict, _ = process_block( state.with_yield_background(False).with_yield_result(False), - scope, + empty_scope, stdlib.root, loc, ) - + + stdlib_scope = scope | PdlDict({"stdlib": stdlib_dict}) + result, document, final_scope, trace = process_block( state, stdlib_scope, block=prog.root, loc=loc ) @@ -489,7 +491,7 @@ def process_advanced_block( # noqa:C901 max_retry = block.retry if block.retry else 0 trial_total = max_retry + 1 - for trial_idx in range(trial_total): + for trial_idx in range(trial_total): # pylint: disable=too-many-nested-blocks try: result, background, new_scope, trace = process_block_body( state, scope, block, loc @@ -498,23 +500,24 @@ def process_advanced_block( # noqa:C901 requirements_satisfied = True for req in block.requirements: evaluate = getattr(req, "evaluate", None) + stdlib_dict: Any = scope["stdlib"] if evaluate is None: - evaluate = scope["__eval"] + evaluate = stdlib_dict["requirements"]["evaluation"] evalfn: Any evalfn, _ = process_expr(scope, evaluate, loc) - requirement, _ = process_expr(scope, getattr(req, "description"), loc) - evaluation = evalfn( - requirement=requirement, response=result + requirement, _ = process_expr( + scope, getattr(req, "description"), loc ) - if evaluation.result() < -0.3: + evaluation = evalfn(requirement=requirement, response=result) + if evaluation.result() < -0.3: requirements_satisfied = False - transformContext = getattr(req, "transformContext", None) - if transformContext is None: - transformContext = scope["__transformContext"] + transform_context = getattr(req, "transformContext", None) + if transform_context is None: + transform_context = stdlib_dict["requirements"][ + "transformContext" + ] transfn: Any - transfn, _ = process_expr( - scope, transformContext, loc - ) + transfn, _ = process_expr(scope, transform_context, loc) new_context = transfn( pdl_context=scope["pdl_context"], requirement=requirement, @@ -688,7 +691,7 @@ def process_block_body( yield_result(result.result(), block.kind) if state.yield_background: yield_background(background) - case TextBlock(): # HERE + case TextBlock(): result, background, scope, trace = process_blocks_of( block, "text", diff --git a/src/pdl/pdl_stdlib.pdl b/src/pdl/pdl_stdlib.pdl index 1bb588e65..6fcd4eb8d 100644 --- a/src/pdl/pdl_stdlib.pdl +++ b/src/pdl/pdl_stdlib.pdl @@ -1,40 +1,40 @@ defs: - requirements: - object: - req_eval: - function: - requirement: string - response: string - llm_as_judge: {optional: string} - return: - lastOf: - - model: ${ llm_as_judge | default('ollama_chat/gpt-oss:20b') } - def: evaluation - input: | - Is the following requirement satisfied in the solution below? Requirement: ${ requirement } - ${ response } + requirements: + object: + evaluation: + function: + requirement: string + response: string + llm_as_judge: {optional: string} + return: + lastOf: + - model: ${ llm_as_judge | default('ollama_chat/gpt-oss:20b') } + def: evaluation + input: | + Is the following requirement satisfied in the solution below? Requirement: ${ requirement } + ${ response } - Respond with a JSON object that has a field result set to true if the requirement is satisfied and false otherwise. - parser: json - - lang: python - code: | - print(evaluation) - result = "" - - if: ${ evaluation.result } - then: 0 - else: -1000000 - - req_transform: - function: - requirement: string - response: string - model: {optional: string} - return: - lastOf: - - model: ${ model | default('ollama_chat/granite3.3:8b') } - input: | - The following requirement is not satisfied, what instruction can be added to get the correct answer? - Requirement: ${ requirement } - Answer with only the instruction. - - ${ pdl_context } + Respond with a JSON object that has a field result set to true if the requirement is satisfied and false otherwise. + parser: json + - lang: python + code: | + print(evaluation) + result = "" + - if: ${ evaluation.result } + then: 0 + else: -1000000 + + transformContext: + function: + requirement: string + response: string + model: {optional: string} + return: + lastOf: + - model: ${ model | default('ollama_chat/granite3.3:8b') } + input: | + The following requirement is not satisfied, what instruction can be added to get the correct answer? + Requirement: ${ requirement } + Answer with only the instruction. + - ${ pdl_context } diff --git a/tests/test_examples_run.yaml b/tests/test_examples_run.yaml index 62f0c8fd4..3e2c4f1fe 100644 --- a/tests/test_examples_run.yaml +++ b/tests/test_examples_run.yaml @@ -36,6 +36,7 @@ skip: - examples/requirements/email.pdl - examples/skeleton-of-thought/tips.pdl - examples/tutorial/sdk/lib.pdl + - src/pdl/pdl_stdlib.pdl with_inputs: examples/tutorial/programs/chatbot.pdl: stdin: | From 26dfa5064f155dd34391f4abaa97d96ddc7e692e Mon Sep 17 00:00:00 2001 From: Mandana Vaziri Date: Wed, 17 Sep 2025 09:25:21 -0400 Subject: [PATCH 07/14] cleanup Signed-off-by: Mandana Vaziri --- src/pdl/pdl_dumper.py | 6 ++++-- src/pdl/pdl_interpreter.py | 2 +- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/src/pdl/pdl_dumper.py b/src/pdl/pdl_dumper.py index 59b9885b9..8f7d1e0aa 100644 --- a/src/pdl/pdl_dumper.py +++ b/src/pdl/pdl_dumper.py @@ -413,8 +413,10 @@ def usage_to_dict(usage: PdlUsage) -> dict: def requirement_to_dict(req: RequirementType, json_compatible: bool) -> dict: d: dict = {} d["description"] = req.description - d["evaluate"] = expr_to_dict(req.evaluate, json_compatible) - d["transformContext"] = expr_to_dict(req.transformContext, json_compatible) + if req.evaluate is not None: + d["evaluate"] = expr_to_dict(req.evaluate, json_compatible) + if req.transformContext is not None: + d["transformContext"] = expr_to_dict(req.transformContext, json_compatible) return d diff --git a/src/pdl/pdl_interpreter.py b/src/pdl/pdl_interpreter.py index 7a5f30b2d..6cd846e23 100644 --- a/src/pdl/pdl_interpreter.py +++ b/src/pdl/pdl_interpreter.py @@ -307,7 +307,7 @@ def process_prog( # Process stdlib stdlib_file = Path(__file__).parent / "pdl_stdlib.pdl" - stdlib, loc = parse_file(stdlib_file) + stdlib, _ = parse_file(stdlib_file) _, _, stdlib_dict, _ = process_block( state.with_yield_background(False).with_yield_result(False), empty_scope, From 398f269edf72555b63beff4be93fee1b1f0e7c81 Mon Sep 17 00:00:00 2001 From: Mandana Vaziri Date: Wed, 17 Sep 2025 14:54:31 -0400 Subject: [PATCH 08/14] cleanup Signed-off-by: Mandana Vaziri --- src/pdl/pdl_interpreter.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/pdl/pdl_interpreter.py b/src/pdl/pdl_interpreter.py index 6cd846e23..ead3472e1 100644 --- a/src/pdl/pdl_interpreter.py +++ b/src/pdl/pdl_interpreter.py @@ -462,7 +462,7 @@ def set_error_to_scope_for_retry( return scope -def process_advanced_block( # noqa:C901 +def process_advanced_block( # noqa: C901 state: InterpreterState, scope: ScopeType, block: AdvancedBlockType, @@ -484,7 +484,7 @@ def process_advanced_block( # noqa:C901 ) # Bind result variables here with empty values - result: PdlLazy[Any] = PdlConst(None) + result: PdlLazy[Any] = PdlConst(None) # noqa: C901 background: LazyMessages = DependentContext([]) new_scope: ScopeType = PdlDict({}) trace: AdvancedBlockType = EmptyBlock() From 86b313f5dd11206cf0bde0981331e3deae17ca25 Mon Sep 17 00:00:00 2001 From: Mandana Vaziri Date: Thu, 18 Sep 2025 09:31:07 -0400 Subject: [PATCH 09/14] cleanup Signed-off-by: Mandana Vaziri --- src/pdl/pdl_interpreter.py | 9 --------- 1 file changed, 9 deletions(-) diff --git a/src/pdl/pdl_interpreter.py b/src/pdl/pdl_interpreter.py index 551d7398b..810f3579c 100644 --- a/src/pdl/pdl_interpreter.py +++ b/src/pdl/pdl_interpreter.py @@ -505,15 +505,6 @@ def process_advance_block_retry( state.yield_background and context_in_contribute(block) ) -<<<<<<< HEAD - # Bind result variables here with empty values - result: PdlLazy[Any] = PdlConst(None) # noqa: C901 - background: LazyMessages = DependentContext([]) - new_scope: ScopeType = PdlDict({}) - trace: AdvancedBlockType = EmptyBlock() - -======= ->>>>>>> main max_retry = block.retry if block.retry else 0 trial_total = max_retry + 1 for trial_idx in range(trial_total): # pylint: disable=too-many-nested-blocks From 79046718062315262a7f38b1f409d763b1e8c2d1 Mon Sep 17 00:00:00 2001 From: Mandana Vaziri Date: Thu, 18 Sep 2025 13:16:16 -0400 Subject: [PATCH 10/14] cleanup Signed-off-by: Mandana Vaziri --- src/pdl/pdl_interpreter.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/pdl/pdl_interpreter.py b/src/pdl/pdl_interpreter.py index 810f3579c..a206c3237 100644 --- a/src/pdl/pdl_interpreter.py +++ b/src/pdl/pdl_interpreter.py @@ -484,7 +484,7 @@ def process_advanced_block( # noqa: C901 return result, background, new_scope, trace -def process_advance_block_retry( +def process_advance_block_retry( # noqa: C901 state: InterpreterState, scope: ScopeType, block: AdvancedBlockType, From 06f927c5e68711b4d66e2cfef98ea2a2ae482c00 Mon Sep 17 00:00:00 2001 From: Mandana Vaziri Date: Thu, 18 Sep 2025 17:53:54 -0400 Subject: [PATCH 11/14] cleanup Signed-off-by: Mandana Vaziri --- tests/test_examples_run.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/test_examples_run.yaml b/tests/test_examples_run.yaml index 59bfb0ca9..405c06cf5 100644 --- a/tests/test_examples_run.yaml +++ b/tests/test_examples_run.yaml @@ -34,6 +34,7 @@ skip: - examples/optimizer/optimized_grammar_correction.pdl - examples/optimizer/eval_levenshtein.pdl - examples/requirements/email.pdl + - examples/requirements/gsm8k.pdl - examples/skeleton-of-thought/tips.pdl - examples/tutorial/sdk/lib.pdl - src/pdl/pdl_stdlib.pdl From 535d209c5f5116035742eeddc90362cd477447f0 Mon Sep 17 00:00:00 2001 From: Mandana Vaziri Date: Fri, 19 Sep 2025 11:17:00 -0400 Subject: [PATCH 12/14] added logprops for llm-judge Signed-off-by: Mandana Vaziri --- examples/requirements/gsm8k_short.pdl | 21 ++++++++++++ src/pdl/pdl_interpreter.py | 2 +- src/pdl/pdl_stdlib.pdl | 49 +++++++++++++++++++++------ tests/test_examples_run.yaml | 1 + 4 files changed, 62 insertions(+), 11 deletions(-) create mode 100644 examples/requirements/gsm8k_short.pdl diff --git a/examples/requirements/gsm8k_short.pdl b/examples/requirements/gsm8k_short.pdl new file mode 100644 index 000000000..5f1c2efaa --- /dev/null +++ b/examples/requirements/gsm8k_short.pdl @@ -0,0 +1,21 @@ +defs: + problem: |+ + Carla is downloading a 200 GB file. Normally she can download 2 GB/minute, + but 40% of the way through the download, Windows forces a restart to install updates, + which takes 20 minutes. Then Carla has to restart the download from the beginning. + How load does it take to download the file? + + +lastOf: +- ${ problem } +- model: ollama_chat/granite3.3:8b + parameters: + temperature: 0.2 +- Extract the result from the above solution into a JSON object with field "answer" and a float as value. Remove any dollar signs or other symbols. +- model: ollama_chat/granite3.3:8b + def: result + parser: json + spec: { "answer": number } + requirements: + - description: "This solution to the following math problem is correct: ${ problem }" +- ${ result.answer } \ No newline at end of file diff --git a/src/pdl/pdl_interpreter.py b/src/pdl/pdl_interpreter.py index 2a57c11fd..242157b29 100644 --- a/src/pdl/pdl_interpreter.py +++ b/src/pdl/pdl_interpreter.py @@ -525,7 +525,7 @@ def process_advance_block_retry( # noqa: C901 scope, getattr(req, "description"), loc ) evaluation = evalfn(requirement=requirement, response=result) - if evaluation.result() < -0.3: + if evaluation < -0.3: requirements_satisfied = False transform_context = getattr(req, "transformContext", None) if transform_context is None: diff --git a/src/pdl/pdl_stdlib.pdl b/src/pdl/pdl_stdlib.pdl index 6fcd4eb8d..95e881d55 100644 --- a/src/pdl/pdl_stdlib.pdl +++ b/src/pdl/pdl_stdlib.pdl @@ -1,5 +1,30 @@ defs: + reward: + function: + response: + return: + defs: + top_logprobs: ${ response.choices[0].logprobs.content[0].top_logprobs} + lastOf: + - for: + tp: ${ top_logprobs } + repeat: + match: ${ tp.token } + with: + - case: "Yes" + then: + data: ${ tp.logprob } + def: lp_y + - case: "No" + then: + data: ${ tp.logprob } + def: lp_n + - lang: python + code: | + import math + result = math.log(math.exp(lp_y) / (math.exp(lp_y) + math.exp(lp_n))) + requirements: object: evaluation: @@ -9,21 +34,20 @@ defs: llm_as_judge: {optional: string} return: lastOf: - - model: ${ llm_as_judge | default('ollama_chat/gpt-oss:20b') } + - model: ${ llm_as_judge | default('watsonx/meta-llama/llama-3-3-70b-instruct') } def: evaluation input: | Is the following requirement satisfied in the solution below? Requirement: ${ requirement } ${ response } - Respond with a JSON object that has a field result set to true if the requirement is satisfied and false otherwise. - parser: json - - lang: python - code: | - print(evaluation) - result = "" - - if: ${ evaluation.result } - then: 0 - else: -1000000 + Respond with only 'Yes' or 'No'. + modelResponse: out + parameters: + temperature: 0 + logprobs: true + top_logprobs: 5 + - ${ reward(out) } + transformContext: function: @@ -38,3 +62,8 @@ defs: Requirement: ${ requirement } Answer with only the instruction. - ${ pdl_context } + + + + + diff --git a/tests/test_examples_run.yaml b/tests/test_examples_run.yaml index 405c06cf5..653f229fe 100644 --- a/tests/test_examples_run.yaml +++ b/tests/test_examples_run.yaml @@ -35,6 +35,7 @@ skip: - examples/optimizer/eval_levenshtein.pdl - examples/requirements/email.pdl - examples/requirements/gsm8k.pdl + - examples/requirements/gsm8k_short.pdl - examples/skeleton-of-thought/tips.pdl - examples/tutorial/sdk/lib.pdl - src/pdl/pdl_stdlib.pdl From 4e86ffb22ba329f89014cb646cb1be934064bae4 Mon Sep 17 00:00:00 2001 From: Mandana Vaziri Date: Fri, 19 Sep 2025 13:15:05 -0400 Subject: [PATCH 13/14] changed description to expect Signed-off-by: Mandana Vaziri --- examples/requirements/email.pdl | 2 +- examples/requirements/gsm8k.pdl | 2 +- examples/requirements/gsm8k_short.pdl | 2 +- src/pdl/pdl-schema.json | 6 +++--- src/pdl/pdl_ast.py | 2 +- src/pdl/pdl_dumper.py | 2 +- src/pdl/pdl_interpreter.py | 2 +- 7 files changed, 9 insertions(+), 9 deletions(-) diff --git a/examples/requirements/email.pdl b/examples/requirements/email.pdl index 24575f986..0c6b26f3e 100644 --- a/examples/requirements/email.pdl +++ b/examples/requirements/email.pdl @@ -38,7 +38,7 @@ text: - "Write an email to ${ name } using the notes following: ${ notes }" - model: ollama_chat/granite3.2:2b requirements: - - description: The email should end with Kind regards + - expect: The email should end with Kind regards evaluate: ${ eval } transformContext: ${ fix } retry: 5 diff --git a/examples/requirements/gsm8k.pdl b/examples/requirements/gsm8k.pdl index 5d30673f7..4a1dd5b14 100644 --- a/examples/requirements/gsm8k.pdl +++ b/examples/requirements/gsm8k.pdl @@ -32,7 +32,7 @@ defs: parameters: temperature: 0.1 requirements: - - description: "The solution to this problem should be correct. Problem: ${ problem }" + - expect: "The solution to this problem should be correct. Problem: ${ problem }" retry: 1 answer_obj: call: ${ extract_answer } diff --git a/examples/requirements/gsm8k_short.pdl b/examples/requirements/gsm8k_short.pdl index 5f1c2efaa..c4bebc874 100644 --- a/examples/requirements/gsm8k_short.pdl +++ b/examples/requirements/gsm8k_short.pdl @@ -17,5 +17,5 @@ lastOf: parser: json spec: { "answer": number } requirements: - - description: "This solution to the following math problem is correct: ${ problem }" + - expect: "This solution to the following math problem is correct: ${ problem }" - ${ result.answer } \ No newline at end of file diff --git a/src/pdl/pdl-schema.json b/src/pdl/pdl-schema.json index b011633f7..aa12df222 100644 --- a/src/pdl/pdl-schema.json +++ b/src/pdl/pdl-schema.json @@ -4504,7 +4504,7 @@ "additionalProperties": false, "description": "Single requirement definition.", "properties": { - "description": { + "expect": { "anyOf": [ { "$ref": "#/$defs/LocalizedExpression_TypeVar_" @@ -4514,7 +4514,7 @@ "type": "string" } ], - "title": "Description" + "title": "Expect" }, "evaluate": { "anyOf": [ @@ -4554,7 +4554,7 @@ } }, "required": [ - "description" + "expect" ], "title": "RequirementType", "type": "object" diff --git a/src/pdl/pdl_ast.py b/src/pdl/pdl_ast.py index 512a591eb..9f13293eb 100644 --- a/src/pdl/pdl_ast.py +++ b/src/pdl/pdl_ast.py @@ -338,7 +338,7 @@ class RequirementType(BaseModel): model_config = ConfigDict(extra="forbid") - description: ExpressionType + expect: ExpressionType """English description of the requirement""" evaluate: Optional[ExpressionType["FunctionBlock"]] = None diff --git a/src/pdl/pdl_dumper.py b/src/pdl/pdl_dumper.py index 8f7d1e0aa..0eb8b108a 100644 --- a/src/pdl/pdl_dumper.py +++ b/src/pdl/pdl_dumper.py @@ -412,7 +412,7 @@ def usage_to_dict(usage: PdlUsage) -> dict: def requirement_to_dict(req: RequirementType, json_compatible: bool) -> dict: d: dict = {} - d["description"] = req.description + d["expect"] = req.expect if req.evaluate is not None: d["evaluate"] = expr_to_dict(req.evaluate, json_compatible) if req.transformContext is not None: diff --git a/src/pdl/pdl_interpreter.py b/src/pdl/pdl_interpreter.py index 242157b29..01e81ec4a 100644 --- a/src/pdl/pdl_interpreter.py +++ b/src/pdl/pdl_interpreter.py @@ -522,7 +522,7 @@ def process_advance_block_retry( # noqa: C901 evalfn: Any evalfn, _ = process_expr(scope, evaluate, loc) requirement, _ = process_expr( - scope, getattr(req, "description"), loc + scope, getattr(req, "expect"), loc ) evaluation = evalfn(requirement=requirement, response=result) if evaluation < -0.3: From b6335645ce772c9b6472190d5dd218bd8d306fea Mon Sep 17 00:00:00 2001 From: Mandana Vaziri Date: Fri, 19 Sep 2025 13:17:36 -0400 Subject: [PATCH 14/14] cleanup Signed-off-by: Mandana Vaziri --- src/pdl/pdl_interpreter.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/src/pdl/pdl_interpreter.py b/src/pdl/pdl_interpreter.py index 01e81ec4a..4bba1677a 100644 --- a/src/pdl/pdl_interpreter.py +++ b/src/pdl/pdl_interpreter.py @@ -521,9 +521,7 @@ def process_advance_block_retry( # noqa: C901 evaluate = stdlib_dict["requirements"]["evaluation"] evalfn: Any evalfn, _ = process_expr(scope, evaluate, loc) - requirement, _ = process_expr( - scope, getattr(req, "expect"), loc - ) + requirement, _ = process_expr(scope, getattr(req, "expect"), loc) evaluation = evalfn(requirement=requirement, response=result) if evaluation < -0.3: requirements_satisfied = False