From 46d12b12895007c2d70d81f3e4a194721c674ebe Mon Sep 17 00:00:00 2001 From: Mandana Vaziri Date: Mon, 17 Nov 2025 12:08:30 -0500 Subject: [PATCH 1/2] csv parser Signed-off-by: Mandana Vaziri --- docs/tutorial.md | 2 +- src/pdl/pdl-schema.json | 3 ++- src/pdl/pdl_ast.py | 2 +- src/pdl/pdl_interpreter.py | 14 ++++++++++++++ tests/test_parser.py | 11 +++++++++++ 5 files changed, 29 insertions(+), 3 deletions(-) diff --git a/docs/tutorial.md b/docs/tutorial.md index f9bbb0987..7dd38ad4c 100644 --- a/docs/tutorial.md +++ b/docs/tutorial.md @@ -378,7 +378,7 @@ Bob lives at the following address: ## Parsing the output of a block As we saw in the previous section, it is possible to use the `parser: json` setting to parse the result of a block as a JSON. -Other possible values for `parser` are `yaml`, `jsonl`, or `regex`. +Other possible values for `parser` are `yaml`, `jsonl`, `regex`, or `csv`. The following example extracts using a regular expression parser the code between triple backtick generated by a model: diff --git a/src/pdl/pdl-schema.json b/src/pdl/pdl-schema.json index b616b49b5..43e419b17 100644 --- a/src/pdl/pdl-schema.json +++ b/src/pdl/pdl-schema.json @@ -3998,7 +3998,8 @@ "enum": [ "json", "jsonl", - "yaml" + "yaml", + "csv" ], "type": "string" }, diff --git a/src/pdl/pdl_ast.py b/src/pdl/pdl_ast.py index ff03b3c32..7feae1edd 100644 --- a/src/pdl/pdl_ast.py +++ b/src/pdl/pdl_ast.py @@ -301,7 +301,7 @@ class RegexParser(Parser): ParserType = TypeAliasType( - "ParserType", Union[Literal["json", "jsonl", "yaml"], PdlParser, RegexParser] + "ParserType", Union[Literal["json", "jsonl", "yaml", "csv"], PdlParser, RegexParser] ) """Different parsers.""" OptionalParserType = TypeAliasType("OptionalParserType", Optional[ParserType]) diff --git a/src/pdl/pdl_interpreter.py b/src/pdl/pdl_interpreter.py index 6b40843e6..f90f04e77 100644 --- a/src/pdl/pdl_interpreter.py +++ b/src/pdl/pdl_interpreter.py @@ -1,4 +1,5 @@ # pylint: disable=import-outside-toplevel +from io import StringIO import json import re import shlex @@ -7,6 +8,7 @@ import time import traceback import types +import csv # TODO: temporarily disabling warnings to mute a pydantic warning from liteLLM import warnings @@ -2647,6 +2649,18 @@ def parse_result(parser: ParserType, text: str) -> JSONReturnType: raise PDLRuntimeParserError( f"Attempted to parse ill-formed YAML: {repr(exc)}" ) from exc + case "csv": + try: + result = [] + reader = csv.reader(StringIO(text)) + for row in reader: + result.append(row) + except KeyboardInterrupt as exc: + raise exc from exc + except Exception as exc: + raise PDLRuntimeParserError( + f"Attempted to parse ill-formed CSV: {repr(exc)}" + ) from exc case PdlParser(): assert False, "TODO" case RegexParser(mode="search" | "match" | "fullmatch"): diff --git a/tests/test_parser.py b/tests/test_parser.py index a4744c075..6539dbe7f 100644 --- a/tests/test_parser.py +++ b/tests/test_parser.py @@ -172,3 +172,14 @@ def test_parser_case2(): """ result = exec_str(prog) assert result == ["1", "2", "3", "4"] + +def test_parser_csv(): + csv_parser = """ + text: | + 1,Apple,Red + 2,Orange,Orange + 3,Banana,Yellow + parser: csv + """ + result = exec_str(csv_parser) + assert result == [["1", "Apple", "Red"], ["2", "Orange", "Orange"], ["3", "Banana", "Yellow"]] \ No newline at end of file From 8e9da8ad314fb925bcd072160d46799ad0ff4bea Mon Sep 17 00:00:00 2001 From: Mandana Vaziri Date: Mon, 17 Nov 2025 12:51:09 -0500 Subject: [PATCH 2/2] cleanup Signed-off-by: Mandana Vaziri --- src/pdl/pdl_interpreter.py | 4 ++-- tests/test_parser.py | 7 ++++++- 2 files changed, 8 insertions(+), 3 deletions(-) diff --git a/src/pdl/pdl_interpreter.py b/src/pdl/pdl_interpreter.py index f90f04e77..3b8784b9e 100644 --- a/src/pdl/pdl_interpreter.py +++ b/src/pdl/pdl_interpreter.py @@ -1,5 +1,5 @@ # pylint: disable=import-outside-toplevel -from io import StringIO +import csv import json import re import shlex @@ -8,13 +8,13 @@ import time import traceback import types -import csv # TODO: temporarily disabling warnings to mute a pydantic warning from liteLLM import warnings from abc import ABC, abstractmethod from concurrent.futures import ThreadPoolExecutor from functools import partial, reduce +from io import StringIO from itertools import count from os import getenv from pathlib import Path diff --git a/tests/test_parser.py b/tests/test_parser.py index 6539dbe7f..468bb3497 100644 --- a/tests/test_parser.py +++ b/tests/test_parser.py @@ -173,6 +173,7 @@ def test_parser_case2(): result = exec_str(prog) assert result == ["1", "2", "3", "4"] + def test_parser_csv(): csv_parser = """ text: | @@ -182,4 +183,8 @@ def test_parser_csv(): parser: csv """ result = exec_str(csv_parser) - assert result == [["1", "Apple", "Red"], ["2", "Orange", "Orange"], ["3", "Banana", "Yellow"]] \ No newline at end of file + assert result == [ + ["1", "Apple", "Red"], + ["2", "Orange", "Orange"], + ["3", "Banana", "Yellow"], + ]