diff --git a/docs/tutorial.md b/docs/tutorial.md index f9bbb0987..7dd38ad4c 100644 --- a/docs/tutorial.md +++ b/docs/tutorial.md @@ -378,7 +378,7 @@ Bob lives at the following address: ## Parsing the output of a block As we saw in the previous section, it is possible to use the `parser: json` setting to parse the result of a block as a JSON. -Other possible values for `parser` are `yaml`, `jsonl`, or `regex`. +Other possible values for `parser` are `yaml`, `jsonl`, `regex`, or `csv`. The following example extracts using a regular expression parser the code between triple backtick generated by a model: diff --git a/src/pdl/pdl-schema.json b/src/pdl/pdl-schema.json index b616b49b5..43e419b17 100644 --- a/src/pdl/pdl-schema.json +++ b/src/pdl/pdl-schema.json @@ -3998,7 +3998,8 @@ "enum": [ "json", "jsonl", - "yaml" + "yaml", + "csv" ], "type": "string" }, diff --git a/src/pdl/pdl_ast.py b/src/pdl/pdl_ast.py index ff03b3c32..7feae1edd 100644 --- a/src/pdl/pdl_ast.py +++ b/src/pdl/pdl_ast.py @@ -301,7 +301,7 @@ class RegexParser(Parser): ParserType = TypeAliasType( - "ParserType", Union[Literal["json", "jsonl", "yaml"], PdlParser, RegexParser] + "ParserType", Union[Literal["json", "jsonl", "yaml", "csv"], PdlParser, RegexParser] ) """Different parsers.""" OptionalParserType = TypeAliasType("OptionalParserType", Optional[ParserType]) diff --git a/src/pdl/pdl_interpreter.py b/src/pdl/pdl_interpreter.py index 6b40843e6..3b8784b9e 100644 --- a/src/pdl/pdl_interpreter.py +++ b/src/pdl/pdl_interpreter.py @@ -1,4 +1,5 @@ # pylint: disable=import-outside-toplevel +import csv import json import re import shlex @@ -13,6 +14,7 @@ from abc import ABC, abstractmethod from concurrent.futures import ThreadPoolExecutor from functools import partial, reduce +from io import StringIO from itertools import count from os import getenv from pathlib import Path @@ -2647,6 +2649,18 @@ def parse_result(parser: ParserType, text: str) -> JSONReturnType: raise PDLRuntimeParserError( f"Attempted to parse ill-formed YAML: {repr(exc)}" ) from exc + case "csv": + try: + result = [] + reader = csv.reader(StringIO(text)) + for row in reader: + result.append(row) + except KeyboardInterrupt as exc: + raise exc from exc + except Exception as exc: + raise PDLRuntimeParserError( + f"Attempted to parse ill-formed CSV: {repr(exc)}" + ) from exc case PdlParser(): assert False, "TODO" case RegexParser(mode="search" | "match" | "fullmatch"): diff --git a/tests/test_parser.py b/tests/test_parser.py index a4744c075..468bb3497 100644 --- a/tests/test_parser.py +++ b/tests/test_parser.py @@ -172,3 +172,19 @@ def test_parser_case2(): """ result = exec_str(prog) assert result == ["1", "2", "3", "4"] + + +def test_parser_csv(): + csv_parser = """ + text: | + 1,Apple,Red + 2,Orange,Orange + 3,Banana,Yellow + parser: csv + """ + result = exec_str(csv_parser) + assert result == [ + ["1", "Apple", "Red"], + ["2", "Orange", "Orange"], + ["3", "Banana", "Yellow"], + ]