From 535ec12a329760d615565299bfb28daa33d71119 Mon Sep 17 00:00:00 2001 From: Louis Choquel Date: Thu, 12 Feb 2026 00:48:57 +0100 Subject: [PATCH 1/5] Adopt MTHDS standard and clean up obsolete config files Rename .plx to .mthds and update VS Code file associations for the MTHDS standard migration. Remove legacy AI-assistant rule files (.blackboxrules, .cursor/, .windsurfrules.md, copilot-instructions, AGENTS.md, CLAUDE.md, CHANGELOG.md) and add .pipelex/ config directory. Update dependencies to pipelex 0.18.0b3. Co-Authored-By: Claude Opus 4.6 --- .blackboxrules | 1219 ----------------- .cursor/rules/commands.mdc | 8 - .cursor/rules/docs.mdc | 16 - .cursor/rules/llms.mdc | 85 -- .cursor/rules/pytest_standards.mdc | 107 -- .cursor/rules/python_standards.mdc | 176 --- .cursor/rules/run_pipelex.mdc | 231 ---- .cursor/rules/tdd.mdc | 28 - .cursor/rules/write_pipelex.mdc | 997 -------------- .github/copilot-instructions.md | 1219 ----------------- .pipelex/inference/backends.toml | 108 ++ .pipelex/inference/backends/anthropic.toml | 111 ++ .pipelex/inference/backends/azure_openai.toml | 227 +++ .pipelex/inference/backends/bedrock.toml | 138 ++ .pipelex/inference/backends/blackboxai.toml | 165 +++ .pipelex/inference/backends/fal.toml | 108 ++ .pipelex/inference/backends/google.toml | 91 ++ .pipelex/inference/backends/groq.toml | 130 ++ .pipelex/inference/backends/huggingface.toml | 44 + .pipelex/inference/backends/internal.toml | 44 + .pipelex/inference/backends/mistral.toml | 219 +++ .pipelex/inference/backends/ollama.toml | 64 + .pipelex/inference/backends/openai.toml | 224 +++ .../inference/backends/pipelex_gateway.toml | 41 + .../inference/backends/pipelex_inference.toml | 200 +++ .pipelex/inference/backends/portkey.toml | 307 +++++ .pipelex/inference/backends/scaleway.toml | 68 + .pipelex/inference/backends/vertexai.toml | 47 + .pipelex/inference/backends/xai.toml | 57 + .pipelex/inference/deck/1_llm_deck.toml | 87 ++ .pipelex/inference/deck/2_img_gen_deck.toml | 53 + .pipelex/inference/deck/3_extract_deck.toml | 42 + .pipelex/inference/routing_profiles.toml | 149 ++ .pipelex/pipelex.toml | 192 +++ .pipelex/pipelex_service.toml | 19 + .pipelex/telemetry.toml | 92 ++ .vscode/settings.json | 2 +- .windsurfrules.md | 1219 ----------------- AGENTS.md | 1219 ----------------- CHANGELOG.md | 104 -- CLAUDE.md | 1219 ----------------- Makefile | 2 +- .../{hello_world.plx => hello_world.mthds} | 0 pyproject.toml | 3 +- uv.lock | 75 +- 45 files changed, 3103 insertions(+), 7853 deletions(-) delete mode 100644 .blackboxrules delete mode 100644 .cursor/rules/commands.mdc delete mode 100644 .cursor/rules/docs.mdc delete mode 100644 .cursor/rules/llms.mdc delete mode 100644 .cursor/rules/pytest_standards.mdc delete mode 100644 .cursor/rules/python_standards.mdc delete mode 100644 .cursor/rules/run_pipelex.mdc delete mode 100644 .cursor/rules/tdd.mdc delete mode 100644 .cursor/rules/write_pipelex.mdc delete mode 100644 .github/copilot-instructions.md create mode 100644 .pipelex/inference/backends.toml create mode 100644 .pipelex/inference/backends/anthropic.toml create mode 100644 .pipelex/inference/backends/azure_openai.toml create mode 100644 .pipelex/inference/backends/bedrock.toml create mode 100644 .pipelex/inference/backends/blackboxai.toml create mode 100644 .pipelex/inference/backends/fal.toml create mode 100644 .pipelex/inference/backends/google.toml create mode 100644 .pipelex/inference/backends/groq.toml create mode 100644 .pipelex/inference/backends/huggingface.toml create mode 100644 .pipelex/inference/backends/internal.toml create mode 100644 .pipelex/inference/backends/mistral.toml create mode 100644 .pipelex/inference/backends/ollama.toml create mode 100644 .pipelex/inference/backends/openai.toml create mode 100644 .pipelex/inference/backends/pipelex_gateway.toml create mode 100644 .pipelex/inference/backends/pipelex_inference.toml create mode 100644 .pipelex/inference/backends/portkey.toml create mode 100644 .pipelex/inference/backends/scaleway.toml create mode 100644 .pipelex/inference/backends/vertexai.toml create mode 100644 .pipelex/inference/backends/xai.toml create mode 100644 .pipelex/inference/deck/1_llm_deck.toml create mode 100644 .pipelex/inference/deck/2_img_gen_deck.toml create mode 100644 .pipelex/inference/deck/3_extract_deck.toml create mode 100644 .pipelex/inference/routing_profiles.toml create mode 100644 .pipelex/pipelex.toml create mode 100644 .pipelex/pipelex_service.toml create mode 100644 .pipelex/telemetry.toml delete mode 100644 .windsurfrules.md delete mode 100644 AGENTS.md delete mode 100644 CHANGELOG.md delete mode 100644 CLAUDE.md rename my_project/{hello_world.plx => hello_world.mthds} (100%) diff --git a/.blackboxrules b/.blackboxrules deleted file mode 100644 index af4572e..0000000 --- a/.blackboxrules +++ /dev/null @@ -1,1219 +0,0 @@ - -# Pipelex Coding Rules - -## Guide to write or edit pipelines using the Pipelex language in .plx files - -- Always first write your "plan" in natural language, then transcribe it in pipelex. -- You should ALWAYS RUN validation when you are writing or editing a `.plx` file. It will ensure the pipe is runnable. If not, iterate. - - For a specific file: `pipelex validate path_to_file.plx` - - For all pipelines: `pipelex validate all` - - **IMPORTANT**: Ensure the Python virtual environment is activated before running `pipelex` commands. For standard installations, the venv is named `.venv` - always check that first. The commands will not work without proper venv activation. -- Please use POSIX standard for files. (empty lines, no trailing whitespaces, etc.) - -### Pipeline File Naming -- Files must be `.plx` for pipelines (Always add an empty line at the end of the file, and do not add trailing whitespaces to PLX files at all) -- Files must be `.py` for code defining the data structures -- Use descriptive names in `snake_case` - -### Pipeline File Outline -A pipeline file has three main sections: -1. Domain statement -2. Concept definitions -3. Pipe definitions - -#### Domain Statement -```plx -domain = "domain_code" -description = "Description of the domain" # Optional -``` -Note: The domain code usually matches the plx filename for single-file domains. For multi-file domains, use the subdirectory name. - -#### Concept Definitions - -Concepts represent ideas and semantic entities in your pipeline. They define what something *is*, not how it's structured. - -```plx -[concept] -ConceptName = "Description of the concept" -``` - -**Naming Rules:** -- Use PascalCase for concept names -- Never use plurals (no "Stories", use "Story") - lists are handled implicitly by Pipelex -- Avoid circumstantial adjectives (no "LargeText", use "Text") - focus on the essence of what the concept represents -- Don't redefine native concepts (Text, Image, PDF, TextAndImages, Number, Page, JSON) - -**Native Concepts:** -Pipelex provides built-in native concepts: `Text`, `Image`, `PDF`, `TextAndImages`, `Number`, `Page`, `JSON`. Use these directly or refine them when appropriate. - -**Refining Native Concepts:** -To create a concept that specializes a native concept without adding fields: - -```plx -[concept.Landscape] -description = "A scenic outdoor photograph" -refines = "Image" -``` - -For details on how to structure concepts with fields, see the "Structuring Models" section below. - -#### Pipe Definitions - -### Pipe Base Definition - -```plx -[pipe.your_pipe_code] -type = "PipeLLM" -description = "A description of what your pipe does" -inputs = { input_1 = "ConceptName1", input_2 = "ConceptName2" } -output = "ConceptName" -``` - -The pipes will all have at least this base definition. -- `inputs`: Dictionary of key being the variable used in the prompts, and the value being the ConceptName. It should ALSO LIST THE INPUTS OF THE INTERMEDIATE STEPS (if PipeSequence) or of the conditional pipes (if PipeCondition). -So If you have this error: -`PipeValidationError: missing_input_variable • domain='expense_validator' • pipe='validate_expense' • -variable='['invoice']'`` -That means that the pipe validate_expense is missing the input `invoice` because one of the subpipe is needing it. - -NEVER WRITE THE INPUTS BY BREAKING THE LINE LIKE THIS: - -```plx -inputs = { - input_1 = "ConceptName1", - input_2 = "ConceptName2" -} -``` - - -- `output`: The name of the concept to output. The `ConceptName` should have the same name as the python class if you want structured output: - -#### Input Multiplicity - -By default, inputs expect a single item. Use bracket notation to specify multiple items: - -```plx -## Single item (default) -inputs = { document = "Text" } - -## Variable list - indeterminate number of items -inputs = { documents = "Text[]" } - -## Fixed count - exactly N items -inputs = { comparison_items = "Image[2]" } -``` - -**Key points:** -- No brackets = single item (default behavior) -- Use `[]` for lists of unknown length -- Use `[N]` (where N is an integer) when operation requires exact count (e.g., comparing 2 items) - -### Structuring Models - -Once you've defined your concepts semantically (see "Concept Definitions" above), you need to specify their structure if they have fields. - -#### Three Ways to Structure Concepts - -**1. No Structure Needed** - -If a concept only refines a native concept without adding fields, use the TOML table syntax shown in "Concept Definitions" above. No structure section is needed. - -**2. Inline Structure Definition (RECOMMENDED for most cases)** - -For concepts with structured fields, define them inline using TOML syntax: - -```plx -[concept.Invoice] -description = "A commercial document issued by a seller to a buyer" - -[concept.Invoice.structure] -invoice_number = "The unique invoice identifier" # This will be optional by default -issue_date = { type = "date", description = "The date the invoice was issued", required = true } -total_amount = { type = "number", description = "The total invoice amount", required = true } -vendor_name = "The name of the vendor" # This will be optional by default -line_items = { type = "list", item_type = "text", description = "List of items" } -``` - -**Supported inline field types:** `text`, `integer`, `boolean`, `number`, `date`, `list`, `dict` - -**Field properties:** `type`, `description`, `required` (default: false), `default_value`, `choices`, `item_type` (for lists), `key_type` and `value_type` (for dicts) - -**Simple syntax** (creates required text field): -```plx -field_name = "Field description" -``` - -**Detailed syntax** (with explicit properties): -```plx -field_name = { type = "text", description = "Field description", default_value = "default" } -``` - -**3. Python StructuredContent Class (For Advanced Features)** - -Create a Python class when you need: -- Custom validation logic (@field_validator, @model_validator) -- Computed properties (@property methods) -- Custom methods or class methods -- Complex cross-field validation -- Reusable structures across multiple domains - -```python -from pipelex.core.stuffs.structured_content import StructuredContent -from pydantic import Field, field_validator - -class Invoice(StructuredContent): - """A commercial invoice with validation.""" - - invoice_number: str = Field(description="The unique invoice identifier") - total_amount: float = Field(ge=0, description="The total invoice amount") - tax_amount: float = Field(ge=0, description="Tax amount") - - @field_validator('tax_amount') - @classmethod - def validate_tax(cls, v, info): - """Ensure tax doesn't exceed total.""" - total = info.data.get('total_amount', 0) - if v > total: - raise ValueError('Tax amount cannot exceed total amount') - return v -``` - -**Location:** Create models in `my_project/some_domain/some_domain_struct.py`. Classes inheriting from `StructuredContent` are automatically discovered. - -#### Decision Rules for Agents - -**If concept already exists:** -- If it's already inline → KEEP IT INLINE unless user explicitly asks to convert or features require Python class -- If it's already a Python class → KEEP IT as Python class - -**If creating new concept:** -1. Does it only refine a native concept without adding fields? → Use concept-only declaration -2. Does it need custom validation, computed properties, or methods? → Use Python class -3. Otherwise → Use inline structure (fastest and simplest) - -**When to suggest conversion to Python class:** -- User needs validation logic beyond type checking -- User needs computed properties or custom methods -- Structure needs to be reused across multiple domains -- Complex type relationships or inheritance required - -#### Inline Structure Limitations - -Inline structures: -- ✅ Support all common field types (text, number, date, list, dict, etc.) -- ✅ Support required/optional fields, defaults, choices -- ✅ Generate full Pydantic models with validation -- ❌ Cannot have custom validators or complex validation logic -- ❌ Cannot have computed properties or custom methods -- ❌ Cannot refine custom (non-native) concepts -- ❌ Limited IDE autocomplete compared to explicit Python classes - - -### Pipe Controllers and Pipe Operators - -Look at the Pipes we have in order to adapt it. Pipes are organized in two categories: - -1. **Controllers** - For flow control: - - `PipeSequence` - For creating a sequence of multiple steps - - `PipeCondition` - If the next pipe depends of the expression of a stuff in the working memory - - `PipeParallel` - For parallelizing pipes - -2. **Operators** - For specific tasks: - - `PipeLLM` - Generate Text and Objects (include Vision LLM) - - `PipeExtract` - Extract text and images from an image or a PDF - - `PipeCompose` - For composing text using Jinja2 templates: supports html, markdown, mermaid, etc. - - `PipeImgGen` - Generate Images - - `PipeFunc` - For running classic python scripts - -### PipeSequence controller - -Purpose: PipeSequence executes multiple pipes in a defined order, where each step can use results from original inputs or from previous steps. - -#### Basic Definition -```plx -[pipe.your_sequence_name] -type = "PipeSequence" -description = "Description of what this sequence does" -inputs = { input_name = "InputType" } # All the inputs of the sub pipes, except the ones generated by intermediate steps -output = "OutputType" -steps = [ - { pipe = "first_pipe", result = "first_result" }, - { pipe = "second_pipe", result = "second_result" }, - { pipe = "final_pipe", result = "final_result" } -] -``` - -#### Key Components - -1. **Steps Array**: List of pipes to execute in sequence - - `pipe`: Name of the pipe to execute - - `result`: Name to assign to the pipe's output that will be in the working memory - -#### Using PipeBatch in Steps - -You can use PipeBatch functionality within steps using `batch_over` and `batch_as`: - -```plx -steps = [ - { pipe = "process_items", batch_over = "input_list", batch_as = "current_item", result = "processed_items" - } -] -``` - -1. **batch_over**: Specifies a `ListContent` field to iterate over. Each item in the list will be processed individually and IN PARALLEL by the pipe. - - Must be a `ListContent` type containing the items to process - - Can reference inputs or results from previous steps - -2. **batch_as**: Defines the name that will be used to reference the current item being processed - - This name can be used in the pipe's input mappings - - Makes each item from the batch available as a single element - -The result of a batched step will be a `ListContent` containing the outputs from processing each item. - -### PipeCondition controller - -The PipeCondition controller allows you to implement conditional logic in your pipeline, choosing which pipe to execute based on an evaluated expression. It supports both direct expressions and expression templates. - -#### Basic usage - -```plx -[pipe.conditional_operation] -type = "PipeCondition" -description = "A conditional pipe to decide whether..." -inputs = { input_data = "CategoryInput" } -output = "native.Text" -expression = "input_data.category" -default_outcome = "process_medium" - -[pipe.conditional_operation.outcomes] -small = "process_small" -medium = "process_medium" -large = "process_large" -``` -or -```plx -[pipe.conditional_operation] -type = "PipeCondition" -description = "A conditional pipe to decide whether..." -inputs = { input_data = "CategoryInput" } -output = "native.Text" -expression_template = "{{ input_data.category }}" # Jinja2 code -default_outcome = "process_medium" - -[pipe.conditional_operation.outcomes] -small = "process_small" -medium = "process_medium" -large = "process_large" -``` - -#### Key Parameters - -- `expression`: Direct boolean or string expression (mutually exclusive with expression_template) -- `expression_template`: Jinja2 template for more complex conditional logic (mutually exclusive with expression) -- `outcomes`: Dictionary mapping expression results to pipe codes: - 1. The key on the left (`small`, `medium`) is the result of `expression` or `expression_template` - 2. The value on the right (`process_small`, `process_medium`, etc.) is the name of the pipe to trigger -- `default_outcome`: **Required** - The pipe to execute if the expression doesn't match any key in outcomes. Use `"fail"` if you want the pipeline to fail when no match is found - -Example with fail as default: -```plx -[pipe.strict_validation] -type = "PipeCondition" -description = "Validate with strict matching" -inputs = { status = "Status" } -output = "Text" -expression = "status.value" -default_outcome = "fail" - -[pipe.strict_validation.outcomes] -approved = "process_approved" -rejected = "process_rejected" -``` - -### PipeLLM operator - -PipeLLM is used to: -1. Generate text or objects with LLMs -2. Process images with Vision LLMs - -#### Basic Usage - -Simple Text Generation: -```plx -[pipe.write_story] -type = "PipeLLM" -description = "Write a short story" -output = "Text" -prompt = """ -Write a short story about a programmer. -""" -``` - -Structured Data Extraction: -```plx -[pipe.extract_info] -type = "PipeLLM" -description = "Extract information" -inputs = { text = "Text" } -output = "PersonInfo" -prompt = """ -Extract person information from this text: -@text -""" -``` - -Supports system instructions: -```plx -[pipe.expert_analysis] -type = "PipeLLM" -description = "Expert analysis" -output = "Analysis" -system_prompt = "You are a data analysis expert" -prompt = "Analyze this data" -``` - -#### Multiple Outputs - -Generate multiple outputs (fixed number) - use bracket notation: -```plx -[pipe.generate_ideas] -type = "PipeLLM" -description = "Generate ideas" -output = "Idea[3]" # Generate exactly 3 ideas -prompt = "Generate 3 ideas" -``` - -Generate multiple outputs (variable number) - use bracket notation: -```plx -[pipe.generate_ideas] -type = "PipeLLM" -description = "Generate ideas" -output = "Idea[]" # Let the LLM decide how many to generate -prompt = "Generate ideas" -``` - -#### Vision - -Process images with VLMs (image inputs must be tagged in the prompt): -```plx -[pipe.analyze_image] -type = "PipeLLM" -description = "Analyze image" -inputs = { image = "Image" } -output = "ImageAnalysis" -prompt = """ -Describe what you see in this image: - -$image -""" -``` - -You can also reference images inline in meaningful sentences to guide the Visual LLM: -```plx -[pipe.compare_images] -type = "PipeLLM" -description = "Compare two images" -inputs = { photo = "Image", painting = "Image" } -output = "Analysis" -prompt = "Analyze the colors in $photo and the shapes in $painting." -``` - -#### Writing prompts for PipeLLM - -**Insert stuff inside a tagged block** - -If the inserted text is supposedly a long text, made of several lines or paragraphs, you want it inserted inside a block, possibly a block tagged and delimlited with proper syntax as one would do in a markdown documentation. To include stuff as a block, use the "@" prefix. - -Example template: -```plx -prompt = """ -Match the expense with its corresponding invoice: - -@expense - -@invoices -""" -``` -In the example above, the expense data and the invoices data are obviously made of several lines each, that's why it makes sense to use the "@" prefix in order to have them delimited inside a block. Note that our preprocessor will automatically include the block's title, so it doesn't need to be explicitly written in the prompt. - -DO NOT write things like "Here is the expense: @expense". -DO write simply "@expense" alone in an isolated line. - -**Insert stuff inline** - -If the inserted text is short text and it makes sense to have it inserted directly into a sentence, you want it inserted inline. To insert stuff inline, use the "$" prefix. This will insert the stuff without delimiters and the content will be rendered as plain text. - -Example template: -```plx -prompt = """ -Your goal is to summarize everything related to $topic in the provided text: - -@text - -Please provide only the summary, with no additional text or explanations. -Your summary should not be longer than 2 sentences. -""" -``` - -In the example above, $topic will be inserted inline, whereas @text will be a a delimited block. -Be sure to make the proper choice of prefix for each insertion. - -DO NOT write "$topic" alone in an isolated line. -DO write things like "Write an essay about $topic" to include text into an actual sentence. - - -### PipeExtract operator - -The PipeExtract operator is used to extract text and images from an image or a PDF - -#### Simple Text Extraction -```plx -[pipe.extract_info] -type = "PipeExtract" -description = "extract the information" -inputs = { document = "Document" } # or { image = "Image" } if it's an image. This is the only input. -output = "Page" -``` - -Using Extract Model Settings: -```plx -[pipe.extract_with_model] -type = "PipeExtract" -description = "Extract with specific model" -inputs = { document = "Document" } -output = "Page" -model = "base_extract_mistral" # Use predefined extract preset or model alias -``` - -Only one input is allowed and it must either be an `Image` or a `PDF`. The input can be named anything. - -The output concept `Page` is a native concept, with the structure `PageContent`: -It corresponds to 1 page. Therefore, the PipeExtract is outputing a `ListContent` of `Page` - -```python -class TextAndImagesContent(StuffContent): - text: TextContent | None - images: list[ImageContent] | None - -class PageContent(StructuredContent): # CONCEPT IS "Page" - text_and_images: TextAndImagesContent - page_view: ImageContent | None = None -``` -- `text_and_images` are the text, and the related images found in the input image or PDF. -- `page_view` is the screenshot of the whole pdf page/image. - -### PipeCompose operator - -The PipeCompose operator is used to compose text using Jinja2 templates. It supports various output formats including HTML, Markdown, Mermaid diagrams, and more. - -#### Basic Usage - -Simple Template Composition: -```plx -[pipe.compose_report] -type = "PipeCompose" -description = "Compose a report using template" -inputs = { data = "ReportData" } -output = "Text" -template = """ -## Report Summary - -Based on the analysis: -$data - -Generated on: {{ current_date }} -""" -``` - -Using Named Templates: -```plx -[pipe.use_template] -type = "PipeCompose" -description = "Use a predefined template" -inputs = { content = "Text" } -output = "Text" -template_name = "standard_report_template" -``` - -Using Nested Template Section (for more control): -```plx -[pipe.advanced_template] -type = "PipeCompose" -description = "Use advanced template settings" -inputs = { data = "ReportData" } -output = "Text" - -[pipe.advanced_template.template] -template = "Report: $data" -category = "html" -templating_style = { tag_style = "square_brackets", text_format = "html" } -``` - -CRM Email Template: -```plx -[pipe.compose_follow_up_email] -type = "PipeCompose" -description = "Compose a personalized follow-up email for CRM" -inputs = { customer = "Customer", deal = "Deal", sales_rep = "SalesRep" } -output = "Text" -template_category = "html" -templating_style = { tag_style = "square_brackets", text_format = "html" } -template = """ -Subject: Following up on our $deal.product_name discussion - -Hi $customer.first_name, - -I hope this email finds you well! I wanted to follow up on our conversation about $deal.product_name from $deal.last_contact_date. - -Based on our discussion, I understand that your key requirements are: $deal.customer_requirements - -I'm excited to let you know that we can definitely help you achieve your goals. Here's what I'd like to propose: - -**Next Steps:** -- Schedule a demo tailored to your specific needs -- Provide you with a customized quote based on your requirements -- Connect you with our implementation team - -Would you be available for a 30-minute call this week? I have openings on: -{% for slot in available_slots %} -- {{ slot }} -{% endfor %} - -Looking forward to moving this forward together! - -Best regards, -$sales_rep.name -$sales_rep.title -$sales_rep.phone | $sales_rep.email -""" -``` - -#### Key Parameters (Template Mode) - -- `template`: Inline template string (mutually exclusive with template_name and construct) -- `template_name`: Name of a predefined template (mutually exclusive with template) -- `template_category`: Template type ("llm_prompt", "html", "markdown", "mermaid", etc.) -- `templating_style`: Styling options for template rendering -- `extra_context`: Additional context variables for template - -For more control, you can use a nested `template` section instead of the `template` field: - -- `template.template`: The template string -- `template.category`: Template type -- `template.templating_style`: Styling options - -#### Template Variables - -Use the same variable insertion rules as PipeLLM: - -- `@variable` for block insertion (multi-line content) -- `$variable` for inline insertion (short text) - -#### Construct Mode (for StructuredContent Output) - -PipeCompose can also generate `StructuredContent` objects using the `construct` section. This mode composes field values from fixed values, variable references, templates, or nested structures. - -**When to use construct mode:** - -- You need to output a structured object (not just Text) -- You want to deterministically compose fields from existing data -- No LLM is needed - just data composition and templating - -##### Basic Construct Usage - -```plx -[concept.SalesSummary] -description = "A structured sales summary" - -[concept.SalesSummary.structure] -report_title = { type = "text", description = "Title of the report" } -customer_name = { type = "text", description = "Customer name" } -deal_value = { type = "number", description = "Deal value" } -summary_text = { type = "text", description = "Generated summary text" } - -[pipe.compose_summary] -type = "PipeCompose" -description = "Compose a sales summary from deal data" -inputs = { deal = "Deal" } -output = "SalesSummary" - -[pipe.compose_summary.construct] -report_title = "Monthly Sales Report" -customer_name = { from = "deal.customer_name" } -deal_value = { from = "deal.amount" } -summary_text = { template = "Deal worth $deal.amount with $deal.customer_name" } -``` - -##### Field Composition Methods - -There are four ways to define field values in a construct: - -**1. Fixed Value (literal)** - -Use a literal value directly: - -```plx -[pipe.compose_report.construct] -report_title = "Annual Report" -report_year = 2024 -is_draft = false -``` - -**2. Variable Reference (`from`)** - -Get a value from working memory using a dotted path: - -```plx -[pipe.compose_report.construct] -customer_name = { from = "deal.customer_name" } -total_amount = { from = "order.total" } -street_address = { from = "customer.address.street" } -``` - -**3. Template (`template`)** - -Render a Jinja2 template with variable substitution: - -```plx -[pipe.compose_report.construct] -invoice_number = { template = "INV-$order.id" } -summary = { template = "Deal worth $deal.amount with $deal.customer_name on {{ current_date }}" } -``` - -**4. Nested Construct** - -For nested structures, use a TOML subsection: - -```plx -[pipe.compose_invoice.construct] -invoice_number = { template = "INV-$order.id" } -total = { from = "order.total_amount" } - -[pipe.compose_invoice.construct.billing_address] -street = { from = "customer.address.street" } -city = { from = "customer.address.city" } -country = "France" -``` - -##### Complete Construct Example - -```plx -domain = "invoicing" - -[concept.Address] -description = "A postal address" - -[concept.Address.structure] -street = { type = "text", description = "Street address" } -city = { type = "text", description = "City name" } -country = { type = "text", description = "Country name" } - -[concept.Invoice] -description = "An invoice document" - -[concept.Invoice.structure] -invoice_number = { type = "text", description = "Invoice number" } -total = { type = "number", description = "Total amount" } - -[pipe.compose_invoice] -type = "PipeCompose" -description = "Compose an invoice from order and customer data" -inputs = { order = "Order", customer = "Customer" } -output = "Invoice" - -[pipe.compose_invoice.construct] -invoice_number = { template = "INV-$order.id" } -total = { from = "order.total_amount" } - -[pipe.compose_invoice.construct.billing_address] -street = { from = "customer.address.street" } -city = { from = "customer.address.city" } -country = "France" -``` - -##### Key Parameters (Construct Mode) - -- `construct`: Dictionary mapping field names to their composition rules -- Each field can be: - - A literal value (string, number, boolean) - - A dict with `from` key for variable reference - - A dict with `template` key for template rendering - - A nested dict for nested structures - -**Note:** You must use either `template` or `construct`, not both. They are mutually exclusive. - -### PipeImgGen operator - -The PipeImgGen operator is used to generate images using AI image generation models. - -#### Basic Usage - -Simple Image Generation: -```plx -[pipe.generate_image] -type = "PipeImgGen" -description = "Generate an image from prompt" -inputs = { prompt = "ImgGenPrompt" } -output = "Image" -``` - -Using Image Generation Settings: -```plx -[pipe.generate_photo] -type = "PipeImgGen" -description = "Generate a high-quality photo" -inputs = { prompt = "ImgGenPrompt" } -output = "Photo" -model = { model = "fast-img-gen" } -aspect_ratio = "16:9" -quality = "hd" -``` - -Multiple Image Generation: -```plx -[pipe.generate_variations] -type = "PipeImgGen" -description = "Generate multiple image variations" -inputs = { prompt = "ImgGenPrompt" } -output = "Image[3]" -seed = "auto" -``` - -Advanced Configuration: -```plx -[pipe.generate_custom] -type = "PipeImgGen" -description = "Generate image with custom settings" -inputs = { prompt = "ImgGenPrompt" } -output = "Image" -model = "img_gen_preset_name" # Use predefined preset -aspect_ratio = "1:1" -quality = "hd" -background = "transparent" -output_format = "png" -is_raw = false -safety_tolerance = 3 -``` - -#### Key Parameters - -**Image Generation Settings:** -- `model`: Model choice (preset name or inline settings with model name) -- `quality`: Image quality ("standard", "hd") - -**Output Configuration:** -- `aspect_ratio`: Image dimensions ("1:1", "16:9", "9:16", etc.) -- `output_format`: File format ("png", "jpeg", "webp") -- `background`: Background type ("default", "transparent") - -**Generation Control:** -- `seed`: Random seed (integer or "auto") -- `is_raw`: Whether to apply post-processing -- `is_moderated`: Enable content moderation -- `safety_tolerance`: Content safety level (1-6) - -#### Input Requirements - -PipeImgGen requires exactly one input that must be either: -- An `ImgGenPrompt` concept -- A concept that refines `ImgGenPrompt` - -The input can be named anything but must contain the prompt text for image generation. - -### PipeFunc operator - -The PipeFunc operator is used to run custom Python functions within a pipeline. This allows integration of classic Python scripts and custom logic. - -#### Basic Usage - -Simple Function Call: -```plx -[pipe.process_data] -type = "PipeFunc" -description = "Process data using custom function" -inputs = { input_data = "DataType" } -output = "ProcessedData" -function_name = "process_data_function" -``` - -File Processing Example: -```plx -[pipe.read_file] -type = "PipeFunc" -description = "Read file content" -inputs = { file_path = "FilePath" } -output = "FileContent" -function_name = "read_file_content" -``` - -#### Key Parameters - -- `function_name`: Name of the Python function to call (must be registered in func_registry) - -#### Function Requirements - -The Python function must: - -1. **Be registered** in the `func_registry` -2. **Accept `working_memory`** as a parameter: - ```python - async def my_function(working_memory: WorkingMemory) -> StuffContent | list[StuffContent] | str: - # Function implementation - pass - ``` - -3. **Return appropriate types**: - - `StuffContent`: Single content object - - `list[StuffContent]`: Multiple content objects (becomes ListContent) - - `str`: Simple string (becomes TextContent) - -#### Function Registration - -Functions must be registered in the function registry before use: - -```python -from pipelex.system.registries.func_registry import func_registry - -@func_registry.register("my_function_name") -async def my_custom_function(working_memory: WorkingMemory) -> StuffContent: - # Access inputs from working memory - input_data = working_memory.get_stuff("input_name") - - # Process data - result = process_logic(input_data.content) - - # Return result - return MyResultContent(data=result) -``` - -#### Working Memory Access - -Inside the function, access pipeline inputs through working memory: - -```python -async def process_function(working_memory: WorkingMemory) -> TextContent: - # Get input stuff by name - input_stuff = working_memory.get_stuff("input_name") - - # Access the content - input_content = input_stuff.content - - # Process and return - processed_text = f"Processed: {input_content.text}" - return TextContent(text=processed_text) -``` - ---- - -### Rules to choose LLM models used in PipeLLMs. - -#### LLM Configuration System - -In order to use it in a pipe, an LLM is referenced by its llm_handle (alias) and possibly by an llm_preset. -LLM configurations are managed through the new inference backend system with files located in `.pipelex/inference/`: - -- **Model Deck**: `.pipelex/inference/deck/base_deck.toml` and `.pipelex/inference/deck/overrides.toml` -- **Backends**: `.pipelex/inference/backends.toml` and `.pipelex/inference/backends/*.toml` -- **Routing**: `.pipelex/inference/routing_profiles.toml` - -#### LLM Handles - -An llm_handle can be either: -1. **A direct model name** (like "gpt-4o-mini", "claude-3-sonnet") - automatically available for all models loaded by the inference backend system -2. **An alias** - user-defined shortcuts that map to model names, defined in the `[aliases]` section: - -```toml -[aliases] -base-claude = "claude-4.5-sonnet" -base-gpt = "gpt-5" -base-gemini = "gemini-2.5-flash" -base-mistral = "mistral-medium" -``` - -The system first looks for direct model names, then checks aliases if no direct match is found. The system handles model routing through backends automatically. - -#### Using an LLM Handle in a PipeLLM - -Here is an example of using a model to specify which LLM to use in a PipeLLM: - -```plx -[pipe.hello_world] -type = "PipeLLM" -description = "Write text about Hello World." -output = "Text" -model = { model = "gpt-5", temperature = 0.9 } -prompt = """ -Write a haiku about Hello World. -""" -``` - -As you can see, to use the LLM, you must also indicate the temperature (float between 0 and 1) and max_tokens (either an int or the string "auto"). - -#### LLM Presets - -Presets are meant to record the choice of an llm with its hyper parameters (temperature and max_tokens) if it's good for a particular task. LLM Presets are skill-oriented. - -Examples: -```toml -llm_to_engineer = { model = "base-claude", temperature = 1 } -llm_to_extract_invoice = { model = "claude-3-7-sonnet", temperature = 0.1, max_tokens = "auto" } -``` - -The interest is that these presets can be used to set the LLM choice in a PipeLLM, like this: - -```plx -[pipe.extract_invoice] -type = "PipeLLM" -description = "Extract invoice information from an invoice text transcript" -inputs = { invoice_text = "InvoiceText" } -output = "Invoice" -model = "llm_to_extract_invoice" -prompt = """ -Extract invoice information from this invoice: - -The category of this invoice is: $invoice_details.category. - -@invoice_text -""" -``` - -The setting here `model = "llm_to_extract_invoice"` works because "llm_to_extract_invoice" has been declared as an llm_preset in the deck. -You must not use an LLM preset in a PipeLLM that does not exist in the deck. If needed, you can add llm presets. - -You can override the predefined llm presets by setting them in `.pipelex/inference/deck/overrides.toml`. - ---- - -ALWAYS RUN validation when you are finished writing pipelines: This checks for errors. If there are errors, iterate until it works. -- For a specific bundle/file: `pipelex validate path_to_file.plx` -- For all pipelines: `pipelex validate all` -- Remember: Ensure your Python virtual environment is activated (typically `.venv` for standard installations) before running `pipelex` commands. - -Then, create an example file to run the pipeline in the `examples` folder. -But don't write documentation unless asked explicitly to. - -## Guide to execute a pipeline and write example code - -### Example to execute a pipeline with text output - -```python -import asyncio - -from pipelex import pretty_print -from pipelex.pipelex import Pipelex -from pipelex.pipeline.execute import execute_pipeline - - -async def hello_world() -> str: - """ - This function demonstrates the use of a super simple Pipelex pipeline to generate text. - """ - # Run the pipe - pipe_output = await execute_pipeline( - pipe_code="hello_world", - ) - - return pipe_output.main_stuff_as_str - - -## start Pipelex -Pipelex.make() -## run sample using asyncio -output_text = asyncio.run(hello_world()) -pretty_print(output_text, title="Your first Pipelex output") -``` - -### Example to execute a pipeline with structured output - -```python -import asyncio - -from pipelex import pretty_print -from pipelex.pipelex import Pipelex -from pipelex.pipeline.execute import execute_pipeline -from pipelex.core.stuffs.image_content import ImageContent - -from my_project.gantt.gantt_struct import GanttChart - -SAMPLE_NAME = "extract_gantt" -IMAGE_URL = "assets/gantt/gantt_tree_house.png" - - -async def extract_gantt(image_url: str) -> GanttChart: - # Run the pipe - pipe_output = await execute_pipeline( - pipe_code="extract_gantt_by_steps", - inputs={ - "gantt_chart_image": { - "concept": "gantt.GanttImage", - "content": ImageContent(url=image_url), - } - }, - ) - # Output the result - return pipe_output.main_stuff_as(content_type=GanttChart) - - -## start Pipelex -Pipelex.make() - -## run sample using asyncio -gantt_chart = asyncio.run(extract_gantt(image_url=IMAGE_URL)) -pretty_print(gantt_chart, title="Gantt Chart") -``` - -### Setting up the input memory - -#### Explanation of input memory - -The input memory is a dictionary, where the key is the name of the input variable and the value provides details to make it a stuff object. The relevant definitions are: -```python -StuffContentOrData = dict[str, Any] | StuffContent | list[Any] | str -PipelineInputs = dict[str, StuffContentOrData] -``` -As you can seen, we made it so different ways can be used to define that stuff using structured content or data. - -#### Different ways to set up the input memory - -So here are a few concrete examples of calls to execute_pipeline with various ways to set up the input memory: - -```python -## Here we have a single input and it's a Text. -## If you assign a string, by default it will be considered as a TextContent. - pipe_output = await execute_pipeline( - pipe_code="master_advisory_orchestrator", - inputs={ - "user_input": problem_description, - }, - ) - -## Here we have a single input and it's a document. -## Because DocumentContent is a native concept, we can use it directly as a value, -## the system knows what content it corresponds to: - pipe_output = await execute_pipeline( - pipe_code="power_extractor_dpe", - inputs={ - "document": DocumentContent(url=pdf_url), - }, - ) - -## Here we have a single input and it's an Image. -## Because ImageContent is a native concept, we can use it directly as a value: - pipe_output = await execute_pipeline( - pipe_code="fashion_variation_pipeline", - inputs={ - "fashion_photo": ImageContent(url=image_url), - }, - ) - -## Here we have a single input, it's an image but -## its actually a more specific concept gantt.GanttImage which refines Image, -## so we must provide it using a dict with the concept and the content: - pipe_output = await execute_pipeline( - pipe_code="extract_gantt_by_steps", - inputs={ - "gantt_chart_image": { - "concept": "gantt.GanttImage", - "content": ImageContent(url=image_url), - } - }, - ) - -## Here is a more complex example with multiple inputs assigned using different ways: - pipe_output = await execute_pipeline( - pipe_code="retrieve_then_answer", - dynamic_output_concept_code="contracts.Fees", - inputs={ - "text": load_text_from_path(path=text_path), - "question": { - "concept": "answer.Question", - "content": question, - }, - "client_instructions": client_instructions, - }, - ) -``` - -### Using the outputs of a pipeline - -All pipe executions return a `PipeOutput` object. -It's a BaseModel which contains the resulting working memory at the end of the execution and the pipeline run id. -It also provides a bunch of accessor functions and properties to unwrap the main stuff, which is the last stuff added to the working memory: - -```python - -class PipeOutput(BaseModel): - working_memory: WorkingMemory = Field(default_factory=WorkingMemory) - pipeline_run_id: str = Field(default=SpecialPipelineId.UNTITLED) - - @property - def main_stuff(self) -> Stuff: - ... - - def main_stuff_as_list(self, item_type: type[StuffContentType]) -> ListContent[StuffContentType]: - ... - - def main_stuff_as_items(self, item_type: type[StuffContentType]) -> list[StuffContentType]: - ... - - def main_stuff_as(self, content_type: type[StuffContentType]) -> StuffContentType: - ... - - @property - def main_stuff_as_text(self) -> TextContent: - ... - - @property - def main_stuff_as_str(self) -> str: - ... - - @property - def main_stuff_as_image(self) -> ImageContent: - ... - - @property - def main_stuff_as_text_and_image(self) -> TextAndImagesContent: - ... - - @property - def main_stuff_as_number(self) -> NumberContent: - ... - - @property - def main_stuff_as_html(self) -> HtmlContent: - ... - - @property - def main_stuff_as_mermaid(self) -> MermaidContent: - ... -``` - -As you can see, you can extract any variable from the output working memory. - -#### Getting the main stuff as a specific type - -Simple text as a string: - -```python -result = pipe_output.main_stuff_as_str -``` -Structured object (BaseModel): - -```python -result = pipe_output.main_stuff_as(content_type=GanttChart) -``` - -If it's a list, you can get a `ListContent` of the specific type. - -```python -result_list_content = pipe_output.main_stuff_as_list(item_type=GanttChart) -``` - -or if you want, you can get the actual items as a regular python list: - -```python -result_list = pipe_output.main_stuff_as_items(item_type=GanttChart) -``` - ---- - diff --git a/.cursor/rules/commands.mdc b/.cursor/rules/commands.mdc deleted file mode 100644 index 6c017d1..0000000 --- a/.cursor/rules/commands.mdc +++ /dev/null @@ -1,8 +0,0 @@ ---- -alwaysApply: true -description: Guidelines for running commands ---- -# Commands - - - When you want to run commands such as `python`, `pytest` or any of our CLI such as `pipelex` or `cocode`, ALWAYS use the current obvious virtual env. If the installaton is standard, the venv is named `.venv` so always check that first. - \ No newline at end of file diff --git a/.cursor/rules/docs.mdc b/.cursor/rules/docs.mdc deleted file mode 100644 index 1a16650..0000000 --- a/.cursor/rules/docs.mdc +++ /dev/null @@ -1,16 +0,0 @@ ---- -alwaysApply: false -description: Guidelines for writing documentation -globs: -- docs/**/*.md ---- -# Writing Docs - -Write docs and answer questions about writing docs. - -We use Material for MkDocs. All markdown in our docs must be compatible with Material for MkDocs and done using best practices to get the best results with Material for MkDocs. - -## MkDocs Markdown Requirements - -- Always add a blank line before any bullet lists or numbered lists in MkDocs markdown. - diff --git a/.cursor/rules/llms.mdc b/.cursor/rules/llms.mdc deleted file mode 100644 index 2035388..0000000 --- a/.cursor/rules/llms.mdc +++ /dev/null @@ -1,85 +0,0 @@ ---- -alwaysApply: false -description: LLM configuration and usage guidelines -globs: -- '*.plx' -- '*.toml' ---- -# Rules to choose LLM models used in PipeLLMs. - -## LLM Configuration System - -In order to use it in a pipe, an LLM is referenced by its llm_handle (alias) and possibly by an llm_preset. -LLM configurations are managed through the new inference backend system with files located in `.pipelex/inference/`: - -- **Model Deck**: `.pipelex/inference/deck/base_deck.toml` and `.pipelex/inference/deck/overrides.toml` -- **Backends**: `.pipelex/inference/backends.toml` and `.pipelex/inference/backends/*.toml` -- **Routing**: `.pipelex/inference/routing_profiles.toml` - -## LLM Handles - -An llm_handle can be either: -1. **A direct model name** (like "gpt-4o-mini", "claude-3-sonnet") - automatically available for all models loaded by the inference backend system -2. **An alias** - user-defined shortcuts that map to model names, defined in the `[aliases]` section: - -```toml -[aliases] -base-claude = "claude-4.5-sonnet" -base-gpt = "gpt-5" -base-gemini = "gemini-2.5-flash" -base-mistral = "mistral-medium" -``` - -The system first looks for direct model names, then checks aliases if no direct match is found. The system handles model routing through backends automatically. - -## Using an LLM Handle in a PipeLLM - -Here is an example of using an llm_handle to specify which LLM to use in a PipeLLM: - -```plx -[pipe.hello_world] -type = "PipeLLM" -description = "Write text about Hello World." -output = "Text" -model = { model = "gpt-5", temperature = 0.9 } -prompt = """ -Write a haiku about Hello World. -""" -``` - -As you can see, to use the LLM, you must also indicate the temperature (float between 0 and 1) and max_tokens (either an int or the string "auto"). - -## LLM Presets - -Presets are meant to record the choice of an llm with its hyper parameters (temperature and max_tokens) if it's good for a particular task. LLM Presets are skill-oriented. - -Examples: -```toml -llm_to_engineer = { model = "base-claude", temperature = 1 } -llm_to_extract_invoice = { model = "claude-4.5-sonnet", temperature = 0.1, max_tokens = "auto" } -``` - -The interest is that these presets can be used to set the LLM choice in a PipeLLM, like this: - -```plx -[pipe.extract_invoice] -type = "PipeLLM" -description = "Extract invoice information from an invoice text transcript" -inputs = { invoice_text = "InvoiceText" } -output = "Invoice" -model = "llm_to_extract_invoice" -prompt = """ -Extract invoice information from this invoice: - -The category of this invoice is: $invoice_details.category. - -@invoice_text -""" -``` - -The setting here `model = "llm_to_extract_invoice"` works because "llm_to_extract_invoice" has been declared as an llm_preset in the deck. -You must not use an LLM preset in a PipeLLM that does not exist in the deck. If needed, you can add llm presets. - - -You can override the predefined llm presets by setting them in `.pipelex/inference/deck/overrides.toml`. - diff --git a/.cursor/rules/pytest_standards.mdc b/.cursor/rules/pytest_standards.mdc deleted file mode 100644 index b1c2f11..0000000 --- a/.cursor/rules/pytest_standards.mdc +++ /dev/null @@ -1,107 +0,0 @@ ---- -alwaysApply: false -description: Guidelines for writing unit tests -globs: -- tests/**/*.py ---- -# Writing unit tests - -## Unit test generalities - -NEVER USE unittest.mock or MagicMock. YOU MUST USE pytest-mock instead. - -### Test file structure - -- Name test files with `test_` prefix -- Place test files in the appropriate test category directory: - - `tests/unit/` - for unit tests that test individual functions/classes in isolation - - `tests/integration/` - for integration tests that test component interactions - - `tests/e2e/` - for end-to-end tests that test complete workflows - - `tests/test_pipelines/` - for test pipeline definitions (PLX files and their structuring python files) -- Fixtures are defined in conftest.py modules at different levels of the hierarchy, their scope is handled by pytest -- Test data is placed inside test_data.py at different levels of the hierarchy, they must be imported with package paths from the root like `tests.pipelex.test_data`. Their content is all constants, regrouped inside classes to keep things tidy. -- Always put test inside Test classes. -- The pipelex pipelines should be stored in `tests/test_pipelines` as well as the related structured Output classes that inherit from `StructuredContent` - -### Markers - -Apply the appropriate markers: -- "llm: uses an LLM to generate text or objects" -- "img_gen: uses an image generation AI" -- "extract: uses text/image extraction from documents" -- "inference: uses either an LLM or an image generation AI" -- "gha_disabled: will not be able to run properly on GitHub Actions" - -Several markers may be applied. For instance, if the test uses an LLM, then it uses inference, so you must mark with both `inference`and `llm`. - -### Important rules - -- Never use the unittest.mock. Use pytest-mock. - -### Test Class Structure - -- Always group the tests of a module into a test class: - -```python -@pytest.mark.llm -@pytest.mark.inference -@pytest.mark.asyncio(loop_scope="class") -class TestFooBar: - @pytest.mark.parametrize( - "topic, test_case_blueprint", - [ - TestCases.CASE_1, - TestCases.CASE_2, - ], - ) - async def test_pipe_processing( - self, - request: FixtureRequest, - topic: str, - test_case_blueprint: StuffBlueprint, - ): - # Test implementation -``` - -- Never more than 1 class per test file. -- When testing one method, if possible, limit the number of test functions, but with different test cases in parameters -- Sometimes it can be convenient to access the test's name in its body, for instance to include into a job_id. To achieve that, add the argument `request: FixtureRequest` into the signature and then you can get the test name using `cast(str, request.node.originalname), # type: ignore`. - -### Test Data Organization - -- If it's not already there, create a `test_data.py` file in the proper test directory -- Define test cases using `StuffBlueprint`: - -```python -class TestCases: - CASE_BLUEPRINT_1 = StuffBlueprint( - name="test_case_1", - concept_code="domain.ConceptName1", - value="test_value" - ) - CASE_BLUEPRINT_2 = StuffBlueprint( - name="test_case_2", - concept_code="domain.ConceptName2", - value="test_value" - ) - - CASE_BLUEPRINTS: ClassVar[list[tuple[str, str]]] = [ # topic, blueprint" - ("topic1", CASE_BLUEPRINT_1), - ("topic2", CASE_BLUEPRINT_2), - ] -``` - -Note how we avoid initializing a default mutable value within a class instance, instead we use ClassVar. -Also note that we provide a topic for the test case, which is purely for convenience. - -## Best Practices for Testing - -- Whenever possible, use strong asserts to test value, not just type and presence. -- Use parametrize for multiple test cases -- Test both success and failure cases -- Verify working memory state -- Check output structure and content -- Use meaningful test case names -- Include docstrings explaining test purpose but not on top of the file -- Log outputs for debugging -- Generate reports for cost tracking diff --git a/.cursor/rules/python_standards.mdc b/.cursor/rules/python_standards.mdc deleted file mode 100644 index 2e2e0a9..0000000 --- a/.cursor/rules/python_standards.mdc +++ /dev/null @@ -1,176 +0,0 @@ ---- -alwaysApply: false -description: Python coding standards and best practices -globs: -- '**/*.py' ---- -# Coding Standards & Best Practices for Python Code - -This document outlines the core coding standards, best practices, and quality control procedures for the codebase. - -## Variables, loops and indexes - - - Variable names should have a minimum length of 3 characters. No exceptions: name your `for` loop indexes like `index_foobar`, your exceptions `exc` or more specific like `validation_error` when there are several layers of exceptions, and use `for key, value in ...` for key/value pairs. - - When looping on the keys of a dict, use `for key in the_dict` rather than `for key in the_dict.keys()` otherwise you won't pass linting. - - Avoid inline for loops, unless it's ultra-simple and holds on oneline. - - If you have a variable that will get its value differently through different code paths, declare it first with a type, e.g. `pipe_code: str` but DO NOT give it a default value like `pipe_code: str = ""` unless it's really justified. We want the variable to be unbound until all paths are covered, and the linters will help us avoid bugs this way. - -## Enums and tests - - - When defining enums related to string values, always inherit from `StrEnum` - - Never test equality to an enum value: use match/case, even to single out 1 case out of 10 cases. To avoid heavy match/case code in awkward places, add methods to the enum class such as `is_foobar()`. This is to avoid bugs: when new enum values are added we want the linter to complain. Use the `|` operator to group cases - - As our match/case constructs over enums are always exhaustive, NEVER add a default `case _: ...`. Otherwise, you won't pass linting. - -## Imports - -### **Imports at the top of the file** - - - Import all necessary libraries at the top of the file - - Do not import libraries in functions or classes unless in very specific cases, to be discussed with the user, as they would required a `# noqa: ...` comment to pass linting - - Do not bother with ordering the imports, our Ruff linter will handle it for us. Same goes with removing unused imports. - -- **Logging and Pretty Printing**: - - - Both `log()` and `pretty_print()` can be imported from `pipelex` directly: - ```python - from pipelex import log, pretty_print - - log.info("Hello, world!") - ``` - - Both have a title arg which is handy when logging/printing objects: - - ```python - log.verbose("Hello, world!", title="Your first Pipelex log") - pretty_print(output_object, title="Your first Pipelex output") - ``` - - Both handle formatting json using Rich, pretty_print makes it prettier. - -- **StrEnum and Self type**: - - - Both `StrEnum` and `Self` must be imported from `pipelex.types` (handles python retrocompatibility): - ```python - from pipelex.types import Self, StrEnum - ``` - -## Typing - -### **Always Use Type Hints** - - - Every function parameter must be typed - - Every function return must be typed - - Use type hints for all variables where type is not obvious - - Use dict, list, tuple types with lowercase first letter: dict[], list[], tuple[] - - Use type hints for all fields - - Use Field(default_factory=...) for mutable defaults - - Use `# pyright: ignore[specificError]` or `# type: ignore` only as a last resort. In particular, if you are sure about the type, you often solve issues by using cast() or creating a new typed variable. - -### **BaseModel / Pydantic Standards** - - - Use `BaseModel` and respect Pydantic v2 standards - - Use the modern `ConfigDict` when needed, e.g. `model_config = ConfigDict(extra="forbid", strict=True)` - - Keep models focused and single-purpose - - For list fields with non-string items in BaseModels, use `empty_list_factory_of()` to avoid linter complaints: - ```python - from pydantic import BaseModel, Field - from pipelex.tools.typing.pydantic_utils import empty_list_factory_of - - class MyModel(BaseModel): - names: list[str] = Field(default_factory=list) # OK for strings - numbers: list[int] = Field(default_factory=empty_list_factory_of(int), description="A list of numbers") - items: list[MyItem] = Field(default_factory=empty_list_factory_of(MyItem), description="A list of items") - ``` - -## Factory Pattern - - - Use Factory Pattern for object creation when dealing with multiple implementations - - Our factory methods are named `make_from_...` and such - -## Error Handling - - - Always catch exceptions at the place where you can add useful context to it. - - Use try/except blocks with specific exceptions - - Convert third-party exceptions to our custom ones - - NEVER catch the generic Exception, only catch specific exceptions, except at the root of CLI commands - - NEVER raise generic exceptions like ValueError or TypeError, create new error classes and raise them instead - - Always add `from exc` to the exception - - ```python - try: - self.models_manager.setup() - except RoutingProfileLibraryNotFoundError as exc: - msg = "The routing library could not be found, please call `pipelex init config` to create it" - raise PipelexSetupError(msg) from exc - ``` - - **Note**: Following Ruff rules, we set the error message as a variable before raising it, for cleaner error traces. - -## Documentation - -1. **Docstring Format** - ```python - def process_image(image_path: str, size: tuple[int, int]) -> bytes: - """Process and resize an image. - - Args: - image_path: Path to the source image - size: Tuple of (width, height) for resizing - - Returns: - Processed image as bytes - """ - pass - ``` - -2. **Class Documentation** - ```python - class ImageProcessor: - """Handles image processing operations. - - Provides methods for resizing, converting, and optimizing images. - """ - ``` - -## Code Quality Checks - -### Linting and Type Checking - -Before finalizing a task, run: -```bash -make fix-unused-imports -make check -``` - -This runs multiple code quality tools: -- Pyright: Static type checking -- Ruff: Fast Python linter -- Mypy: Static type checker - -Always fix any issues reported by these tools before proceeding. - -### Running Tests - -1. **Quick Test Run** (no LLM/image generation): - ```bash - make tp - ``` - Runs tests with markers: `(dry_runnable or not (inference or llm or img_gen or extract)) and not (needs_output or pipelex_api)` - -2. **Specific Tests**: - ```bash - make tp TEST=TestClassName - # or - make tp TEST=test_function_name - ``` - Note: Matches names starting with the provided string. - -**Important**: Never run `make ti`, `make test-inference`, `make te`, `make test-extract`, `make tg`, or `make test-img-gen` - these use costly inference. - -## Pipelines - -- Always validate pipelines after creation/edit with `make validate`. - Iterate if there are errors. - -## Project Structure - -- **Tests**: `tests/` directory -- **Documentation**: `docs/` directory diff --git a/.cursor/rules/run_pipelex.mdc b/.cursor/rules/run_pipelex.mdc deleted file mode 100644 index 7650051..0000000 --- a/.cursor/rules/run_pipelex.mdc +++ /dev/null @@ -1,231 +0,0 @@ ---- -alwaysApply: false -description: Guidelines for running Pipelex pipelines -globs: -- examples/**/*.py ---- -# Guide to execute a pipeline and write example code - -## Example to execute a pipeline with text output - -```python -import asyncio - -from pipelex import pretty_print -from pipelex.pipelex import Pipelex -from pipelex.pipeline.execute import execute_pipeline - - -async def hello_world() -> str: - """ - This function demonstrates the use of a super simple Pipelex pipeline to generate text. - """ - # Run the pipe - pipe_output = await execute_pipeline( - pipe_code="hello_world", - ) - - return pipe_output.main_stuff_as_str - - -# start Pipelex -Pipelex.make() -# run sample using asyncio -output_text = asyncio.run(hello_world()) -pretty_print(output_text, title="Your first Pipelex output") -``` - -## Example to execute a pipeline with structured output - -```python -import asyncio - -from pipelex import pretty_print -from pipelex.pipelex import Pipelex -from pipelex.pipeline.execute import execute_pipeline -from pipelex.core.stuffs.image_content import ImageContent - -from my_project.gantt.gantt_struct import GanttChart - -SAMPLE_NAME = "extract_gantt" -IMAGE_URL = "assets/gantt/gantt_tree_house.png" - - -async def extract_gantt(image_url: str) -> GanttChart: - # Run the pipe - pipe_output = await execute_pipeline( - pipe_code="extract_gantt_by_steps", - inputs={ - "gantt_chart_image": { - "concept": "gantt.GanttImage", - "content": ImageContent(url=image_url), - } - }, - ) - # Output the result - return pipe_output.main_stuff_as(content_type=GanttChart) - - -# start Pipelex -Pipelex.make() - -# run sample using asyncio -gantt_chart = asyncio.run(extract_gantt(image_url=IMAGE_URL)) -pretty_print(gantt_chart, title="Gantt Chart") -``` - -## Setting up the input memory - -### Explanation of input memory - -The input memory is a dictionary, where the key is the name of the input variable and the value provides details to make it a stuff object. The relevant definitions are: -```python -StuffContentOrData = dict[str, Any] | StuffContent | list[Any] | str -PipelineInputs = dict[str, StuffContentOrData] -``` -As you can seen, we made it so different ways can be used to define that stuff using structured content or data. - -### Different ways to set up the input memory - -So here are a few concrete examples of calls to execute_pipeline with various ways to set up the input memory: - -```python -# Here we have a single input and it's a Text. -# If you assign a string, by default it will be considered as a TextContent. - pipe_output = await execute_pipeline( - pipe_code="master_advisory_orchestrator", - inputs={ - "user_input": problem_description, - }, - ) - -# Here we have a single input and it's a document. -# Because DocumentContent is a native concept, we can use it directly as a value, -# the system knows what content it corresponds to: - pipe_output = await execute_pipeline( - pipe_code="power_extractor_dpe", - inputs={ - "document": DocumentContent(url=pdf_url), - }, - ) - -# Here we have a single input and it's an Image. -# Because ImageContent is a native concept, we can use it directly as a value: - pipe_output = await execute_pipeline( - pipe_code="fashion_variation_pipeline", - inputs={ - "fashion_photo": ImageContent(url=image_url), - }, - ) - -# Here we have a single input, it's an image but -# its actually a more specific concept gantt.GanttImage which refines Image, -# so we must provide it using a dict with the concept and the content: - pipe_output = await execute_pipeline( - pipe_code="extract_gantt_by_steps", - inputs={ - "gantt_chart_image": { - "concept": "gantt.GanttImage", - "content": ImageContent(url=image_url), - } - }, - ) - -# Here is a more complex example with multiple inputs assigned using different ways: - pipe_output = await execute_pipeline( - pipe_code="retrieve_then_answer", - dynamic_output_concept_code="contracts.Fees", - inputs={ - "text": load_text_from_path(path=text_path), - "question": { - "concept": "answer.Question", - "content": question, - }, - "client_instructions": client_instructions, - }, - ) -``` - -## Using the outputs of a pipeline - -All pipe executions return a `PipeOutput` object. -It's a BaseModel which contains the resulting working memory at the end of the execution and the pipeline run id. -It also provides a bunch of accessor functions and properties to unwrap the main stuff, which is the last stuff added to the working memory: - -```python - -class PipeOutput(BaseModel): - working_memory: WorkingMemory = Field(default_factory=WorkingMemory) - pipeline_run_id: str = Field(default=SpecialPipelineId.UNTITLED) - - @property - def main_stuff(self) -> Stuff: - ... - - def main_stuff_as_list(self, item_type: type[StuffContentType]) -> ListContent[StuffContentType]: - ... - - def main_stuff_as_items(self, item_type: type[StuffContentType]) -> list[StuffContentType]: - ... - - def main_stuff_as(self, content_type: type[StuffContentType]) -> StuffContentType: - ... - - @property - def main_stuff_as_text(self) -> TextContent: - ... - - @property - def main_stuff_as_str(self) -> str: - ... - - @property - def main_stuff_as_image(self) -> ImageContent: - ... - - @property - def main_stuff_as_text_and_image(self) -> TextAndImagesContent: - ... - - @property - def main_stuff_as_number(self) -> NumberContent: - ... - - @property - def main_stuff_as_html(self) -> HtmlContent: - ... - - @property - def main_stuff_as_mermaid(self) -> MermaidContent: - ... -``` - -As you can see, you can extract any variable from the output working memory. - -### Getting the main stuff as a specific type - -Simple text as a string: - -```python -result = pipe_output.main_stuff_as_str -``` -Structured object (BaseModel): - -```python -result = pipe_output.main_stuff_as(content_type=GanttChart) -``` - -If it's a list, you can get a `ListContent` of the specific type. - -```python -result_list_content = pipe_output.main_stuff_as_list(item_type=GanttChart) -``` - -or if you want, you can get the actual items as a regular python list: - -```python -result_list = pipe_output.main_stuff_as_items(item_type=GanttChart) -``` - ---- - diff --git a/.cursor/rules/tdd.mdc b/.cursor/rules/tdd.mdc deleted file mode 100644 index 4b4f058..0000000 --- a/.cursor/rules/tdd.mdc +++ /dev/null @@ -1,28 +0,0 @@ ---- -alwaysApply: false -description: Guidelines for writing test-driven development code ---- -# Test-Driven Development Guide - -This document outlines our test-driven development (TDD) process and the tools available for testing. - -## TDD Cycle - -1. **Write a Test First** -[pytest.mdc](pytest.mdc) - -2. **Write the Code** - - Implement the minimum amount of code needed to pass the test - - Follow the project's coding standards - - Keep it simple - don't write more than needed - -3. **Run Linting and Type Checking** -[coding_standards.mdc](coding_standards.mdc) - -4. **Refactor if needed** -If the code needs refactoring, with the best practices [coding_standards.mdc](coding_standards.mdc) - -5. **Validate tests** - -Remember: The key to TDD is writing the test first and letting it drive your implementation. Always run the full test suite and quality checks before considering a feature complete. - diff --git a/.cursor/rules/write_pipelex.mdc b/.cursor/rules/write_pipelex.mdc deleted file mode 100644 index 93422cc..0000000 --- a/.cursor/rules/write_pipelex.mdc +++ /dev/null @@ -1,997 +0,0 @@ ---- -alwaysApply: false -description: Guidelines for writing Pipelex pipelines -globs: -- '**/*.plx' -- '**/pipelines/**/*.py' ---- -# Guide to write or edit pipelines using the Pipelex language in .plx files - -- Always first write your "plan" in natural language, then transcribe it in pipelex. -- You should ALWAYS RUN validation when you are writing or editing a `.plx` file. It will ensure the pipe is runnable. If not, iterate. - - For a specific file: `pipelex validate path_to_file.plx` - - For all pipelines: `pipelex validate all` - - **IMPORTANT**: Ensure the Python virtual environment is activated before running `pipelex` commands. For standard installations, the venv is named `.venv` - always check that first. The commands will not work without proper venv activation. -- Please use POSIX standard for files. (empty lines, no trailing whitespaces, etc.) - -## Pipeline File Naming -- Files must be `.plx` for pipelines (Always add an empty line at the end of the file, and do not add trailing whitespaces to PLX files at all) -- Files must be `.py` for code defining the data structures -- Use descriptive names in `snake_case` - -## Pipeline File Outline -A pipeline file has three main sections: -1. Domain statement -2. Concept definitions -3. Pipe definitions - -### Domain Statement -```plx -domain = "domain_code" -description = "Description of the domain" # Optional -``` -Note: The domain code usually matches the plx filename for single-file domains. For multi-file domains, use the subdirectory name. - -### Concept Definitions - -Concepts represent ideas and semantic entities in your pipeline. They define what something *is*, not how it's structured. - -```plx -[concept] -ConceptName = "Description of the concept" -``` - -**Naming Rules:** -- Use PascalCase for concept names -- Never use plurals (no "Stories", use "Story") - lists are handled implicitly by Pipelex -- Avoid circumstantial adjectives (no "LargeText", use "Text") - focus on the essence of what the concept represents -- Don't redefine native concepts (Text, Image, PDF, TextAndImages, Number, Page, JSON) - -**Native Concepts:** -Pipelex provides built-in native concepts: `Text`, `Image`, `PDF`, `TextAndImages`, `Number`, `Page`, `JSON`. Use these directly or refine them when appropriate. - -**Refining Native Concepts:** -To create a concept that specializes a native concept without adding fields: - -```plx -[concept.Landscape] -description = "A scenic outdoor photograph" -refines = "Image" -``` - -For details on how to structure concepts with fields, see the "Structuring Models" section below. - -### Pipe Definitions - -## Pipe Base Definition - -```plx -[pipe.your_pipe_code] -type = "PipeLLM" -description = "A description of what your pipe does" -inputs = { input_1 = "ConceptName1", input_2 = "ConceptName2" } -output = "ConceptName" -``` - -The pipes will all have at least this base definition. -- `inputs`: Dictionary of key being the variable used in the prompts, and the value being the ConceptName. It should ALSO LIST THE INPUTS OF THE INTERMEDIATE STEPS (if PipeSequence) or of the conditional pipes (if PipeCondition). -So If you have this error: -`PipeValidationError: missing_input_variable • domain='expense_validator' • pipe='validate_expense' • -variable='['invoice']'`` -That means that the pipe validate_expense is missing the input `invoice` because one of the subpipe is needing it. - -NEVER WRITE THE INPUTS BY BREAKING THE LINE LIKE THIS: - -```plx -inputs = { - input_1 = "ConceptName1", - input_2 = "ConceptName2" -} -``` - - -- `output`: The name of the concept to output. The `ConceptName` should have the same name as the python class if you want structured output: - -### Input Multiplicity - -By default, inputs expect a single item. Use bracket notation to specify multiple items: - -```plx -# Single item (default) -inputs = { document = "Text" } - -# Variable list - indeterminate number of items -inputs = { documents = "Text[]" } - -# Fixed count - exactly N items -inputs = { comparison_items = "Image[2]" } -``` - -**Key points:** -- No brackets = single item (default behavior) -- Use `[]` for lists of unknown length -- Use `[N]` (where N is an integer) when operation requires exact count (e.g., comparing 2 items) - -## Structuring Models - -Once you've defined your concepts semantically (see "Concept Definitions" above), you need to specify their structure if they have fields. - -### Three Ways to Structure Concepts - -**1. No Structure Needed** - -If a concept only refines a native concept without adding fields, use the TOML table syntax shown in "Concept Definitions" above. No structure section is needed. - -**2. Inline Structure Definition (RECOMMENDED for most cases)** - -For concepts with structured fields, define them inline using TOML syntax: - -```plx -[concept.Invoice] -description = "A commercial document issued by a seller to a buyer" - -[concept.Invoice.structure] -invoice_number = "The unique invoice identifier" # This will be optional by default -issue_date = { type = "date", description = "The date the invoice was issued", required = true } -total_amount = { type = "number", description = "The total invoice amount", required = true } -vendor_name = "The name of the vendor" # This will be optional by default -line_items = { type = "list", item_type = "text", description = "List of items" } -``` - -**Supported inline field types:** `text`, `integer`, `boolean`, `number`, `date`, `list`, `dict` - -**Field properties:** `type`, `description`, `required` (default: false), `default_value`, `choices`, `item_type` (for lists), `key_type` and `value_type` (for dicts) - -**Simple syntax** (creates required text field): -```plx -field_name = "Field description" -``` - -**Detailed syntax** (with explicit properties): -```plx -field_name = { type = "text", description = "Field description", default_value = "default" } -``` - -**3. Python StructuredContent Class (For Advanced Features)** - -Create a Python class when you need: -- Custom validation logic (@field_validator, @model_validator) -- Computed properties (@property methods) -- Custom methods or class methods -- Complex cross-field validation -- Reusable structures across multiple domains - -```python -from pipelex.core.stuffs.structured_content import StructuredContent -from pydantic import Field, field_validator - -class Invoice(StructuredContent): - """A commercial invoice with validation.""" - - invoice_number: str = Field(description="The unique invoice identifier") - total_amount: float = Field(ge=0, description="The total invoice amount") - tax_amount: float = Field(ge=0, description="Tax amount") - - @field_validator('tax_amount') - @classmethod - def validate_tax(cls, v, info): - """Ensure tax doesn't exceed total.""" - total = info.data.get('total_amount', 0) - if v > total: - raise ValueError('Tax amount cannot exceed total amount') - return v -``` - -**Location:** Create models in `my_project/some_domain/some_domain_struct.py`. Classes inheriting from `StructuredContent` are automatically discovered. - -### Decision Rules for Agents - -**If concept already exists:** -- If it's already inline → KEEP IT INLINE unless user explicitly asks to convert or features require Python class -- If it's already a Python class → KEEP IT as Python class - -**If creating new concept:** -1. Does it only refine a native concept without adding fields? → Use concept-only declaration -2. Does it need custom validation, computed properties, or methods? → Use Python class -3. Otherwise → Use inline structure (fastest and simplest) - -**When to suggest conversion to Python class:** -- User needs validation logic beyond type checking -- User needs computed properties or custom methods -- Structure needs to be reused across multiple domains -- Complex type relationships or inheritance required - -### Inline Structure Limitations - -Inline structures: -- ✅ Support all common field types (text, number, date, list, dict, etc.) -- ✅ Support required/optional fields, defaults, choices -- ✅ Generate full Pydantic models with validation -- ❌ Cannot have custom validators or complex validation logic -- ❌ Cannot have computed properties or custom methods -- ❌ Cannot refine custom (non-native) concepts -- ❌ Limited IDE autocomplete compared to explicit Python classes - - -## Pipe Controllers and Pipe Operators - -Look at the Pipes we have in order to adapt it. Pipes are organized in two categories: - -1. **Controllers** - For flow control: - - `PipeSequence` - For creating a sequence of multiple steps - - `PipeCondition` - If the next pipe depends of the expression of a stuff in the working memory - - `PipeParallel` - For parallelizing pipes - -2. **Operators** - For specific tasks: - - `PipeLLM` - Generate Text and Objects (include Vision LLM) - - `PipeExtract` - Extract text and images from an image or a PDF - - `PipeCompose` - For composing text using Jinja2 templates: supports html, markdown, mermaid, etc. - - `PipeImgGen` - Generate Images - - `PipeFunc` - For running classic python scripts - -## PipeSequence controller - -Purpose: PipeSequence executes multiple pipes in a defined order, where each step can use results from original inputs or from previous steps. - -### Basic Definition -```plx -[pipe.your_sequence_name] -type = "PipeSequence" -description = "Description of what this sequence does" -inputs = { input_name = "InputType" } # All the inputs of the sub pipes, except the ones generated by intermediate steps -output = "OutputType" -steps = [ - { pipe = "first_pipe", result = "first_result" }, - { pipe = "second_pipe", result = "second_result" }, - { pipe = "final_pipe", result = "final_result" } -] -``` - -### Key Components - -1. **Steps Array**: List of pipes to execute in sequence - - `pipe`: Name of the pipe to execute - - `result`: Name to assign to the pipe's output that will be in the working memory - -### Using PipeBatch in Steps - -You can use PipeBatch functionality within steps using `batch_over` and `batch_as`: - -```plx -steps = [ - { pipe = "process_items", batch_over = "input_list", batch_as = "current_item", result = "processed_items" - } -] -``` - -1. **batch_over**: Specifies a `ListContent` field to iterate over. Each item in the list will be processed individually and IN PARALLEL by the pipe. - - Must be a `ListContent` type containing the items to process - - Can reference inputs or results from previous steps - -2. **batch_as**: Defines the name that will be used to reference the current item being processed - - This name can be used in the pipe's input mappings - - Makes each item from the batch available as a single element - -The result of a batched step will be a `ListContent` containing the outputs from processing each item. - -## PipeCondition controller - -The PipeCondition controller allows you to implement conditional logic in your pipeline, choosing which pipe to execute based on an evaluated expression. It supports both direct expressions and expression templates. - -### Basic usage - -```plx -[pipe.conditional_operation] -type = "PipeCondition" -description = "A conditional pipe to decide whether..." -inputs = { input_data = "CategoryInput" } -output = "native.Text" -expression = "input_data.category" -default_outcome = "process_medium" - -[pipe.conditional_operation.outcomes] -small = "process_small" -medium = "process_medium" -large = "process_large" -``` -or -```plx -[pipe.conditional_operation] -type = "PipeCondition" -description = "A conditional pipe to decide whether..." -inputs = { input_data = "CategoryInput" } -output = "native.Text" -expression_template = "{{ input_data.category }}" # Jinja2 code -default_outcome = "process_medium" - -[pipe.conditional_operation.outcomes] -small = "process_small" -medium = "process_medium" -large = "process_large" -``` - -### Key Parameters - -- `expression`: Direct boolean or string expression (mutually exclusive with expression_template) -- `expression_template`: Jinja2 template for more complex conditional logic (mutually exclusive with expression) -- `outcomes`: Dictionary mapping expression results to pipe codes: - 1. The key on the left (`small`, `medium`) is the result of `expression` or `expression_template` - 2. The value on the right (`process_small`, `process_medium`, etc.) is the name of the pipe to trigger -- `default_outcome`: **Required** - The pipe to execute if the expression doesn't match any key in outcomes. Use `"fail"` if you want the pipeline to fail when no match is found - -Example with fail as default: -```plx -[pipe.strict_validation] -type = "PipeCondition" -description = "Validate with strict matching" -inputs = { status = "Status" } -output = "Text" -expression = "status.value" -default_outcome = "fail" - -[pipe.strict_validation.outcomes] -approved = "process_approved" -rejected = "process_rejected" -``` - -## PipeLLM operator - -PipeLLM is used to: -1. Generate text or objects with LLMs -2. Process images with Vision LLMs - -### Basic Usage - -Simple Text Generation: -```plx -[pipe.write_story] -type = "PipeLLM" -description = "Write a short story" -output = "Text" -prompt = """ -Write a short story about a programmer. -""" -``` - -Structured Data Extraction: -```plx -[pipe.extract_info] -type = "PipeLLM" -description = "Extract information" -inputs = { text = "Text" } -output = "PersonInfo" -prompt = """ -Extract person information from this text: -@text -""" -``` - -Supports system instructions: -```plx -[pipe.expert_analysis] -type = "PipeLLM" -description = "Expert analysis" -output = "Analysis" -system_prompt = "You are a data analysis expert" -prompt = "Analyze this data" -``` - -### Multiple Outputs - -Generate multiple outputs (fixed number) - use bracket notation: -```plx -[pipe.generate_ideas] -type = "PipeLLM" -description = "Generate ideas" -output = "Idea[3]" # Generate exactly 3 ideas -prompt = "Generate 3 ideas" -``` - -Generate multiple outputs (variable number) - use bracket notation: -```plx -[pipe.generate_ideas] -type = "PipeLLM" -description = "Generate ideas" -output = "Idea[]" # Let the LLM decide how many to generate -prompt = "Generate ideas" -``` - -### Vision - -Process images with VLMs (image inputs must be tagged in the prompt): -```plx -[pipe.analyze_image] -type = "PipeLLM" -description = "Analyze image" -inputs = { image = "Image" } -output = "ImageAnalysis" -prompt = """ -Describe what you see in this image: - -$image -""" -``` - -You can also reference images inline in meaningful sentences to guide the Visual LLM: -```plx -[pipe.compare_images] -type = "PipeLLM" -description = "Compare two images" -inputs = { photo = "Image", painting = "Image" } -output = "Analysis" -prompt = "Analyze the colors in $photo and the shapes in $painting." -``` - -### Writing prompts for PipeLLM - -**Insert stuff inside a tagged block** - -If the inserted text is supposedly a long text, made of several lines or paragraphs, you want it inserted inside a block, possibly a block tagged and delimlited with proper syntax as one would do in a markdown documentation. To include stuff as a block, use the "@" prefix. - -Example template: -```plx -prompt = """ -Match the expense with its corresponding invoice: - -@expense - -@invoices -""" -``` -In the example above, the expense data and the invoices data are obviously made of several lines each, that's why it makes sense to use the "@" prefix in order to have them delimited inside a block. Note that our preprocessor will automatically include the block's title, so it doesn't need to be explicitly written in the prompt. - -DO NOT write things like "Here is the expense: @expense". -DO write simply "@expense" alone in an isolated line. - -**Insert stuff inline** - -If the inserted text is short text and it makes sense to have it inserted directly into a sentence, you want it inserted inline. To insert stuff inline, use the "$" prefix. This will insert the stuff without delimiters and the content will be rendered as plain text. - -Example template: -```plx -prompt = """ -Your goal is to summarize everything related to $topic in the provided text: - -@text - -Please provide only the summary, with no additional text or explanations. -Your summary should not be longer than 2 sentences. -""" -``` - -In the example above, $topic will be inserted inline, whereas @text will be a a delimited block. -Be sure to make the proper choice of prefix for each insertion. - -DO NOT write "$topic" alone in an isolated line. -DO write things like "Write an essay about $topic" to include text into an actual sentence. - - -## PipeExtract operator - -The PipeExtract operator is used to extract text and images from an image or a PDF - -### Simple Text Extraction -```plx -[pipe.extract_info] -type = "PipeExtract" -description = "extract the information" -inputs = { document = "Document" } # or { image = "Image" } if it's an image. This is the only input. -output = "Page" -``` - -Using Extract Model Settings: -```plx -[pipe.extract_with_model] -type = "PipeExtract" -description = "Extract with specific model" -inputs = { document = "Document" } -output = "Page" -model = "base_extract_mistral" # Use predefined extract preset or model alias -``` - -Only one input is allowed and it must either be an `Image` or a `PDF`. The input can be named anything. - -The output concept `Page` is a native concept, with the structure `PageContent`: -It corresponds to 1 page. Therefore, the PipeExtract is outputing a `ListContent` of `Page` - -```python -class TextAndImagesContent(StuffContent): - text: TextContent | None - images: list[ImageContent] | None - -class PageContent(StructuredContent): # CONCEPT IS "Page" - text_and_images: TextAndImagesContent - page_view: ImageContent | None = None -``` -- `text_and_images` are the text, and the related images found in the input image or PDF. -- `page_view` is the screenshot of the whole pdf page/image. - -## PipeCompose operator - -The PipeCompose operator is used to compose text using Jinja2 templates. It supports various output formats including HTML, Markdown, Mermaid diagrams, and more. - -### Basic Usage - -Simple Template Composition: -```plx -[pipe.compose_report] -type = "PipeCompose" -description = "Compose a report using template" -inputs = { data = "ReportData" } -output = "Text" -template = """ -# Report Summary - -Based on the analysis: -$data - -Generated on: {{ current_date }} -""" -``` - -Using Named Templates: -```plx -[pipe.use_template] -type = "PipeCompose" -description = "Use a predefined template" -inputs = { content = "Text" } -output = "Text" -template_name = "standard_report_template" -``` - -Using Nested Template Section (for more control): -```plx -[pipe.advanced_template] -type = "PipeCompose" -description = "Use advanced template settings" -inputs = { data = "ReportData" } -output = "Text" - -[pipe.advanced_template.template] -template = "Report: $data" -category = "html" -templating_style = { tag_style = "square_brackets", text_format = "html" } -``` - -CRM Email Template: -```plx -[pipe.compose_follow_up_email] -type = "PipeCompose" -description = "Compose a personalized follow-up email for CRM" -inputs = { customer = "Customer", deal = "Deal", sales_rep = "SalesRep" } -output = "Text" -template_category = "html" -templating_style = { tag_style = "square_brackets", text_format = "html" } -template = """ -Subject: Following up on our $deal.product_name discussion - -Hi $customer.first_name, - -I hope this email finds you well! I wanted to follow up on our conversation about $deal.product_name from $deal.last_contact_date. - -Based on our discussion, I understand that your key requirements are: $deal.customer_requirements - -I'm excited to let you know that we can definitely help you achieve your goals. Here's what I'd like to propose: - -**Next Steps:** -- Schedule a demo tailored to your specific needs -- Provide you with a customized quote based on your requirements -- Connect you with our implementation team - -Would you be available for a 30-minute call this week? I have openings on: -{% for slot in available_slots %} -- {{ slot }} -{% endfor %} - -Looking forward to moving this forward together! - -Best regards, -$sales_rep.name -$sales_rep.title -$sales_rep.phone | $sales_rep.email -""" -``` - -### Key Parameters (Template Mode) - -- `template`: Inline template string (mutually exclusive with template_name and construct) -- `template_name`: Name of a predefined template (mutually exclusive with template) -- `template_category`: Template type ("llm_prompt", "html", "markdown", "mermaid", etc.) -- `templating_style`: Styling options for template rendering -- `extra_context`: Additional context variables for template - -For more control, you can use a nested `template` section instead of the `template` field: - -- `template.template`: The template string -- `template.category`: Template type -- `template.templating_style`: Styling options - -### Template Variables - -Use the same variable insertion rules as PipeLLM: - -- `@variable` for block insertion (multi-line content) -- `$variable` for inline insertion (short text) - -### Construct Mode (for StructuredContent Output) - -PipeCompose can also generate `StructuredContent` objects using the `construct` section. This mode composes field values from fixed values, variable references, templates, or nested structures. - -**When to use construct mode:** - -- You need to output a structured object (not just Text) -- You want to deterministically compose fields from existing data -- No LLM is needed - just data composition and templating - -#### Basic Construct Usage - -```plx -[concept.SalesSummary] -description = "A structured sales summary" - -[concept.SalesSummary.structure] -report_title = { type = "text", description = "Title of the report" } -customer_name = { type = "text", description = "Customer name" } -deal_value = { type = "number", description = "Deal value" } -summary_text = { type = "text", description = "Generated summary text" } - -[pipe.compose_summary] -type = "PipeCompose" -description = "Compose a sales summary from deal data" -inputs = { deal = "Deal" } -output = "SalesSummary" - -[pipe.compose_summary.construct] -report_title = "Monthly Sales Report" -customer_name = { from = "deal.customer_name" } -deal_value = { from = "deal.amount" } -summary_text = { template = "Deal worth $deal.amount with $deal.customer_name" } -``` - -#### Field Composition Methods - -There are four ways to define field values in a construct: - -**1. Fixed Value (literal)** - -Use a literal value directly: - -```plx -[pipe.compose_report.construct] -report_title = "Annual Report" -report_year = 2024 -is_draft = false -``` - -**2. Variable Reference (`from`)** - -Get a value from working memory using a dotted path: - -```plx -[pipe.compose_report.construct] -customer_name = { from = "deal.customer_name" } -total_amount = { from = "order.total" } -street_address = { from = "customer.address.street" } -``` - -**3. Template (`template`)** - -Render a Jinja2 template with variable substitution: - -```plx -[pipe.compose_report.construct] -invoice_number = { template = "INV-$order.id" } -summary = { template = "Deal worth $deal.amount with $deal.customer_name on {{ current_date }}" } -``` - -**4. Nested Construct** - -For nested structures, use a TOML subsection: - -```plx -[pipe.compose_invoice.construct] -invoice_number = { template = "INV-$order.id" } -total = { from = "order.total_amount" } - -[pipe.compose_invoice.construct.billing_address] -street = { from = "customer.address.street" } -city = { from = "customer.address.city" } -country = "France" -``` - -#### Complete Construct Example - -```plx -domain = "invoicing" - -[concept.Address] -description = "A postal address" - -[concept.Address.structure] -street = { type = "text", description = "Street address" } -city = { type = "text", description = "City name" } -country = { type = "text", description = "Country name" } - -[concept.Invoice] -description = "An invoice document" - -[concept.Invoice.structure] -invoice_number = { type = "text", description = "Invoice number" } -total = { type = "number", description = "Total amount" } - -[pipe.compose_invoice] -type = "PipeCompose" -description = "Compose an invoice from order and customer data" -inputs = { order = "Order", customer = "Customer" } -output = "Invoice" - -[pipe.compose_invoice.construct] -invoice_number = { template = "INV-$order.id" } -total = { from = "order.total_amount" } - -[pipe.compose_invoice.construct.billing_address] -street = { from = "customer.address.street" } -city = { from = "customer.address.city" } -country = "France" -``` - -#### Key Parameters (Construct Mode) - -- `construct`: Dictionary mapping field names to their composition rules -- Each field can be: - - A literal value (string, number, boolean) - - A dict with `from` key for variable reference - - A dict with `template` key for template rendering - - A nested dict for nested structures - -**Note:** You must use either `template` or `construct`, not both. They are mutually exclusive. - -## PipeImgGen operator - -The PipeImgGen operator is used to generate images using AI image generation models. - -### Basic Usage - -Simple Image Generation: -```plx -[pipe.generate_image] -type = "PipeImgGen" -description = "Generate an image from prompt" -inputs = { prompt = "ImgGenPrompt" } -output = "Image" -``` - -Using Image Generation Settings: -```plx -[pipe.generate_photo] -type = "PipeImgGen" -description = "Generate a high-quality photo" -inputs = { prompt = "ImgGenPrompt" } -output = "Photo" -model = { model = "fast-img-gen" } -aspect_ratio = "16:9" -quality = "hd" -``` - -Multiple Image Generation: -```plx -[pipe.generate_variations] -type = "PipeImgGen" -description = "Generate multiple image variations" -inputs = { prompt = "ImgGenPrompt" } -output = "Image[3]" -seed = "auto" -``` - -Advanced Configuration: -```plx -[pipe.generate_custom] -type = "PipeImgGen" -description = "Generate image with custom settings" -inputs = { prompt = "ImgGenPrompt" } -output = "Image" -model = "img_gen_preset_name" # Use predefined preset -aspect_ratio = "1:1" -quality = "hd" -background = "transparent" -output_format = "png" -is_raw = false -safety_tolerance = 3 -``` - -### Key Parameters - -**Image Generation Settings:** -- `model`: Model choice (preset name or inline settings with model name) -- `quality`: Image quality ("standard", "hd") - -**Output Configuration:** -- `aspect_ratio`: Image dimensions ("1:1", "16:9", "9:16", etc.) -- `output_format`: File format ("png", "jpeg", "webp") -- `background`: Background type ("default", "transparent") - -**Generation Control:** -- `seed`: Random seed (integer or "auto") -- `is_raw`: Whether to apply post-processing -- `is_moderated`: Enable content moderation -- `safety_tolerance`: Content safety level (1-6) - -### Input Requirements - -PipeImgGen requires exactly one input that must be either: -- An `ImgGenPrompt` concept -- A concept that refines `ImgGenPrompt` - -The input can be named anything but must contain the prompt text for image generation. - -## PipeFunc operator - -The PipeFunc operator is used to run custom Python functions within a pipeline. This allows integration of classic Python scripts and custom logic. - -### Basic Usage - -Simple Function Call: -```plx -[pipe.process_data] -type = "PipeFunc" -description = "Process data using custom function" -inputs = { input_data = "DataType" } -output = "ProcessedData" -function_name = "process_data_function" -``` - -File Processing Example: -```plx -[pipe.read_file] -type = "PipeFunc" -description = "Read file content" -inputs = { file_path = "FilePath" } -output = "FileContent" -function_name = "read_file_content" -``` - -### Key Parameters - -- `function_name`: Name of the Python function to call (must be registered in func_registry) - -### Function Requirements - -The Python function must: - -1. **Be registered** in the `func_registry` -2. **Accept `working_memory`** as a parameter: - ```python - async def my_function(working_memory: WorkingMemory) -> StuffContent | list[StuffContent] | str: - # Function implementation - pass - ``` - -3. **Return appropriate types**: - - `StuffContent`: Single content object - - `list[StuffContent]`: Multiple content objects (becomes ListContent) - - `str`: Simple string (becomes TextContent) - -### Function Registration - -Functions must be registered in the function registry before use: - -```python -from pipelex.system.registries.func_registry import func_registry - -@func_registry.register("my_function_name") -async def my_custom_function(working_memory: WorkingMemory) -> StuffContent: - # Access inputs from working memory - input_data = working_memory.get_stuff("input_name") - - # Process data - result = process_logic(input_data.content) - - # Return result - return MyResultContent(data=result) -``` - -### Working Memory Access - -Inside the function, access pipeline inputs through working memory: - -```python -async def process_function(working_memory: WorkingMemory) -> TextContent: - # Get input stuff by name - input_stuff = working_memory.get_stuff("input_name") - - # Access the content - input_content = input_stuff.content - - # Process and return - processed_text = f"Processed: {input_content.text}" - return TextContent(text=processed_text) -``` - ---- - -## Rules to choose LLM models used in PipeLLMs. - -### LLM Configuration System - -In order to use it in a pipe, an LLM is referenced by its llm_handle (alias) and possibly by an llm_preset. -LLM configurations are managed through the new inference backend system with files located in `.pipelex/inference/`: - -- **Model Deck**: `.pipelex/inference/deck/base_deck.toml` and `.pipelex/inference/deck/overrides.toml` -- **Backends**: `.pipelex/inference/backends.toml` and `.pipelex/inference/backends/*.toml` -- **Routing**: `.pipelex/inference/routing_profiles.toml` - -### LLM Handles - -An llm_handle can be either: -1. **A direct model name** (like "gpt-4o-mini", "claude-3-sonnet") - automatically available for all models loaded by the inference backend system -2. **An alias** - user-defined shortcuts that map to model names, defined in the `[aliases]` section: - -```toml -[aliases] -base-claude = "claude-4.5-sonnet" -base-gpt = "gpt-5" -base-gemini = "gemini-2.5-flash" -base-mistral = "mistral-medium" -``` - -The system first looks for direct model names, then checks aliases if no direct match is found. The system handles model routing through backends automatically. - -### Using an LLM Handle in a PipeLLM - -Here is an example of using a model to specify which LLM to use in a PipeLLM: - -```plx -[pipe.hello_world] -type = "PipeLLM" -description = "Write text about Hello World." -output = "Text" -model = { model = "gpt-5", temperature = 0.9 } -prompt = """ -Write a haiku about Hello World. -""" -``` - -As you can see, to use the LLM, you must also indicate the temperature (float between 0 and 1) and max_tokens (either an int or the string "auto"). - -### LLM Presets - -Presets are meant to record the choice of an llm with its hyper parameters (temperature and max_tokens) if it's good for a particular task. LLM Presets are skill-oriented. - -Examples: -```toml -llm_to_engineer = { model = "base-claude", temperature = 1 } -llm_to_extract_invoice = { model = "claude-3-7-sonnet", temperature = 0.1, max_tokens = "auto" } -``` - -The interest is that these presets can be used to set the LLM choice in a PipeLLM, like this: - -```plx -[pipe.extract_invoice] -type = "PipeLLM" -description = "Extract invoice information from an invoice text transcript" -inputs = { invoice_text = "InvoiceText" } -output = "Invoice" -model = "llm_to_extract_invoice" -prompt = """ -Extract invoice information from this invoice: - -The category of this invoice is: $invoice_details.category. - -@invoice_text -""" -``` - -The setting here `model = "llm_to_extract_invoice"` works because "llm_to_extract_invoice" has been declared as an llm_preset in the deck. -You must not use an LLM preset in a PipeLLM that does not exist in the deck. If needed, you can add llm presets. - -You can override the predefined llm presets by setting them in `.pipelex/inference/deck/overrides.toml`. - ---- - -ALWAYS RUN validation when you are finished writing pipelines: This checks for errors. If there are errors, iterate until it works. -- For a specific bundle/file: `pipelex validate path_to_file.plx` -- For all pipelines: `pipelex validate all` -- Remember: Ensure your Python virtual environment is activated (typically `.venv` for standard installations) before running `pipelex` commands. - -Then, create an example file to run the pipeline in the `examples` folder. -But don't write documentation unless asked explicitly to. diff --git a/.github/copilot-instructions.md b/.github/copilot-instructions.md deleted file mode 100644 index af4572e..0000000 --- a/.github/copilot-instructions.md +++ /dev/null @@ -1,1219 +0,0 @@ - -# Pipelex Coding Rules - -## Guide to write or edit pipelines using the Pipelex language in .plx files - -- Always first write your "plan" in natural language, then transcribe it in pipelex. -- You should ALWAYS RUN validation when you are writing or editing a `.plx` file. It will ensure the pipe is runnable. If not, iterate. - - For a specific file: `pipelex validate path_to_file.plx` - - For all pipelines: `pipelex validate all` - - **IMPORTANT**: Ensure the Python virtual environment is activated before running `pipelex` commands. For standard installations, the venv is named `.venv` - always check that first. The commands will not work without proper venv activation. -- Please use POSIX standard for files. (empty lines, no trailing whitespaces, etc.) - -### Pipeline File Naming -- Files must be `.plx` for pipelines (Always add an empty line at the end of the file, and do not add trailing whitespaces to PLX files at all) -- Files must be `.py` for code defining the data structures -- Use descriptive names in `snake_case` - -### Pipeline File Outline -A pipeline file has three main sections: -1. Domain statement -2. Concept definitions -3. Pipe definitions - -#### Domain Statement -```plx -domain = "domain_code" -description = "Description of the domain" # Optional -``` -Note: The domain code usually matches the plx filename for single-file domains. For multi-file domains, use the subdirectory name. - -#### Concept Definitions - -Concepts represent ideas and semantic entities in your pipeline. They define what something *is*, not how it's structured. - -```plx -[concept] -ConceptName = "Description of the concept" -``` - -**Naming Rules:** -- Use PascalCase for concept names -- Never use plurals (no "Stories", use "Story") - lists are handled implicitly by Pipelex -- Avoid circumstantial adjectives (no "LargeText", use "Text") - focus on the essence of what the concept represents -- Don't redefine native concepts (Text, Image, PDF, TextAndImages, Number, Page, JSON) - -**Native Concepts:** -Pipelex provides built-in native concepts: `Text`, `Image`, `PDF`, `TextAndImages`, `Number`, `Page`, `JSON`. Use these directly or refine them when appropriate. - -**Refining Native Concepts:** -To create a concept that specializes a native concept without adding fields: - -```plx -[concept.Landscape] -description = "A scenic outdoor photograph" -refines = "Image" -``` - -For details on how to structure concepts with fields, see the "Structuring Models" section below. - -#### Pipe Definitions - -### Pipe Base Definition - -```plx -[pipe.your_pipe_code] -type = "PipeLLM" -description = "A description of what your pipe does" -inputs = { input_1 = "ConceptName1", input_2 = "ConceptName2" } -output = "ConceptName" -``` - -The pipes will all have at least this base definition. -- `inputs`: Dictionary of key being the variable used in the prompts, and the value being the ConceptName. It should ALSO LIST THE INPUTS OF THE INTERMEDIATE STEPS (if PipeSequence) or of the conditional pipes (if PipeCondition). -So If you have this error: -`PipeValidationError: missing_input_variable • domain='expense_validator' • pipe='validate_expense' • -variable='['invoice']'`` -That means that the pipe validate_expense is missing the input `invoice` because one of the subpipe is needing it. - -NEVER WRITE THE INPUTS BY BREAKING THE LINE LIKE THIS: - -```plx -inputs = { - input_1 = "ConceptName1", - input_2 = "ConceptName2" -} -``` - - -- `output`: The name of the concept to output. The `ConceptName` should have the same name as the python class if you want structured output: - -#### Input Multiplicity - -By default, inputs expect a single item. Use bracket notation to specify multiple items: - -```plx -## Single item (default) -inputs = { document = "Text" } - -## Variable list - indeterminate number of items -inputs = { documents = "Text[]" } - -## Fixed count - exactly N items -inputs = { comparison_items = "Image[2]" } -``` - -**Key points:** -- No brackets = single item (default behavior) -- Use `[]` for lists of unknown length -- Use `[N]` (where N is an integer) when operation requires exact count (e.g., comparing 2 items) - -### Structuring Models - -Once you've defined your concepts semantically (see "Concept Definitions" above), you need to specify their structure if they have fields. - -#### Three Ways to Structure Concepts - -**1. No Structure Needed** - -If a concept only refines a native concept without adding fields, use the TOML table syntax shown in "Concept Definitions" above. No structure section is needed. - -**2. Inline Structure Definition (RECOMMENDED for most cases)** - -For concepts with structured fields, define them inline using TOML syntax: - -```plx -[concept.Invoice] -description = "A commercial document issued by a seller to a buyer" - -[concept.Invoice.structure] -invoice_number = "The unique invoice identifier" # This will be optional by default -issue_date = { type = "date", description = "The date the invoice was issued", required = true } -total_amount = { type = "number", description = "The total invoice amount", required = true } -vendor_name = "The name of the vendor" # This will be optional by default -line_items = { type = "list", item_type = "text", description = "List of items" } -``` - -**Supported inline field types:** `text`, `integer`, `boolean`, `number`, `date`, `list`, `dict` - -**Field properties:** `type`, `description`, `required` (default: false), `default_value`, `choices`, `item_type` (for lists), `key_type` and `value_type` (for dicts) - -**Simple syntax** (creates required text field): -```plx -field_name = "Field description" -``` - -**Detailed syntax** (with explicit properties): -```plx -field_name = { type = "text", description = "Field description", default_value = "default" } -``` - -**3. Python StructuredContent Class (For Advanced Features)** - -Create a Python class when you need: -- Custom validation logic (@field_validator, @model_validator) -- Computed properties (@property methods) -- Custom methods or class methods -- Complex cross-field validation -- Reusable structures across multiple domains - -```python -from pipelex.core.stuffs.structured_content import StructuredContent -from pydantic import Field, field_validator - -class Invoice(StructuredContent): - """A commercial invoice with validation.""" - - invoice_number: str = Field(description="The unique invoice identifier") - total_amount: float = Field(ge=0, description="The total invoice amount") - tax_amount: float = Field(ge=0, description="Tax amount") - - @field_validator('tax_amount') - @classmethod - def validate_tax(cls, v, info): - """Ensure tax doesn't exceed total.""" - total = info.data.get('total_amount', 0) - if v > total: - raise ValueError('Tax amount cannot exceed total amount') - return v -``` - -**Location:** Create models in `my_project/some_domain/some_domain_struct.py`. Classes inheriting from `StructuredContent` are automatically discovered. - -#### Decision Rules for Agents - -**If concept already exists:** -- If it's already inline → KEEP IT INLINE unless user explicitly asks to convert or features require Python class -- If it's already a Python class → KEEP IT as Python class - -**If creating new concept:** -1. Does it only refine a native concept without adding fields? → Use concept-only declaration -2. Does it need custom validation, computed properties, or methods? → Use Python class -3. Otherwise → Use inline structure (fastest and simplest) - -**When to suggest conversion to Python class:** -- User needs validation logic beyond type checking -- User needs computed properties or custom methods -- Structure needs to be reused across multiple domains -- Complex type relationships or inheritance required - -#### Inline Structure Limitations - -Inline structures: -- ✅ Support all common field types (text, number, date, list, dict, etc.) -- ✅ Support required/optional fields, defaults, choices -- ✅ Generate full Pydantic models with validation -- ❌ Cannot have custom validators or complex validation logic -- ❌ Cannot have computed properties or custom methods -- ❌ Cannot refine custom (non-native) concepts -- ❌ Limited IDE autocomplete compared to explicit Python classes - - -### Pipe Controllers and Pipe Operators - -Look at the Pipes we have in order to adapt it. Pipes are organized in two categories: - -1. **Controllers** - For flow control: - - `PipeSequence` - For creating a sequence of multiple steps - - `PipeCondition` - If the next pipe depends of the expression of a stuff in the working memory - - `PipeParallel` - For parallelizing pipes - -2. **Operators** - For specific tasks: - - `PipeLLM` - Generate Text and Objects (include Vision LLM) - - `PipeExtract` - Extract text and images from an image or a PDF - - `PipeCompose` - For composing text using Jinja2 templates: supports html, markdown, mermaid, etc. - - `PipeImgGen` - Generate Images - - `PipeFunc` - For running classic python scripts - -### PipeSequence controller - -Purpose: PipeSequence executes multiple pipes in a defined order, where each step can use results from original inputs or from previous steps. - -#### Basic Definition -```plx -[pipe.your_sequence_name] -type = "PipeSequence" -description = "Description of what this sequence does" -inputs = { input_name = "InputType" } # All the inputs of the sub pipes, except the ones generated by intermediate steps -output = "OutputType" -steps = [ - { pipe = "first_pipe", result = "first_result" }, - { pipe = "second_pipe", result = "second_result" }, - { pipe = "final_pipe", result = "final_result" } -] -``` - -#### Key Components - -1. **Steps Array**: List of pipes to execute in sequence - - `pipe`: Name of the pipe to execute - - `result`: Name to assign to the pipe's output that will be in the working memory - -#### Using PipeBatch in Steps - -You can use PipeBatch functionality within steps using `batch_over` and `batch_as`: - -```plx -steps = [ - { pipe = "process_items", batch_over = "input_list", batch_as = "current_item", result = "processed_items" - } -] -``` - -1. **batch_over**: Specifies a `ListContent` field to iterate over. Each item in the list will be processed individually and IN PARALLEL by the pipe. - - Must be a `ListContent` type containing the items to process - - Can reference inputs or results from previous steps - -2. **batch_as**: Defines the name that will be used to reference the current item being processed - - This name can be used in the pipe's input mappings - - Makes each item from the batch available as a single element - -The result of a batched step will be a `ListContent` containing the outputs from processing each item. - -### PipeCondition controller - -The PipeCondition controller allows you to implement conditional logic in your pipeline, choosing which pipe to execute based on an evaluated expression. It supports both direct expressions and expression templates. - -#### Basic usage - -```plx -[pipe.conditional_operation] -type = "PipeCondition" -description = "A conditional pipe to decide whether..." -inputs = { input_data = "CategoryInput" } -output = "native.Text" -expression = "input_data.category" -default_outcome = "process_medium" - -[pipe.conditional_operation.outcomes] -small = "process_small" -medium = "process_medium" -large = "process_large" -``` -or -```plx -[pipe.conditional_operation] -type = "PipeCondition" -description = "A conditional pipe to decide whether..." -inputs = { input_data = "CategoryInput" } -output = "native.Text" -expression_template = "{{ input_data.category }}" # Jinja2 code -default_outcome = "process_medium" - -[pipe.conditional_operation.outcomes] -small = "process_small" -medium = "process_medium" -large = "process_large" -``` - -#### Key Parameters - -- `expression`: Direct boolean or string expression (mutually exclusive with expression_template) -- `expression_template`: Jinja2 template for more complex conditional logic (mutually exclusive with expression) -- `outcomes`: Dictionary mapping expression results to pipe codes: - 1. The key on the left (`small`, `medium`) is the result of `expression` or `expression_template` - 2. The value on the right (`process_small`, `process_medium`, etc.) is the name of the pipe to trigger -- `default_outcome`: **Required** - The pipe to execute if the expression doesn't match any key in outcomes. Use `"fail"` if you want the pipeline to fail when no match is found - -Example with fail as default: -```plx -[pipe.strict_validation] -type = "PipeCondition" -description = "Validate with strict matching" -inputs = { status = "Status" } -output = "Text" -expression = "status.value" -default_outcome = "fail" - -[pipe.strict_validation.outcomes] -approved = "process_approved" -rejected = "process_rejected" -``` - -### PipeLLM operator - -PipeLLM is used to: -1. Generate text or objects with LLMs -2. Process images with Vision LLMs - -#### Basic Usage - -Simple Text Generation: -```plx -[pipe.write_story] -type = "PipeLLM" -description = "Write a short story" -output = "Text" -prompt = """ -Write a short story about a programmer. -""" -``` - -Structured Data Extraction: -```plx -[pipe.extract_info] -type = "PipeLLM" -description = "Extract information" -inputs = { text = "Text" } -output = "PersonInfo" -prompt = """ -Extract person information from this text: -@text -""" -``` - -Supports system instructions: -```plx -[pipe.expert_analysis] -type = "PipeLLM" -description = "Expert analysis" -output = "Analysis" -system_prompt = "You are a data analysis expert" -prompt = "Analyze this data" -``` - -#### Multiple Outputs - -Generate multiple outputs (fixed number) - use bracket notation: -```plx -[pipe.generate_ideas] -type = "PipeLLM" -description = "Generate ideas" -output = "Idea[3]" # Generate exactly 3 ideas -prompt = "Generate 3 ideas" -``` - -Generate multiple outputs (variable number) - use bracket notation: -```plx -[pipe.generate_ideas] -type = "PipeLLM" -description = "Generate ideas" -output = "Idea[]" # Let the LLM decide how many to generate -prompt = "Generate ideas" -``` - -#### Vision - -Process images with VLMs (image inputs must be tagged in the prompt): -```plx -[pipe.analyze_image] -type = "PipeLLM" -description = "Analyze image" -inputs = { image = "Image" } -output = "ImageAnalysis" -prompt = """ -Describe what you see in this image: - -$image -""" -``` - -You can also reference images inline in meaningful sentences to guide the Visual LLM: -```plx -[pipe.compare_images] -type = "PipeLLM" -description = "Compare two images" -inputs = { photo = "Image", painting = "Image" } -output = "Analysis" -prompt = "Analyze the colors in $photo and the shapes in $painting." -``` - -#### Writing prompts for PipeLLM - -**Insert stuff inside a tagged block** - -If the inserted text is supposedly a long text, made of several lines or paragraphs, you want it inserted inside a block, possibly a block tagged and delimlited with proper syntax as one would do in a markdown documentation. To include stuff as a block, use the "@" prefix. - -Example template: -```plx -prompt = """ -Match the expense with its corresponding invoice: - -@expense - -@invoices -""" -``` -In the example above, the expense data and the invoices data are obviously made of several lines each, that's why it makes sense to use the "@" prefix in order to have them delimited inside a block. Note that our preprocessor will automatically include the block's title, so it doesn't need to be explicitly written in the prompt. - -DO NOT write things like "Here is the expense: @expense". -DO write simply "@expense" alone in an isolated line. - -**Insert stuff inline** - -If the inserted text is short text and it makes sense to have it inserted directly into a sentence, you want it inserted inline. To insert stuff inline, use the "$" prefix. This will insert the stuff without delimiters and the content will be rendered as plain text. - -Example template: -```plx -prompt = """ -Your goal is to summarize everything related to $topic in the provided text: - -@text - -Please provide only the summary, with no additional text or explanations. -Your summary should not be longer than 2 sentences. -""" -``` - -In the example above, $topic will be inserted inline, whereas @text will be a a delimited block. -Be sure to make the proper choice of prefix for each insertion. - -DO NOT write "$topic" alone in an isolated line. -DO write things like "Write an essay about $topic" to include text into an actual sentence. - - -### PipeExtract operator - -The PipeExtract operator is used to extract text and images from an image or a PDF - -#### Simple Text Extraction -```plx -[pipe.extract_info] -type = "PipeExtract" -description = "extract the information" -inputs = { document = "Document" } # or { image = "Image" } if it's an image. This is the only input. -output = "Page" -``` - -Using Extract Model Settings: -```plx -[pipe.extract_with_model] -type = "PipeExtract" -description = "Extract with specific model" -inputs = { document = "Document" } -output = "Page" -model = "base_extract_mistral" # Use predefined extract preset or model alias -``` - -Only one input is allowed and it must either be an `Image` or a `PDF`. The input can be named anything. - -The output concept `Page` is a native concept, with the structure `PageContent`: -It corresponds to 1 page. Therefore, the PipeExtract is outputing a `ListContent` of `Page` - -```python -class TextAndImagesContent(StuffContent): - text: TextContent | None - images: list[ImageContent] | None - -class PageContent(StructuredContent): # CONCEPT IS "Page" - text_and_images: TextAndImagesContent - page_view: ImageContent | None = None -``` -- `text_and_images` are the text, and the related images found in the input image or PDF. -- `page_view` is the screenshot of the whole pdf page/image. - -### PipeCompose operator - -The PipeCompose operator is used to compose text using Jinja2 templates. It supports various output formats including HTML, Markdown, Mermaid diagrams, and more. - -#### Basic Usage - -Simple Template Composition: -```plx -[pipe.compose_report] -type = "PipeCompose" -description = "Compose a report using template" -inputs = { data = "ReportData" } -output = "Text" -template = """ -## Report Summary - -Based on the analysis: -$data - -Generated on: {{ current_date }} -""" -``` - -Using Named Templates: -```plx -[pipe.use_template] -type = "PipeCompose" -description = "Use a predefined template" -inputs = { content = "Text" } -output = "Text" -template_name = "standard_report_template" -``` - -Using Nested Template Section (for more control): -```plx -[pipe.advanced_template] -type = "PipeCompose" -description = "Use advanced template settings" -inputs = { data = "ReportData" } -output = "Text" - -[pipe.advanced_template.template] -template = "Report: $data" -category = "html" -templating_style = { tag_style = "square_brackets", text_format = "html" } -``` - -CRM Email Template: -```plx -[pipe.compose_follow_up_email] -type = "PipeCompose" -description = "Compose a personalized follow-up email for CRM" -inputs = { customer = "Customer", deal = "Deal", sales_rep = "SalesRep" } -output = "Text" -template_category = "html" -templating_style = { tag_style = "square_brackets", text_format = "html" } -template = """ -Subject: Following up on our $deal.product_name discussion - -Hi $customer.first_name, - -I hope this email finds you well! I wanted to follow up on our conversation about $deal.product_name from $deal.last_contact_date. - -Based on our discussion, I understand that your key requirements are: $deal.customer_requirements - -I'm excited to let you know that we can definitely help you achieve your goals. Here's what I'd like to propose: - -**Next Steps:** -- Schedule a demo tailored to your specific needs -- Provide you with a customized quote based on your requirements -- Connect you with our implementation team - -Would you be available for a 30-minute call this week? I have openings on: -{% for slot in available_slots %} -- {{ slot }} -{% endfor %} - -Looking forward to moving this forward together! - -Best regards, -$sales_rep.name -$sales_rep.title -$sales_rep.phone | $sales_rep.email -""" -``` - -#### Key Parameters (Template Mode) - -- `template`: Inline template string (mutually exclusive with template_name and construct) -- `template_name`: Name of a predefined template (mutually exclusive with template) -- `template_category`: Template type ("llm_prompt", "html", "markdown", "mermaid", etc.) -- `templating_style`: Styling options for template rendering -- `extra_context`: Additional context variables for template - -For more control, you can use a nested `template` section instead of the `template` field: - -- `template.template`: The template string -- `template.category`: Template type -- `template.templating_style`: Styling options - -#### Template Variables - -Use the same variable insertion rules as PipeLLM: - -- `@variable` for block insertion (multi-line content) -- `$variable` for inline insertion (short text) - -#### Construct Mode (for StructuredContent Output) - -PipeCompose can also generate `StructuredContent` objects using the `construct` section. This mode composes field values from fixed values, variable references, templates, or nested structures. - -**When to use construct mode:** - -- You need to output a structured object (not just Text) -- You want to deterministically compose fields from existing data -- No LLM is needed - just data composition and templating - -##### Basic Construct Usage - -```plx -[concept.SalesSummary] -description = "A structured sales summary" - -[concept.SalesSummary.structure] -report_title = { type = "text", description = "Title of the report" } -customer_name = { type = "text", description = "Customer name" } -deal_value = { type = "number", description = "Deal value" } -summary_text = { type = "text", description = "Generated summary text" } - -[pipe.compose_summary] -type = "PipeCompose" -description = "Compose a sales summary from deal data" -inputs = { deal = "Deal" } -output = "SalesSummary" - -[pipe.compose_summary.construct] -report_title = "Monthly Sales Report" -customer_name = { from = "deal.customer_name" } -deal_value = { from = "deal.amount" } -summary_text = { template = "Deal worth $deal.amount with $deal.customer_name" } -``` - -##### Field Composition Methods - -There are four ways to define field values in a construct: - -**1. Fixed Value (literal)** - -Use a literal value directly: - -```plx -[pipe.compose_report.construct] -report_title = "Annual Report" -report_year = 2024 -is_draft = false -``` - -**2. Variable Reference (`from`)** - -Get a value from working memory using a dotted path: - -```plx -[pipe.compose_report.construct] -customer_name = { from = "deal.customer_name" } -total_amount = { from = "order.total" } -street_address = { from = "customer.address.street" } -``` - -**3. Template (`template`)** - -Render a Jinja2 template with variable substitution: - -```plx -[pipe.compose_report.construct] -invoice_number = { template = "INV-$order.id" } -summary = { template = "Deal worth $deal.amount with $deal.customer_name on {{ current_date }}" } -``` - -**4. Nested Construct** - -For nested structures, use a TOML subsection: - -```plx -[pipe.compose_invoice.construct] -invoice_number = { template = "INV-$order.id" } -total = { from = "order.total_amount" } - -[pipe.compose_invoice.construct.billing_address] -street = { from = "customer.address.street" } -city = { from = "customer.address.city" } -country = "France" -``` - -##### Complete Construct Example - -```plx -domain = "invoicing" - -[concept.Address] -description = "A postal address" - -[concept.Address.structure] -street = { type = "text", description = "Street address" } -city = { type = "text", description = "City name" } -country = { type = "text", description = "Country name" } - -[concept.Invoice] -description = "An invoice document" - -[concept.Invoice.structure] -invoice_number = { type = "text", description = "Invoice number" } -total = { type = "number", description = "Total amount" } - -[pipe.compose_invoice] -type = "PipeCompose" -description = "Compose an invoice from order and customer data" -inputs = { order = "Order", customer = "Customer" } -output = "Invoice" - -[pipe.compose_invoice.construct] -invoice_number = { template = "INV-$order.id" } -total = { from = "order.total_amount" } - -[pipe.compose_invoice.construct.billing_address] -street = { from = "customer.address.street" } -city = { from = "customer.address.city" } -country = "France" -``` - -##### Key Parameters (Construct Mode) - -- `construct`: Dictionary mapping field names to their composition rules -- Each field can be: - - A literal value (string, number, boolean) - - A dict with `from` key for variable reference - - A dict with `template` key for template rendering - - A nested dict for nested structures - -**Note:** You must use either `template` or `construct`, not both. They are mutually exclusive. - -### PipeImgGen operator - -The PipeImgGen operator is used to generate images using AI image generation models. - -#### Basic Usage - -Simple Image Generation: -```plx -[pipe.generate_image] -type = "PipeImgGen" -description = "Generate an image from prompt" -inputs = { prompt = "ImgGenPrompt" } -output = "Image" -``` - -Using Image Generation Settings: -```plx -[pipe.generate_photo] -type = "PipeImgGen" -description = "Generate a high-quality photo" -inputs = { prompt = "ImgGenPrompt" } -output = "Photo" -model = { model = "fast-img-gen" } -aspect_ratio = "16:9" -quality = "hd" -``` - -Multiple Image Generation: -```plx -[pipe.generate_variations] -type = "PipeImgGen" -description = "Generate multiple image variations" -inputs = { prompt = "ImgGenPrompt" } -output = "Image[3]" -seed = "auto" -``` - -Advanced Configuration: -```plx -[pipe.generate_custom] -type = "PipeImgGen" -description = "Generate image with custom settings" -inputs = { prompt = "ImgGenPrompt" } -output = "Image" -model = "img_gen_preset_name" # Use predefined preset -aspect_ratio = "1:1" -quality = "hd" -background = "transparent" -output_format = "png" -is_raw = false -safety_tolerance = 3 -``` - -#### Key Parameters - -**Image Generation Settings:** -- `model`: Model choice (preset name or inline settings with model name) -- `quality`: Image quality ("standard", "hd") - -**Output Configuration:** -- `aspect_ratio`: Image dimensions ("1:1", "16:9", "9:16", etc.) -- `output_format`: File format ("png", "jpeg", "webp") -- `background`: Background type ("default", "transparent") - -**Generation Control:** -- `seed`: Random seed (integer or "auto") -- `is_raw`: Whether to apply post-processing -- `is_moderated`: Enable content moderation -- `safety_tolerance`: Content safety level (1-6) - -#### Input Requirements - -PipeImgGen requires exactly one input that must be either: -- An `ImgGenPrompt` concept -- A concept that refines `ImgGenPrompt` - -The input can be named anything but must contain the prompt text for image generation. - -### PipeFunc operator - -The PipeFunc operator is used to run custom Python functions within a pipeline. This allows integration of classic Python scripts and custom logic. - -#### Basic Usage - -Simple Function Call: -```plx -[pipe.process_data] -type = "PipeFunc" -description = "Process data using custom function" -inputs = { input_data = "DataType" } -output = "ProcessedData" -function_name = "process_data_function" -``` - -File Processing Example: -```plx -[pipe.read_file] -type = "PipeFunc" -description = "Read file content" -inputs = { file_path = "FilePath" } -output = "FileContent" -function_name = "read_file_content" -``` - -#### Key Parameters - -- `function_name`: Name of the Python function to call (must be registered in func_registry) - -#### Function Requirements - -The Python function must: - -1. **Be registered** in the `func_registry` -2. **Accept `working_memory`** as a parameter: - ```python - async def my_function(working_memory: WorkingMemory) -> StuffContent | list[StuffContent] | str: - # Function implementation - pass - ``` - -3. **Return appropriate types**: - - `StuffContent`: Single content object - - `list[StuffContent]`: Multiple content objects (becomes ListContent) - - `str`: Simple string (becomes TextContent) - -#### Function Registration - -Functions must be registered in the function registry before use: - -```python -from pipelex.system.registries.func_registry import func_registry - -@func_registry.register("my_function_name") -async def my_custom_function(working_memory: WorkingMemory) -> StuffContent: - # Access inputs from working memory - input_data = working_memory.get_stuff("input_name") - - # Process data - result = process_logic(input_data.content) - - # Return result - return MyResultContent(data=result) -``` - -#### Working Memory Access - -Inside the function, access pipeline inputs through working memory: - -```python -async def process_function(working_memory: WorkingMemory) -> TextContent: - # Get input stuff by name - input_stuff = working_memory.get_stuff("input_name") - - # Access the content - input_content = input_stuff.content - - # Process and return - processed_text = f"Processed: {input_content.text}" - return TextContent(text=processed_text) -``` - ---- - -### Rules to choose LLM models used in PipeLLMs. - -#### LLM Configuration System - -In order to use it in a pipe, an LLM is referenced by its llm_handle (alias) and possibly by an llm_preset. -LLM configurations are managed through the new inference backend system with files located in `.pipelex/inference/`: - -- **Model Deck**: `.pipelex/inference/deck/base_deck.toml` and `.pipelex/inference/deck/overrides.toml` -- **Backends**: `.pipelex/inference/backends.toml` and `.pipelex/inference/backends/*.toml` -- **Routing**: `.pipelex/inference/routing_profiles.toml` - -#### LLM Handles - -An llm_handle can be either: -1. **A direct model name** (like "gpt-4o-mini", "claude-3-sonnet") - automatically available for all models loaded by the inference backend system -2. **An alias** - user-defined shortcuts that map to model names, defined in the `[aliases]` section: - -```toml -[aliases] -base-claude = "claude-4.5-sonnet" -base-gpt = "gpt-5" -base-gemini = "gemini-2.5-flash" -base-mistral = "mistral-medium" -``` - -The system first looks for direct model names, then checks aliases if no direct match is found. The system handles model routing through backends automatically. - -#### Using an LLM Handle in a PipeLLM - -Here is an example of using a model to specify which LLM to use in a PipeLLM: - -```plx -[pipe.hello_world] -type = "PipeLLM" -description = "Write text about Hello World." -output = "Text" -model = { model = "gpt-5", temperature = 0.9 } -prompt = """ -Write a haiku about Hello World. -""" -``` - -As you can see, to use the LLM, you must also indicate the temperature (float between 0 and 1) and max_tokens (either an int or the string "auto"). - -#### LLM Presets - -Presets are meant to record the choice of an llm with its hyper parameters (temperature and max_tokens) if it's good for a particular task. LLM Presets are skill-oriented. - -Examples: -```toml -llm_to_engineer = { model = "base-claude", temperature = 1 } -llm_to_extract_invoice = { model = "claude-3-7-sonnet", temperature = 0.1, max_tokens = "auto" } -``` - -The interest is that these presets can be used to set the LLM choice in a PipeLLM, like this: - -```plx -[pipe.extract_invoice] -type = "PipeLLM" -description = "Extract invoice information from an invoice text transcript" -inputs = { invoice_text = "InvoiceText" } -output = "Invoice" -model = "llm_to_extract_invoice" -prompt = """ -Extract invoice information from this invoice: - -The category of this invoice is: $invoice_details.category. - -@invoice_text -""" -``` - -The setting here `model = "llm_to_extract_invoice"` works because "llm_to_extract_invoice" has been declared as an llm_preset in the deck. -You must not use an LLM preset in a PipeLLM that does not exist in the deck. If needed, you can add llm presets. - -You can override the predefined llm presets by setting them in `.pipelex/inference/deck/overrides.toml`. - ---- - -ALWAYS RUN validation when you are finished writing pipelines: This checks for errors. If there are errors, iterate until it works. -- For a specific bundle/file: `pipelex validate path_to_file.plx` -- For all pipelines: `pipelex validate all` -- Remember: Ensure your Python virtual environment is activated (typically `.venv` for standard installations) before running `pipelex` commands. - -Then, create an example file to run the pipeline in the `examples` folder. -But don't write documentation unless asked explicitly to. - -## Guide to execute a pipeline and write example code - -### Example to execute a pipeline with text output - -```python -import asyncio - -from pipelex import pretty_print -from pipelex.pipelex import Pipelex -from pipelex.pipeline.execute import execute_pipeline - - -async def hello_world() -> str: - """ - This function demonstrates the use of a super simple Pipelex pipeline to generate text. - """ - # Run the pipe - pipe_output = await execute_pipeline( - pipe_code="hello_world", - ) - - return pipe_output.main_stuff_as_str - - -## start Pipelex -Pipelex.make() -## run sample using asyncio -output_text = asyncio.run(hello_world()) -pretty_print(output_text, title="Your first Pipelex output") -``` - -### Example to execute a pipeline with structured output - -```python -import asyncio - -from pipelex import pretty_print -from pipelex.pipelex import Pipelex -from pipelex.pipeline.execute import execute_pipeline -from pipelex.core.stuffs.image_content import ImageContent - -from my_project.gantt.gantt_struct import GanttChart - -SAMPLE_NAME = "extract_gantt" -IMAGE_URL = "assets/gantt/gantt_tree_house.png" - - -async def extract_gantt(image_url: str) -> GanttChart: - # Run the pipe - pipe_output = await execute_pipeline( - pipe_code="extract_gantt_by_steps", - inputs={ - "gantt_chart_image": { - "concept": "gantt.GanttImage", - "content": ImageContent(url=image_url), - } - }, - ) - # Output the result - return pipe_output.main_stuff_as(content_type=GanttChart) - - -## start Pipelex -Pipelex.make() - -## run sample using asyncio -gantt_chart = asyncio.run(extract_gantt(image_url=IMAGE_URL)) -pretty_print(gantt_chart, title="Gantt Chart") -``` - -### Setting up the input memory - -#### Explanation of input memory - -The input memory is a dictionary, where the key is the name of the input variable and the value provides details to make it a stuff object. The relevant definitions are: -```python -StuffContentOrData = dict[str, Any] | StuffContent | list[Any] | str -PipelineInputs = dict[str, StuffContentOrData] -``` -As you can seen, we made it so different ways can be used to define that stuff using structured content or data. - -#### Different ways to set up the input memory - -So here are a few concrete examples of calls to execute_pipeline with various ways to set up the input memory: - -```python -## Here we have a single input and it's a Text. -## If you assign a string, by default it will be considered as a TextContent. - pipe_output = await execute_pipeline( - pipe_code="master_advisory_orchestrator", - inputs={ - "user_input": problem_description, - }, - ) - -## Here we have a single input and it's a document. -## Because DocumentContent is a native concept, we can use it directly as a value, -## the system knows what content it corresponds to: - pipe_output = await execute_pipeline( - pipe_code="power_extractor_dpe", - inputs={ - "document": DocumentContent(url=pdf_url), - }, - ) - -## Here we have a single input and it's an Image. -## Because ImageContent is a native concept, we can use it directly as a value: - pipe_output = await execute_pipeline( - pipe_code="fashion_variation_pipeline", - inputs={ - "fashion_photo": ImageContent(url=image_url), - }, - ) - -## Here we have a single input, it's an image but -## its actually a more specific concept gantt.GanttImage which refines Image, -## so we must provide it using a dict with the concept and the content: - pipe_output = await execute_pipeline( - pipe_code="extract_gantt_by_steps", - inputs={ - "gantt_chart_image": { - "concept": "gantt.GanttImage", - "content": ImageContent(url=image_url), - } - }, - ) - -## Here is a more complex example with multiple inputs assigned using different ways: - pipe_output = await execute_pipeline( - pipe_code="retrieve_then_answer", - dynamic_output_concept_code="contracts.Fees", - inputs={ - "text": load_text_from_path(path=text_path), - "question": { - "concept": "answer.Question", - "content": question, - }, - "client_instructions": client_instructions, - }, - ) -``` - -### Using the outputs of a pipeline - -All pipe executions return a `PipeOutput` object. -It's a BaseModel which contains the resulting working memory at the end of the execution and the pipeline run id. -It also provides a bunch of accessor functions and properties to unwrap the main stuff, which is the last stuff added to the working memory: - -```python - -class PipeOutput(BaseModel): - working_memory: WorkingMemory = Field(default_factory=WorkingMemory) - pipeline_run_id: str = Field(default=SpecialPipelineId.UNTITLED) - - @property - def main_stuff(self) -> Stuff: - ... - - def main_stuff_as_list(self, item_type: type[StuffContentType]) -> ListContent[StuffContentType]: - ... - - def main_stuff_as_items(self, item_type: type[StuffContentType]) -> list[StuffContentType]: - ... - - def main_stuff_as(self, content_type: type[StuffContentType]) -> StuffContentType: - ... - - @property - def main_stuff_as_text(self) -> TextContent: - ... - - @property - def main_stuff_as_str(self) -> str: - ... - - @property - def main_stuff_as_image(self) -> ImageContent: - ... - - @property - def main_stuff_as_text_and_image(self) -> TextAndImagesContent: - ... - - @property - def main_stuff_as_number(self) -> NumberContent: - ... - - @property - def main_stuff_as_html(self) -> HtmlContent: - ... - - @property - def main_stuff_as_mermaid(self) -> MermaidContent: - ... -``` - -As you can see, you can extract any variable from the output working memory. - -#### Getting the main stuff as a specific type - -Simple text as a string: - -```python -result = pipe_output.main_stuff_as_str -``` -Structured object (BaseModel): - -```python -result = pipe_output.main_stuff_as(content_type=GanttChart) -``` - -If it's a list, you can get a `ListContent` of the specific type. - -```python -result_list_content = pipe_output.main_stuff_as_list(item_type=GanttChart) -``` - -or if you want, you can get the actual items as a regular python list: - -```python -result_list = pipe_output.main_stuff_as_items(item_type=GanttChart) -``` - ---- - diff --git a/.pipelex/inference/backends.toml b/.pipelex/inference/backends.toml new file mode 100644 index 0000000..8ffc6c6 --- /dev/null +++ b/.pipelex/inference/backends.toml @@ -0,0 +1,108 @@ +#################################################################################################### +# Pipelex Inference Backends Configuration +#################################################################################################### +# +# This file configures the inference backends available to Pipelex. +# Each backend connects to a different AI service provider (OpenAI, Anthropic, Google, etc.). +# +# Documentation: https://docs.pipelex.com +# Support: https://go.pipelex.com/discord +# +#################################################################################################### + +[pipelex_gateway] +display_name = "⭐ Pipelex Gateway" +enabled = true # Enable after accepting terms via `pipelex init config` +api_key = "${PIPELEX_GATEWAY_API_KEY}" + +[anthropic] +enabled = false +api_key = "${ANTHROPIC_API_KEY}" + +[azure_openai] +display_name = "Azure OpenAI" +enabled = false +endpoint = "${AZURE_API_BASE}" +api_key = "${AZURE_API_KEY}" +api_version = "${AZURE_API_VERSION}" + +[bedrock] +display_name = "Amazon Bedrock" +enabled = false +aws_region = "${AWS_REGION}" + +[blackboxai] +display_name = "BlackBox AI" +enabled = false +endpoint = "https://api.blackbox.ai/v1" +api_key = "${BLACKBOX_API_KEY}" + +[fal] +display_name = "FAL" +enabled = false +api_key = "${FAL_API_KEY}" + +[google] +display_name = "Google AI" +enabled = false +api_key = "${GOOGLE_API_KEY}" + +[groq] +display_name = "Groq" +enabled = false +endpoint = "https://api.groq.com/openai/v1" +api_key = "${GROQ_API_KEY}" + +[huggingface] +display_name = "Hugging Face" +enabled = false +api_key = "${HF_TOKEN}" + +[mistral] +display_name = "Mistral AI" +enabled = false +api_key = "${MISTRAL_API_KEY}" + +[ollama] +enabled = false +endpoint = "http://localhost:11434/v1" + +[openai] +display_name = "OpenAI" +enabled = false +api_key = "${OPENAI_API_KEY}" + +[portkey] +display_name = "Portkey" +enabled = false +endpoint = "https://api.portkey.ai/v1" +api_key = "${PORTKEY_API_KEY}" + +[scaleway] +display_name = "Scaleway" +enabled = false +endpoint = "${SCALEWAY_ENDPOINT}" +api_key = "${SCALEWAY_API_KEY}" + +[vertexai] +display_name = "Google Vertex AI" +enabled = false # This is the only one we disable because setting it up requires internet access just to get credentials so it fails in CI sandboxes +gcp_project_id = "${GCP_PROJECT_ID}" +gcp_location = "${GCP_LOCATION}" +gcp_credentials_file_path = "${GCP_CREDENTIALS_FILE_PATH}" + +[xai] +display_name = "xAI" +enabled = false +endpoint = "https://api.x.ai/v1" +api_key = "${XAI_API_KEY}" + +[internal] # software-only backend, runs internally, without AI +enabled = true + +# Deprecated +[pipelex_inference] +display_name = "🛑 Legacy Pipelex Inference" +enabled = false +endpoint = "https://inference.pipelex.com/v1" +api_key = "${PIPELEX_INFERENCE_API_KEY}" diff --git a/.pipelex/inference/backends/anthropic.toml b/.pipelex/inference/backends/anthropic.toml new file mode 100644 index 0000000..145ba19 --- /dev/null +++ b/.pipelex/inference/backends/anthropic.toml @@ -0,0 +1,111 @@ +################################################################################ +# Anthropic Backend Configuration +################################################################################ +# +# This file defines the model specifications for Anthropic Claude models. +# It contains model definitions for various Claude language models +# accessible through the Anthropic API. +# +# Configuration structure: +# - Each model is defined in its own section with the model name as the header +# - Headers with dots must be quoted (e.g., ["claude-3.5-sonnet"]) +# - Model costs are in USD per million tokens (input/output) +# +# Documentation: https://docs.pipelex.com +# Support: https://go.pipelex.com/discord +# +################################################################################ + +################################################################################ +# MODEL DEFAULTS +################################################################################ + +[defaults] +model_type = "llm" +sdk = "anthropic" +prompting_target = "anthropic" +structure_method = "instructor/anthropic_tools" +thinking_mode = "manual" + +################################################################################ +# LANGUAGE MODELS +################################################################################ + +# --- Claude 3 Series ---------------------------------------------------------- +[claude-3-haiku] +model_id = "claude-3-haiku-20240307" +max_tokens = 4096 +inputs = ["text", "images"] +outputs = ["text", "structured"] +max_prompt_images = 100 +costs = { input = 0.25, output = 1.25 } +thinking_mode = "none" + +# --- Claude 3.7 Series -------------------------------------------------------- +["claude-3.7-sonnet"] +model_id = "claude-3-7-sonnet-20250219" +max_tokens = 8192 +inputs = ["text", "images", "pdf"] +outputs = ["text", "structured"] +max_prompt_images = 100 +costs = { input = 3.0, output = 15.0 } + +# --- Claude 4 Series ---------------------------------------------------------- +[claude-4-sonnet] +model_id = "claude-sonnet-4-20250514" +max_tokens = 64000 +inputs = ["text", "images", "pdf"] +outputs = ["text", "structured"] +max_prompt_images = 100 +costs = { input = 3.0, output = 15.0 } + +[claude-4-opus] +model_id = "claude-opus-4-20250514" +max_tokens = 32000 +inputs = ["text", "images", "pdf"] +outputs = ["text", "structured"] +max_prompt_images = 100 +costs = { input = 3.0, output = 15.0 } + +# --- Claude 4.1 Series -------------------------------------------------------- +["claude-4.1-opus"] +model_id = "claude-opus-4-1-20250805" +max_tokens = 32000 +inputs = ["text", "images", "pdf"] +outputs = ["text", "structured"] +max_prompt_images = 100 +costs = { input = 3.0, output = 15.0 } + +# --- Claude 4.5 Series -------------------------------------------------------- +["claude-4.5-sonnet"] +model_id = "claude-sonnet-4-5-20250929" +max_tokens = 64000 +inputs = ["text", "images", "pdf"] +outputs = ["text", "structured"] +max_prompt_images = 100 +costs = { input = 3.0, output = 15.0 } + +["claude-4.5-haiku"] +model_id = "claude-haiku-4-5-20251001" +max_tokens = 64000 +inputs = ["text", "images", "pdf"] +outputs = ["text", "structured"] +max_prompt_images = 100 +costs = { input = 1.0, output = 5.0 } + +["claude-4.5-opus"] +model_id = "claude-opus-4-5-20251101" +max_tokens = 64000 +inputs = ["text", "images", "pdf"] +outputs = ["text", "structured"] +max_prompt_images = 100 +costs = { input = 5.0, output = 25.0 } + +["claude-4.6-opus"] +model_id = "claude-opus-4-6" +max_tokens = 64000 +inputs = ["text", "images", "pdf"] +outputs = ["text", "structured"] +max_prompt_images = 100 +costs = { input = 5.0, output = 25.0 } +thinking_mode = "adaptive" diff --git a/.pipelex/inference/backends/azure_openai.toml b/.pipelex/inference/backends/azure_openai.toml new file mode 100644 index 0000000..c3020da --- /dev/null +++ b/.pipelex/inference/backends/azure_openai.toml @@ -0,0 +1,227 @@ +################################################################################ +# Azure OpenAI Backend Configuration +################################################################################ +# +# This file defines the model specifications for Azure OpenAI models. +# It contains model definitions for OpenAI models deployed on Azure +# accessible through the Azure OpenAI API. +# +# Configuration structure: +# - Each model is defined in its own section with the model name as the header +# - Headers with dots must be quoted (e.g., ["gpt-4.1"]) +# - Model costs are in USD per million tokens (input/output) +# +# Documentation: https://docs.pipelex.com +# Support: https://go.pipelex.com/discord +# +################################################################################ + +################################################################################ +# MODEL DEFAULTS +################################################################################ + +[defaults] +model_type = "llm" +sdk = "azure_openai_responses" +prompting_target = "openai" +structure_method = "instructor/openai_responses_tools" +thinking_mode = "none" + +################################################################################ +# LANGUAGE MODELS +################################################################################ + +# --- GPT-4o Series ------------------------------------------------------------ +[gpt-4o] +model_id = "gpt-4o-2024-11-20" +inputs = ["text", "images"] +outputs = ["text", "structured"] +costs = { input = 2.5, output = 10.0 } + +[gpt-4o-mini] +model_id = "gpt-4o-mini-2024-07-18" +inputs = ["text", "images"] +outputs = ["text", "structured"] +costs = { input = 0.15, output = 0.6 } + +# --- GPT-4.1 Series ----------------------------------------------------------- +["gpt-4.1"] +model_id = "gpt-4.1-2025-04-14" +inputs = ["text", "images"] +outputs = ["text", "structured"] +costs = { input = 2, output = 8 } + +["gpt-4.1-mini"] +model_id = "gpt-4.1-mini-2025-04-14" +inputs = ["text", "images"] +outputs = ["text", "structured"] +costs = { input = 0.4, output = 1.6 } + +["gpt-4.1-nano"] +model_id = "gpt-4.1-nano-2025-04-14" +inputs = ["text", "images"] +outputs = ["text", "structured"] +costs = { input = 0.1, output = 0.4 } + +# --- o Series ---------------------------------------------------------------- +[o1-mini] +model_id = "o1-mini-2024-09-12" +inputs = ["text"] +outputs = ["text", "structured"] +costs = { input = 3.0, output = 12.0 } +valued_constraints = { fixed_temperature = 1 } +thinking_mode = "manual" + +[o1] +model_id = "o1-2024-12-17" +inputs = ["text", "images"] +outputs = ["text", "structured"] +costs = { input = 15.0, output = 60.0 } +valued_constraints = { fixed_temperature = 1 } +thinking_mode = "manual" + +[o3-mini] +model_id = "o3-mini-2025-01-31" +inputs = ["text"] +outputs = ["text", "structured"] +costs = { input = 1.1, output = 4.4 } +valued_constraints = { fixed_temperature = 1 } +thinking_mode = "manual" + +[o3] +model_id = "o3-2025-04-16" +inputs = ["text"] +outputs = ["text", "structured"] +costs = { input = 2, output = 8 } +valued_constraints = { fixed_temperature = 1 } +thinking_mode = "manual" + +# --- GPT-5 Series ------------------------------------------------------------- +[gpt-5-mini] +model_id = "gpt-5-mini-2025-08-07" +inputs = ["text", "images"] +outputs = ["text", "structured"] +costs = { input = 0.25, output = 2.0 } +valued_constraints = { fixed_temperature = 1 } +thinking_mode = "manual" + +[gpt-5-nano] +model_id = "gpt-5-nano-2025-08-07" +inputs = ["text", "images"] +outputs = ["text", "structured"] +costs = { input = 0.05, output = 0.4 } +valued_constraints = { fixed_temperature = 1 } +thinking_mode = "manual" + +[gpt-5-chat] +model_id = "gpt-5-chat-2025-08-07" +inputs = ["text", "images"] +outputs = ["text", "structured"] +costs = { input = 1.25, output = 10.0 } +valued_constraints = { fixed_temperature = 1 } +thinking_mode = "manual" + +[gpt-5] +model_id = "gpt-5-2025-08-07" +inputs = ["text", "images"] +outputs = ["text", "structured"] +costs = { input = 1.25, output = 10.0 } +valued_constraints = { fixed_temperature = 1 } +thinking_mode = "manual" + +# --- GPT-5.1 Series ------------------------------------------------------------- +["gpt-5.1"] +model_id = "gpt-5.1-2025-11-13" +inputs = ["text", "images"] +outputs = ["text", "structured"] +costs = { input = 1.25, output = 10.0 } +valued_constraints = { fixed_temperature = 1 } +thinking_mode = "manual" + +["gpt-5.1-chat"] +model_id = "gpt-5.1-chat-2025-11-13" +inputs = ["text", "images"] +outputs = ["text", "structured"] +costs = { input = 1.25, output = 10.0 } +valued_constraints = { fixed_temperature = 1 } +thinking_mode = "manual" + +["gpt-5.1-codex"] +model_id = "gpt-5.1-codex-2025-11-13" +inputs = ["text", "images"] +outputs = ["text", "structured"] +costs = { input = 1.25, output = 10.0 } +valued_constraints = { fixed_temperature = 1 } +thinking_mode = "manual" + +# --- GPT-5.2 Series ------------------------------------------------------------- +["gpt-5.2"] +model_id = "gpt-5.2-2025-12-11" +inputs = ["text", "images"] +outputs = ["text", "structured"] +costs = { input = 1.75, output = 14.0 } +thinking_mode = "manual" + +["gpt-5.2-chat"] +model_id = "gpt-5.2-chat-2025-12-11" +inputs = ["text", "images"] +outputs = ["text", "structured"] +costs = { input = 1.25, output = 10.0 } +valued_constraints = { fixed_temperature = 1 } +thinking_mode = "manual" + +################################################################################ +# IMAGE GENERATION MODELS +################################################################################ + +# --- OpenAI Image Generation -------------------------------------------------- +[gpt-image-1] +sdk = "azure_rest_img_gen" +model_type = "img_gen" +model_id = "gpt-image-1-2025-04-15" +inputs = ["text"] +outputs = ["image"] +costs = { input = 10, output = 40 } + +[gpt-image-1.rules] +prompt = "positive_only" +num_images = "gpt" +aspect_ratio = "gpt" +background = "gpt" +inference = "gpt" +safety_checker = "unavailable" +output_format = "gpt" + +[gpt-image-1-mini] +sdk = "azure_rest_img_gen" +model_type = "img_gen" +model_id = "gpt-image-1-mini-2025-10-06" +inputs = ["text"] +outputs = ["image"] +costs = { input = 2.5, output = 8 } + +[gpt-image-1-mini.rules] +prompt = "positive_only" +num_images = "gpt" +aspect_ratio = "gpt" +background = "gpt" +inference = "gpt" +safety_checker = "unavailable" +output_format = "gpt" + +["gpt-image-1.5"] +sdk = "azure_rest_img_gen" +model_type = "img_gen" +model_id = "gpt-image-1.5-2025-12-16" +inputs = ["text"] +outputs = ["image"] +costs = { input = 8, output = 32 } + +["gpt-image-1.5".rules] +prompt = "positive_only" +num_images = "gpt" +aspect_ratio = "gpt" +background = "gpt" +inference = "gpt" +safety_checker = "unavailable" +output_format = "gpt" diff --git a/.pipelex/inference/backends/bedrock.toml b/.pipelex/inference/backends/bedrock.toml new file mode 100644 index 0000000..d2ecd23 --- /dev/null +++ b/.pipelex/inference/backends/bedrock.toml @@ -0,0 +1,138 @@ +################################################################################ +# Amazon Bedrock Backend Configuration +################################################################################ +# +# This file defines the model specifications for Amazon Bedrock models. +# It contains model definitions for various language models +# accessible through the Amazon Bedrock service. +# +# Configuration structure: +# - Each model is defined in its own section with the model name as the header +# - Headers with dots must be quoted (e.g., ["claude-3.5-sonnet"]) +# - Model costs are in USD per million tokens (input/output) +# +# Documentation: https://docs.pipelex.com +# Support: https://go.pipelex.com/discord +# +################################################################################ + +################################################################################ +# MODEL DEFAULTS +################################################################################ + +[defaults] +model_type = "llm" +sdk = "bedrock_aioboto3" +prompting_target = "anthropic" +thinking_mode = "none" + +################################################################################ +# LANGUAGE MODELS +################################################################################ + +# --- Mistral Models ----------------------------------------------------------- +[bedrock-mistral-large] +model_id = "mistral.mistral-large-2407-v1:0" +max_tokens = 8192 +inputs = ["text"] +outputs = ["text"] +costs = { input = 4.0, output = 12.0 } + +# --- Meta Llama Models -------------------------------------------------------- +[bedrock-meta-llama-3-3-70b-instruct] +model_id = "us.meta.llama3-3-70b-instruct-v1:0" +max_tokens = 8192 +inputs = ["text"] +outputs = ["text"] +# TODO: find out the actual cost per million tokens for llama3 on bedrock +costs = { input = 3.0, output = 15.0 } + +# --- Amazon Nova Models ------------------------------------------------------- +[bedrock-nova-pro] +model_id = "us.amazon.nova-pro-v1:0" +max_tokens = 5120 +inputs = ["text"] +outputs = ["text"] +# TODO: find out the actual cost per million tokens for nova on bedrock +costs = { input = 3.0, output = 15.0 } + +# --- Claude LLMs -------------------------------------------------------------- +["claude-3.7-sonnet"] +sdk = "bedrock_anthropic" +model_id = "us.anthropic.claude-3-7-sonnet-20250219-v1:0" +max_tokens = 8192 +inputs = ["text", "images", "pdf"] +outputs = ["text", "structured"] +max_prompt_images = 100 +costs = { input = 3.0, output = 15.0 } +thinking_mode = "manual" + +[claude-4-sonnet] +sdk = "bedrock_anthropic" +model_id = "us.anthropic.claude-sonnet-4-20250514-v1:0" +max_tokens = 64000 +inputs = ["text", "images", "pdf"] +outputs = ["text", "structured"] +max_prompt_images = 100 +costs = { input = 3.0, output = 15.0 } +thinking_mode = "manual" + +[claude-4-opus] +sdk = "bedrock_anthropic" +model_id = "us.anthropic.claude-opus-4-20250514-v1:0" +max_tokens = 32000 +inputs = ["text", "images", "pdf"] +outputs = ["text", "structured"] +max_prompt_images = 100 +costs = { input = 3.0, output = 15.0 } +thinking_mode = "manual" + +["claude-4.1-opus"] +sdk = "bedrock_anthropic" +model_id = "us.anthropic.claude-opus-4-1-20250805-v1:0" +max_tokens = 32000 +inputs = ["text", "images", "pdf"] +outputs = ["text", "structured"] +max_prompt_images = 100 +costs = { input = 3.0, output = 15.0 } +thinking_mode = "manual" + +["claude-4.5-sonnet"] +sdk = "bedrock_anthropic" +model_id = "us.anthropic.claude-sonnet-4-5-20250929-v1:0" +max_tokens = 64000 +inputs = ["text", "images", "pdf"] +outputs = ["text", "structured"] +max_prompt_images = 100 +costs = { input = 3.0, output = 15.0 } +thinking_mode = "manual" + +["claude-4.5-haiku"] +sdk = "bedrock_anthropic" +model_id = "us.anthropic.claude-haiku-4-5-20251001-v1:0" +max_tokens = 64000 +inputs = ["text", "images", "pdf"] +outputs = ["text", "structured"] +max_prompt_images = 100 +costs = { input = 1.0, output = 5.0 } +thinking_mode = "manual" + +["claude-4.5-opus"] +sdk = "bedrock_anthropic" +model_id = "global.anthropic.claude-opus-4-5-20251101-v1:0" +max_tokens = 64000 +inputs = ["text", "images", "pdf"] +outputs = ["text", "structured"] +max_prompt_images = 100 +costs = { input = 5.0, output = 25.0 } +thinking_mode = "manual" + +["claude-4.6-opus"] +sdk = "bedrock_anthropic" +model_id = "global.anthropic.claude-opus-4-6-v1" +max_tokens = 64000 +inputs = ["text", "images", "pdf"] +outputs = ["text", "structured"] +max_prompt_images = 100 +costs = { input = 5.0, output = 25.0 } +thinking_mode = "adaptive" diff --git a/.pipelex/inference/backends/blackboxai.toml b/.pipelex/inference/backends/blackboxai.toml new file mode 100644 index 0000000..dc1642d --- /dev/null +++ b/.pipelex/inference/backends/blackboxai.toml @@ -0,0 +1,165 @@ +################################################################################ +# BlackBoxAI Backend Configuration +################################################################################ +# +# This file defines the model specifications for BlackBoxAI models. +# It contains model definitions for various language models from different providers +# accessible through the BlackBoxAI API. +# +# Configuration structure: +# - Each model is defined in its own section with the model name as the header +# - Headers with dots must be quoted (e.g., ["gpt-5.2"]) +# - Model costs are in USD per million tokens (input/output) +# +# Documentation: https://docs.pipelex.com +# Support: https://go.pipelex.com/discord +# +################################################################################ + +################################################################################ +# MODEL DEFAULTS +################################################################################ + +[defaults] +model_type = "llm" +sdk = "openai" +structure_method = "instructor/openai_tools" +thinking_mode = "none" + +################################################################################ +# LANGUAGE MODELS +################################################################################ + +# --- Anthropic Claude Models -------------------------------------------------- +["claude-4.5-sonnet"] +model_id = "blackboxai/anthropic/claude-sonnet-4.5" +inputs = ["text", "images"] +outputs = ["text", "structured"] +costs = { input = 3.00, output = 15.00 } +thinking_mode = "manual" + +["claude-4.5-haiku"] +model_id = "blackboxai/anthropic/claude-haiku-4.5" +inputs = ["text", "images"] +outputs = ["text", "structured"] +costs = { input = 1.00, output = 5.00 } +thinking_mode = "manual" + +[claude-4-sonnet] +model_id = "blackboxai/anthropic/claude-sonnet-4" +inputs = ["text", "images"] +outputs = ["text", "structured"] +costs = { input = 3.00, output = 15.00 } +thinking_mode = "manual" + +["claude-4.5-opus"] +model_id = "blackboxai/anthropic/claude-opus-4.5" +inputs = ["text", "images"] +outputs = ["text", "structured"] +costs = { input = 5.00, output = 25.00 } +thinking_mode = "manual" + +["claude-4.6-opus"] +model_id = "blackboxai/anthropic/claude-opus-4.6" +inputs = ["text", "images"] +outputs = ["text", "structured"] +costs = { input = 5.00, output = 25.00 } +thinking_mode = "adaptive" + +# --- OpenAI GPT-5 Models ------------------------------------------------------ +[gpt-5-mini] +model_id = "blackboxai/openai/gpt-5-mini" +inputs = ["text", "images"] +outputs = ["text", "structured"] +costs = { input = 0.25, output = 2.00 } + +[gpt-5] +model_id = "blackboxai/openai/gpt-5" +inputs = ["text", "images"] +outputs = ["text", "structured"] +costs = { input = 1.25, output = 10.00 } + +[gpt-5-pro] +model_id = "blackboxai/openai/gpt-5-pro" +inputs = ["text", "images"] +outputs = ["text", "structured"] +costs = { input = 15.00, output = 120.00 } + +[gpt-5-codex] +model_id = "blackboxai/openai/gpt-5-codex" +inputs = ["text"] +outputs = ["text", "structured"] +costs = { input = 1.25, output = 10.00 } + +["gpt-5.1"] +model_id = "blackboxai/openai/gpt-5.1" +inputs = ["text", "images"] +outputs = ["text", "structured"] +costs = { input = 1.25, output = 10.00 } + +["gpt-5.1-codex"] +model_id = "blackboxai/openai/gpt-5.1-codex" +inputs = ["text"] +outputs = ["text", "structured"] +costs = { input = 1.25, output = 10.00 } + +["gpt-5.2"] +model_id = "blackboxai/openai/gpt-5.2" +inputs = ["text", "images"] +outputs = ["text", "structured"] +costs = { input = 1.75, output = 14.00 } + +["gpt-5.2-pro"] +model_id = "blackboxai/openai/gpt-5.2-pro" +inputs = ["text", "images"] +outputs = ["text", "structured"] +costs = { input = 2.00, output = 16.00 } + +# --- Google Gemini Models ----------------------------------------------------- +["gemini-2.5-pro"] +model_id = "blackboxai/google/gemini-2.5-pro" +inputs = ["text", "images"] +outputs = ["text", "structured"] +costs = { input = 1.25, output = 10.00 } +thinking_mode = "none" + +["gemini-2.5-flash"] +model_id = "blackboxai/google/gemini-2.5-flash" +inputs = ["text", "images"] +outputs = ["text", "structured"] +costs = { input = 0.30, output = 2.50 } +thinking_mode = "none" + +# --- MiniMax Models ----------------------------------------------------------- +[minimax-m2] +model_id = "blackboxai/minimax/minimax-m2" +inputs = ["text"] +outputs = ["text", "structured"] +costs = { input = 0.27, output = 1.15 } + +# --- Qwen Models -------------------------------------------------------------- +[qwen3-max] +model_id = "blackboxai/qwen/qwen3-max" +inputs = ["text"] +outputs = ["text", "structured"] +costs = { input = 2.00, output = 10.00 } + +[qwen3-coder] +model_id = "blackboxai/qwen/qwen3-coder" +inputs = ["text"] +outputs = ["text", "structured"] +costs = { input = 1.50, output = 8.00 } + +# --- Qwen VL Models ------------------------------------------------- +[qwen3-vl-235b-a22b] +model_id = "blackboxai/qwen3-vl-235b-a22b" +inputs = ["text", "images"] +outputs = ["text", "structured"] +costs = { input = 3.00, output = 15.00 } + +[qwen3-vl-32b] +model_id = "blackboxai/qwen3-vl-32b" +inputs = ["text", "images"] +outputs = ["text", "structured"] +costs = { input = 1.00, output = 5.00 } +structure_method = "instructor/openrouter_structured_outputs" diff --git a/.pipelex/inference/backends/fal.toml b/.pipelex/inference/backends/fal.toml new file mode 100644 index 0000000..d79251e --- /dev/null +++ b/.pipelex/inference/backends/fal.toml @@ -0,0 +1,108 @@ +################################################################################ +# FAL Backend Configuration +################################################################################ +# +# This file defines the model specifications for FAL (Fast AI Labs) models. +# It contains model definitions for various image generation models +# accessible through the FAL API. +# +# Configuration structure: +# - Each model is defined in its own section with the model name as the header +# - Headers with dots must be quoted (e.g., ["flux-pro/v1.1"]) +# - Model costs are in USD per million tokens (input/output) +# +# Documentation: https://docs.pipelex.com +# Support: https://go.pipelex.com/discord +# +################################################################################ + +################################################################################ +# MODEL DEFAULTS +################################################################################ + +[defaults] +model_type = "img_gen" +sdk = "fal" +prompting_target = "fal" +thinking_mode = "none" + +################################################################################ +# IMAGE GENERATION MODELS +################################################################################ + +# --- Flux Pro Series ---------------------------------------------------------- +[flux-pro] +model_id = "fal-ai/flux-pro" +inputs = ["text"] +outputs = ["image"] +costs = { input = 0.05, output = 0.0 } + +[flux-pro.rules] +prompt = "positive_only" +num_images = "fal" +aspect_ratio = "flux" +inference = "flux" +safety_checker = "available" +output_format = "flux_1" +specific = "fal" + +["flux-pro/v1.1"] +model_id = "fal-ai/flux-pro/v1.1" +inputs = ["text"] +outputs = ["image"] +costs = { input = 0.05, output = 0.0 } + +["flux-pro/v1.1".rules] +prompt = "positive_only" +num_images = "fal" +aspect_ratio = "flux" +inference = "flux" +safety_checker = "available" +output_format = "flux_1" +specific = "fal" + +["flux-pro/v1.1-ultra"] +model_id = "fal-ai/flux-pro/v1.1-ultra" +inputs = ["text"] +outputs = ["image"] +costs = { input = 0.06, output = 0.0 } + +["flux-pro/v1.1-ultra".rules] +prompt = "positive_only" +num_images = "fal" +aspect_ratio = "flux_11_ultra" +inference = "flux_11_ultra" +safety_checker = "available" +output_format = "flux_1" +specific = "fal" + +[flux-2] +model_id = "fal-ai/flux-2" +inputs = ["text"] +outputs = ["image"] +costs = { input = 0.05, output = 0.0 } + +[flux-2.rules] +prompt = "positive_only" +num_images = "fal" +aspect_ratio = "flux" +inference = "flux" +safety_checker = "available" +output_format = "flux_2" +specific = "fal" + +# --- SDXL models -------------------------------------------------------------- +[fast-lightning-sdxl] +model_id = "fal-ai/fast-lightning-sdxl" +inputs = ["text"] +outputs = ["image"] +costs = { input = 0.0003, output = 0.0 } + +[fast-lightning-sdxl.rules] +prompt = "positive_only" +num_images = "fal" +aspect_ratio = "flux" +inference = "sdxl_lightning" +safety_checker = "unavailable" +output_format = "sdxl" +specific = "fal" diff --git a/.pipelex/inference/backends/google.toml b/.pipelex/inference/backends/google.toml new file mode 100644 index 0000000..fdc63d5 --- /dev/null +++ b/.pipelex/inference/backends/google.toml @@ -0,0 +1,91 @@ +################################################################################ +# Google Gemini API Backend Configuration +################################################################################ +# +# This file defines the model specifications for Google Gemini API models. +# It contains model definitions for Gemini language models +# accessible through the Google Gemini API (not VertexAI). +# +# Configuration structure: +# - Each model is defined in its own section with the model name as the header +# - Headers with dots must be quoted (e.g., ["gemini-3.0-pro"]) +# - Model costs are in USD per million tokens (input/output) +# +# Documentation: https://docs.pipelex.com +# Support: https://go.pipelex.com/discord +# +################################################################################ + +################################################################################ +# MODEL DEFAULTS +################################################################################ + +[defaults] +model_type = "llm" +sdk = "google" +prompting_target = "gemini" +structure_method = "instructor/genai_tools" +thinking_mode = "manual" + +################################################################################ +# LANGUAGE MODELS +################################################################################ + +# --- Gemini 2.5 Series ---------------------------------------- +["gemini-2.5-pro"] +model_id = "gemini-2.5-pro" +inputs = ["text", "images", "pdf"] +outputs = ["text", "structured"] +max_prompt_images = 3000 +costs = { input = 1.25, output = 10.0 } + +["gemini-2.5-flash"] +model_id = "gemini-2.5-flash" +inputs = ["text", "images", "pdf"] +outputs = ["text", "structured"] +max_prompt_images = 3000 +costs = { input = 0.30, output = 2.50 } + +["gemini-2.5-flash-lite"] +model_id = "gemini-2.5-flash-lite" +inputs = ["text", "images", "pdf"] +outputs = ["text", "structured"] +max_prompt_images = 3000 +costs = { input = 0.10, output = 0.40 } + +# --- Gemini 3.0 Series ---------------------------------------- +["gemini-3.0-pro"] +model_id = "gemini-3-pro-preview" +inputs = ["text", "images", "pdf"] +outputs = ["text", "structured"] +max_prompt_images = 3000 +costs = { input = 2, output = 12.0 } +thinking_mode = "adaptive" + +["gemini-3.0-flash-preview"] +model_id = "gemini-3-flash-preview" +inputs = ["text", "images", "pdf"] +outputs = ["text", "structured"] +max_prompt_images = 3000 +costs = { input = 0.5, output = 3.0 } +thinking_mode = "adaptive" + +################################################################################ +# IMAGE GENERATION MODELS (Nano Banana) +################################################################################ + +[nano-banana] +model_type = "img_gen" +model_id = "gemini-2.5-flash-image" +inputs = ["text"] +outputs = ["image"] +thinking_mode = "none" +costs = { input = 0.0, output = 0.039 } + +[nano-banana-pro] +model_type = "img_gen" +model_id = "gemini-3-pro-image-preview" +inputs = ["text"] +outputs = ["image"] +thinking_mode = "none" +costs = { input = 0.0, output = 0.039 } diff --git a/.pipelex/inference/backends/groq.toml b/.pipelex/inference/backends/groq.toml new file mode 100644 index 0000000..cb48094 --- /dev/null +++ b/.pipelex/inference/backends/groq.toml @@ -0,0 +1,130 @@ +################################################################################ +# Groq Backend Configuration +################################################################################ +# +# This file defines the model specifications for Groq models. +# It contains model definitions for various LLM models accessible through +# the Groq API, including text-only and vision-capable models. +# +# Configuration structure: +# - Each model is defined in its own section with the model name as the header +# - Headers with dots or slashes must be quoted (e.g., ["meta-llama/llama-4-scout"]) +# - Model costs are in USD per million tokens (input/output) +# - Vision models support max 5 images per request, 33MP max resolution +# +# Documentation: https://docs.pipelex.com +# Support: https://go.pipelex.com/discord +# +################################################################################ + +################################################################################ +# MODEL DEFAULTS +################################################################################ + +[defaults] +model_type = "llm" +sdk = "openai" +structure_method = "instructor/json" +thinking_mode = "none" + +################################################################################ +# PRODUCTION TEXT MODELS +################################################################################ + +# --- Meta Llama 3.x Series ---------------------------------------------------- +["llama-3.1-8b-instant"] +model_id = "llama-3.1-8b-instant" +max_tokens = 131072 +inputs = ["text"] +outputs = ["text", "structured"] +costs = { input = 0.05, output = 0.08 } + +["llama-3.3-70b-versatile"] +model_id = "llama-3.3-70b-versatile" +max_tokens = 32768 +inputs = ["text"] +outputs = ["text", "structured"] +costs = { input = 0.59, output = 0.79 } + +# --- Meta Llama Guard --------------------------------------------------------- +[llama-guard-4-12b] +model_id = "meta-llama/llama-guard-4-12b" +max_tokens = 1024 +inputs = ["text"] +outputs = ["text", "structured"] +costs = { input = 0.20, output = 0.20 } + +# --- OpenAI GPT-OSS Models ---------------------------------------------------- +[gpt-oss-20b] +model_id = "openai/gpt-oss-20b" +max_tokens = 65536 +inputs = ["text"] +outputs = ["text", "structured"] +costs = { input = 0.075, output = 0.30 } + +[gpt-oss-120b] +model_id = "openai/gpt-oss-120b" +max_tokens = 65536 +inputs = ["text"] +outputs = ["text", "structured"] +costs = { input = 0.15, output = 0.60 } + +# --- Groq Compound Systems ---------------------------------------------------- +["groq/compound"] +model_id = "groq/compound" +max_tokens = 8192 +inputs = ["text"] +outputs = ["text", "structured"] +costs = { input = 0.15, output = 0.45 } + +["groq/compound-mini"] +model_id = "groq/compound-mini" +max_tokens = 8192 +inputs = ["text"] +outputs = ["text", "structured"] +costs = { input = 0.10, output = 0.30 } + +################################################################################ +# PREVIEW MODELS +################################################################################ + +# --- Meta Llama 4 Vision Models (Preview) ------------------------------------- +[llama-4-scout-17b-16e-instruct] +model_id = "meta-llama/llama-4-scout-17b-16e-instruct" +max_tokens = 8192 +inputs = ["text", "images"] +outputs = ["text", "structured"] +max_prompt_images = 5 +costs = { input = 0.11, output = 0.34 } + +[llama-4-maverick-17b-128e-instruct] +model_id = "meta-llama/llama-4-maverick-17b-128e-instruct" +max_tokens = 8192 +inputs = ["text", "images"] +outputs = ["text", "structured"] +max_prompt_images = 5 +costs = { input = 0.20, output = 0.60 } + +# --- Moonshot Kimi K2 --------------------------------------------------------- +[kimi-k2-instruct-0905] +model_id = "moonshotai/kimi-k2-instruct-0905" +max_tokens = 16384 +inputs = ["text"] +outputs = ["text", "structured"] +costs = { input = 1.00, output = 3.00 } + +# --- OpenAI Safety Model ------------------------------------------------------ +[gpt-oss-safeguard-20b] +model_id = "openai/gpt-oss-safeguard-20b" +max_tokens = 65536 +inputs = ["text"] +outputs = ["text", "structured"] +costs = { input = 0.075, output = 0.30 } + +# --- Qwen 3 ------------------------------------------------------------------- +[qwen3-32b] +model_id = "qwen/qwen3-32b" +max_tokens = 40960 +inputs = ["text"] +outputs = ["text", "structured"] +costs = { input = 0.29, output = 0.59 } diff --git a/.pipelex/inference/backends/huggingface.toml b/.pipelex/inference/backends/huggingface.toml new file mode 100644 index 0000000..2fd0faf --- /dev/null +++ b/.pipelex/inference/backends/huggingface.toml @@ -0,0 +1,44 @@ +################################################################################ +# Hugging Face Backend Configuration +################################################################################ +# +# This file defines the model specifications for Hugging Face models. +# It contains model definitions for various image generation models +# accessible through the Hugging Face Inference API with provider="auto". +# +# Configuration structure: +# - Each model is defined in its own section with the model name as the header +# - Headers with dots or slashes must be quoted (e.g., ["stabilityai/stable-diffusion-2-1"]) +# - Model costs are in USD per million tokens (input/output) +# +# Documentation: https://docs.pipelex.com +# Support: https://go.pipelex.com/discord +# +################################################################################ + +################################################################################ +# MODEL DEFAULTS +################################################################################ + +[defaults] +model_type = "img_gen" +sdk = "huggingface_img_gen" +thinking_mode = "none" + +################################################################################ +# IMAGE GENERATION MODELS +################################################################################ + +# --- Qwen Image Models -------------------------------------------------- +[qwen-image] +model_id = "Qwen/Qwen-Image" +inputs = ["text"] +outputs = ["image"] +costs = { input = 0.0, output = 0.0 } +variant = "fal-ai" +# variant = "replicate" + +[qwen-image.rules] +prompt = "with_negative" +aspect_ratio = "qwen_image" +inference = "qwen_image" diff --git a/.pipelex/inference/backends/internal.toml b/.pipelex/inference/backends/internal.toml new file mode 100644 index 0000000..8fcc38d --- /dev/null +++ b/.pipelex/inference/backends/internal.toml @@ -0,0 +1,44 @@ +################################################################################ +# Internal Backend Configuration +################################################################################ +# +# This file defines the model specifications for internal software-only models. +# These models run internally without external APIs or AI services. +# +# Configuration structure: +# - Each model is defined in its own section with the model name as the header +# - Model costs are in USD per million tokens (input/output) +# +# Documentation: https://docs.pipelex.com +# Support: https://go.pipelex.com/discord +# +################################################################################ + +################################################################################ +# MODEL DEFAULTS +################################################################################ + +[defaults] +thinking_mode = "none" + +################################################################################ +# TEXT EXTRACTION MODELS +################################################################################ + +# --- PyPDFium2 Text Extractor ------------------------------------------------- +[pypdfium2-extract-pdf] +model_type = "text_extractor" +sdk = "pypdfium2" +model_id = "extract-text" +inputs = ["pdf"] +outputs = ["pages"] +costs = {} + +# --- Docling Text Extractor --------------------------------------------------- +[docling-extract-text] +model_type = "text_extractor" +sdk = "docling_sdk" +model_id = "extract-text" +inputs = ["pdf", "image"] +outputs = ["pages"] +costs = {} diff --git a/.pipelex/inference/backends/mistral.toml b/.pipelex/inference/backends/mistral.toml new file mode 100644 index 0000000..7ca2615 --- /dev/null +++ b/.pipelex/inference/backends/mistral.toml @@ -0,0 +1,219 @@ +################################################################################ +# Mistral Backend Configuration +################################################################################ +# +# This file defines the model specifications for Mistral AI models. +# It contains model definitions for various Mistral language models and specialized models +# accessible through the Mistral API. +# +# Configuration structure: +# - Each model is defined in its own section with the model name as the header +# - Headers with dots must be quoted (e.g., ["ministral-3b"]) +# - Model costs are in USD per million tokens (input/output) +# +# Documentation: https://docs.pipelex.com +# Support: https://go.pipelex.com/discord +# +################################################################################ + +################################################################################ +# MODEL DEFAULTS +################################################################################ + +[defaults] +model_type = "llm" +sdk = "mistral" +prompting_target = "mistral" +structure_method = "instructor/mistral_tools" +thinking_mode = "none" + +################################################################################ +# LANGUAGE MODELS +################################################################################ + +# --- Ministral Series --------------------------------------------------------- +[ministral-3b] +model_id = "ministral-3b-latest" +max_tokens = 131072 +inputs = ["text"] +outputs = ["text", "structured"] +costs = { input = 0.04, output = 0.04 } + +[ministral-8b] +model_id = "ministral-8b-latest" +max_tokens = 131072 +inputs = ["text"] +outputs = ["text", "structured"] +costs = { input = 0.1, output = 0.1 } + +# --- Mistral 7B Series -------------------------------------------------------- +[mistral-7b-2312] +model_id = "mistral-large-2402" +max_tokens = 32768 +inputs = ["text"] +outputs = ["text", "structured"] +costs = { input = 0.25, output = 0.25 } + +# --- Mistral 8x7B Series ------------------------------------------------------ +[mistral-8x7b-2312] +model_id = "open-mixtral-8x7b" +max_tokens = 32768 +inputs = ["text"] +outputs = ["text"] +costs = { input = 0.7, output = 0.7 } + +# --- Mistral Codestral Series ------------------------------------------------- +[mistral-codestral-2405] +model_id = "codestral-2405" +max_tokens = 262144 +inputs = ["text"] +outputs = ["text"] +costs = { input = 1.0, output = 3.0 } + +# --- Pixtral Series ----------------------------------------------------------- +[pixtral-12b] +model_id = "pixtral-12b-latest" +max_tokens = 131072 +inputs = ["text", "images"] +outputs = ["text", "structured"] +costs = { input = 0.15, output = 0.15 } + +[pixtral-large] +model_id = "pixtral-large-latest" +max_tokens = 131072 +inputs = ["text", "images"] +outputs = ["text", "structured"] +costs = { input = 2.0, output = 6.0 } + +# --- Mistral Small Series ----------------------------------------------------- +[mistral-small-2506] +model_id = "mistral-small-2506" +max_tokens = 128000 +inputs = ["text"] +outputs = ["text", "structured"] +costs = { input = 0.1, output = 0.3 } + +["mistral-small-3.2"] +model_id = "mistral-small-2506" +max_tokens = 128000 +inputs = ["text"] +outputs = ["text", "structured"] +costs = { input = 0.1, output = 0.3 } + +[mistral-small] +model_id = "mistral-small-latest" +max_tokens = 128000 +inputs = ["text"] +outputs = ["text", "structured"] +costs = { input = 0.1, output = 0.3 } + +# --- Mistral Medium Series ---------------------------------------------------- +[mistral-medium-2508] +model_id = "mistral-medium-2508" +max_tokens = 128000 +inputs = ["text", "images", "pdf"] +outputs = ["text", "structured"] +costs = { input = 0.4, output = 2.0 } + +["mistral-medium-3.1"] +model_id = "mistral-medium-2508" +max_tokens = 128000 +inputs = ["text", "images", "pdf"] +outputs = ["text", "structured"] +costs = { input = 0.4, output = 2.0 } + +[mistral-medium] +model_id = "mistral-medium-latest" +max_tokens = 128000 +inputs = ["text", "images", "pdf"] +outputs = ["text", "structured"] +costs = { input = 0.4, output = 2.0 } + +# --- Mistral Large Series ----------------------------------------------------- +[mistral-large-2512] +model_id = "mistral-large-2512" +max_tokens = 256000 +inputs = ["text", "images", "pdf"] +outputs = ["text", "structured"] +costs = { input = 0.5, output = 1.5 } + +[mistral-large-3] +model_id = "mistral-large-2512" +max_tokens = 256000 +inputs = ["text", "images", "pdf"] +outputs = ["text", "structured"] +costs = { input = 0.5, output = 1.5 } + +[mistral-large] +model_id = "mistral-large-latest" +max_tokens = 256000 +inputs = ["text", "images", "pdf"] +outputs = ["text", "structured"] +costs = { input = 0.5, output = 1.5 } + +# --- Magistral Series (reasoning models) ------------------------------------ +[magistral-small-2509] +model_id = "magistral-small-2509" +max_tokens = 128000 +inputs = ["text", "pdf"] +outputs = ["text"] +costs = { input = 0.5, output = 1.5 } +thinking_mode = "manual" + +[magistral-small] +model_id = "magistral-small-latest" +max_tokens = 128000 +inputs = ["text", "pdf"] +outputs = ["text"] +costs = { input = 0.5, output = 1.5 } +thinking_mode = "manual" + +[magistral-medium-2509] +model_id = "magistral-medium-2509" +max_tokens = 128000 +inputs = ["text", "pdf"] +outputs = ["text"] +costs = { input = 2, output = 5 } +thinking_mode = "manual" + +[magistral-medium] +model_id = "magistral-medium-latest" +max_tokens = 128000 +inputs = ["text", "pdf"] +outputs = ["text"] +costs = { input = 2, output = 5 } +thinking_mode = "manual" + +################################################################################ +# EXTRACTION MODELS +################################################################################ + +# TODO: add support to pricing per page + +[mistral-ocr-2503] +model_type = "text_extractor" +model_id = "mistral-ocr-2503" +max_tokens = 16384 +inputs = ["pdf", "image"] +outputs = ["pages"] + +[mistral-ocr-2505] +model_type = "text_extractor" +model_id = "mistral-ocr-2505" +max_tokens = 16384 +inputs = ["pdf", "image"] +outputs = ["pages"] + +[mistral-ocr-2512] +model_type = "text_extractor" +model_id = "mistral-ocr-2512" +max_tokens = 16384 +inputs = ["pdf", "image"] +outputs = ["pages"] + +[mistral-ocr] +model_type = "text_extractor" +model_id = "mistral-ocr-latest" +max_tokens = 16384 +inputs = ["pdf", "image"] +outputs = ["pages"] diff --git a/.pipelex/inference/backends/ollama.toml b/.pipelex/inference/backends/ollama.toml new file mode 100644 index 0000000..3e020a8 --- /dev/null +++ b/.pipelex/inference/backends/ollama.toml @@ -0,0 +1,64 @@ +################################################################################ +# Ollama Backend Configuration +################################################################################ +# +# This file defines the model specifications for Ollama models. +# It contains model definitions for local language models +# accessible through the Ollama API. +# +# Configuration structure: +# - Each model is defined in its own section with the model name as the header +# - Headers with dots must be quoted (e.g., ["mistral-small3.1-24b"]) +# - Model costs are in USD per million tokens (input/output) +# +# Documentation: https://docs.pipelex.com +# Support: https://go.pipelex.com/discord +# +################################################################################ + +################################################################################ +# MODEL DEFAULTS +################################################################################ + +[defaults] +model_type = "llm" +sdk = "openai" +prompting_target = "anthropic" +structure_method = "instructor/openai_tools" +thinking_mode = "none" + +################################################################################ +# LANGUAGE MODELS +################################################################################ + +# --- Gemma Models ------------------------------------------------------------- +[gemma3-4b] +model_id = "gemma3:4b" +inputs = ["text"] +outputs = ["text"] +max_prompt_images = 3000 +costs = { input = 0, output = 0 } + +# --- Llama Models ------------------------------------------------------------- +[llama4-scout] +model_id = "llama4:scout" +inputs = ["text"] +outputs = ["text"] +max_prompt_images = 3000 +costs = { input = 0, output = 0 } + +# --- Mistral Models ----------------------------------------------------------- +["mistral-small3.1-24b"] +model_id = "mistral-small3.1:24b" +inputs = ["text"] +outputs = ["text"] +max_prompt_images = 3000 +costs = { input = 0, output = 0 } + +# --- Qwen Models -------------------------------------------------------------- +[qwen3-8b] +model_id = "qwen3:8b" +inputs = ["text"] +outputs = ["text"] +costs = { input = 0, output = 0 } +# TODO: support tokens diff --git a/.pipelex/inference/backends/openai.toml b/.pipelex/inference/backends/openai.toml new file mode 100644 index 0000000..3c2af8c --- /dev/null +++ b/.pipelex/inference/backends/openai.toml @@ -0,0 +1,224 @@ +################################################################################ +# OpenAI Backend Configuration +################################################################################ +# +# This file defines the model specifications for OpenAI models. +# It contains model definitions for various LLM and image generation models +# accessible through the OpenAI API. +# +# Configuration structure: +# - Each model is defined in its own section with the model name as the header +# - Headers with dots must be quoted (e.g., ["gpt-4.1"]) +# - Model costs are in USD per million tokens (input/output) +# +# Documentation: https://docs.pipelex.com +# Support: https://go.pipelex.com/discord +# +################################################################################ + +################################################################################ +# MODEL DEFAULTS +################################################################################ + +[defaults] +model_type = "llm" +sdk = "openai_responses" +prompting_target = "openai" +structure_method = "instructor/openai_responses_tools" +thinking_mode = "none" + +################################################################################ +# LANGUAGE MODELS +################################################################################ + +# --- GPT-3.5 Series ----------------------------------------------------------- +["gpt-3.5-turbo"] +model_id = "gpt-3.5-turbo-1106" +inputs = ["text"] +outputs = ["text", "structured"] +costs = { input = 0.5, output = 1.5 } + +# --- GPT-4 Series ------------------------------------------------------------- +[gpt-4] +inputs = ["text"] +outputs = ["text"] +costs = { input = 30.0, output = 60.0 } + +[gpt-4-turbo] +inputs = ["text"] +outputs = ["text", "structured"] +costs = { input = 10.0, output = 30.0 } + +# --- GPT-4o Series ------------------------------------------------------------ +[gpt-4o-2024-11-20] +inputs = ["text", "images", "pdf"] +outputs = ["text", "structured"] +costs = { input = 2.5, output = 10.0 } + +[gpt-4o] +inputs = ["text", "images", "pdf"] +outputs = ["text", "structured"] +costs = { input = 2.5, output = 10.0 } + +[gpt-4o-mini-2024-07-18] +inputs = ["text", "images", "pdf"] +outputs = ["text", "structured"] +costs = { input = 0.15, output = 0.6 } + +[gpt-4o-mini] +inputs = ["text", "images", "pdf"] +outputs = ["text", "structured"] +costs = { input = 0.15, output = 0.6 } + +# --- GPT-4.1 Series ----------------------------------------------------------- +["gpt-4.1"] +inputs = ["text", "images", "pdf"] +outputs = ["text", "structured"] +costs = { input = 2, output = 8 } + +["gpt-4.1-mini"] +inputs = ["text", "images", "pdf"] +outputs = ["text", "structured"] +costs = { input = 0.4, output = 1.6 } + +["gpt-4.1-nano"] +inputs = ["text", "images", "pdf"] +outputs = ["text", "structured"] +costs = { input = 0.1, output = 0.4 } + +# --- o Series ---------------------------------------------------------------- +[o1] +inputs = ["text", "images", "pdf"] +outputs = ["text", "structured"] +costs = { input = 15.0, output = 60.0 } +valued_constraints = { fixed_temperature = 1 } +thinking_mode = "manual" + +[o3-mini] +inputs = ["text"] +outputs = ["text", "structured"] +costs = { input = 1.1, output = 4.4 } +valued_constraints = { fixed_temperature = 1 } +thinking_mode = "manual" + +[o3] +inputs = ["text", "images", "pdf"] +outputs = ["text", "structured"] +costs = { input = 10.0, output = 40.0 } +valued_constraints = { fixed_temperature = 1 } +thinking_mode = "manual" + +[o4-mini] +inputs = ["text"] +outputs = ["text", "structured"] +costs = { input = 1.1, output = 4.4 } +valued_constraints = { fixed_temperature = 1 } +thinking_mode = "manual" + +# --- GPT-5 Series ------------------------------------------------------------- +[gpt-5] +inputs = ["text", "images", "pdf"] +outputs = ["text", "structured"] +costs = { input = 1.25, output = 10.0 } +valued_constraints = { fixed_temperature = 1 } +thinking_mode = "manual" + +[gpt-5-mini] +inputs = ["text", "images", "pdf"] +outputs = ["text", "structured"] +costs = { input = 0.25, output = 2.0 } +valued_constraints = { fixed_temperature = 1 } +thinking_mode = "manual" + +[gpt-5-nano] +inputs = ["text", "images", "pdf"] +outputs = ["text", "structured"] +costs = { input = 0.05, output = 0.4 } +valued_constraints = { fixed_temperature = 1 } +thinking_mode = "manual" + +[gpt-5-chat] +model_id = "gpt-5-chat-latest" +inputs = ["text", "images", "pdf"] +outputs = ["text"] +costs = { input = 1.25, output = 10.0 } +valued_constraints = { fixed_temperature = 1 } +thinking_mode = "manual" + +[gpt-5-codex] +inputs = ["text", "images", "pdf"] +outputs = ["text", "structured"] +costs = { input = 1.25, output = 10.0 } +valued_constraints = { fixed_temperature = 1 } +thinking_mode = "manual" + +# --- GPT-5.1 Series ------------------------------------------------------------- +["gpt-5.1"] +inputs = ["text", "images", "pdf"] +outputs = ["text", "structured"] +costs = { input = 1.25, output = 10.0 } +thinking_mode = "manual" + +["gpt-5.1-chat"] +model_id = "gpt-5.1-chat-latest" +inputs = ["text", "images", "pdf"] +outputs = ["text", "structured"] +costs = { input = 1.25, output = 10.0 } +valued_constraints = { fixed_temperature = 1 } +thinking_mode = "manual" + +["gpt-5.1-codex"] +inputs = ["text", "images", "pdf"] +outputs = ["text", "structured"] +costs = { input = 1.25, output = 10.0 } +valued_constraints = { fixed_temperature = 1 } +thinking_mode = "manual" + +["gpt-5.1-codex-max"] +inputs = ["text", "images", "pdf"] +outputs = ["text", "structured"] +costs = { input = 1.25, output = 10.0 } +valued_constraints = { fixed_temperature = 1 } +thinking_mode = "manual" + +# --- GPT-5.2 Series ------------------------------------------------------------- +["gpt-5.2"] +inputs = ["text", "images", "pdf"] +outputs = ["text", "structured"] +costs = { input = 1.75, output = 14.0 } +thinking_mode = "manual" + +["gpt-5.2-chat"] +model_id = "gpt-5.2-chat-latest" +inputs = ["text", "images", "pdf"] +outputs = ["text", "structured"] +costs = { input = 1.75, output = 14.0 } +valued_constraints = { fixed_temperature = 1 } +thinking_mode = "manual" + +################################################################################ +# IMAGE GENERATION MODELS +################################################################################ + +# --- OpenAI Image Generation -------------------------------------------------- +[gpt-image-1] +sdk = "openai_img_gen" +model_type = "img_gen" +inputs = ["text"] +outputs = ["image"] +costs = { input = 10, output = 40 } + +[gpt-image-1-mini] +sdk = "openai_img_gen" +model_type = "img_gen" +inputs = ["text"] +outputs = ["image"] +costs = { input = 2.5, output = 8 } + +["gpt-image-1.5"] +sdk = "openai_img_gen" +model_type = "img_gen" +model_id = "gpt-image-1.5" +inputs = ["text"] +outputs = ["image"] +costs = { input = 8, output = 32 } diff --git a/.pipelex/inference/backends/pipelex_gateway.toml b/.pipelex/inference/backends/pipelex_gateway.toml new file mode 100644 index 0000000..bca075b --- /dev/null +++ b/.pipelex/inference/backends/pipelex_gateway.toml @@ -0,0 +1,41 @@ +################################################################################ +# Pipelex Gateway Local Overrides +################################################################################ +# +# TELEMETRY NOTICE: +# +# Using Pipelex Gateway enables identified telemetry tied to your API key +# (hashed for security). This is independent from your telemetry.toml settings. +# +# We collect only technical data (model names, token counts, latency, error rates). +# We do NOT collect prompts, completions, pipe codes, or business data. +# +# This allows us to monitor service quality, enforce fair usage, and support you. +# +################################################################################ +# +# WARNING: USE AT YOUR OWN RISK! +# +# The actual model configuration is fetched remotely from Pipelex servers. +# Any override in this file may cause unexpected behavior or failures, +# as the remote configuration may change at any time. +# +# If you must override, you may ONLY use these keys per model: +# - sdk +# - structure_method +# +# All other keys will be ignored. +# +# If you need custom configurations, consider using your own API keys +# with direct provider backends (openai, anthropic, etc.) instead. +# +# Documentation: +# https://docs.pipelex.com/home/7-configuration/config-technical/inference-backend-config/ +# Support: https://go.pipelex.com/discord +# +################################################################################ + +# Per-model overrides example: +# [gpt-4o] +# sdk = "gateway_completions" +# structure_method = "instructor/openai_tools" diff --git a/.pipelex/inference/backends/pipelex_inference.toml b/.pipelex/inference/backends/pipelex_inference.toml new file mode 100644 index 0000000..abb5fcf --- /dev/null +++ b/.pipelex/inference/backends/pipelex_inference.toml @@ -0,0 +1,200 @@ +################################################################################ +# Pipelex Inference Backend Configuration +################################################################################ +# +# This file defines the model specifications for the Pipelex Inference backend. +# It contains model definitions for various LLM and image generation models +# accessible through the Pipelex unified inference API. +# +# Configuration structure: +# - Each model is defined in its own section with the model name as the header +# - Headers with dots must be quoted (e.g., ["gpt-4.1"]) +# - Model costs are in USD per million tokens (input/output) +# +# Documentation: https://docs.pipelex.com +# Support: https://go.pipelex.com/discord +# +################################################################################ + +################################################################################ +# MODEL DEFAULTS +################################################################################ + +[defaults] +model_type = "llm" +sdk = "openai" +prompting_target = "anthropic" +structure_method = "instructor/openai_tools" +thinking_mode = "none" + +################################################################################ +# LANGUAGE MODELS +################################################################################ + +# --- OpenAI LLMs -------------------------------------------------------------- +[gpt-4o] +model_id = "pipelex/gpt-4o" +inputs = ["text", "images"] +outputs = ["text", "structured"] +costs = { input = 2.75, output = 11.00 } +sdk = "openai_responses" +structure_method = "instructor/openai_responses_tools" + +[gpt-4o-mini] +model_id = "pipelex/gpt-4o-mini" +inputs = ["text", "images"] +outputs = ["text", "structured"] +costs = { input = 0.17, output = 0.66 } +sdk = "openai_responses" +structure_method = "instructor/openai_responses_tools" + +["gpt-4.1"] +model_id = "pipelex/gpt-4.1" +inputs = ["text", "images"] +outputs = ["text", "structured"] +costs = { input = 2, output = 8 } +sdk = "openai_responses" +structure_method = "instructor/openai_responses_tools" + +["gpt-4.1-mini"] +model_id = "pipelex/gpt-4.1-mini" +inputs = ["text", "images"] +outputs = ["text", "structured"] +costs = { input = 0.4, output = 1.6 } +sdk = "openai_responses" +structure_method = "instructor/openai_responses_tools" + +["gpt-4.1-nano"] +model_id = "pipelex/gpt-4.1-nano" +inputs = ["text", "images"] +outputs = ["text", "structured"] +costs = { input = 0.1, output = 0.4 } +sdk = "openai_responses" +structure_method = "instructor/openai_responses_tools" + +[gpt-5-nano] +model_id = "pipelex/gpt-5-nano" +inputs = ["text", "images"] +outputs = ["text", "structured"] +costs = { input = 0.05, output = 0.40 } +sdk = "openai_responses" +structure_method = "instructor/openai_responses_tools" + +[gpt-5-mini] +model_id = "pipelex/gpt-5-mini" +inputs = ["text", "images"] +outputs = ["text", "structured"] +costs = { input = 0.25, output = 2.00 } +sdk = "openai_responses" +structure_method = "instructor/openai_responses_tools" + +[gpt-5-chat] +model_id = "pipelex/gpt-5-chat" +inputs = ["text", "images"] +outputs = ["text"] +costs = { input = 1.25, output = 10.00 } +sdk = "openai_responses" +structure_method = "instructor/openai_responses_tools" + +[gpt-5] +model_id = "pipelex/gpt-5" +inputs = ["text", "images"] +outputs = ["text"] +costs = { input = 1.25, output = 10.00 } +sdk = "openai_responses" +structure_method = "instructor/openai_responses_tools" + +["gpt-5.1"] +model_id = "pipelex/gpt-5.1" +inputs = ["text", "images"] +outputs = ["text", "structured"] +costs = { input = 1.25, output = 10.00 } +sdk = "openai_responses" +structure_method = "instructor/openai_responses_tools" + +["gpt-5.1-chat"] +model_id = "pipelex/gpt-5.1-chat" +inputs = ["text", "images"] +outputs = ["text", "structured"] +costs = { input = 1.25, output = 10.00 } +sdk = "openai_responses" +structure_method = "instructor/openai_responses_tools" + +# --- Claude LLMs -------------------------------------------------------------- +["claude-4-sonnet"] +model_id = "pipelex/claude-4-sonnet" +inputs = ["text", "images"] +outputs = ["text", "structured"] +costs = { input = 3, output = 15 } + +["claude-4.1-opus"] +model_id = "pipelex/claude-4.1-opus" +inputs = ["text", "images"] +outputs = ["text", "structured"] +costs = { input = 15, output = 75 } + +["claude-4.5-sonnet"] +model_id = "pipelex/claude-4.5-sonnet" +inputs = ["text", "images"] +outputs = ["text", "structured"] +costs = { input = 3, output = 15 } + +["claude-4.5-haiku"] +model_id = "pipelex/claude-4.5-haiku" +inputs = ["text", "images"] +outputs = ["text", "structured"] +costs = { input = 1, output = 5 } + +["claude-4.5-opus"] +model_id = "pipelex/claude-4.5-opus" +inputs = ["text", "images"] +outputs = ["text", "structured"] +costs = { input = 5, output = 25 } + +# --- Gemini LLMs -------------------------------------------------------------- +["gemini-2.5-pro"] +model_id = "pipelex/gemini-2.5-pro" +inputs = ["text", "images"] +outputs = ["text", "structured"] +max_prompt_images = 3000 +costs = { input = 1.25, output = 10.0 } + +["gemini-2.5-flash"] +model_id = "pipelex/gemini-2.5-flash" +inputs = ["text", "images"] +outputs = ["text", "structured"] +costs = { input = 0.30, output = 2.50 } + +["gemini-2.5-flash-lite"] +model_id = "pipelex/gemini-2.5-flash-lite" +inputs = ["text", "images"] +outputs = ["text", "structured"] +costs = { input = 0.10, output = 0.40 } + +["gemini-3.0-pro"] +model_id = "pipelex/gemini-3.0-pro" +inputs = ["text", "images"] +outputs = ["text", "structured"] +max_prompt_images = 3000 +costs = { input = 2, output = 12.0 } + +# --- XAI LLMs -------------------------------------------------------------- + +[grok-3] +model_id = "grok-3" +inputs = ["text"] +outputs = ["text"] +costs = { input = 3, output = 15 } + +[grok-3-mini] +model_id = "grok-3-mini" +inputs = ["text"] +outputs = ["text"] +costs = { input = 0.3, output = 0.5 } + +################################################################################ +# OCR and IMAGE GENERATION MODELS +################################################################################ + +# We are still working in giving you acces to OCR and image generation models +# and to the best models from Mistral through the Pipelex Inference backend. diff --git a/.pipelex/inference/backends/portkey.toml b/.pipelex/inference/backends/portkey.toml new file mode 100644 index 0000000..d91a1da --- /dev/null +++ b/.pipelex/inference/backends/portkey.toml @@ -0,0 +1,307 @@ +################################################################################ +# Portkey Configuration +################################################################################ +# +# This file defines the model specifications for the Portkey backend. +# It contains model definitions for various AI models. +# +# Configuration structure: +# - Each model is defined in its own section with the model name as the header +# - Headers with dots must be quoted (e.g., ["gpt-4.1"]) +# - Model costs are in USD per million tokens (input/output) +# +# Documentation: https://docs.pipelex.com +# Support: https://go.pipelex.com/discord +# +################################################################################ + +################################################################################ +# MODEL DEFAULTS +################################################################################ + +[defaults] +model_type = "llm" +sdk = "portkey_completions" +structure_method = "instructor/openai_tools" +prompting_target = "anthropic" +thinking_mode = "none" + +################################################################################ +# LANGUAGE MODELS +################################################################################ + +# --- OpenAI LLMs -------------------------------------------------------------- +[gpt-4o-mini] +inputs = ["text", "images", "pdf"] +outputs = ["text", "structured"] +costs = { input = 0.15, output = 0.6 } +sdk = "portkey_responses" +structure_method = "instructor/openai_responses_tools" +thinking_mode = "none" +x-portkey-provider = "@openai" + +[gpt-4o] +inputs = ["text", "images", "pdf"] +outputs = ["text", "structured"] +costs = { input = 2.5, output = 10.0 } +sdk = "portkey_responses" +structure_method = "instructor/openai_responses_tools" +thinking_mode = "none" +x-portkey-provider = "@openai" + +["gpt-4.1-nano"] +inputs = ["text", "images", "pdf"] +outputs = ["text", "structured"] +costs = { input = 0.1, output = 0.4 } +sdk = "portkey_responses" +structure_method = "instructor/openai_responses_tools" +thinking_mode = "none" +x-portkey-provider = "@openai" + +["gpt-4.1-mini"] +inputs = ["text", "images", "pdf"] +outputs = ["text", "structured"] +costs = { input = 0.4, output = 1.6 } +sdk = "portkey_responses" +structure_method = "instructor/openai_responses_tools" +thinking_mode = "none" +x-portkey-provider = "@openai" + +["gpt-4.1"] +inputs = ["text", "images", "pdf"] +outputs = ["text", "structured"] +costs = { input = 2, output = 8 } +sdk = "portkey_responses" +structure_method = "instructor/openai_responses_tools" +thinking_mode = "none" +x-portkey-provider = "@openai" + +[o1] +inputs = ["text", "images", "pdf"] +outputs = ["text", "structured"] +costs = { input = 15.0, output = 60.0 } +valued_constraints = { fixed_temperature = 1 } +sdk = "portkey_responses" +structure_method = "instructor/openai_responses_tools" +thinking_mode = "manual" +x-portkey-provider = "@openai" + +[o3-mini] +inputs = ["text"] +outputs = ["text", "structured"] +costs = { input = 1.1, output = 4.4 } +valued_constraints = { fixed_temperature = 1 } +sdk = "portkey_responses" +structure_method = "instructor/openai_responses_tools" +thinking_mode = "manual" +x-portkey-provider = "@openai" + +[o3] +inputs = ["text", "images", "pdf"] +outputs = ["text", "structured"] +costs = { input = 2, output = 8 } +valued_constraints = { fixed_temperature = 1 } +sdk = "portkey_responses" +structure_method = "instructor/openai_responses_tools" +thinking_mode = "manual" +x-portkey-provider = "@openai" + +[o4-mini] +inputs = ["text"] +outputs = ["text", "structured"] +costs = { input = 1.1, output = 4.4 } +valued_constraints = { fixed_temperature = 1 } +sdk = "portkey_responses" +structure_method = "instructor/openai_responses_tools" +thinking_mode = "manual" +x-portkey-provider = "@openai" + +[gpt-5-nano] +inputs = ["text", "images", "pdf"] +outputs = ["text", "structured"] +costs = { input = 0.05, output = 0.4 } +valued_constraints = { fixed_temperature = 1 } +sdk = "portkey_responses" +structure_method = "instructor/openai_responses_tools" +thinking_mode = "manual" +x-portkey-provider = "@openai" + +[gpt-5-mini] +inputs = ["text", "images", "pdf"] +outputs = ["text", "structured"] +costs = { input = 0.25, output = 2.0 } +valued_constraints = { fixed_temperature = 1 } +sdk = "portkey_responses" +structure_method = "instructor/openai_responses_tools" +thinking_mode = "manual" +x-portkey-provider = "@openai" + +[gpt-5] +inputs = ["text", "images", "pdf"] +outputs = ["text", "structured"] +costs = { input = 1.25, output = 10.0 } +valued_constraints = { fixed_temperature = 1 } +sdk = "portkey_responses" +structure_method = "instructor/openai_responses_tools" +thinking_mode = "manual" +x-portkey-provider = "@openai" + +["gpt-5.1"] +inputs = ["text", "images", "pdf"] +outputs = ["text", "structured"] +costs = { input = 1.25, output = 10.0 } +valued_constraints = { fixed_temperature = 1 } +sdk = "portkey_responses" +structure_method = "instructor/openai_responses_tools" +thinking_mode = "manual" +x-portkey-provider = "@openai" + +["gpt-5.1-codex"] +inputs = ["text", "images", "pdf"] +outputs = ["text", "structured"] +costs = { input = 1.25, output = 10.0 } +valued_constraints = { fixed_temperature = 1 } +sdk = "portkey_responses" +structure_method = "instructor/openai_responses_tools" +thinking_mode = "manual" +x-portkey-provider = "@openai" + +# --- Claude LLMs -------------------------------------------------------------- +[claude-3-haiku] +model_id = "claude-3-haiku-20240307" +max_tokens = 4096 +inputs = ["text", "images"] +outputs = ["text", "structured"] +max_prompt_images = 100 +costs = { input = 0.25, output = 1.25 } +thinking_mode = "none" +x-portkey-provider = "@anthropic" + +["claude-3.7-sonnet"] +model_id = "claude-3-7-sonnet-20250219" +max_tokens = 8192 +inputs = ["text", "images", "pdf"] +outputs = ["text", "structured"] +max_prompt_images = 100 +costs = { input = 3.0, output = 15.0 } +thinking_mode = "manual" +x-portkey-provider = "@anthropic" + +[claude-4-sonnet] +model_id = "claude-sonnet-4-20250514" +max_tokens = 64000 +inputs = ["text", "images", "pdf"] +outputs = ["text", "structured"] +max_prompt_images = 100 +costs = { input = 3.0, output = 15.0 } +thinking_mode = "manual" +x-portkey-provider = "@anthropic" + +[claude-4-opus] +model_id = "claude-opus-4-20250514" +max_tokens = 32000 +inputs = ["text", "images", "pdf"] +outputs = ["text", "structured"] +max_prompt_images = 100 +costs = { input = 3.0, output = 15.0 } +thinking_mode = "manual" +x-portkey-provider = "@anthropic" + +["claude-4.1-opus"] +model_id = "claude-opus-4-1-20250805" +max_tokens = 32000 +inputs = ["text", "images", "pdf"] +outputs = ["text", "structured"] +max_prompt_images = 100 +costs = { input = 3.0, output = 15.0 } +thinking_mode = "manual" +x-portkey-provider = "@anthropic" + +["claude-4.5-sonnet"] +model_id = "claude-sonnet-4-5-20250929" +max_tokens = 64000 +inputs = ["text", "images", "pdf"] +outputs = ["text", "structured"] +max_prompt_images = 100 +costs = { input = 3.0, output = 15.0 } +thinking_mode = "manual" +x-portkey-provider = "@anthropic" + +["claude-4.5-haiku"] +model_id = "claude-haiku-4-5-20251001" +max_tokens = 64000 +inputs = ["text", "images", "pdf"] +outputs = ["text", "structured"] +max_prompt_images = 100 +costs = { input = 1.0, output = 5.0 } +thinking_mode = "manual" +x-portkey-provider = "@anthropic" + +["claude-4.5-opus"] +model_id = "claude-opus-4-5-20251101" +max_tokens = 64000 +inputs = ["text", "images", "pdf"] +outputs = ["text", "structured"] +max_prompt_images = 100 +costs = { input = 5.0, output = 25.0 } +thinking_mode = "manual" +x-portkey-provider = "@anthropic" + +["claude-4.6-opus"] +model_id = "claude-opus-4-6" +max_tokens = 64000 +inputs = ["text", "images", "pdf"] +outputs = ["text", "structured"] +max_prompt_images = 100 +costs = { input = 5.0, output = 25.0 } +thinking_mode = "adaptive" +x-portkey-provider = "@anthropic" + +# --- Gemini LLMs -------------------------------------------------------------- +["gemini-2.5-pro"] +model_id = "gemini-2.5-pro" +inputs = ["text", "images", "pdf"] +outputs = ["text", "structured"] +max_prompt_images = 3000 +costs = { input = 1.25, output = 10.0 } +thinking_mode = "manual" +prompting_target = "gemini" +x-portkey-provider = "@google" + +["gemini-2.5-flash"] +model_id = "gemini-2.5-flash" +inputs = ["text", "images", "pdf"] +outputs = ["text", "structured"] +costs = { input = 0.30, output = 2.50 } +thinking_mode = "manual" +prompting_target = "gemini" +x-portkey-provider = "@google" + +["gemini-2.5-flash-lite"] +model_id = "gemini-2.5-flash-lite" +inputs = ["text", "images", "pdf"] +outputs = ["text", "structured"] +costs = { input = 0.10, output = 0.40 } +thinking_mode = "manual" +prompting_target = "gemini" +x-portkey-provider = "@google" + +["gemini-3.0-pro"] +model_id = "gemini-3-pro-preview" +inputs = ["text", "images", "pdf"] +outputs = ["text", "structured"] +max_prompt_images = 3000 +costs = { input = 2, output = 12.0 } +thinking_mode = "adaptive" +prompting_target = "gemini" +x-portkey-provider = "@google" + +["gemini-3.0-flash-preview"] +model_id = "gemini-3-flash-preview" +inputs = ["text", "images", "pdf"] +outputs = ["text", "structured"] +max_prompt_images = 3000 +costs = { input = 0.5, output = 3.0 } +thinking_mode = "adaptive" +prompting_target = "gemini" +x-portkey-provider = "@google" diff --git a/.pipelex/inference/backends/scaleway.toml b/.pipelex/inference/backends/scaleway.toml new file mode 100644 index 0000000..75d6a05 --- /dev/null +++ b/.pipelex/inference/backends/scaleway.toml @@ -0,0 +1,68 @@ +################################################################################ +# Groq Backend Configuration +################################################################################ +# +# This file defines the model specifications for Scaleway models. +# It contains model definitions for various LLM models accessible through +# the Groq API, including text-only and vision-capable models. +# +# Configuration structure: +# - Each model is defined in its own section with the model name as the header +# - Headers with dots or slashes must be quoted (e.g., ["meta-llama/llama-4-scout"]) +# - Model costs are in USD per million tokens (input/output) +# - Vision models support max 5 images per request, 33MP max resolution +# +# Documentation: https://docs.pipelex.com +# Support: https://go.pipelex.com/discord +# +################################################################################ + +################################################################################ +# MODEL DEFAULTS +################################################################################ + +[defaults] +model_type = "llm" +sdk = "openai" +structure_method = "instructor/json" +thinking_mode = "none" + +# --- DeepSeek Models ---------------------------------------------------------- +[deepseek-r1-distill-llama-70b] +max_tokens = 32768 +inputs = ["text"] +outputs = ["text", "structured"] +costs = { input = 0.90, output = 0.90 } + +# --- Meta Llama 3.x Series ---------------------------------------------------- +["llama-3.1-8b-instruct"] +max_tokens = 131072 +inputs = ["text"] +outputs = ["text", "structured"] +costs = { input = 0.2, output = 0.2 } + +["llama-3.3-70b-instruct"] +max_tokens = 32768 +inputs = ["text"] +outputs = ["text", "structured"] +costs = { input = 0.90, output = 0.90 } + +# --- OpenAI GPT-OSS Models ---------------------------------------------------- +[gpt-oss-120b] +max_tokens = 65536 +inputs = ["text"] +outputs = ["text", "structured"] +costs = { input = 0.15, output = 0.60 } + +# --- Qwen 3 ------------------------------------------------------------------- +[qwen3-235b-a22b-instruct-2507] +max_tokens = 40960 +inputs = ["text"] +outputs = ["text", "structured"] +costs = { input = 0.75, output = 2.25 } + +[qwen3-coder-30b-a3b-instruct] +max_tokens = 40960 +inputs = ["text"] +outputs = ["text", "structured"] +costs = { input = 0.20, output = 0.80 } diff --git a/.pipelex/inference/backends/vertexai.toml b/.pipelex/inference/backends/vertexai.toml new file mode 100644 index 0000000..fe89dc8 --- /dev/null +++ b/.pipelex/inference/backends/vertexai.toml @@ -0,0 +1,47 @@ +################################################################################ +# VertexAI Backend Configuration +################################################################################ +# +# This file defines the model specifications for Google VertexAI models. +# It contains model definitions for Gemini language models +# accessible through the Google VertexAI API. +# +# Configuration structure: +# - Each model is defined in its own section with the model name as the header +# - Headers with dots must be quoted (e.g., ["gemini-2.5-pro"]) +# - Model costs are in USD per million tokens (input/output) +# +# Documentation: https://docs.pipelex.com +# Support: https://go.pipelex.com/discord +# +################################################################################ + +################################################################################ +# MODEL DEFAULTS +################################################################################ + +[defaults] +model_type = "llm" +sdk = "openai" +prompting_target = "gemini" +structure_method = "instructor/vertexai_tools" +thinking_mode = "none" + +################################################################################ +# LANGUAGE MODELS +################################################################################ + +# --- Gemini 2.5 Series -------------------------------------------------------- +["gemini-2.5-pro"] +model_id = "google/gemini-2.5-pro" +inputs = ["text", "images"] +outputs = ["text", "structured"] +max_prompt_images = 3000 +costs = { input = 1.25, output = 10.0 } + +["gemini-2.5-flash"] +model_id = "google/gemini-2.5-flash" +inputs = ["text", "images"] +outputs = ["text", "structured"] +max_prompt_images = 3000 +costs = { input = 0.30, output = 2.50 } diff --git a/.pipelex/inference/backends/xai.toml b/.pipelex/inference/backends/xai.toml new file mode 100644 index 0000000..a6348f2 --- /dev/null +++ b/.pipelex/inference/backends/xai.toml @@ -0,0 +1,57 @@ +################################################################################ +# XAI Backend Configuration +################################################################################ +# +# This file defines the model specifications for XAI (formerly Twitter AI) models. +# It contains model definitions for Grok language models +# accessible through the XAI API. +# +# Configuration structure: +# - Each model is defined in its own section with the model name as the header +# - Headers with dots must be quoted (e.g., ["grok-3"]) +# - Model costs are in USD per million tokens (input/output) +# +# Documentation: https://docs.pipelex.com +# Support: https://go.pipelex.com/discord +# +################################################################################ + +################################################################################ +# MODEL DEFAULTS +################################################################################ + +[defaults] +model_type = "llm" +sdk = "openai" +prompting_target = "anthropic" +structure_method = "instructor/openai_tools" +thinking_mode = "none" + +################################################################################ +# LANGUAGE MODELS +################################################################################ + +# --- Grok 3 Series ------------------------------------------------------------ +[grok-3] +model_id = "grok-3" +inputs = ["text"] +outputs = ["text"] +costs = { input = 3, output = 15 } + +[grok-3-mini] +model_id = "grok-3-mini" +inputs = ["text"] +outputs = ["text"] +costs = { input = 0.3, output = 0.5 } + +[grok-3-fast] +model_id = "grok-3-fast-latest" +inputs = ["text"] +outputs = ["text"] +costs = { input = 5, output = 25 } + +[grok-3-mini-fast] +model_id = "grok-3-mini-fast-latest" +inputs = ["text"] +outputs = ["text"] +costs = { input = 0.15, output = 4 } diff --git a/.pipelex/inference/deck/1_llm_deck.toml b/.pipelex/inference/deck/1_llm_deck.toml new file mode 100644 index 0000000..649576b --- /dev/null +++ b/.pipelex/inference/deck/1_llm_deck.toml @@ -0,0 +1,87 @@ +#################################################################################################### +# Pipelex Model Deck - LLM Configuration +#################################################################################################### +# +# This file defines model defaults, aliases, and presets for LLMs +# +# Model Reference Syntax: +# - Preset: $preset_name or preset:preset_name +# - Alias: @alias_name or alias:alias_name +# +# Documentation: https://docs.pipelex.com +# Support: https://go.pipelex.com/discord +# +#################################################################################################### + +#################################################################################################### +# LLM Default Choices +#################################################################################################### + +[llm.choice_defaults] +default_temperature = 0.5 +for_text = "@default-general" +for_object = "@default-general" + +#################################################################################################### +# LLM Aliases +#################################################################################################### + +[llm.aliases] +best-gpt = "gpt-5.2" +best-claude = "claude-4.5-opus" +best-gemini = "gemini-3.0-pro" +best-mistral = "mistral-large" + +# Default aliases (first choice from waterfalls) +default-general = "claude-4.5-sonnet" +default-premium = "claude-4.6-opus" +default-premium-vision = "claude-4.6-opus" +default-premium-structured = "claude-4.6-opus" +default-large-context-code = "gemini-3.0-pro" +default-large-context-text = "gemini-2.5-flash" +default-small = "gpt-4o-mini" +default-small-structured = "gpt-4o-mini" +default-small-vision = "gemini-2.5-flash-lite" +default-small-creative = "gemini-2.5-flash-lite" + +#################################################################################################### +# LLM Presets +#################################################################################################### + +[llm.presets] + +# Writing +writing-factual = { model = "@default-premium", temperature = 0.1, description = "Factual writing with high accuracy" } +writing-creative = { model = "@default-premium", temperature = 0.9, description = "Creative writing with high variability" } + +# Retrieval +retrieval = { model = "@default-large-context-text", temperature = 0.1, description = "Data retrieval from large text corpora" } + +# Engineering +engineering-structured = { model = "@default-premium-structured", temperature = 0.2, description = "Structured engineering output (JSON, schemas)" } +engineering-code = { model = "@default-premium", temperature = 0.1, description = "Code generation and analysis" } +engineering-codebase-analysis = { model = "@best-gemini", temperature = 0.1, description = "Large codebase analysis" } + +# Vision +vision = { model = "@default-premium-vision", temperature = 0.5, description = "Vision language model for understanding images" } +vision-cheap = { model = "@default-small-vision", temperature = 0.5, description = "Budget vision model for simple image tasks" } +vision-diagram = { model = "@default-premium-vision", temperature = 0.3, description = "Diagram and chart interpretation" } +vision-table = { model = "@default-premium-vision", temperature = 0.3, description = "Table extraction from images" } + +# Image generation prompting +img-gen-prompting = { model = "@default-premium", temperature = 0.5, description = "Crafting image generation prompts" } +img-gen-prompting-cheap = { model = "@default-small", temperature = 0.5, description = "Budget image prompt generation" } + +# Reasoning +deep-analysis = { model = "@default-premium", temperature = 0.1, reasoning_effort = "high", description = "Deep reasoning and analysis" } +quick-reasoning = { model = "@default-premium", temperature = 0.3, reasoning_effort = "low", description = "Quick reasoning for simple tasks" } + +# Builder (isolated presets for the pipeline builder) +pipe-builder-engineering = { model = "claude-4.6-opus", temperature = 0.2, description = "Builder: structured engineering output" } +pipe-builder-img-gen-prompting = { model = "claude-4.5-sonnet", temperature = 0.7, description = "Builder: crafting image generation prompts" } + +# Testing +testing-text = { model = "@default-small", temperature = 0.5, description = "Testing preset for text generation" } +testing-structured = { model = "@default-small-structured", temperature = 0.1, description = "Testing preset for structured output" } +testing-vision = { model = "@default-small-vision", temperature = 0.5, description = "Testing preset for vision tasks" } +testing-vision-structured = { model = "@default-small-vision", temperature = 0.5, description = "Testing preset for structured vision output" } diff --git a/.pipelex/inference/deck/2_img_gen_deck.toml b/.pipelex/inference/deck/2_img_gen_deck.toml new file mode 100644 index 0000000..400b492 --- /dev/null +++ b/.pipelex/inference/deck/2_img_gen_deck.toml @@ -0,0 +1,53 @@ +#################################################################################################### +# Pipelex Model Deck - Image Generation Configuration +#################################################################################################### +# +# This file defines model aliases and presets for image generation models +# +# Model Reference Syntax: +# - Preset: $preset_name or preset:preset_name +# - Alias: @alias_name or alias:alias_name +# +# Documentation: https://docs.pipelex.com +# Support: https://go.pipelex.com/discord +# +#################################################################################################### + +#################################################################################################### +# Image Generation Default Choices +#################################################################################################### + +[img_gen] +default_quality = "medium" +choice_default = "$gen-image" + +#################################################################################################### +# Image Generation Aliases +#################################################################################################### + +[img_gen.aliases] +best-gpt = "gpt-image-1.5" +best-gemini = "nano-banana-pro" +best-blackforestlabs = "flux-2-pro" + +default-general = "flux-2-pro" +default-premium = "nano-banana-pro" +default-small = "gpt-image-1-mini" + +#################################################################################################### +# Image Generation Presets +#################################################################################################### + +[img_gen.presets] + +# General purpose +gen-image = { model = "@default-general", quality = "medium", description = "Standard image generation" } +gen-image-fast = { model = "@default-small", quality = "low", description = "Fast image generation with lower quality" } +gen-image-high-quality = { model = "@default-premium", quality = "high", description = "High-quality image generation" } + +# Testing +gen-image-testing = { model = "@default-small", quality = "low", description = "Testing preset for image generation" } +gen-image-testing-img2img = { model = "nano-banana-pro", description = "Testing preset for image-to-image" } +synthesize-photo = { model = "@default-small", quality = "low", description = "Synthesize realistic photos for testing" } +synthesize-ui = { model = "nano-banana-pro", description = "Synthesize UI screenshots for testing" } +synthesize-chart = { model = "nano-banana-pro", description = "Synthesize charts and graphs for testing" } diff --git a/.pipelex/inference/deck/3_extract_deck.toml b/.pipelex/inference/deck/3_extract_deck.toml new file mode 100644 index 0000000..2e5e5af --- /dev/null +++ b/.pipelex/inference/deck/3_extract_deck.toml @@ -0,0 +1,42 @@ +#################################################################################################### +# Pipelex Model Deck - Base Configuration +#################################################################################################### +# +# This file defines model aliases and presets for Document extraction models, including +# extraction of text and images from documents and OCR and text extraction from images. +# +# Model Reference Syntax: +# - Preset: $preset_name or preset:preset_name +# - Alias: @alias_name or alias:alias_name +# +# Documentation: https://docs.pipelex.com +# Support: https://go.pipelex.com/discord +# +#################################################################################################### + +#################################################################################################### +# Document Extraction Default Choices +#################################################################################################### + +[extract] +choice_default = "@default-extract-document" + +#################################################################################################### +# Aliases +#################################################################################################### + +[extract.aliases] +default-premium = "azure-document-intelligence" +default-extract-document = "mistral-document-ai-2505" +default-extract-image = "mistral-document-ai-2505" +default-text-from-pdf = "pypdfium2-extract-pdf" +default-no-inference = "pypdfium2-extract-pdf" + +#################################################################################################### +# Extract Presets +#################################################################################################### + +[extract.presets] + +# Testing +extract-testing = { model = "@default-extract-document", max_nb_images = 5, image_min_size = 50, description = "Testing preset for document extraction" } diff --git a/.pipelex/inference/routing_profiles.toml b/.pipelex/inference/routing_profiles.toml new file mode 100644 index 0000000..eb9aae0 --- /dev/null +++ b/.pipelex/inference/routing_profiles.toml @@ -0,0 +1,149 @@ +# Routing profile library - Routes models to their backends +# ========================================================================================= +# This file controls which backend serves which model. +# Simply change the 'active' field to switch profiles, +# or you can add your own custom profiles. +# +# Documentation: https://docs.pipelex.com +# Support: https://go.pipelex.com/discord +# ========================================================================================= + +# Note: The internal backend (software-only models) is always available regardless of +# which routing profile you select. See the documentation for details. + +# Which profile to use (change this to switch routing) +active = "all_pipelex_gateway" + +# We recommend using the "all_pipelex_gateway" profile to get a head start with all models. +# To use the Pipelex Gateway backend: +# 1. Get your API key at https://app.pipelex.com (free credits included) +# 2. Add it to your .env file: PIPELEX_GATEWAY_API_KEY=your-key-here +# 3. Run `pipelex init` and accept the Gateway terms of service + +# ========================================================================================= +# Routing Profiles +# ========================================================================================= + +[profiles.all_pipelex_gateway] +description = "Use Pipelex Gateway for all its supported models" +default = "pipelex_gateway" + +[profiles.all_anthropic] +description = "Use Anthropic backend for all its supported models" +default = "anthropic" + +[profiles.all_azure_openai] +description = "Use Azure OpenAI backend for all its supported models" +default = "azure_openai" + +[profiles.all_bedrock] +description = "Use Bedrock backend for all its supported models" +default = "bedrock" + +[profiles.all_blackboxai] +description = "Use BlackBoxAI backend for all its supported models" +default = "blackboxai" + +[profiles.all_fal] +description = "Use FAL backend for all its supported models" +default = "fal" + +[profiles.all_google] +description = "Use Google GenAI backend for all its supported models" +default = "google" + +[profiles.all_groq] +description = "Use groq backend for all its supported models" +default = "groq" + +[profiles.all_huggingface] +description = "Use HuggingFace backend for all its supported models" +default = "huggingface" + +[profiles.all_mistral] +description = "Use Mistral backend for all its supported models" +default = "mistral" + +[profiles.all_ollama] +description = "Use Ollama backend for all its supported models" +default = "ollama" + +[profiles.all_openai] +description = "Use OpenAI backend for all its supported models" +default = "openai" + +[profiles.all_portkey] +description = "Use Portkey backend for all its supported models" +default = "portkey" + +[profiles.all_scaleway] +description = "Use Scaleway backend for all its supported models" +default = "scaleway" + +[profiles.all_vertexai] +description = "Use Vertex AI backend for all its supported models" +default = "vertexai" + +[profiles.all_xai] +description = "Use xAI backend for all its supported models" +default = "xai" + +[profiles.all_internal] +description = "Use internal backend for all its supported models" +default = "internal" + +# ========================================================================================= +# Custom Profiles +# ========================================================================================= +# Add your own profiles below following the same pattern: +# +# [profiles.your_profile_name] +# description = "What this profile does" +# default = "backend-name" # Where to route models by default +# [profiles.your_profile_name.routes] +# "model-pattern" = "backend-name" # Specific routing rules +# +# Pattern matching supports: +# - Exact names: "gpt-4o-mini" +# - Wildcards: "claude-*" (matches all models starting with claude-) +# - Partial wildcards: "*-sonnet" (matches all sonnet variants) + +# ========================================================================================= +# Example of a custom routing profile with mostly pattern matching and one specific model +# ========================================================================================= +[profiles.example_routing_using_patterns] +description = "Example routing profile using patterns" +default = "pipelex_gateway" + +[profiles.example_routing_using_patterns.routes] +# Pattern matching: "model-pattern" = "backend-name" +"gpt-*" = "azure_openai" +"claude-*" = "bedrock" +"gemini-*" = "google" +"grok-*" = "xai" +"*-sdxl" = "fal" +"flux-*" = "fal" +"gpt-image-1" = "openai" + +# ========================================================================================= +# Example of a custom routing profile with specific model matching +# ========================================================================================= + +[profiles.example_routing_using_specific_models] +description = "Example routing profile using specific models" + +[profiles.example_routing_using_specific_models.routes] +"gpt-5-nano" = "pipelex_gateway" +"gpt-4o-mini" = "blackboxai" +"gpt-5-mini" = "openai" +"gpt-5-chat" = "azure_openai" + +"claude-4-sonnet" = "pipelex_gateway" +"claude-3.7-sonnet" = "blackboxai" + +"gemini-2.5-flash-lite" = "pipelex_gateway" +"gemini-2.5-flash" = "blackboxai" +"gemini-2.5-pro" = "vertexai" + +"grok-3" = "pipelex_gateway" +"grok-3-mini" = "xai" diff --git a/.pipelex/pipelex.toml b/.pipelex/pipelex.toml new file mode 100644 index 0000000..ed8859f --- /dev/null +++ b/.pipelex/pipelex.toml @@ -0,0 +1,192 @@ +#################################################################################################### +# Pipelex Configuration File +#################################################################################################### +# +# This configuration file is copied to client projects' .pipelex/ directory when running: +# `pipelex init config` +# +# Purpose: +# - This file allows you to override Pipelex's default settings for specific projects +# - All values below are set to their defaults - modify them as needed +# - The values here will override the defaults from the Pipelex package +# +# Finding Available Settings: +# - See the full default configuration in: pipelex/pipelex.toml (in the Pipelex package) +# - See the configuration structure classes in: pipelex/config.py and pipelex/cogt/config_cogt.py +# +# Common customizations include: +# - Logging levels and behavior +# - Excluded directories for scanning +# - LLM prompt dumping for debugging +# - Feature flags +# - Observer and reporting output directories +# +# Documentation: https://docs.pipelex.com +# Support: https://go.pipelex.com/discord +# +#################################################################################################### + +#################################################################################################### +# Pipeline Execution Config +#################################################################################################### + +[pipelex.pipeline_execution_config] +# Set to false to disable conversion of incoming data URLs to pipelex-storage:// URIs +is_normalize_data_urls_to_storage = true +# Set to false to disable generation of execution graphs +is_generate_graph = true + +[pipelex.pipeline_execution_config.graph_config.data_inclusion] +# Control what data is included in graph outputs +stuff_json_content = true +stuff_text_content = true +stuff_html_content = true +error_stack_traces = true + +[pipelex.pipeline_execution_config.graph_config.graphs_inclusion] +# Control which graph outputs are generated +graphspec_json = true +mermaidflow_mmd = true +mermaidflow_html = true +reactflow_viewspec = true +reactflow_html = true + +[pipelex.pipeline_execution_config.graph_config.reactflow_config] +# Customize ReactFlow graph rendering +edge_type = "bezier" # Options: "bezier", "smoothstep", "step", "straight" +nodesep = 50 # Horizontal spacing between nodes +ranksep = 30 # Vertical spacing between ranks/levels +initial_zoom = 1.0 # Initial zoom level (1.0 = 100%) +pan_to_top = true # Pan to show top of graph on load + +#################################################################################################### +# Storage Config +#################################################################################################### + +[pipelex.storage_config] +# Storage method: "local", "in_memory" (default), "s3", or "gcp" +method = "in_memory" +# Whether to fetch remote HTTP URLs and store them locally +is_fetch_remote_content_enabled = true +# Whether to upload local file paths to storage and replace with pipelex-storage:// URIs +is_upload_local_content_enabled = true + +[pipelex.storage_config.local] +# Local storage settings +uri_format = "{primary_id}/{secondary_id}/{hash}.{extension}" +local_storage_path = ".pipelex/storage" + +[pipelex.storage_config.in_memory] +# In-memory storage settings +uri_format = "{primary_id}/{secondary_id}/{hash}.{extension}" + +[pipelex.storage_config.s3] +# AWS S3 storage settings (requires boto3: `pip install pipelex[s3]`) +uri_format = "{primary_id}/{secondary_id}/{hash}.{extension}" +bucket_name = "" +region = "" +signed_urls_lifespan_seconds = 3600 # Set to "disabled" for public URLs + +[pipelex.storage_config.gcp] +# Google Cloud Storage settings (requires google-cloud-storage: `pip install pipelex[gcp-storage]`) +uri_format = "{primary_id}/{secondary_id}/{hash}.{extension}" +bucket_name = "" +project_id = "" +signed_urls_lifespan_seconds = 3600 # Set to "disabled" for public URLs + +#################################################################################################### +# Scan Config +#################################################################################################### + +[pipelex.scan_config] +# Directories to exclude when scanning for pipeline files +excluded_dirs = [ + ".venv", + "venv", + "env", + ".env", + "virtualenv", + ".virtualenv", + ".git", + "__pycache__", + ".pytest_cache", + ".mypy_cache", + ".ruff_cache", + "node_modules", + "results", +] + +#################################################################################################### +# Builder Config +#################################################################################################### + +[pipelex.builder_config] +# Settings for generated pipelines +default_output_dir = "." +default_bundle_file_name = "bundle" +default_directory_base_name = "pipeline" + +#################################################################################################### +# Log Config +#################################################################################################### + +[pipelex.log_config] +# Default logging level: "DEBUG", "INFO", "WARNING", "ERROR" +default_log_level = "INFO" +# Log output target: "stdout" or "stderr" +console_log_target = "stdout" +console_print_target = "stdout" + +[pipelex.log_config.package_log_levels] +# Log levels for specific packages (use "-" instead of "." in package names) +pipelex = "INFO" + +#################################################################################################### +# Feature Config +#################################################################################################### + +[pipelex.feature_config] +# WIP/Experimental feature flags +is_reporting_enabled = true + +#################################################################################################### +# Reporting Config +#################################################################################################### + +[pipelex.reporting_config] +# Cost reporting settings +is_log_costs_to_console = false +is_generate_cost_report_file_enabled = false +cost_report_dir_path = "reports" +cost_report_base_name = "cost_report" +cost_report_extension = "csv" +cost_report_unit_scale = 1.0 + +#################################################################################################### +# Cogt (Cognitive Tools) Config +#################################################################################################### + +[cogt.model_deck_config] +# Model fallback behavior: if true, uses secondary model options when primary fails +is_model_fallback_enabled = true +# Reaction to missing presets: "raise", "log", or "none" +missing_presets_reaction = "log" + +[cogt.tenacity_config] +# Retry behavior for API calls +max_retries = 50 # Maximum number of retry attempts before giving up +wait_multiplier = 0.2 # Multiplier applied to the wait time between retries (in seconds) +wait_max = 20 # Maximum wait time between retries (in seconds) +wait_exp_base = 1.3 # Base for exponential backoff calculation + +[cogt.llm_config] +# Enable dumping of LLM inputs/outputs for debugging +is_dump_text_prompts_enabled = false +is_dump_response_text_enabled = false + +[cogt.llm_config.instructor_config] +# Enable dumping of structured content generation details for debugging +is_dump_kwargs_enabled = false +is_dump_response_enabled = false +is_dump_error_enabled = false + diff --git a/.pipelex/pipelex_service.toml b/.pipelex/pipelex_service.toml new file mode 100644 index 0000000..afe39a2 --- /dev/null +++ b/.pipelex/pipelex_service.toml @@ -0,0 +1,19 @@ +#################################################################################################### +# Pipelex Service Configuration +#################################################################################################### +# +# This file stores settings related to Pipelex managed services. +# Currently used for Pipelex Gateway terms acceptance. +# +# Documentation: https://docs.pipelex.com +# Support: https://go.pipelex.com/discord +# +#################################################################################################### + +[agreement] +# Set to true after accepting Pipelex terms of service. +terms_accepted = true + +# Note: when using pipelex_gateway, telemetry is enabled to monitor service usage. +# We collect technical data (model, pipe type...) and quantitative data (token counts...) +# but NOT your content, pipe codes, or output class names. diff --git a/.pipelex/telemetry.toml b/.pipelex/telemetry.toml new file mode 100644 index 0000000..eb2c537 --- /dev/null +++ b/.pipelex/telemetry.toml @@ -0,0 +1,92 @@ +#################################################################################################### +# Custom Telemetry Configuration +#################################################################################################### +# +# This file controls YOUR custom telemetry settings for observability and analytics. +# Configure your own PostHog, Langfuse, or OTLP-compatible backends here. +# +# NOTE: When using Pipelex Gateway, identified telemetry is automatically enabled +# (tied to your Gateway API key, hashed for security). This allows us to monitor +# service quality, enforce fair usage, and provide you with better support. +# Gateway telemetry operates independently from your settings below - you can have both! +# +# To disable all telemetry, set the DO_NOT_TRACK=1 environment variable. +# +# Documentation: https://docs.pipelex.com +# Support: https://go.pipelex.com/discord +# +#################################################################################################### + +# ────────────────────────────────────────────────────────────────────────────── +# PostHog Configuration (Event tracking + AI span tracing) +# ────────────────────────────────────────────────────────────────────────────── + +[custom_posthog] +mode = "off" # Values: "off" | "anonymous" | "identified" +# user_id = "your_user_id" # Required when mode = "identified" +endpoint = "${POSTHOG_ENDPOINT}" # Default: https://us.i.posthog.com (or https://eu.i.posthog.com for EU) +api_key = "${POSTHOG_API_KEY}" # Get from PostHog Project Settings +geoip = true # Enable GeoIP lookup +debug = false # Enable PostHog debug mode +redact_properties = [ + "prompt", + "system_prompt", + "response", + "file_path", + "url", +] # Event properties to redact + +# AI span tracing to YOUR PostHog (does NOT affect Langfuse/OTLP - they receive full data) +[custom_posthog.tracing] +enabled = false # Send AI spans to your PostHog + +# Privacy controls for data sent to YOUR PostHog only +[custom_posthog.tracing.capture] +content = false # Capture prompt/completion content +# content_max_length = 1000 # Max length for captured content (omit for unlimited) +pipe_codes = false # Include pipe codes in span names/attributes +output_class_names = false # Include output class names in span names/attributes + +# ────────────────────────────────────────────────────────────────────────────── +# Portkey SDK Configuration +# ────────────────────────────────────────────────────────────────────────────── + +[custom_portkey] +force_debug_enabled = false +force_tracing_enabled = false + +# ────────────────────────────────────────────────────────────────────────────── +# Langfuse Integration +# Note: Langfuse receives FULL span data (no redaction) +# ────────────────────────────────────────────────────────────────────────────── + +[langfuse] +enabled = false +# endpoint = "https://cloud.langfuse.com" # Override for self-hosted Langfuse +# public_key = "${LANGFUSE_PUBLIC_KEY}" # Langfuse public key +# secret_key = "${LANGFUSE_SECRET_KEY}" # Langfuse secret key + +# ────────────────────────────────────────────────────────────────────────────── +# Additional OTLP Exporters (array for multiple) +# Note: OTLP exporters receive FULL span data (no redaction) +# ────────────────────────────────────────────────────────────────────────────── + +# [[otlp]] +# name = "my-collector" # Identifier for logging +# endpoint = "https://..." # OTLP endpoint URL +# headers = { Authorization = "Bearer ${OTLP_AUTH_TOKEN}" } # Headers for OTLP export + +# ────────────────────────────────────────────────────────────────────────────── +# Custom Telemetry Allowed Modes +# Controls which integration modes can use custom telemetry settings above. +# ────────────────────────────────────────────────────────────────────────────── + +[telemetry_allowed_modes] +ci = false # CI environments don't use custom telemetry +cli = true # CLI usage allows custom telemetry +docker = true # Docker deployments allow custom telemetry +fastapi = true # FastAPI integrations allow custom telemetry +mcp = true # MCP integrations allow custom telemetry +n8n = true # n8n integrations allow custom telemetry +pytest = false # Tests don't use custom telemetry +python = false # Direct Python SDK usage doesn't use custom telemetry by default diff --git a/.vscode/settings.json b/.vscode/settings.json index acd31dc..371dd2e 100644 --- a/.vscode/settings.json +++ b/.vscode/settings.json @@ -21,6 +21,6 @@ "python.testing.pytestEnabled": true, "djlint.showInstallError": false, "files.associations": { - "*.plx": "plx" + "*.mthds": "mthds" } } \ No newline at end of file diff --git a/.windsurfrules.md b/.windsurfrules.md deleted file mode 100644 index af4572e..0000000 --- a/.windsurfrules.md +++ /dev/null @@ -1,1219 +0,0 @@ - -# Pipelex Coding Rules - -## Guide to write or edit pipelines using the Pipelex language in .plx files - -- Always first write your "plan" in natural language, then transcribe it in pipelex. -- You should ALWAYS RUN validation when you are writing or editing a `.plx` file. It will ensure the pipe is runnable. If not, iterate. - - For a specific file: `pipelex validate path_to_file.plx` - - For all pipelines: `pipelex validate all` - - **IMPORTANT**: Ensure the Python virtual environment is activated before running `pipelex` commands. For standard installations, the venv is named `.venv` - always check that first. The commands will not work without proper venv activation. -- Please use POSIX standard for files. (empty lines, no trailing whitespaces, etc.) - -### Pipeline File Naming -- Files must be `.plx` for pipelines (Always add an empty line at the end of the file, and do not add trailing whitespaces to PLX files at all) -- Files must be `.py` for code defining the data structures -- Use descriptive names in `snake_case` - -### Pipeline File Outline -A pipeline file has three main sections: -1. Domain statement -2. Concept definitions -3. Pipe definitions - -#### Domain Statement -```plx -domain = "domain_code" -description = "Description of the domain" # Optional -``` -Note: The domain code usually matches the plx filename for single-file domains. For multi-file domains, use the subdirectory name. - -#### Concept Definitions - -Concepts represent ideas and semantic entities in your pipeline. They define what something *is*, not how it's structured. - -```plx -[concept] -ConceptName = "Description of the concept" -``` - -**Naming Rules:** -- Use PascalCase for concept names -- Never use plurals (no "Stories", use "Story") - lists are handled implicitly by Pipelex -- Avoid circumstantial adjectives (no "LargeText", use "Text") - focus on the essence of what the concept represents -- Don't redefine native concepts (Text, Image, PDF, TextAndImages, Number, Page, JSON) - -**Native Concepts:** -Pipelex provides built-in native concepts: `Text`, `Image`, `PDF`, `TextAndImages`, `Number`, `Page`, `JSON`. Use these directly or refine them when appropriate. - -**Refining Native Concepts:** -To create a concept that specializes a native concept without adding fields: - -```plx -[concept.Landscape] -description = "A scenic outdoor photograph" -refines = "Image" -``` - -For details on how to structure concepts with fields, see the "Structuring Models" section below. - -#### Pipe Definitions - -### Pipe Base Definition - -```plx -[pipe.your_pipe_code] -type = "PipeLLM" -description = "A description of what your pipe does" -inputs = { input_1 = "ConceptName1", input_2 = "ConceptName2" } -output = "ConceptName" -``` - -The pipes will all have at least this base definition. -- `inputs`: Dictionary of key being the variable used in the prompts, and the value being the ConceptName. It should ALSO LIST THE INPUTS OF THE INTERMEDIATE STEPS (if PipeSequence) or of the conditional pipes (if PipeCondition). -So If you have this error: -`PipeValidationError: missing_input_variable • domain='expense_validator' • pipe='validate_expense' • -variable='['invoice']'`` -That means that the pipe validate_expense is missing the input `invoice` because one of the subpipe is needing it. - -NEVER WRITE THE INPUTS BY BREAKING THE LINE LIKE THIS: - -```plx -inputs = { - input_1 = "ConceptName1", - input_2 = "ConceptName2" -} -``` - - -- `output`: The name of the concept to output. The `ConceptName` should have the same name as the python class if you want structured output: - -#### Input Multiplicity - -By default, inputs expect a single item. Use bracket notation to specify multiple items: - -```plx -## Single item (default) -inputs = { document = "Text" } - -## Variable list - indeterminate number of items -inputs = { documents = "Text[]" } - -## Fixed count - exactly N items -inputs = { comparison_items = "Image[2]" } -``` - -**Key points:** -- No brackets = single item (default behavior) -- Use `[]` for lists of unknown length -- Use `[N]` (where N is an integer) when operation requires exact count (e.g., comparing 2 items) - -### Structuring Models - -Once you've defined your concepts semantically (see "Concept Definitions" above), you need to specify their structure if they have fields. - -#### Three Ways to Structure Concepts - -**1. No Structure Needed** - -If a concept only refines a native concept without adding fields, use the TOML table syntax shown in "Concept Definitions" above. No structure section is needed. - -**2. Inline Structure Definition (RECOMMENDED for most cases)** - -For concepts with structured fields, define them inline using TOML syntax: - -```plx -[concept.Invoice] -description = "A commercial document issued by a seller to a buyer" - -[concept.Invoice.structure] -invoice_number = "The unique invoice identifier" # This will be optional by default -issue_date = { type = "date", description = "The date the invoice was issued", required = true } -total_amount = { type = "number", description = "The total invoice amount", required = true } -vendor_name = "The name of the vendor" # This will be optional by default -line_items = { type = "list", item_type = "text", description = "List of items" } -``` - -**Supported inline field types:** `text`, `integer`, `boolean`, `number`, `date`, `list`, `dict` - -**Field properties:** `type`, `description`, `required` (default: false), `default_value`, `choices`, `item_type` (for lists), `key_type` and `value_type` (for dicts) - -**Simple syntax** (creates required text field): -```plx -field_name = "Field description" -``` - -**Detailed syntax** (with explicit properties): -```plx -field_name = { type = "text", description = "Field description", default_value = "default" } -``` - -**3. Python StructuredContent Class (For Advanced Features)** - -Create a Python class when you need: -- Custom validation logic (@field_validator, @model_validator) -- Computed properties (@property methods) -- Custom methods or class methods -- Complex cross-field validation -- Reusable structures across multiple domains - -```python -from pipelex.core.stuffs.structured_content import StructuredContent -from pydantic import Field, field_validator - -class Invoice(StructuredContent): - """A commercial invoice with validation.""" - - invoice_number: str = Field(description="The unique invoice identifier") - total_amount: float = Field(ge=0, description="The total invoice amount") - tax_amount: float = Field(ge=0, description="Tax amount") - - @field_validator('tax_amount') - @classmethod - def validate_tax(cls, v, info): - """Ensure tax doesn't exceed total.""" - total = info.data.get('total_amount', 0) - if v > total: - raise ValueError('Tax amount cannot exceed total amount') - return v -``` - -**Location:** Create models in `my_project/some_domain/some_domain_struct.py`. Classes inheriting from `StructuredContent` are automatically discovered. - -#### Decision Rules for Agents - -**If concept already exists:** -- If it's already inline → KEEP IT INLINE unless user explicitly asks to convert or features require Python class -- If it's already a Python class → KEEP IT as Python class - -**If creating new concept:** -1. Does it only refine a native concept without adding fields? → Use concept-only declaration -2. Does it need custom validation, computed properties, or methods? → Use Python class -3. Otherwise → Use inline structure (fastest and simplest) - -**When to suggest conversion to Python class:** -- User needs validation logic beyond type checking -- User needs computed properties or custom methods -- Structure needs to be reused across multiple domains -- Complex type relationships or inheritance required - -#### Inline Structure Limitations - -Inline structures: -- ✅ Support all common field types (text, number, date, list, dict, etc.) -- ✅ Support required/optional fields, defaults, choices -- ✅ Generate full Pydantic models with validation -- ❌ Cannot have custom validators or complex validation logic -- ❌ Cannot have computed properties or custom methods -- ❌ Cannot refine custom (non-native) concepts -- ❌ Limited IDE autocomplete compared to explicit Python classes - - -### Pipe Controllers and Pipe Operators - -Look at the Pipes we have in order to adapt it. Pipes are organized in two categories: - -1. **Controllers** - For flow control: - - `PipeSequence` - For creating a sequence of multiple steps - - `PipeCondition` - If the next pipe depends of the expression of a stuff in the working memory - - `PipeParallel` - For parallelizing pipes - -2. **Operators** - For specific tasks: - - `PipeLLM` - Generate Text and Objects (include Vision LLM) - - `PipeExtract` - Extract text and images from an image or a PDF - - `PipeCompose` - For composing text using Jinja2 templates: supports html, markdown, mermaid, etc. - - `PipeImgGen` - Generate Images - - `PipeFunc` - For running classic python scripts - -### PipeSequence controller - -Purpose: PipeSequence executes multiple pipes in a defined order, where each step can use results from original inputs or from previous steps. - -#### Basic Definition -```plx -[pipe.your_sequence_name] -type = "PipeSequence" -description = "Description of what this sequence does" -inputs = { input_name = "InputType" } # All the inputs of the sub pipes, except the ones generated by intermediate steps -output = "OutputType" -steps = [ - { pipe = "first_pipe", result = "first_result" }, - { pipe = "second_pipe", result = "second_result" }, - { pipe = "final_pipe", result = "final_result" } -] -``` - -#### Key Components - -1. **Steps Array**: List of pipes to execute in sequence - - `pipe`: Name of the pipe to execute - - `result`: Name to assign to the pipe's output that will be in the working memory - -#### Using PipeBatch in Steps - -You can use PipeBatch functionality within steps using `batch_over` and `batch_as`: - -```plx -steps = [ - { pipe = "process_items", batch_over = "input_list", batch_as = "current_item", result = "processed_items" - } -] -``` - -1. **batch_over**: Specifies a `ListContent` field to iterate over. Each item in the list will be processed individually and IN PARALLEL by the pipe. - - Must be a `ListContent` type containing the items to process - - Can reference inputs or results from previous steps - -2. **batch_as**: Defines the name that will be used to reference the current item being processed - - This name can be used in the pipe's input mappings - - Makes each item from the batch available as a single element - -The result of a batched step will be a `ListContent` containing the outputs from processing each item. - -### PipeCondition controller - -The PipeCondition controller allows you to implement conditional logic in your pipeline, choosing which pipe to execute based on an evaluated expression. It supports both direct expressions and expression templates. - -#### Basic usage - -```plx -[pipe.conditional_operation] -type = "PipeCondition" -description = "A conditional pipe to decide whether..." -inputs = { input_data = "CategoryInput" } -output = "native.Text" -expression = "input_data.category" -default_outcome = "process_medium" - -[pipe.conditional_operation.outcomes] -small = "process_small" -medium = "process_medium" -large = "process_large" -``` -or -```plx -[pipe.conditional_operation] -type = "PipeCondition" -description = "A conditional pipe to decide whether..." -inputs = { input_data = "CategoryInput" } -output = "native.Text" -expression_template = "{{ input_data.category }}" # Jinja2 code -default_outcome = "process_medium" - -[pipe.conditional_operation.outcomes] -small = "process_small" -medium = "process_medium" -large = "process_large" -``` - -#### Key Parameters - -- `expression`: Direct boolean or string expression (mutually exclusive with expression_template) -- `expression_template`: Jinja2 template for more complex conditional logic (mutually exclusive with expression) -- `outcomes`: Dictionary mapping expression results to pipe codes: - 1. The key on the left (`small`, `medium`) is the result of `expression` or `expression_template` - 2. The value on the right (`process_small`, `process_medium`, etc.) is the name of the pipe to trigger -- `default_outcome`: **Required** - The pipe to execute if the expression doesn't match any key in outcomes. Use `"fail"` if you want the pipeline to fail when no match is found - -Example with fail as default: -```plx -[pipe.strict_validation] -type = "PipeCondition" -description = "Validate with strict matching" -inputs = { status = "Status" } -output = "Text" -expression = "status.value" -default_outcome = "fail" - -[pipe.strict_validation.outcomes] -approved = "process_approved" -rejected = "process_rejected" -``` - -### PipeLLM operator - -PipeLLM is used to: -1. Generate text or objects with LLMs -2. Process images with Vision LLMs - -#### Basic Usage - -Simple Text Generation: -```plx -[pipe.write_story] -type = "PipeLLM" -description = "Write a short story" -output = "Text" -prompt = """ -Write a short story about a programmer. -""" -``` - -Structured Data Extraction: -```plx -[pipe.extract_info] -type = "PipeLLM" -description = "Extract information" -inputs = { text = "Text" } -output = "PersonInfo" -prompt = """ -Extract person information from this text: -@text -""" -``` - -Supports system instructions: -```plx -[pipe.expert_analysis] -type = "PipeLLM" -description = "Expert analysis" -output = "Analysis" -system_prompt = "You are a data analysis expert" -prompt = "Analyze this data" -``` - -#### Multiple Outputs - -Generate multiple outputs (fixed number) - use bracket notation: -```plx -[pipe.generate_ideas] -type = "PipeLLM" -description = "Generate ideas" -output = "Idea[3]" # Generate exactly 3 ideas -prompt = "Generate 3 ideas" -``` - -Generate multiple outputs (variable number) - use bracket notation: -```plx -[pipe.generate_ideas] -type = "PipeLLM" -description = "Generate ideas" -output = "Idea[]" # Let the LLM decide how many to generate -prompt = "Generate ideas" -``` - -#### Vision - -Process images with VLMs (image inputs must be tagged in the prompt): -```plx -[pipe.analyze_image] -type = "PipeLLM" -description = "Analyze image" -inputs = { image = "Image" } -output = "ImageAnalysis" -prompt = """ -Describe what you see in this image: - -$image -""" -``` - -You can also reference images inline in meaningful sentences to guide the Visual LLM: -```plx -[pipe.compare_images] -type = "PipeLLM" -description = "Compare two images" -inputs = { photo = "Image", painting = "Image" } -output = "Analysis" -prompt = "Analyze the colors in $photo and the shapes in $painting." -``` - -#### Writing prompts for PipeLLM - -**Insert stuff inside a tagged block** - -If the inserted text is supposedly a long text, made of several lines or paragraphs, you want it inserted inside a block, possibly a block tagged and delimlited with proper syntax as one would do in a markdown documentation. To include stuff as a block, use the "@" prefix. - -Example template: -```plx -prompt = """ -Match the expense with its corresponding invoice: - -@expense - -@invoices -""" -``` -In the example above, the expense data and the invoices data are obviously made of several lines each, that's why it makes sense to use the "@" prefix in order to have them delimited inside a block. Note that our preprocessor will automatically include the block's title, so it doesn't need to be explicitly written in the prompt. - -DO NOT write things like "Here is the expense: @expense". -DO write simply "@expense" alone in an isolated line. - -**Insert stuff inline** - -If the inserted text is short text and it makes sense to have it inserted directly into a sentence, you want it inserted inline. To insert stuff inline, use the "$" prefix. This will insert the stuff without delimiters and the content will be rendered as plain text. - -Example template: -```plx -prompt = """ -Your goal is to summarize everything related to $topic in the provided text: - -@text - -Please provide only the summary, with no additional text or explanations. -Your summary should not be longer than 2 sentences. -""" -``` - -In the example above, $topic will be inserted inline, whereas @text will be a a delimited block. -Be sure to make the proper choice of prefix for each insertion. - -DO NOT write "$topic" alone in an isolated line. -DO write things like "Write an essay about $topic" to include text into an actual sentence. - - -### PipeExtract operator - -The PipeExtract operator is used to extract text and images from an image or a PDF - -#### Simple Text Extraction -```plx -[pipe.extract_info] -type = "PipeExtract" -description = "extract the information" -inputs = { document = "Document" } # or { image = "Image" } if it's an image. This is the only input. -output = "Page" -``` - -Using Extract Model Settings: -```plx -[pipe.extract_with_model] -type = "PipeExtract" -description = "Extract with specific model" -inputs = { document = "Document" } -output = "Page" -model = "base_extract_mistral" # Use predefined extract preset or model alias -``` - -Only one input is allowed and it must either be an `Image` or a `PDF`. The input can be named anything. - -The output concept `Page` is a native concept, with the structure `PageContent`: -It corresponds to 1 page. Therefore, the PipeExtract is outputing a `ListContent` of `Page` - -```python -class TextAndImagesContent(StuffContent): - text: TextContent | None - images: list[ImageContent] | None - -class PageContent(StructuredContent): # CONCEPT IS "Page" - text_and_images: TextAndImagesContent - page_view: ImageContent | None = None -``` -- `text_and_images` are the text, and the related images found in the input image or PDF. -- `page_view` is the screenshot of the whole pdf page/image. - -### PipeCompose operator - -The PipeCompose operator is used to compose text using Jinja2 templates. It supports various output formats including HTML, Markdown, Mermaid diagrams, and more. - -#### Basic Usage - -Simple Template Composition: -```plx -[pipe.compose_report] -type = "PipeCompose" -description = "Compose a report using template" -inputs = { data = "ReportData" } -output = "Text" -template = """ -## Report Summary - -Based on the analysis: -$data - -Generated on: {{ current_date }} -""" -``` - -Using Named Templates: -```plx -[pipe.use_template] -type = "PipeCompose" -description = "Use a predefined template" -inputs = { content = "Text" } -output = "Text" -template_name = "standard_report_template" -``` - -Using Nested Template Section (for more control): -```plx -[pipe.advanced_template] -type = "PipeCompose" -description = "Use advanced template settings" -inputs = { data = "ReportData" } -output = "Text" - -[pipe.advanced_template.template] -template = "Report: $data" -category = "html" -templating_style = { tag_style = "square_brackets", text_format = "html" } -``` - -CRM Email Template: -```plx -[pipe.compose_follow_up_email] -type = "PipeCompose" -description = "Compose a personalized follow-up email for CRM" -inputs = { customer = "Customer", deal = "Deal", sales_rep = "SalesRep" } -output = "Text" -template_category = "html" -templating_style = { tag_style = "square_brackets", text_format = "html" } -template = """ -Subject: Following up on our $deal.product_name discussion - -Hi $customer.first_name, - -I hope this email finds you well! I wanted to follow up on our conversation about $deal.product_name from $deal.last_contact_date. - -Based on our discussion, I understand that your key requirements are: $deal.customer_requirements - -I'm excited to let you know that we can definitely help you achieve your goals. Here's what I'd like to propose: - -**Next Steps:** -- Schedule a demo tailored to your specific needs -- Provide you with a customized quote based on your requirements -- Connect you with our implementation team - -Would you be available for a 30-minute call this week? I have openings on: -{% for slot in available_slots %} -- {{ slot }} -{% endfor %} - -Looking forward to moving this forward together! - -Best regards, -$sales_rep.name -$sales_rep.title -$sales_rep.phone | $sales_rep.email -""" -``` - -#### Key Parameters (Template Mode) - -- `template`: Inline template string (mutually exclusive with template_name and construct) -- `template_name`: Name of a predefined template (mutually exclusive with template) -- `template_category`: Template type ("llm_prompt", "html", "markdown", "mermaid", etc.) -- `templating_style`: Styling options for template rendering -- `extra_context`: Additional context variables for template - -For more control, you can use a nested `template` section instead of the `template` field: - -- `template.template`: The template string -- `template.category`: Template type -- `template.templating_style`: Styling options - -#### Template Variables - -Use the same variable insertion rules as PipeLLM: - -- `@variable` for block insertion (multi-line content) -- `$variable` for inline insertion (short text) - -#### Construct Mode (for StructuredContent Output) - -PipeCompose can also generate `StructuredContent` objects using the `construct` section. This mode composes field values from fixed values, variable references, templates, or nested structures. - -**When to use construct mode:** - -- You need to output a structured object (not just Text) -- You want to deterministically compose fields from existing data -- No LLM is needed - just data composition and templating - -##### Basic Construct Usage - -```plx -[concept.SalesSummary] -description = "A structured sales summary" - -[concept.SalesSummary.structure] -report_title = { type = "text", description = "Title of the report" } -customer_name = { type = "text", description = "Customer name" } -deal_value = { type = "number", description = "Deal value" } -summary_text = { type = "text", description = "Generated summary text" } - -[pipe.compose_summary] -type = "PipeCompose" -description = "Compose a sales summary from deal data" -inputs = { deal = "Deal" } -output = "SalesSummary" - -[pipe.compose_summary.construct] -report_title = "Monthly Sales Report" -customer_name = { from = "deal.customer_name" } -deal_value = { from = "deal.amount" } -summary_text = { template = "Deal worth $deal.amount with $deal.customer_name" } -``` - -##### Field Composition Methods - -There are four ways to define field values in a construct: - -**1. Fixed Value (literal)** - -Use a literal value directly: - -```plx -[pipe.compose_report.construct] -report_title = "Annual Report" -report_year = 2024 -is_draft = false -``` - -**2. Variable Reference (`from`)** - -Get a value from working memory using a dotted path: - -```plx -[pipe.compose_report.construct] -customer_name = { from = "deal.customer_name" } -total_amount = { from = "order.total" } -street_address = { from = "customer.address.street" } -``` - -**3. Template (`template`)** - -Render a Jinja2 template with variable substitution: - -```plx -[pipe.compose_report.construct] -invoice_number = { template = "INV-$order.id" } -summary = { template = "Deal worth $deal.amount with $deal.customer_name on {{ current_date }}" } -``` - -**4. Nested Construct** - -For nested structures, use a TOML subsection: - -```plx -[pipe.compose_invoice.construct] -invoice_number = { template = "INV-$order.id" } -total = { from = "order.total_amount" } - -[pipe.compose_invoice.construct.billing_address] -street = { from = "customer.address.street" } -city = { from = "customer.address.city" } -country = "France" -``` - -##### Complete Construct Example - -```plx -domain = "invoicing" - -[concept.Address] -description = "A postal address" - -[concept.Address.structure] -street = { type = "text", description = "Street address" } -city = { type = "text", description = "City name" } -country = { type = "text", description = "Country name" } - -[concept.Invoice] -description = "An invoice document" - -[concept.Invoice.structure] -invoice_number = { type = "text", description = "Invoice number" } -total = { type = "number", description = "Total amount" } - -[pipe.compose_invoice] -type = "PipeCompose" -description = "Compose an invoice from order and customer data" -inputs = { order = "Order", customer = "Customer" } -output = "Invoice" - -[pipe.compose_invoice.construct] -invoice_number = { template = "INV-$order.id" } -total = { from = "order.total_amount" } - -[pipe.compose_invoice.construct.billing_address] -street = { from = "customer.address.street" } -city = { from = "customer.address.city" } -country = "France" -``` - -##### Key Parameters (Construct Mode) - -- `construct`: Dictionary mapping field names to their composition rules -- Each field can be: - - A literal value (string, number, boolean) - - A dict with `from` key for variable reference - - A dict with `template` key for template rendering - - A nested dict for nested structures - -**Note:** You must use either `template` or `construct`, not both. They are mutually exclusive. - -### PipeImgGen operator - -The PipeImgGen operator is used to generate images using AI image generation models. - -#### Basic Usage - -Simple Image Generation: -```plx -[pipe.generate_image] -type = "PipeImgGen" -description = "Generate an image from prompt" -inputs = { prompt = "ImgGenPrompt" } -output = "Image" -``` - -Using Image Generation Settings: -```plx -[pipe.generate_photo] -type = "PipeImgGen" -description = "Generate a high-quality photo" -inputs = { prompt = "ImgGenPrompt" } -output = "Photo" -model = { model = "fast-img-gen" } -aspect_ratio = "16:9" -quality = "hd" -``` - -Multiple Image Generation: -```plx -[pipe.generate_variations] -type = "PipeImgGen" -description = "Generate multiple image variations" -inputs = { prompt = "ImgGenPrompt" } -output = "Image[3]" -seed = "auto" -``` - -Advanced Configuration: -```plx -[pipe.generate_custom] -type = "PipeImgGen" -description = "Generate image with custom settings" -inputs = { prompt = "ImgGenPrompt" } -output = "Image" -model = "img_gen_preset_name" # Use predefined preset -aspect_ratio = "1:1" -quality = "hd" -background = "transparent" -output_format = "png" -is_raw = false -safety_tolerance = 3 -``` - -#### Key Parameters - -**Image Generation Settings:** -- `model`: Model choice (preset name or inline settings with model name) -- `quality`: Image quality ("standard", "hd") - -**Output Configuration:** -- `aspect_ratio`: Image dimensions ("1:1", "16:9", "9:16", etc.) -- `output_format`: File format ("png", "jpeg", "webp") -- `background`: Background type ("default", "transparent") - -**Generation Control:** -- `seed`: Random seed (integer or "auto") -- `is_raw`: Whether to apply post-processing -- `is_moderated`: Enable content moderation -- `safety_tolerance`: Content safety level (1-6) - -#### Input Requirements - -PipeImgGen requires exactly one input that must be either: -- An `ImgGenPrompt` concept -- A concept that refines `ImgGenPrompt` - -The input can be named anything but must contain the prompt text for image generation. - -### PipeFunc operator - -The PipeFunc operator is used to run custom Python functions within a pipeline. This allows integration of classic Python scripts and custom logic. - -#### Basic Usage - -Simple Function Call: -```plx -[pipe.process_data] -type = "PipeFunc" -description = "Process data using custom function" -inputs = { input_data = "DataType" } -output = "ProcessedData" -function_name = "process_data_function" -``` - -File Processing Example: -```plx -[pipe.read_file] -type = "PipeFunc" -description = "Read file content" -inputs = { file_path = "FilePath" } -output = "FileContent" -function_name = "read_file_content" -``` - -#### Key Parameters - -- `function_name`: Name of the Python function to call (must be registered in func_registry) - -#### Function Requirements - -The Python function must: - -1. **Be registered** in the `func_registry` -2. **Accept `working_memory`** as a parameter: - ```python - async def my_function(working_memory: WorkingMemory) -> StuffContent | list[StuffContent] | str: - # Function implementation - pass - ``` - -3. **Return appropriate types**: - - `StuffContent`: Single content object - - `list[StuffContent]`: Multiple content objects (becomes ListContent) - - `str`: Simple string (becomes TextContent) - -#### Function Registration - -Functions must be registered in the function registry before use: - -```python -from pipelex.system.registries.func_registry import func_registry - -@func_registry.register("my_function_name") -async def my_custom_function(working_memory: WorkingMemory) -> StuffContent: - # Access inputs from working memory - input_data = working_memory.get_stuff("input_name") - - # Process data - result = process_logic(input_data.content) - - # Return result - return MyResultContent(data=result) -``` - -#### Working Memory Access - -Inside the function, access pipeline inputs through working memory: - -```python -async def process_function(working_memory: WorkingMemory) -> TextContent: - # Get input stuff by name - input_stuff = working_memory.get_stuff("input_name") - - # Access the content - input_content = input_stuff.content - - # Process and return - processed_text = f"Processed: {input_content.text}" - return TextContent(text=processed_text) -``` - ---- - -### Rules to choose LLM models used in PipeLLMs. - -#### LLM Configuration System - -In order to use it in a pipe, an LLM is referenced by its llm_handle (alias) and possibly by an llm_preset. -LLM configurations are managed through the new inference backend system with files located in `.pipelex/inference/`: - -- **Model Deck**: `.pipelex/inference/deck/base_deck.toml` and `.pipelex/inference/deck/overrides.toml` -- **Backends**: `.pipelex/inference/backends.toml` and `.pipelex/inference/backends/*.toml` -- **Routing**: `.pipelex/inference/routing_profiles.toml` - -#### LLM Handles - -An llm_handle can be either: -1. **A direct model name** (like "gpt-4o-mini", "claude-3-sonnet") - automatically available for all models loaded by the inference backend system -2. **An alias** - user-defined shortcuts that map to model names, defined in the `[aliases]` section: - -```toml -[aliases] -base-claude = "claude-4.5-sonnet" -base-gpt = "gpt-5" -base-gemini = "gemini-2.5-flash" -base-mistral = "mistral-medium" -``` - -The system first looks for direct model names, then checks aliases if no direct match is found. The system handles model routing through backends automatically. - -#### Using an LLM Handle in a PipeLLM - -Here is an example of using a model to specify which LLM to use in a PipeLLM: - -```plx -[pipe.hello_world] -type = "PipeLLM" -description = "Write text about Hello World." -output = "Text" -model = { model = "gpt-5", temperature = 0.9 } -prompt = """ -Write a haiku about Hello World. -""" -``` - -As you can see, to use the LLM, you must also indicate the temperature (float between 0 and 1) and max_tokens (either an int or the string "auto"). - -#### LLM Presets - -Presets are meant to record the choice of an llm with its hyper parameters (temperature and max_tokens) if it's good for a particular task. LLM Presets are skill-oriented. - -Examples: -```toml -llm_to_engineer = { model = "base-claude", temperature = 1 } -llm_to_extract_invoice = { model = "claude-3-7-sonnet", temperature = 0.1, max_tokens = "auto" } -``` - -The interest is that these presets can be used to set the LLM choice in a PipeLLM, like this: - -```plx -[pipe.extract_invoice] -type = "PipeLLM" -description = "Extract invoice information from an invoice text transcript" -inputs = { invoice_text = "InvoiceText" } -output = "Invoice" -model = "llm_to_extract_invoice" -prompt = """ -Extract invoice information from this invoice: - -The category of this invoice is: $invoice_details.category. - -@invoice_text -""" -``` - -The setting here `model = "llm_to_extract_invoice"` works because "llm_to_extract_invoice" has been declared as an llm_preset in the deck. -You must not use an LLM preset in a PipeLLM that does not exist in the deck. If needed, you can add llm presets. - -You can override the predefined llm presets by setting them in `.pipelex/inference/deck/overrides.toml`. - ---- - -ALWAYS RUN validation when you are finished writing pipelines: This checks for errors. If there are errors, iterate until it works. -- For a specific bundle/file: `pipelex validate path_to_file.plx` -- For all pipelines: `pipelex validate all` -- Remember: Ensure your Python virtual environment is activated (typically `.venv` for standard installations) before running `pipelex` commands. - -Then, create an example file to run the pipeline in the `examples` folder. -But don't write documentation unless asked explicitly to. - -## Guide to execute a pipeline and write example code - -### Example to execute a pipeline with text output - -```python -import asyncio - -from pipelex import pretty_print -from pipelex.pipelex import Pipelex -from pipelex.pipeline.execute import execute_pipeline - - -async def hello_world() -> str: - """ - This function demonstrates the use of a super simple Pipelex pipeline to generate text. - """ - # Run the pipe - pipe_output = await execute_pipeline( - pipe_code="hello_world", - ) - - return pipe_output.main_stuff_as_str - - -## start Pipelex -Pipelex.make() -## run sample using asyncio -output_text = asyncio.run(hello_world()) -pretty_print(output_text, title="Your first Pipelex output") -``` - -### Example to execute a pipeline with structured output - -```python -import asyncio - -from pipelex import pretty_print -from pipelex.pipelex import Pipelex -from pipelex.pipeline.execute import execute_pipeline -from pipelex.core.stuffs.image_content import ImageContent - -from my_project.gantt.gantt_struct import GanttChart - -SAMPLE_NAME = "extract_gantt" -IMAGE_URL = "assets/gantt/gantt_tree_house.png" - - -async def extract_gantt(image_url: str) -> GanttChart: - # Run the pipe - pipe_output = await execute_pipeline( - pipe_code="extract_gantt_by_steps", - inputs={ - "gantt_chart_image": { - "concept": "gantt.GanttImage", - "content": ImageContent(url=image_url), - } - }, - ) - # Output the result - return pipe_output.main_stuff_as(content_type=GanttChart) - - -## start Pipelex -Pipelex.make() - -## run sample using asyncio -gantt_chart = asyncio.run(extract_gantt(image_url=IMAGE_URL)) -pretty_print(gantt_chart, title="Gantt Chart") -``` - -### Setting up the input memory - -#### Explanation of input memory - -The input memory is a dictionary, where the key is the name of the input variable and the value provides details to make it a stuff object. The relevant definitions are: -```python -StuffContentOrData = dict[str, Any] | StuffContent | list[Any] | str -PipelineInputs = dict[str, StuffContentOrData] -``` -As you can seen, we made it so different ways can be used to define that stuff using structured content or data. - -#### Different ways to set up the input memory - -So here are a few concrete examples of calls to execute_pipeline with various ways to set up the input memory: - -```python -## Here we have a single input and it's a Text. -## If you assign a string, by default it will be considered as a TextContent. - pipe_output = await execute_pipeline( - pipe_code="master_advisory_orchestrator", - inputs={ - "user_input": problem_description, - }, - ) - -## Here we have a single input and it's a document. -## Because DocumentContent is a native concept, we can use it directly as a value, -## the system knows what content it corresponds to: - pipe_output = await execute_pipeline( - pipe_code="power_extractor_dpe", - inputs={ - "document": DocumentContent(url=pdf_url), - }, - ) - -## Here we have a single input and it's an Image. -## Because ImageContent is a native concept, we can use it directly as a value: - pipe_output = await execute_pipeline( - pipe_code="fashion_variation_pipeline", - inputs={ - "fashion_photo": ImageContent(url=image_url), - }, - ) - -## Here we have a single input, it's an image but -## its actually a more specific concept gantt.GanttImage which refines Image, -## so we must provide it using a dict with the concept and the content: - pipe_output = await execute_pipeline( - pipe_code="extract_gantt_by_steps", - inputs={ - "gantt_chart_image": { - "concept": "gantt.GanttImage", - "content": ImageContent(url=image_url), - } - }, - ) - -## Here is a more complex example with multiple inputs assigned using different ways: - pipe_output = await execute_pipeline( - pipe_code="retrieve_then_answer", - dynamic_output_concept_code="contracts.Fees", - inputs={ - "text": load_text_from_path(path=text_path), - "question": { - "concept": "answer.Question", - "content": question, - }, - "client_instructions": client_instructions, - }, - ) -``` - -### Using the outputs of a pipeline - -All pipe executions return a `PipeOutput` object. -It's a BaseModel which contains the resulting working memory at the end of the execution and the pipeline run id. -It also provides a bunch of accessor functions and properties to unwrap the main stuff, which is the last stuff added to the working memory: - -```python - -class PipeOutput(BaseModel): - working_memory: WorkingMemory = Field(default_factory=WorkingMemory) - pipeline_run_id: str = Field(default=SpecialPipelineId.UNTITLED) - - @property - def main_stuff(self) -> Stuff: - ... - - def main_stuff_as_list(self, item_type: type[StuffContentType]) -> ListContent[StuffContentType]: - ... - - def main_stuff_as_items(self, item_type: type[StuffContentType]) -> list[StuffContentType]: - ... - - def main_stuff_as(self, content_type: type[StuffContentType]) -> StuffContentType: - ... - - @property - def main_stuff_as_text(self) -> TextContent: - ... - - @property - def main_stuff_as_str(self) -> str: - ... - - @property - def main_stuff_as_image(self) -> ImageContent: - ... - - @property - def main_stuff_as_text_and_image(self) -> TextAndImagesContent: - ... - - @property - def main_stuff_as_number(self) -> NumberContent: - ... - - @property - def main_stuff_as_html(self) -> HtmlContent: - ... - - @property - def main_stuff_as_mermaid(self) -> MermaidContent: - ... -``` - -As you can see, you can extract any variable from the output working memory. - -#### Getting the main stuff as a specific type - -Simple text as a string: - -```python -result = pipe_output.main_stuff_as_str -``` -Structured object (BaseModel): - -```python -result = pipe_output.main_stuff_as(content_type=GanttChart) -``` - -If it's a list, you can get a `ListContent` of the specific type. - -```python -result_list_content = pipe_output.main_stuff_as_list(item_type=GanttChart) -``` - -or if you want, you can get the actual items as a regular python list: - -```python -result_list = pipe_output.main_stuff_as_items(item_type=GanttChart) -``` - ---- - diff --git a/AGENTS.md b/AGENTS.md deleted file mode 100644 index af4572e..0000000 --- a/AGENTS.md +++ /dev/null @@ -1,1219 +0,0 @@ - -# Pipelex Coding Rules - -## Guide to write or edit pipelines using the Pipelex language in .plx files - -- Always first write your "plan" in natural language, then transcribe it in pipelex. -- You should ALWAYS RUN validation when you are writing or editing a `.plx` file. It will ensure the pipe is runnable. If not, iterate. - - For a specific file: `pipelex validate path_to_file.plx` - - For all pipelines: `pipelex validate all` - - **IMPORTANT**: Ensure the Python virtual environment is activated before running `pipelex` commands. For standard installations, the venv is named `.venv` - always check that first. The commands will not work without proper venv activation. -- Please use POSIX standard for files. (empty lines, no trailing whitespaces, etc.) - -### Pipeline File Naming -- Files must be `.plx` for pipelines (Always add an empty line at the end of the file, and do not add trailing whitespaces to PLX files at all) -- Files must be `.py` for code defining the data structures -- Use descriptive names in `snake_case` - -### Pipeline File Outline -A pipeline file has three main sections: -1. Domain statement -2. Concept definitions -3. Pipe definitions - -#### Domain Statement -```plx -domain = "domain_code" -description = "Description of the domain" # Optional -``` -Note: The domain code usually matches the plx filename for single-file domains. For multi-file domains, use the subdirectory name. - -#### Concept Definitions - -Concepts represent ideas and semantic entities in your pipeline. They define what something *is*, not how it's structured. - -```plx -[concept] -ConceptName = "Description of the concept" -``` - -**Naming Rules:** -- Use PascalCase for concept names -- Never use plurals (no "Stories", use "Story") - lists are handled implicitly by Pipelex -- Avoid circumstantial adjectives (no "LargeText", use "Text") - focus on the essence of what the concept represents -- Don't redefine native concepts (Text, Image, PDF, TextAndImages, Number, Page, JSON) - -**Native Concepts:** -Pipelex provides built-in native concepts: `Text`, `Image`, `PDF`, `TextAndImages`, `Number`, `Page`, `JSON`. Use these directly or refine them when appropriate. - -**Refining Native Concepts:** -To create a concept that specializes a native concept without adding fields: - -```plx -[concept.Landscape] -description = "A scenic outdoor photograph" -refines = "Image" -``` - -For details on how to structure concepts with fields, see the "Structuring Models" section below. - -#### Pipe Definitions - -### Pipe Base Definition - -```plx -[pipe.your_pipe_code] -type = "PipeLLM" -description = "A description of what your pipe does" -inputs = { input_1 = "ConceptName1", input_2 = "ConceptName2" } -output = "ConceptName" -``` - -The pipes will all have at least this base definition. -- `inputs`: Dictionary of key being the variable used in the prompts, and the value being the ConceptName. It should ALSO LIST THE INPUTS OF THE INTERMEDIATE STEPS (if PipeSequence) or of the conditional pipes (if PipeCondition). -So If you have this error: -`PipeValidationError: missing_input_variable • domain='expense_validator' • pipe='validate_expense' • -variable='['invoice']'`` -That means that the pipe validate_expense is missing the input `invoice` because one of the subpipe is needing it. - -NEVER WRITE THE INPUTS BY BREAKING THE LINE LIKE THIS: - -```plx -inputs = { - input_1 = "ConceptName1", - input_2 = "ConceptName2" -} -``` - - -- `output`: The name of the concept to output. The `ConceptName` should have the same name as the python class if you want structured output: - -#### Input Multiplicity - -By default, inputs expect a single item. Use bracket notation to specify multiple items: - -```plx -## Single item (default) -inputs = { document = "Text" } - -## Variable list - indeterminate number of items -inputs = { documents = "Text[]" } - -## Fixed count - exactly N items -inputs = { comparison_items = "Image[2]" } -``` - -**Key points:** -- No brackets = single item (default behavior) -- Use `[]` for lists of unknown length -- Use `[N]` (where N is an integer) when operation requires exact count (e.g., comparing 2 items) - -### Structuring Models - -Once you've defined your concepts semantically (see "Concept Definitions" above), you need to specify their structure if they have fields. - -#### Three Ways to Structure Concepts - -**1. No Structure Needed** - -If a concept only refines a native concept without adding fields, use the TOML table syntax shown in "Concept Definitions" above. No structure section is needed. - -**2. Inline Structure Definition (RECOMMENDED for most cases)** - -For concepts with structured fields, define them inline using TOML syntax: - -```plx -[concept.Invoice] -description = "A commercial document issued by a seller to a buyer" - -[concept.Invoice.structure] -invoice_number = "The unique invoice identifier" # This will be optional by default -issue_date = { type = "date", description = "The date the invoice was issued", required = true } -total_amount = { type = "number", description = "The total invoice amount", required = true } -vendor_name = "The name of the vendor" # This will be optional by default -line_items = { type = "list", item_type = "text", description = "List of items" } -``` - -**Supported inline field types:** `text`, `integer`, `boolean`, `number`, `date`, `list`, `dict` - -**Field properties:** `type`, `description`, `required` (default: false), `default_value`, `choices`, `item_type` (for lists), `key_type` and `value_type` (for dicts) - -**Simple syntax** (creates required text field): -```plx -field_name = "Field description" -``` - -**Detailed syntax** (with explicit properties): -```plx -field_name = { type = "text", description = "Field description", default_value = "default" } -``` - -**3. Python StructuredContent Class (For Advanced Features)** - -Create a Python class when you need: -- Custom validation logic (@field_validator, @model_validator) -- Computed properties (@property methods) -- Custom methods or class methods -- Complex cross-field validation -- Reusable structures across multiple domains - -```python -from pipelex.core.stuffs.structured_content import StructuredContent -from pydantic import Field, field_validator - -class Invoice(StructuredContent): - """A commercial invoice with validation.""" - - invoice_number: str = Field(description="The unique invoice identifier") - total_amount: float = Field(ge=0, description="The total invoice amount") - tax_amount: float = Field(ge=0, description="Tax amount") - - @field_validator('tax_amount') - @classmethod - def validate_tax(cls, v, info): - """Ensure tax doesn't exceed total.""" - total = info.data.get('total_amount', 0) - if v > total: - raise ValueError('Tax amount cannot exceed total amount') - return v -``` - -**Location:** Create models in `my_project/some_domain/some_domain_struct.py`. Classes inheriting from `StructuredContent` are automatically discovered. - -#### Decision Rules for Agents - -**If concept already exists:** -- If it's already inline → KEEP IT INLINE unless user explicitly asks to convert or features require Python class -- If it's already a Python class → KEEP IT as Python class - -**If creating new concept:** -1. Does it only refine a native concept without adding fields? → Use concept-only declaration -2. Does it need custom validation, computed properties, or methods? → Use Python class -3. Otherwise → Use inline structure (fastest and simplest) - -**When to suggest conversion to Python class:** -- User needs validation logic beyond type checking -- User needs computed properties or custom methods -- Structure needs to be reused across multiple domains -- Complex type relationships or inheritance required - -#### Inline Structure Limitations - -Inline structures: -- ✅ Support all common field types (text, number, date, list, dict, etc.) -- ✅ Support required/optional fields, defaults, choices -- ✅ Generate full Pydantic models with validation -- ❌ Cannot have custom validators or complex validation logic -- ❌ Cannot have computed properties or custom methods -- ❌ Cannot refine custom (non-native) concepts -- ❌ Limited IDE autocomplete compared to explicit Python classes - - -### Pipe Controllers and Pipe Operators - -Look at the Pipes we have in order to adapt it. Pipes are organized in two categories: - -1. **Controllers** - For flow control: - - `PipeSequence` - For creating a sequence of multiple steps - - `PipeCondition` - If the next pipe depends of the expression of a stuff in the working memory - - `PipeParallel` - For parallelizing pipes - -2. **Operators** - For specific tasks: - - `PipeLLM` - Generate Text and Objects (include Vision LLM) - - `PipeExtract` - Extract text and images from an image or a PDF - - `PipeCompose` - For composing text using Jinja2 templates: supports html, markdown, mermaid, etc. - - `PipeImgGen` - Generate Images - - `PipeFunc` - For running classic python scripts - -### PipeSequence controller - -Purpose: PipeSequence executes multiple pipes in a defined order, where each step can use results from original inputs or from previous steps. - -#### Basic Definition -```plx -[pipe.your_sequence_name] -type = "PipeSequence" -description = "Description of what this sequence does" -inputs = { input_name = "InputType" } # All the inputs of the sub pipes, except the ones generated by intermediate steps -output = "OutputType" -steps = [ - { pipe = "first_pipe", result = "first_result" }, - { pipe = "second_pipe", result = "second_result" }, - { pipe = "final_pipe", result = "final_result" } -] -``` - -#### Key Components - -1. **Steps Array**: List of pipes to execute in sequence - - `pipe`: Name of the pipe to execute - - `result`: Name to assign to the pipe's output that will be in the working memory - -#### Using PipeBatch in Steps - -You can use PipeBatch functionality within steps using `batch_over` and `batch_as`: - -```plx -steps = [ - { pipe = "process_items", batch_over = "input_list", batch_as = "current_item", result = "processed_items" - } -] -``` - -1. **batch_over**: Specifies a `ListContent` field to iterate over. Each item in the list will be processed individually and IN PARALLEL by the pipe. - - Must be a `ListContent` type containing the items to process - - Can reference inputs or results from previous steps - -2. **batch_as**: Defines the name that will be used to reference the current item being processed - - This name can be used in the pipe's input mappings - - Makes each item from the batch available as a single element - -The result of a batched step will be a `ListContent` containing the outputs from processing each item. - -### PipeCondition controller - -The PipeCondition controller allows you to implement conditional logic in your pipeline, choosing which pipe to execute based on an evaluated expression. It supports both direct expressions and expression templates. - -#### Basic usage - -```plx -[pipe.conditional_operation] -type = "PipeCondition" -description = "A conditional pipe to decide whether..." -inputs = { input_data = "CategoryInput" } -output = "native.Text" -expression = "input_data.category" -default_outcome = "process_medium" - -[pipe.conditional_operation.outcomes] -small = "process_small" -medium = "process_medium" -large = "process_large" -``` -or -```plx -[pipe.conditional_operation] -type = "PipeCondition" -description = "A conditional pipe to decide whether..." -inputs = { input_data = "CategoryInput" } -output = "native.Text" -expression_template = "{{ input_data.category }}" # Jinja2 code -default_outcome = "process_medium" - -[pipe.conditional_operation.outcomes] -small = "process_small" -medium = "process_medium" -large = "process_large" -``` - -#### Key Parameters - -- `expression`: Direct boolean or string expression (mutually exclusive with expression_template) -- `expression_template`: Jinja2 template for more complex conditional logic (mutually exclusive with expression) -- `outcomes`: Dictionary mapping expression results to pipe codes: - 1. The key on the left (`small`, `medium`) is the result of `expression` or `expression_template` - 2. The value on the right (`process_small`, `process_medium`, etc.) is the name of the pipe to trigger -- `default_outcome`: **Required** - The pipe to execute if the expression doesn't match any key in outcomes. Use `"fail"` if you want the pipeline to fail when no match is found - -Example with fail as default: -```plx -[pipe.strict_validation] -type = "PipeCondition" -description = "Validate with strict matching" -inputs = { status = "Status" } -output = "Text" -expression = "status.value" -default_outcome = "fail" - -[pipe.strict_validation.outcomes] -approved = "process_approved" -rejected = "process_rejected" -``` - -### PipeLLM operator - -PipeLLM is used to: -1. Generate text or objects with LLMs -2. Process images with Vision LLMs - -#### Basic Usage - -Simple Text Generation: -```plx -[pipe.write_story] -type = "PipeLLM" -description = "Write a short story" -output = "Text" -prompt = """ -Write a short story about a programmer. -""" -``` - -Structured Data Extraction: -```plx -[pipe.extract_info] -type = "PipeLLM" -description = "Extract information" -inputs = { text = "Text" } -output = "PersonInfo" -prompt = """ -Extract person information from this text: -@text -""" -``` - -Supports system instructions: -```plx -[pipe.expert_analysis] -type = "PipeLLM" -description = "Expert analysis" -output = "Analysis" -system_prompt = "You are a data analysis expert" -prompt = "Analyze this data" -``` - -#### Multiple Outputs - -Generate multiple outputs (fixed number) - use bracket notation: -```plx -[pipe.generate_ideas] -type = "PipeLLM" -description = "Generate ideas" -output = "Idea[3]" # Generate exactly 3 ideas -prompt = "Generate 3 ideas" -``` - -Generate multiple outputs (variable number) - use bracket notation: -```plx -[pipe.generate_ideas] -type = "PipeLLM" -description = "Generate ideas" -output = "Idea[]" # Let the LLM decide how many to generate -prompt = "Generate ideas" -``` - -#### Vision - -Process images with VLMs (image inputs must be tagged in the prompt): -```plx -[pipe.analyze_image] -type = "PipeLLM" -description = "Analyze image" -inputs = { image = "Image" } -output = "ImageAnalysis" -prompt = """ -Describe what you see in this image: - -$image -""" -``` - -You can also reference images inline in meaningful sentences to guide the Visual LLM: -```plx -[pipe.compare_images] -type = "PipeLLM" -description = "Compare two images" -inputs = { photo = "Image", painting = "Image" } -output = "Analysis" -prompt = "Analyze the colors in $photo and the shapes in $painting." -``` - -#### Writing prompts for PipeLLM - -**Insert stuff inside a tagged block** - -If the inserted text is supposedly a long text, made of several lines or paragraphs, you want it inserted inside a block, possibly a block tagged and delimlited with proper syntax as one would do in a markdown documentation. To include stuff as a block, use the "@" prefix. - -Example template: -```plx -prompt = """ -Match the expense with its corresponding invoice: - -@expense - -@invoices -""" -``` -In the example above, the expense data and the invoices data are obviously made of several lines each, that's why it makes sense to use the "@" prefix in order to have them delimited inside a block. Note that our preprocessor will automatically include the block's title, so it doesn't need to be explicitly written in the prompt. - -DO NOT write things like "Here is the expense: @expense". -DO write simply "@expense" alone in an isolated line. - -**Insert stuff inline** - -If the inserted text is short text and it makes sense to have it inserted directly into a sentence, you want it inserted inline. To insert stuff inline, use the "$" prefix. This will insert the stuff without delimiters and the content will be rendered as plain text. - -Example template: -```plx -prompt = """ -Your goal is to summarize everything related to $topic in the provided text: - -@text - -Please provide only the summary, with no additional text or explanations. -Your summary should not be longer than 2 sentences. -""" -``` - -In the example above, $topic will be inserted inline, whereas @text will be a a delimited block. -Be sure to make the proper choice of prefix for each insertion. - -DO NOT write "$topic" alone in an isolated line. -DO write things like "Write an essay about $topic" to include text into an actual sentence. - - -### PipeExtract operator - -The PipeExtract operator is used to extract text and images from an image or a PDF - -#### Simple Text Extraction -```plx -[pipe.extract_info] -type = "PipeExtract" -description = "extract the information" -inputs = { document = "Document" } # or { image = "Image" } if it's an image. This is the only input. -output = "Page" -``` - -Using Extract Model Settings: -```plx -[pipe.extract_with_model] -type = "PipeExtract" -description = "Extract with specific model" -inputs = { document = "Document" } -output = "Page" -model = "base_extract_mistral" # Use predefined extract preset or model alias -``` - -Only one input is allowed and it must either be an `Image` or a `PDF`. The input can be named anything. - -The output concept `Page` is a native concept, with the structure `PageContent`: -It corresponds to 1 page. Therefore, the PipeExtract is outputing a `ListContent` of `Page` - -```python -class TextAndImagesContent(StuffContent): - text: TextContent | None - images: list[ImageContent] | None - -class PageContent(StructuredContent): # CONCEPT IS "Page" - text_and_images: TextAndImagesContent - page_view: ImageContent | None = None -``` -- `text_and_images` are the text, and the related images found in the input image or PDF. -- `page_view` is the screenshot of the whole pdf page/image. - -### PipeCompose operator - -The PipeCompose operator is used to compose text using Jinja2 templates. It supports various output formats including HTML, Markdown, Mermaid diagrams, and more. - -#### Basic Usage - -Simple Template Composition: -```plx -[pipe.compose_report] -type = "PipeCompose" -description = "Compose a report using template" -inputs = { data = "ReportData" } -output = "Text" -template = """ -## Report Summary - -Based on the analysis: -$data - -Generated on: {{ current_date }} -""" -``` - -Using Named Templates: -```plx -[pipe.use_template] -type = "PipeCompose" -description = "Use a predefined template" -inputs = { content = "Text" } -output = "Text" -template_name = "standard_report_template" -``` - -Using Nested Template Section (for more control): -```plx -[pipe.advanced_template] -type = "PipeCompose" -description = "Use advanced template settings" -inputs = { data = "ReportData" } -output = "Text" - -[pipe.advanced_template.template] -template = "Report: $data" -category = "html" -templating_style = { tag_style = "square_brackets", text_format = "html" } -``` - -CRM Email Template: -```plx -[pipe.compose_follow_up_email] -type = "PipeCompose" -description = "Compose a personalized follow-up email for CRM" -inputs = { customer = "Customer", deal = "Deal", sales_rep = "SalesRep" } -output = "Text" -template_category = "html" -templating_style = { tag_style = "square_brackets", text_format = "html" } -template = """ -Subject: Following up on our $deal.product_name discussion - -Hi $customer.first_name, - -I hope this email finds you well! I wanted to follow up on our conversation about $deal.product_name from $deal.last_contact_date. - -Based on our discussion, I understand that your key requirements are: $deal.customer_requirements - -I'm excited to let you know that we can definitely help you achieve your goals. Here's what I'd like to propose: - -**Next Steps:** -- Schedule a demo tailored to your specific needs -- Provide you with a customized quote based on your requirements -- Connect you with our implementation team - -Would you be available for a 30-minute call this week? I have openings on: -{% for slot in available_slots %} -- {{ slot }} -{% endfor %} - -Looking forward to moving this forward together! - -Best regards, -$sales_rep.name -$sales_rep.title -$sales_rep.phone | $sales_rep.email -""" -``` - -#### Key Parameters (Template Mode) - -- `template`: Inline template string (mutually exclusive with template_name and construct) -- `template_name`: Name of a predefined template (mutually exclusive with template) -- `template_category`: Template type ("llm_prompt", "html", "markdown", "mermaid", etc.) -- `templating_style`: Styling options for template rendering -- `extra_context`: Additional context variables for template - -For more control, you can use a nested `template` section instead of the `template` field: - -- `template.template`: The template string -- `template.category`: Template type -- `template.templating_style`: Styling options - -#### Template Variables - -Use the same variable insertion rules as PipeLLM: - -- `@variable` for block insertion (multi-line content) -- `$variable` for inline insertion (short text) - -#### Construct Mode (for StructuredContent Output) - -PipeCompose can also generate `StructuredContent` objects using the `construct` section. This mode composes field values from fixed values, variable references, templates, or nested structures. - -**When to use construct mode:** - -- You need to output a structured object (not just Text) -- You want to deterministically compose fields from existing data -- No LLM is needed - just data composition and templating - -##### Basic Construct Usage - -```plx -[concept.SalesSummary] -description = "A structured sales summary" - -[concept.SalesSummary.structure] -report_title = { type = "text", description = "Title of the report" } -customer_name = { type = "text", description = "Customer name" } -deal_value = { type = "number", description = "Deal value" } -summary_text = { type = "text", description = "Generated summary text" } - -[pipe.compose_summary] -type = "PipeCompose" -description = "Compose a sales summary from deal data" -inputs = { deal = "Deal" } -output = "SalesSummary" - -[pipe.compose_summary.construct] -report_title = "Monthly Sales Report" -customer_name = { from = "deal.customer_name" } -deal_value = { from = "deal.amount" } -summary_text = { template = "Deal worth $deal.amount with $deal.customer_name" } -``` - -##### Field Composition Methods - -There are four ways to define field values in a construct: - -**1. Fixed Value (literal)** - -Use a literal value directly: - -```plx -[pipe.compose_report.construct] -report_title = "Annual Report" -report_year = 2024 -is_draft = false -``` - -**2. Variable Reference (`from`)** - -Get a value from working memory using a dotted path: - -```plx -[pipe.compose_report.construct] -customer_name = { from = "deal.customer_name" } -total_amount = { from = "order.total" } -street_address = { from = "customer.address.street" } -``` - -**3. Template (`template`)** - -Render a Jinja2 template with variable substitution: - -```plx -[pipe.compose_report.construct] -invoice_number = { template = "INV-$order.id" } -summary = { template = "Deal worth $deal.amount with $deal.customer_name on {{ current_date }}" } -``` - -**4. Nested Construct** - -For nested structures, use a TOML subsection: - -```plx -[pipe.compose_invoice.construct] -invoice_number = { template = "INV-$order.id" } -total = { from = "order.total_amount" } - -[pipe.compose_invoice.construct.billing_address] -street = { from = "customer.address.street" } -city = { from = "customer.address.city" } -country = "France" -``` - -##### Complete Construct Example - -```plx -domain = "invoicing" - -[concept.Address] -description = "A postal address" - -[concept.Address.structure] -street = { type = "text", description = "Street address" } -city = { type = "text", description = "City name" } -country = { type = "text", description = "Country name" } - -[concept.Invoice] -description = "An invoice document" - -[concept.Invoice.structure] -invoice_number = { type = "text", description = "Invoice number" } -total = { type = "number", description = "Total amount" } - -[pipe.compose_invoice] -type = "PipeCompose" -description = "Compose an invoice from order and customer data" -inputs = { order = "Order", customer = "Customer" } -output = "Invoice" - -[pipe.compose_invoice.construct] -invoice_number = { template = "INV-$order.id" } -total = { from = "order.total_amount" } - -[pipe.compose_invoice.construct.billing_address] -street = { from = "customer.address.street" } -city = { from = "customer.address.city" } -country = "France" -``` - -##### Key Parameters (Construct Mode) - -- `construct`: Dictionary mapping field names to their composition rules -- Each field can be: - - A literal value (string, number, boolean) - - A dict with `from` key for variable reference - - A dict with `template` key for template rendering - - A nested dict for nested structures - -**Note:** You must use either `template` or `construct`, not both. They are mutually exclusive. - -### PipeImgGen operator - -The PipeImgGen operator is used to generate images using AI image generation models. - -#### Basic Usage - -Simple Image Generation: -```plx -[pipe.generate_image] -type = "PipeImgGen" -description = "Generate an image from prompt" -inputs = { prompt = "ImgGenPrompt" } -output = "Image" -``` - -Using Image Generation Settings: -```plx -[pipe.generate_photo] -type = "PipeImgGen" -description = "Generate a high-quality photo" -inputs = { prompt = "ImgGenPrompt" } -output = "Photo" -model = { model = "fast-img-gen" } -aspect_ratio = "16:9" -quality = "hd" -``` - -Multiple Image Generation: -```plx -[pipe.generate_variations] -type = "PipeImgGen" -description = "Generate multiple image variations" -inputs = { prompt = "ImgGenPrompt" } -output = "Image[3]" -seed = "auto" -``` - -Advanced Configuration: -```plx -[pipe.generate_custom] -type = "PipeImgGen" -description = "Generate image with custom settings" -inputs = { prompt = "ImgGenPrompt" } -output = "Image" -model = "img_gen_preset_name" # Use predefined preset -aspect_ratio = "1:1" -quality = "hd" -background = "transparent" -output_format = "png" -is_raw = false -safety_tolerance = 3 -``` - -#### Key Parameters - -**Image Generation Settings:** -- `model`: Model choice (preset name or inline settings with model name) -- `quality`: Image quality ("standard", "hd") - -**Output Configuration:** -- `aspect_ratio`: Image dimensions ("1:1", "16:9", "9:16", etc.) -- `output_format`: File format ("png", "jpeg", "webp") -- `background`: Background type ("default", "transparent") - -**Generation Control:** -- `seed`: Random seed (integer or "auto") -- `is_raw`: Whether to apply post-processing -- `is_moderated`: Enable content moderation -- `safety_tolerance`: Content safety level (1-6) - -#### Input Requirements - -PipeImgGen requires exactly one input that must be either: -- An `ImgGenPrompt` concept -- A concept that refines `ImgGenPrompt` - -The input can be named anything but must contain the prompt text for image generation. - -### PipeFunc operator - -The PipeFunc operator is used to run custom Python functions within a pipeline. This allows integration of classic Python scripts and custom logic. - -#### Basic Usage - -Simple Function Call: -```plx -[pipe.process_data] -type = "PipeFunc" -description = "Process data using custom function" -inputs = { input_data = "DataType" } -output = "ProcessedData" -function_name = "process_data_function" -``` - -File Processing Example: -```plx -[pipe.read_file] -type = "PipeFunc" -description = "Read file content" -inputs = { file_path = "FilePath" } -output = "FileContent" -function_name = "read_file_content" -``` - -#### Key Parameters - -- `function_name`: Name of the Python function to call (must be registered in func_registry) - -#### Function Requirements - -The Python function must: - -1. **Be registered** in the `func_registry` -2. **Accept `working_memory`** as a parameter: - ```python - async def my_function(working_memory: WorkingMemory) -> StuffContent | list[StuffContent] | str: - # Function implementation - pass - ``` - -3. **Return appropriate types**: - - `StuffContent`: Single content object - - `list[StuffContent]`: Multiple content objects (becomes ListContent) - - `str`: Simple string (becomes TextContent) - -#### Function Registration - -Functions must be registered in the function registry before use: - -```python -from pipelex.system.registries.func_registry import func_registry - -@func_registry.register("my_function_name") -async def my_custom_function(working_memory: WorkingMemory) -> StuffContent: - # Access inputs from working memory - input_data = working_memory.get_stuff("input_name") - - # Process data - result = process_logic(input_data.content) - - # Return result - return MyResultContent(data=result) -``` - -#### Working Memory Access - -Inside the function, access pipeline inputs through working memory: - -```python -async def process_function(working_memory: WorkingMemory) -> TextContent: - # Get input stuff by name - input_stuff = working_memory.get_stuff("input_name") - - # Access the content - input_content = input_stuff.content - - # Process and return - processed_text = f"Processed: {input_content.text}" - return TextContent(text=processed_text) -``` - ---- - -### Rules to choose LLM models used in PipeLLMs. - -#### LLM Configuration System - -In order to use it in a pipe, an LLM is referenced by its llm_handle (alias) and possibly by an llm_preset. -LLM configurations are managed through the new inference backend system with files located in `.pipelex/inference/`: - -- **Model Deck**: `.pipelex/inference/deck/base_deck.toml` and `.pipelex/inference/deck/overrides.toml` -- **Backends**: `.pipelex/inference/backends.toml` and `.pipelex/inference/backends/*.toml` -- **Routing**: `.pipelex/inference/routing_profiles.toml` - -#### LLM Handles - -An llm_handle can be either: -1. **A direct model name** (like "gpt-4o-mini", "claude-3-sonnet") - automatically available for all models loaded by the inference backend system -2. **An alias** - user-defined shortcuts that map to model names, defined in the `[aliases]` section: - -```toml -[aliases] -base-claude = "claude-4.5-sonnet" -base-gpt = "gpt-5" -base-gemini = "gemini-2.5-flash" -base-mistral = "mistral-medium" -``` - -The system first looks for direct model names, then checks aliases if no direct match is found. The system handles model routing through backends automatically. - -#### Using an LLM Handle in a PipeLLM - -Here is an example of using a model to specify which LLM to use in a PipeLLM: - -```plx -[pipe.hello_world] -type = "PipeLLM" -description = "Write text about Hello World." -output = "Text" -model = { model = "gpt-5", temperature = 0.9 } -prompt = """ -Write a haiku about Hello World. -""" -``` - -As you can see, to use the LLM, you must also indicate the temperature (float between 0 and 1) and max_tokens (either an int or the string "auto"). - -#### LLM Presets - -Presets are meant to record the choice of an llm with its hyper parameters (temperature and max_tokens) if it's good for a particular task. LLM Presets are skill-oriented. - -Examples: -```toml -llm_to_engineer = { model = "base-claude", temperature = 1 } -llm_to_extract_invoice = { model = "claude-3-7-sonnet", temperature = 0.1, max_tokens = "auto" } -``` - -The interest is that these presets can be used to set the LLM choice in a PipeLLM, like this: - -```plx -[pipe.extract_invoice] -type = "PipeLLM" -description = "Extract invoice information from an invoice text transcript" -inputs = { invoice_text = "InvoiceText" } -output = "Invoice" -model = "llm_to_extract_invoice" -prompt = """ -Extract invoice information from this invoice: - -The category of this invoice is: $invoice_details.category. - -@invoice_text -""" -``` - -The setting here `model = "llm_to_extract_invoice"` works because "llm_to_extract_invoice" has been declared as an llm_preset in the deck. -You must not use an LLM preset in a PipeLLM that does not exist in the deck. If needed, you can add llm presets. - -You can override the predefined llm presets by setting them in `.pipelex/inference/deck/overrides.toml`. - ---- - -ALWAYS RUN validation when you are finished writing pipelines: This checks for errors. If there are errors, iterate until it works. -- For a specific bundle/file: `pipelex validate path_to_file.plx` -- For all pipelines: `pipelex validate all` -- Remember: Ensure your Python virtual environment is activated (typically `.venv` for standard installations) before running `pipelex` commands. - -Then, create an example file to run the pipeline in the `examples` folder. -But don't write documentation unless asked explicitly to. - -## Guide to execute a pipeline and write example code - -### Example to execute a pipeline with text output - -```python -import asyncio - -from pipelex import pretty_print -from pipelex.pipelex import Pipelex -from pipelex.pipeline.execute import execute_pipeline - - -async def hello_world() -> str: - """ - This function demonstrates the use of a super simple Pipelex pipeline to generate text. - """ - # Run the pipe - pipe_output = await execute_pipeline( - pipe_code="hello_world", - ) - - return pipe_output.main_stuff_as_str - - -## start Pipelex -Pipelex.make() -## run sample using asyncio -output_text = asyncio.run(hello_world()) -pretty_print(output_text, title="Your first Pipelex output") -``` - -### Example to execute a pipeline with structured output - -```python -import asyncio - -from pipelex import pretty_print -from pipelex.pipelex import Pipelex -from pipelex.pipeline.execute import execute_pipeline -from pipelex.core.stuffs.image_content import ImageContent - -from my_project.gantt.gantt_struct import GanttChart - -SAMPLE_NAME = "extract_gantt" -IMAGE_URL = "assets/gantt/gantt_tree_house.png" - - -async def extract_gantt(image_url: str) -> GanttChart: - # Run the pipe - pipe_output = await execute_pipeline( - pipe_code="extract_gantt_by_steps", - inputs={ - "gantt_chart_image": { - "concept": "gantt.GanttImage", - "content": ImageContent(url=image_url), - } - }, - ) - # Output the result - return pipe_output.main_stuff_as(content_type=GanttChart) - - -## start Pipelex -Pipelex.make() - -## run sample using asyncio -gantt_chart = asyncio.run(extract_gantt(image_url=IMAGE_URL)) -pretty_print(gantt_chart, title="Gantt Chart") -``` - -### Setting up the input memory - -#### Explanation of input memory - -The input memory is a dictionary, where the key is the name of the input variable and the value provides details to make it a stuff object. The relevant definitions are: -```python -StuffContentOrData = dict[str, Any] | StuffContent | list[Any] | str -PipelineInputs = dict[str, StuffContentOrData] -``` -As you can seen, we made it so different ways can be used to define that stuff using structured content or data. - -#### Different ways to set up the input memory - -So here are a few concrete examples of calls to execute_pipeline with various ways to set up the input memory: - -```python -## Here we have a single input and it's a Text. -## If you assign a string, by default it will be considered as a TextContent. - pipe_output = await execute_pipeline( - pipe_code="master_advisory_orchestrator", - inputs={ - "user_input": problem_description, - }, - ) - -## Here we have a single input and it's a document. -## Because DocumentContent is a native concept, we can use it directly as a value, -## the system knows what content it corresponds to: - pipe_output = await execute_pipeline( - pipe_code="power_extractor_dpe", - inputs={ - "document": DocumentContent(url=pdf_url), - }, - ) - -## Here we have a single input and it's an Image. -## Because ImageContent is a native concept, we can use it directly as a value: - pipe_output = await execute_pipeline( - pipe_code="fashion_variation_pipeline", - inputs={ - "fashion_photo": ImageContent(url=image_url), - }, - ) - -## Here we have a single input, it's an image but -## its actually a more specific concept gantt.GanttImage which refines Image, -## so we must provide it using a dict with the concept and the content: - pipe_output = await execute_pipeline( - pipe_code="extract_gantt_by_steps", - inputs={ - "gantt_chart_image": { - "concept": "gantt.GanttImage", - "content": ImageContent(url=image_url), - } - }, - ) - -## Here is a more complex example with multiple inputs assigned using different ways: - pipe_output = await execute_pipeline( - pipe_code="retrieve_then_answer", - dynamic_output_concept_code="contracts.Fees", - inputs={ - "text": load_text_from_path(path=text_path), - "question": { - "concept": "answer.Question", - "content": question, - }, - "client_instructions": client_instructions, - }, - ) -``` - -### Using the outputs of a pipeline - -All pipe executions return a `PipeOutput` object. -It's a BaseModel which contains the resulting working memory at the end of the execution and the pipeline run id. -It also provides a bunch of accessor functions and properties to unwrap the main stuff, which is the last stuff added to the working memory: - -```python - -class PipeOutput(BaseModel): - working_memory: WorkingMemory = Field(default_factory=WorkingMemory) - pipeline_run_id: str = Field(default=SpecialPipelineId.UNTITLED) - - @property - def main_stuff(self) -> Stuff: - ... - - def main_stuff_as_list(self, item_type: type[StuffContentType]) -> ListContent[StuffContentType]: - ... - - def main_stuff_as_items(self, item_type: type[StuffContentType]) -> list[StuffContentType]: - ... - - def main_stuff_as(self, content_type: type[StuffContentType]) -> StuffContentType: - ... - - @property - def main_stuff_as_text(self) -> TextContent: - ... - - @property - def main_stuff_as_str(self) -> str: - ... - - @property - def main_stuff_as_image(self) -> ImageContent: - ... - - @property - def main_stuff_as_text_and_image(self) -> TextAndImagesContent: - ... - - @property - def main_stuff_as_number(self) -> NumberContent: - ... - - @property - def main_stuff_as_html(self) -> HtmlContent: - ... - - @property - def main_stuff_as_mermaid(self) -> MermaidContent: - ... -``` - -As you can see, you can extract any variable from the output working memory. - -#### Getting the main stuff as a specific type - -Simple text as a string: - -```python -result = pipe_output.main_stuff_as_str -``` -Structured object (BaseModel): - -```python -result = pipe_output.main_stuff_as(content_type=GanttChart) -``` - -If it's a list, you can get a `ListContent` of the specific type. - -```python -result_list_content = pipe_output.main_stuff_as_list(item_type=GanttChart) -``` - -or if you want, you can get the actual items as a regular python list: - -```python -result_list = pipe_output.main_stuff_as_items(item_type=GanttChart) -``` - ---- - diff --git a/CHANGELOG.md b/CHANGELOG.md deleted file mode 100644 index ca6563b..0000000 --- a/CHANGELOG.md +++ /dev/null @@ -1,104 +0,0 @@ -# Changelog - -## [v0.6.7] - 2025-12-01 - -- Bump `pipelex` to `v0.17.3`: See `Pipelex` changelog [here](https://docs.pipelex.com/changelog/) - -## [v0.6.6] - 2025-12-01 - -- Bump `pipelex` to `v0.17.2`: See `Pipelex` changelog [here](https://docs.pipelex.com/changelog/) - -## [v0.6.5] - 2025-11-26 - -- Bump `pipelex` to `v0.17.1`: See `Pipelex` changelog [here](https://docs.pipelex.com/changelog/) - -## [v0.6.4] - 2025-11-18 - -- Bump `pipelex` to `v0.15.7` to fix `pipelex doctor` - -## [v0.6.3] - 2025-11-18 - -- Bump `pipelex` to `v0.15.6`: See `Pipelex` changelog [here](https://docs.pipelex.com/changelog/) -- Bump `pytest` to `v9.0.1` (fix typo in `pyproject.toml`) - -## [v0.6.2] - 2025-11-13 - -- Update agent rules - -## [v0.6.1] - 2025-11-13 - -- Bump `pipelex` to `v0.15.4`: See `Pipelex` changelog [here](https://docs.pipelex.com/changelog/) -- Update pytest to `v0.9.1` - -## [v0.6.0] - 2025-11-07 - - - Upgraded `pipelex` dependency from `0.14.0` to `0.15.2` - -## [v0.5.1] - 2025-10-28 - -- Updated README.md instructions - -## [v0.5.0] - 2025-10-27 - -- Bump `pipelex` to `v0.14.0`: See `Pipelex` changelog [here](https://docs.pipelex.com/changelog/) - -## [v0.4.0] - 2025-10-21 - -- Bump `pipelex` to `v0.13.0`: See `Pipelex` changelog [here](https://docs.pipelex.com/changelog/) - -## [v0.3.0] - 2025-10-15 - -- Bump `pipelex` to `v0.12.0`: See `Pipelex` changelog [here](https://docs.pipelex.com/changelog/) -- **Dependency Management**: Added `requirements.txt` and `requirements-dev.txt` with corresponding `Makefile` commands for dependency export - -## [v0.2.4] - 2025-09-19 - -- Stop ignoring backend config -- Add `gpt-4o` model to `pipelex_inference.toml` - -## [v0.2.3] - 2025-09-19 - -- Fixed Hello World's call to execute_pipeline -- Added proper e2e test for Hello World - -## [v0.2.2] - 2025-09-18 - -- Bump `pipelex` to `v0.10.2`: See `Pipelex` changelog [here](https://docs.pipelex.com/changelog/) -- Remove ugly code from `conftest.py` for unit test env var placeholders, now uses the proper fixture defined in `pipelex.test_extras.shared_pytest_plugins` - -## [v0.2.1] - 2025-09-17 - -- Cleanup env example - -## [v0.2.0] - 2025-09-17 - -- Bump `pipelex` to `v0.10.1`: See `Pipelex` changelog [here](https://docs.pipelex.com/changelog/) -- Add `cleanlibraries` and `cleanconfig` make commands - -## [v0.1.4] - 2025-09-07 - -- Make it easier to get running with BlackboxAI LLMs - -## [v0.1.3] - 2025-09-06 - -- Bump `pipelex` to `v0.9.4`: See `Pipelex` changelog [here](https://docs.pipelex.com/changelog/) - -## [v0.1.2] - 2025-09-06 - -- Better support for Pipelex extension in BlackboxAI IDE - -## [v0.1.1] - 2025-09-04 - -- Updated pipelex Cursor rules - -## [v0.1.0] - 2025-09-03 - -- Bump `pipelex` to `v0.9.0`: See `Pipelex` changelog [here](https://docs.pipelex.com/changelog/). Renamed `hello_world.toml` to `hello_world.plx` as a consequence. - -## [v0.0.2] - 2025-08-27 - -- Bump `pipelex` to `v0.8.1`: See `Pipelex` changelog [here](https://docs.pipelex.com/changelog/) - -## [v0.0.1] - 2025-06-XX - -- TBD diff --git a/CLAUDE.md b/CLAUDE.md deleted file mode 100644 index af4572e..0000000 --- a/CLAUDE.md +++ /dev/null @@ -1,1219 +0,0 @@ - -# Pipelex Coding Rules - -## Guide to write or edit pipelines using the Pipelex language in .plx files - -- Always first write your "plan" in natural language, then transcribe it in pipelex. -- You should ALWAYS RUN validation when you are writing or editing a `.plx` file. It will ensure the pipe is runnable. If not, iterate. - - For a specific file: `pipelex validate path_to_file.plx` - - For all pipelines: `pipelex validate all` - - **IMPORTANT**: Ensure the Python virtual environment is activated before running `pipelex` commands. For standard installations, the venv is named `.venv` - always check that first. The commands will not work without proper venv activation. -- Please use POSIX standard for files. (empty lines, no trailing whitespaces, etc.) - -### Pipeline File Naming -- Files must be `.plx` for pipelines (Always add an empty line at the end of the file, and do not add trailing whitespaces to PLX files at all) -- Files must be `.py` for code defining the data structures -- Use descriptive names in `snake_case` - -### Pipeline File Outline -A pipeline file has three main sections: -1. Domain statement -2. Concept definitions -3. Pipe definitions - -#### Domain Statement -```plx -domain = "domain_code" -description = "Description of the domain" # Optional -``` -Note: The domain code usually matches the plx filename for single-file domains. For multi-file domains, use the subdirectory name. - -#### Concept Definitions - -Concepts represent ideas and semantic entities in your pipeline. They define what something *is*, not how it's structured. - -```plx -[concept] -ConceptName = "Description of the concept" -``` - -**Naming Rules:** -- Use PascalCase for concept names -- Never use plurals (no "Stories", use "Story") - lists are handled implicitly by Pipelex -- Avoid circumstantial adjectives (no "LargeText", use "Text") - focus on the essence of what the concept represents -- Don't redefine native concepts (Text, Image, PDF, TextAndImages, Number, Page, JSON) - -**Native Concepts:** -Pipelex provides built-in native concepts: `Text`, `Image`, `PDF`, `TextAndImages`, `Number`, `Page`, `JSON`. Use these directly or refine them when appropriate. - -**Refining Native Concepts:** -To create a concept that specializes a native concept without adding fields: - -```plx -[concept.Landscape] -description = "A scenic outdoor photograph" -refines = "Image" -``` - -For details on how to structure concepts with fields, see the "Structuring Models" section below. - -#### Pipe Definitions - -### Pipe Base Definition - -```plx -[pipe.your_pipe_code] -type = "PipeLLM" -description = "A description of what your pipe does" -inputs = { input_1 = "ConceptName1", input_2 = "ConceptName2" } -output = "ConceptName" -``` - -The pipes will all have at least this base definition. -- `inputs`: Dictionary of key being the variable used in the prompts, and the value being the ConceptName. It should ALSO LIST THE INPUTS OF THE INTERMEDIATE STEPS (if PipeSequence) or of the conditional pipes (if PipeCondition). -So If you have this error: -`PipeValidationError: missing_input_variable • domain='expense_validator' • pipe='validate_expense' • -variable='['invoice']'`` -That means that the pipe validate_expense is missing the input `invoice` because one of the subpipe is needing it. - -NEVER WRITE THE INPUTS BY BREAKING THE LINE LIKE THIS: - -```plx -inputs = { - input_1 = "ConceptName1", - input_2 = "ConceptName2" -} -``` - - -- `output`: The name of the concept to output. The `ConceptName` should have the same name as the python class if you want structured output: - -#### Input Multiplicity - -By default, inputs expect a single item. Use bracket notation to specify multiple items: - -```plx -## Single item (default) -inputs = { document = "Text" } - -## Variable list - indeterminate number of items -inputs = { documents = "Text[]" } - -## Fixed count - exactly N items -inputs = { comparison_items = "Image[2]" } -``` - -**Key points:** -- No brackets = single item (default behavior) -- Use `[]` for lists of unknown length -- Use `[N]` (where N is an integer) when operation requires exact count (e.g., comparing 2 items) - -### Structuring Models - -Once you've defined your concepts semantically (see "Concept Definitions" above), you need to specify their structure if they have fields. - -#### Three Ways to Structure Concepts - -**1. No Structure Needed** - -If a concept only refines a native concept without adding fields, use the TOML table syntax shown in "Concept Definitions" above. No structure section is needed. - -**2. Inline Structure Definition (RECOMMENDED for most cases)** - -For concepts with structured fields, define them inline using TOML syntax: - -```plx -[concept.Invoice] -description = "A commercial document issued by a seller to a buyer" - -[concept.Invoice.structure] -invoice_number = "The unique invoice identifier" # This will be optional by default -issue_date = { type = "date", description = "The date the invoice was issued", required = true } -total_amount = { type = "number", description = "The total invoice amount", required = true } -vendor_name = "The name of the vendor" # This will be optional by default -line_items = { type = "list", item_type = "text", description = "List of items" } -``` - -**Supported inline field types:** `text`, `integer`, `boolean`, `number`, `date`, `list`, `dict` - -**Field properties:** `type`, `description`, `required` (default: false), `default_value`, `choices`, `item_type` (for lists), `key_type` and `value_type` (for dicts) - -**Simple syntax** (creates required text field): -```plx -field_name = "Field description" -``` - -**Detailed syntax** (with explicit properties): -```plx -field_name = { type = "text", description = "Field description", default_value = "default" } -``` - -**3. Python StructuredContent Class (For Advanced Features)** - -Create a Python class when you need: -- Custom validation logic (@field_validator, @model_validator) -- Computed properties (@property methods) -- Custom methods or class methods -- Complex cross-field validation -- Reusable structures across multiple domains - -```python -from pipelex.core.stuffs.structured_content import StructuredContent -from pydantic import Field, field_validator - -class Invoice(StructuredContent): - """A commercial invoice with validation.""" - - invoice_number: str = Field(description="The unique invoice identifier") - total_amount: float = Field(ge=0, description="The total invoice amount") - tax_amount: float = Field(ge=0, description="Tax amount") - - @field_validator('tax_amount') - @classmethod - def validate_tax(cls, v, info): - """Ensure tax doesn't exceed total.""" - total = info.data.get('total_amount', 0) - if v > total: - raise ValueError('Tax amount cannot exceed total amount') - return v -``` - -**Location:** Create models in `my_project/some_domain/some_domain_struct.py`. Classes inheriting from `StructuredContent` are automatically discovered. - -#### Decision Rules for Agents - -**If concept already exists:** -- If it's already inline → KEEP IT INLINE unless user explicitly asks to convert or features require Python class -- If it's already a Python class → KEEP IT as Python class - -**If creating new concept:** -1. Does it only refine a native concept without adding fields? → Use concept-only declaration -2. Does it need custom validation, computed properties, or methods? → Use Python class -3. Otherwise → Use inline structure (fastest and simplest) - -**When to suggest conversion to Python class:** -- User needs validation logic beyond type checking -- User needs computed properties or custom methods -- Structure needs to be reused across multiple domains -- Complex type relationships or inheritance required - -#### Inline Structure Limitations - -Inline structures: -- ✅ Support all common field types (text, number, date, list, dict, etc.) -- ✅ Support required/optional fields, defaults, choices -- ✅ Generate full Pydantic models with validation -- ❌ Cannot have custom validators or complex validation logic -- ❌ Cannot have computed properties or custom methods -- ❌ Cannot refine custom (non-native) concepts -- ❌ Limited IDE autocomplete compared to explicit Python classes - - -### Pipe Controllers and Pipe Operators - -Look at the Pipes we have in order to adapt it. Pipes are organized in two categories: - -1. **Controllers** - For flow control: - - `PipeSequence` - For creating a sequence of multiple steps - - `PipeCondition` - If the next pipe depends of the expression of a stuff in the working memory - - `PipeParallel` - For parallelizing pipes - -2. **Operators** - For specific tasks: - - `PipeLLM` - Generate Text and Objects (include Vision LLM) - - `PipeExtract` - Extract text and images from an image or a PDF - - `PipeCompose` - For composing text using Jinja2 templates: supports html, markdown, mermaid, etc. - - `PipeImgGen` - Generate Images - - `PipeFunc` - For running classic python scripts - -### PipeSequence controller - -Purpose: PipeSequence executes multiple pipes in a defined order, where each step can use results from original inputs or from previous steps. - -#### Basic Definition -```plx -[pipe.your_sequence_name] -type = "PipeSequence" -description = "Description of what this sequence does" -inputs = { input_name = "InputType" } # All the inputs of the sub pipes, except the ones generated by intermediate steps -output = "OutputType" -steps = [ - { pipe = "first_pipe", result = "first_result" }, - { pipe = "second_pipe", result = "second_result" }, - { pipe = "final_pipe", result = "final_result" } -] -``` - -#### Key Components - -1. **Steps Array**: List of pipes to execute in sequence - - `pipe`: Name of the pipe to execute - - `result`: Name to assign to the pipe's output that will be in the working memory - -#### Using PipeBatch in Steps - -You can use PipeBatch functionality within steps using `batch_over` and `batch_as`: - -```plx -steps = [ - { pipe = "process_items", batch_over = "input_list", batch_as = "current_item", result = "processed_items" - } -] -``` - -1. **batch_over**: Specifies a `ListContent` field to iterate over. Each item in the list will be processed individually and IN PARALLEL by the pipe. - - Must be a `ListContent` type containing the items to process - - Can reference inputs or results from previous steps - -2. **batch_as**: Defines the name that will be used to reference the current item being processed - - This name can be used in the pipe's input mappings - - Makes each item from the batch available as a single element - -The result of a batched step will be a `ListContent` containing the outputs from processing each item. - -### PipeCondition controller - -The PipeCondition controller allows you to implement conditional logic in your pipeline, choosing which pipe to execute based on an evaluated expression. It supports both direct expressions and expression templates. - -#### Basic usage - -```plx -[pipe.conditional_operation] -type = "PipeCondition" -description = "A conditional pipe to decide whether..." -inputs = { input_data = "CategoryInput" } -output = "native.Text" -expression = "input_data.category" -default_outcome = "process_medium" - -[pipe.conditional_operation.outcomes] -small = "process_small" -medium = "process_medium" -large = "process_large" -``` -or -```plx -[pipe.conditional_operation] -type = "PipeCondition" -description = "A conditional pipe to decide whether..." -inputs = { input_data = "CategoryInput" } -output = "native.Text" -expression_template = "{{ input_data.category }}" # Jinja2 code -default_outcome = "process_medium" - -[pipe.conditional_operation.outcomes] -small = "process_small" -medium = "process_medium" -large = "process_large" -``` - -#### Key Parameters - -- `expression`: Direct boolean or string expression (mutually exclusive with expression_template) -- `expression_template`: Jinja2 template for more complex conditional logic (mutually exclusive with expression) -- `outcomes`: Dictionary mapping expression results to pipe codes: - 1. The key on the left (`small`, `medium`) is the result of `expression` or `expression_template` - 2. The value on the right (`process_small`, `process_medium`, etc.) is the name of the pipe to trigger -- `default_outcome`: **Required** - The pipe to execute if the expression doesn't match any key in outcomes. Use `"fail"` if you want the pipeline to fail when no match is found - -Example with fail as default: -```plx -[pipe.strict_validation] -type = "PipeCondition" -description = "Validate with strict matching" -inputs = { status = "Status" } -output = "Text" -expression = "status.value" -default_outcome = "fail" - -[pipe.strict_validation.outcomes] -approved = "process_approved" -rejected = "process_rejected" -``` - -### PipeLLM operator - -PipeLLM is used to: -1. Generate text or objects with LLMs -2. Process images with Vision LLMs - -#### Basic Usage - -Simple Text Generation: -```plx -[pipe.write_story] -type = "PipeLLM" -description = "Write a short story" -output = "Text" -prompt = """ -Write a short story about a programmer. -""" -``` - -Structured Data Extraction: -```plx -[pipe.extract_info] -type = "PipeLLM" -description = "Extract information" -inputs = { text = "Text" } -output = "PersonInfo" -prompt = """ -Extract person information from this text: -@text -""" -``` - -Supports system instructions: -```plx -[pipe.expert_analysis] -type = "PipeLLM" -description = "Expert analysis" -output = "Analysis" -system_prompt = "You are a data analysis expert" -prompt = "Analyze this data" -``` - -#### Multiple Outputs - -Generate multiple outputs (fixed number) - use bracket notation: -```plx -[pipe.generate_ideas] -type = "PipeLLM" -description = "Generate ideas" -output = "Idea[3]" # Generate exactly 3 ideas -prompt = "Generate 3 ideas" -``` - -Generate multiple outputs (variable number) - use bracket notation: -```plx -[pipe.generate_ideas] -type = "PipeLLM" -description = "Generate ideas" -output = "Idea[]" # Let the LLM decide how many to generate -prompt = "Generate ideas" -``` - -#### Vision - -Process images with VLMs (image inputs must be tagged in the prompt): -```plx -[pipe.analyze_image] -type = "PipeLLM" -description = "Analyze image" -inputs = { image = "Image" } -output = "ImageAnalysis" -prompt = """ -Describe what you see in this image: - -$image -""" -``` - -You can also reference images inline in meaningful sentences to guide the Visual LLM: -```plx -[pipe.compare_images] -type = "PipeLLM" -description = "Compare two images" -inputs = { photo = "Image", painting = "Image" } -output = "Analysis" -prompt = "Analyze the colors in $photo and the shapes in $painting." -``` - -#### Writing prompts for PipeLLM - -**Insert stuff inside a tagged block** - -If the inserted text is supposedly a long text, made of several lines or paragraphs, you want it inserted inside a block, possibly a block tagged and delimlited with proper syntax as one would do in a markdown documentation. To include stuff as a block, use the "@" prefix. - -Example template: -```plx -prompt = """ -Match the expense with its corresponding invoice: - -@expense - -@invoices -""" -``` -In the example above, the expense data and the invoices data are obviously made of several lines each, that's why it makes sense to use the "@" prefix in order to have them delimited inside a block. Note that our preprocessor will automatically include the block's title, so it doesn't need to be explicitly written in the prompt. - -DO NOT write things like "Here is the expense: @expense". -DO write simply "@expense" alone in an isolated line. - -**Insert stuff inline** - -If the inserted text is short text and it makes sense to have it inserted directly into a sentence, you want it inserted inline. To insert stuff inline, use the "$" prefix. This will insert the stuff without delimiters and the content will be rendered as plain text. - -Example template: -```plx -prompt = """ -Your goal is to summarize everything related to $topic in the provided text: - -@text - -Please provide only the summary, with no additional text or explanations. -Your summary should not be longer than 2 sentences. -""" -``` - -In the example above, $topic will be inserted inline, whereas @text will be a a delimited block. -Be sure to make the proper choice of prefix for each insertion. - -DO NOT write "$topic" alone in an isolated line. -DO write things like "Write an essay about $topic" to include text into an actual sentence. - - -### PipeExtract operator - -The PipeExtract operator is used to extract text and images from an image or a PDF - -#### Simple Text Extraction -```plx -[pipe.extract_info] -type = "PipeExtract" -description = "extract the information" -inputs = { document = "Document" } # or { image = "Image" } if it's an image. This is the only input. -output = "Page" -``` - -Using Extract Model Settings: -```plx -[pipe.extract_with_model] -type = "PipeExtract" -description = "Extract with specific model" -inputs = { document = "Document" } -output = "Page" -model = "base_extract_mistral" # Use predefined extract preset or model alias -``` - -Only one input is allowed and it must either be an `Image` or a `PDF`. The input can be named anything. - -The output concept `Page` is a native concept, with the structure `PageContent`: -It corresponds to 1 page. Therefore, the PipeExtract is outputing a `ListContent` of `Page` - -```python -class TextAndImagesContent(StuffContent): - text: TextContent | None - images: list[ImageContent] | None - -class PageContent(StructuredContent): # CONCEPT IS "Page" - text_and_images: TextAndImagesContent - page_view: ImageContent | None = None -``` -- `text_and_images` are the text, and the related images found in the input image or PDF. -- `page_view` is the screenshot of the whole pdf page/image. - -### PipeCompose operator - -The PipeCompose operator is used to compose text using Jinja2 templates. It supports various output formats including HTML, Markdown, Mermaid diagrams, and more. - -#### Basic Usage - -Simple Template Composition: -```plx -[pipe.compose_report] -type = "PipeCompose" -description = "Compose a report using template" -inputs = { data = "ReportData" } -output = "Text" -template = """ -## Report Summary - -Based on the analysis: -$data - -Generated on: {{ current_date }} -""" -``` - -Using Named Templates: -```plx -[pipe.use_template] -type = "PipeCompose" -description = "Use a predefined template" -inputs = { content = "Text" } -output = "Text" -template_name = "standard_report_template" -``` - -Using Nested Template Section (for more control): -```plx -[pipe.advanced_template] -type = "PipeCompose" -description = "Use advanced template settings" -inputs = { data = "ReportData" } -output = "Text" - -[pipe.advanced_template.template] -template = "Report: $data" -category = "html" -templating_style = { tag_style = "square_brackets", text_format = "html" } -``` - -CRM Email Template: -```plx -[pipe.compose_follow_up_email] -type = "PipeCompose" -description = "Compose a personalized follow-up email for CRM" -inputs = { customer = "Customer", deal = "Deal", sales_rep = "SalesRep" } -output = "Text" -template_category = "html" -templating_style = { tag_style = "square_brackets", text_format = "html" } -template = """ -Subject: Following up on our $deal.product_name discussion - -Hi $customer.first_name, - -I hope this email finds you well! I wanted to follow up on our conversation about $deal.product_name from $deal.last_contact_date. - -Based on our discussion, I understand that your key requirements are: $deal.customer_requirements - -I'm excited to let you know that we can definitely help you achieve your goals. Here's what I'd like to propose: - -**Next Steps:** -- Schedule a demo tailored to your specific needs -- Provide you with a customized quote based on your requirements -- Connect you with our implementation team - -Would you be available for a 30-minute call this week? I have openings on: -{% for slot in available_slots %} -- {{ slot }} -{% endfor %} - -Looking forward to moving this forward together! - -Best regards, -$sales_rep.name -$sales_rep.title -$sales_rep.phone | $sales_rep.email -""" -``` - -#### Key Parameters (Template Mode) - -- `template`: Inline template string (mutually exclusive with template_name and construct) -- `template_name`: Name of a predefined template (mutually exclusive with template) -- `template_category`: Template type ("llm_prompt", "html", "markdown", "mermaid", etc.) -- `templating_style`: Styling options for template rendering -- `extra_context`: Additional context variables for template - -For more control, you can use a nested `template` section instead of the `template` field: - -- `template.template`: The template string -- `template.category`: Template type -- `template.templating_style`: Styling options - -#### Template Variables - -Use the same variable insertion rules as PipeLLM: - -- `@variable` for block insertion (multi-line content) -- `$variable` for inline insertion (short text) - -#### Construct Mode (for StructuredContent Output) - -PipeCompose can also generate `StructuredContent` objects using the `construct` section. This mode composes field values from fixed values, variable references, templates, or nested structures. - -**When to use construct mode:** - -- You need to output a structured object (not just Text) -- You want to deterministically compose fields from existing data -- No LLM is needed - just data composition and templating - -##### Basic Construct Usage - -```plx -[concept.SalesSummary] -description = "A structured sales summary" - -[concept.SalesSummary.structure] -report_title = { type = "text", description = "Title of the report" } -customer_name = { type = "text", description = "Customer name" } -deal_value = { type = "number", description = "Deal value" } -summary_text = { type = "text", description = "Generated summary text" } - -[pipe.compose_summary] -type = "PipeCompose" -description = "Compose a sales summary from deal data" -inputs = { deal = "Deal" } -output = "SalesSummary" - -[pipe.compose_summary.construct] -report_title = "Monthly Sales Report" -customer_name = { from = "deal.customer_name" } -deal_value = { from = "deal.amount" } -summary_text = { template = "Deal worth $deal.amount with $deal.customer_name" } -``` - -##### Field Composition Methods - -There are four ways to define field values in a construct: - -**1. Fixed Value (literal)** - -Use a literal value directly: - -```plx -[pipe.compose_report.construct] -report_title = "Annual Report" -report_year = 2024 -is_draft = false -``` - -**2. Variable Reference (`from`)** - -Get a value from working memory using a dotted path: - -```plx -[pipe.compose_report.construct] -customer_name = { from = "deal.customer_name" } -total_amount = { from = "order.total" } -street_address = { from = "customer.address.street" } -``` - -**3. Template (`template`)** - -Render a Jinja2 template with variable substitution: - -```plx -[pipe.compose_report.construct] -invoice_number = { template = "INV-$order.id" } -summary = { template = "Deal worth $deal.amount with $deal.customer_name on {{ current_date }}" } -``` - -**4. Nested Construct** - -For nested structures, use a TOML subsection: - -```plx -[pipe.compose_invoice.construct] -invoice_number = { template = "INV-$order.id" } -total = { from = "order.total_amount" } - -[pipe.compose_invoice.construct.billing_address] -street = { from = "customer.address.street" } -city = { from = "customer.address.city" } -country = "France" -``` - -##### Complete Construct Example - -```plx -domain = "invoicing" - -[concept.Address] -description = "A postal address" - -[concept.Address.structure] -street = { type = "text", description = "Street address" } -city = { type = "text", description = "City name" } -country = { type = "text", description = "Country name" } - -[concept.Invoice] -description = "An invoice document" - -[concept.Invoice.structure] -invoice_number = { type = "text", description = "Invoice number" } -total = { type = "number", description = "Total amount" } - -[pipe.compose_invoice] -type = "PipeCompose" -description = "Compose an invoice from order and customer data" -inputs = { order = "Order", customer = "Customer" } -output = "Invoice" - -[pipe.compose_invoice.construct] -invoice_number = { template = "INV-$order.id" } -total = { from = "order.total_amount" } - -[pipe.compose_invoice.construct.billing_address] -street = { from = "customer.address.street" } -city = { from = "customer.address.city" } -country = "France" -``` - -##### Key Parameters (Construct Mode) - -- `construct`: Dictionary mapping field names to their composition rules -- Each field can be: - - A literal value (string, number, boolean) - - A dict with `from` key for variable reference - - A dict with `template` key for template rendering - - A nested dict for nested structures - -**Note:** You must use either `template` or `construct`, not both. They are mutually exclusive. - -### PipeImgGen operator - -The PipeImgGen operator is used to generate images using AI image generation models. - -#### Basic Usage - -Simple Image Generation: -```plx -[pipe.generate_image] -type = "PipeImgGen" -description = "Generate an image from prompt" -inputs = { prompt = "ImgGenPrompt" } -output = "Image" -``` - -Using Image Generation Settings: -```plx -[pipe.generate_photo] -type = "PipeImgGen" -description = "Generate a high-quality photo" -inputs = { prompt = "ImgGenPrompt" } -output = "Photo" -model = { model = "fast-img-gen" } -aspect_ratio = "16:9" -quality = "hd" -``` - -Multiple Image Generation: -```plx -[pipe.generate_variations] -type = "PipeImgGen" -description = "Generate multiple image variations" -inputs = { prompt = "ImgGenPrompt" } -output = "Image[3]" -seed = "auto" -``` - -Advanced Configuration: -```plx -[pipe.generate_custom] -type = "PipeImgGen" -description = "Generate image with custom settings" -inputs = { prompt = "ImgGenPrompt" } -output = "Image" -model = "img_gen_preset_name" # Use predefined preset -aspect_ratio = "1:1" -quality = "hd" -background = "transparent" -output_format = "png" -is_raw = false -safety_tolerance = 3 -``` - -#### Key Parameters - -**Image Generation Settings:** -- `model`: Model choice (preset name or inline settings with model name) -- `quality`: Image quality ("standard", "hd") - -**Output Configuration:** -- `aspect_ratio`: Image dimensions ("1:1", "16:9", "9:16", etc.) -- `output_format`: File format ("png", "jpeg", "webp") -- `background`: Background type ("default", "transparent") - -**Generation Control:** -- `seed`: Random seed (integer or "auto") -- `is_raw`: Whether to apply post-processing -- `is_moderated`: Enable content moderation -- `safety_tolerance`: Content safety level (1-6) - -#### Input Requirements - -PipeImgGen requires exactly one input that must be either: -- An `ImgGenPrompt` concept -- A concept that refines `ImgGenPrompt` - -The input can be named anything but must contain the prompt text for image generation. - -### PipeFunc operator - -The PipeFunc operator is used to run custom Python functions within a pipeline. This allows integration of classic Python scripts and custom logic. - -#### Basic Usage - -Simple Function Call: -```plx -[pipe.process_data] -type = "PipeFunc" -description = "Process data using custom function" -inputs = { input_data = "DataType" } -output = "ProcessedData" -function_name = "process_data_function" -``` - -File Processing Example: -```plx -[pipe.read_file] -type = "PipeFunc" -description = "Read file content" -inputs = { file_path = "FilePath" } -output = "FileContent" -function_name = "read_file_content" -``` - -#### Key Parameters - -- `function_name`: Name of the Python function to call (must be registered in func_registry) - -#### Function Requirements - -The Python function must: - -1. **Be registered** in the `func_registry` -2. **Accept `working_memory`** as a parameter: - ```python - async def my_function(working_memory: WorkingMemory) -> StuffContent | list[StuffContent] | str: - # Function implementation - pass - ``` - -3. **Return appropriate types**: - - `StuffContent`: Single content object - - `list[StuffContent]`: Multiple content objects (becomes ListContent) - - `str`: Simple string (becomes TextContent) - -#### Function Registration - -Functions must be registered in the function registry before use: - -```python -from pipelex.system.registries.func_registry import func_registry - -@func_registry.register("my_function_name") -async def my_custom_function(working_memory: WorkingMemory) -> StuffContent: - # Access inputs from working memory - input_data = working_memory.get_stuff("input_name") - - # Process data - result = process_logic(input_data.content) - - # Return result - return MyResultContent(data=result) -``` - -#### Working Memory Access - -Inside the function, access pipeline inputs through working memory: - -```python -async def process_function(working_memory: WorkingMemory) -> TextContent: - # Get input stuff by name - input_stuff = working_memory.get_stuff("input_name") - - # Access the content - input_content = input_stuff.content - - # Process and return - processed_text = f"Processed: {input_content.text}" - return TextContent(text=processed_text) -``` - ---- - -### Rules to choose LLM models used in PipeLLMs. - -#### LLM Configuration System - -In order to use it in a pipe, an LLM is referenced by its llm_handle (alias) and possibly by an llm_preset. -LLM configurations are managed through the new inference backend system with files located in `.pipelex/inference/`: - -- **Model Deck**: `.pipelex/inference/deck/base_deck.toml` and `.pipelex/inference/deck/overrides.toml` -- **Backends**: `.pipelex/inference/backends.toml` and `.pipelex/inference/backends/*.toml` -- **Routing**: `.pipelex/inference/routing_profiles.toml` - -#### LLM Handles - -An llm_handle can be either: -1. **A direct model name** (like "gpt-4o-mini", "claude-3-sonnet") - automatically available for all models loaded by the inference backend system -2. **An alias** - user-defined shortcuts that map to model names, defined in the `[aliases]` section: - -```toml -[aliases] -base-claude = "claude-4.5-sonnet" -base-gpt = "gpt-5" -base-gemini = "gemini-2.5-flash" -base-mistral = "mistral-medium" -``` - -The system first looks for direct model names, then checks aliases if no direct match is found. The system handles model routing through backends automatically. - -#### Using an LLM Handle in a PipeLLM - -Here is an example of using a model to specify which LLM to use in a PipeLLM: - -```plx -[pipe.hello_world] -type = "PipeLLM" -description = "Write text about Hello World." -output = "Text" -model = { model = "gpt-5", temperature = 0.9 } -prompt = """ -Write a haiku about Hello World. -""" -``` - -As you can see, to use the LLM, you must also indicate the temperature (float between 0 and 1) and max_tokens (either an int or the string "auto"). - -#### LLM Presets - -Presets are meant to record the choice of an llm with its hyper parameters (temperature and max_tokens) if it's good for a particular task. LLM Presets are skill-oriented. - -Examples: -```toml -llm_to_engineer = { model = "base-claude", temperature = 1 } -llm_to_extract_invoice = { model = "claude-3-7-sonnet", temperature = 0.1, max_tokens = "auto" } -``` - -The interest is that these presets can be used to set the LLM choice in a PipeLLM, like this: - -```plx -[pipe.extract_invoice] -type = "PipeLLM" -description = "Extract invoice information from an invoice text transcript" -inputs = { invoice_text = "InvoiceText" } -output = "Invoice" -model = "llm_to_extract_invoice" -prompt = """ -Extract invoice information from this invoice: - -The category of this invoice is: $invoice_details.category. - -@invoice_text -""" -``` - -The setting here `model = "llm_to_extract_invoice"` works because "llm_to_extract_invoice" has been declared as an llm_preset in the deck. -You must not use an LLM preset in a PipeLLM that does not exist in the deck. If needed, you can add llm presets. - -You can override the predefined llm presets by setting them in `.pipelex/inference/deck/overrides.toml`. - ---- - -ALWAYS RUN validation when you are finished writing pipelines: This checks for errors. If there are errors, iterate until it works. -- For a specific bundle/file: `pipelex validate path_to_file.plx` -- For all pipelines: `pipelex validate all` -- Remember: Ensure your Python virtual environment is activated (typically `.venv` for standard installations) before running `pipelex` commands. - -Then, create an example file to run the pipeline in the `examples` folder. -But don't write documentation unless asked explicitly to. - -## Guide to execute a pipeline and write example code - -### Example to execute a pipeline with text output - -```python -import asyncio - -from pipelex import pretty_print -from pipelex.pipelex import Pipelex -from pipelex.pipeline.execute import execute_pipeline - - -async def hello_world() -> str: - """ - This function demonstrates the use of a super simple Pipelex pipeline to generate text. - """ - # Run the pipe - pipe_output = await execute_pipeline( - pipe_code="hello_world", - ) - - return pipe_output.main_stuff_as_str - - -## start Pipelex -Pipelex.make() -## run sample using asyncio -output_text = asyncio.run(hello_world()) -pretty_print(output_text, title="Your first Pipelex output") -``` - -### Example to execute a pipeline with structured output - -```python -import asyncio - -from pipelex import pretty_print -from pipelex.pipelex import Pipelex -from pipelex.pipeline.execute import execute_pipeline -from pipelex.core.stuffs.image_content import ImageContent - -from my_project.gantt.gantt_struct import GanttChart - -SAMPLE_NAME = "extract_gantt" -IMAGE_URL = "assets/gantt/gantt_tree_house.png" - - -async def extract_gantt(image_url: str) -> GanttChart: - # Run the pipe - pipe_output = await execute_pipeline( - pipe_code="extract_gantt_by_steps", - inputs={ - "gantt_chart_image": { - "concept": "gantt.GanttImage", - "content": ImageContent(url=image_url), - } - }, - ) - # Output the result - return pipe_output.main_stuff_as(content_type=GanttChart) - - -## start Pipelex -Pipelex.make() - -## run sample using asyncio -gantt_chart = asyncio.run(extract_gantt(image_url=IMAGE_URL)) -pretty_print(gantt_chart, title="Gantt Chart") -``` - -### Setting up the input memory - -#### Explanation of input memory - -The input memory is a dictionary, where the key is the name of the input variable and the value provides details to make it a stuff object. The relevant definitions are: -```python -StuffContentOrData = dict[str, Any] | StuffContent | list[Any] | str -PipelineInputs = dict[str, StuffContentOrData] -``` -As you can seen, we made it so different ways can be used to define that stuff using structured content or data. - -#### Different ways to set up the input memory - -So here are a few concrete examples of calls to execute_pipeline with various ways to set up the input memory: - -```python -## Here we have a single input and it's a Text. -## If you assign a string, by default it will be considered as a TextContent. - pipe_output = await execute_pipeline( - pipe_code="master_advisory_orchestrator", - inputs={ - "user_input": problem_description, - }, - ) - -## Here we have a single input and it's a document. -## Because DocumentContent is a native concept, we can use it directly as a value, -## the system knows what content it corresponds to: - pipe_output = await execute_pipeline( - pipe_code="power_extractor_dpe", - inputs={ - "document": DocumentContent(url=pdf_url), - }, - ) - -## Here we have a single input and it's an Image. -## Because ImageContent is a native concept, we can use it directly as a value: - pipe_output = await execute_pipeline( - pipe_code="fashion_variation_pipeline", - inputs={ - "fashion_photo": ImageContent(url=image_url), - }, - ) - -## Here we have a single input, it's an image but -## its actually a more specific concept gantt.GanttImage which refines Image, -## so we must provide it using a dict with the concept and the content: - pipe_output = await execute_pipeline( - pipe_code="extract_gantt_by_steps", - inputs={ - "gantt_chart_image": { - "concept": "gantt.GanttImage", - "content": ImageContent(url=image_url), - } - }, - ) - -## Here is a more complex example with multiple inputs assigned using different ways: - pipe_output = await execute_pipeline( - pipe_code="retrieve_then_answer", - dynamic_output_concept_code="contracts.Fees", - inputs={ - "text": load_text_from_path(path=text_path), - "question": { - "concept": "answer.Question", - "content": question, - }, - "client_instructions": client_instructions, - }, - ) -``` - -### Using the outputs of a pipeline - -All pipe executions return a `PipeOutput` object. -It's a BaseModel which contains the resulting working memory at the end of the execution and the pipeline run id. -It also provides a bunch of accessor functions and properties to unwrap the main stuff, which is the last stuff added to the working memory: - -```python - -class PipeOutput(BaseModel): - working_memory: WorkingMemory = Field(default_factory=WorkingMemory) - pipeline_run_id: str = Field(default=SpecialPipelineId.UNTITLED) - - @property - def main_stuff(self) -> Stuff: - ... - - def main_stuff_as_list(self, item_type: type[StuffContentType]) -> ListContent[StuffContentType]: - ... - - def main_stuff_as_items(self, item_type: type[StuffContentType]) -> list[StuffContentType]: - ... - - def main_stuff_as(self, content_type: type[StuffContentType]) -> StuffContentType: - ... - - @property - def main_stuff_as_text(self) -> TextContent: - ... - - @property - def main_stuff_as_str(self) -> str: - ... - - @property - def main_stuff_as_image(self) -> ImageContent: - ... - - @property - def main_stuff_as_text_and_image(self) -> TextAndImagesContent: - ... - - @property - def main_stuff_as_number(self) -> NumberContent: - ... - - @property - def main_stuff_as_html(self) -> HtmlContent: - ... - - @property - def main_stuff_as_mermaid(self) -> MermaidContent: - ... -``` - -As you can see, you can extract any variable from the output working memory. - -#### Getting the main stuff as a specific type - -Simple text as a string: - -```python -result = pipe_output.main_stuff_as_str -``` -Structured object (BaseModel): - -```python -result = pipe_output.main_stuff_as(content_type=GanttChart) -``` - -If it's a list, you can get a `ListContent` of the specific type. - -```python -result_list_content = pipe_output.main_stuff_as_list(item_type=GanttChart) -``` - -or if you want, you can get the actual items as a regular python list: - -```python -result_list = pipe_output.main_stuff_as_items(item_type=GanttChart) -``` - ---- - diff --git a/Makefile b/Makefile index bb1c61f..bb8426b 100644 --- a/Makefile +++ b/Makefile @@ -157,7 +157,7 @@ erd: export-requirements-dev validate: env $(call PRINT_TITLE,"Running setup sequence") - $(VENV_PIPELEX) validate all + $(VENV_PIPELEX) validate --all ############################################################################################## ############################ Cleaning ############################ diff --git a/my_project/hello_world.plx b/my_project/hello_world.mthds similarity index 100% rename from my_project/hello_world.plx rename to my_project/hello_world.mthds diff --git a/pyproject.toml b/pyproject.toml index fbb9661..4ede691 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -19,7 +19,8 @@ classifiers = [ dependencies = ["pipelex[mistralai,anthropic,google,google-genai,bedrock,fal]"] [tool.uv.sources] -pipelex = { git = "https://github.com/Pipelex/pipelex.git", branch = "pre-release/v0.18.0b3" } +# pipelex = { git = "https://github.com/Pipelex/pipelex.git", branch = "pre-release/v0.18.0b3" } +pipelex = { path = "../_epic-mthds-4", editable = true } [tool.setuptools] diff --git a/uv.lock b/uv.lock index 44c8eb5..f96cac2 100644 --- a/uv.lock +++ b/uv.lock @@ -539,7 +539,7 @@ name = "exceptiongroup" version = "1.3.1" source = { registry = "https://pypi.org/simple" } dependencies = [ - { name = "typing-extensions", marker = "python_full_version < '3.13'" }, + { name = "typing-extensions", marker = "python_full_version < '3.11'" }, ] sdist = { url = "https://files.pythonhosted.org/packages/50/79/66800aadf48771f6b62f7eb014e352e5d06856655206165d775e675a02c9/exceptiongroup-1.3.1.tar.gz", hash = "sha256:8b412432c6055b0b7d14c310000ae93352ed6754f70fa8f7c34141f91c4e3219", size = 30371, upload-time = "2025-11-21T23:01:54.787Z" } wheels = [ @@ -1485,7 +1485,7 @@ dev = [ requires-dist = [ { name = "boto3-stubs", marker = "extra == 'dev'", specifier = ">=1.35.24" }, { name = "mypy", marker = "extra == 'dev'", specifier = ">=1.11.2" }, - { name = "pipelex", extras = ["mistralai", "anthropic", "google", "google-genai", "bedrock", "fal"], git = "https://github.com/Pipelex/pipelex.git?branch=pre-release%2Fv0.18.0b3" }, + { name = "pipelex", extras = ["mistralai", "anthropic", "google", "google-genai", "bedrock", "fal"], editable = "../_epic-mthds-4" }, { name = "pyright", marker = "extra == 'dev'", specifier = ">=1.1.405" }, { name = "pytest", marker = "extra == 'dev'", specifier = ">=9.0.1" }, { name = "pytest-asyncio", marker = "extra == 'dev'", specifier = ">=0.24.0" }, @@ -1966,7 +1966,7 @@ wheels = [ [[package]] name = "pipelex" version = "0.18.0b3" -source = { git = "https://github.com/Pipelex/pipelex.git?branch=pre-release%2Fv0.18.0b3#b6095405505fba33b9b99294e3025d769f3af7c1" } +source = { editable = "../_epic-mthds-4" } dependencies = [ { name = "aiofiles" }, { name = "backports-strenum", marker = "python_full_version < '3.11'" }, @@ -2022,6 +2022,75 @@ mistralai = [ { name = "mistralai" }, ] +[package.metadata] +requires-dist = [ + { name = "aioboto3", marker = "extra == 'bedrock'", specifier = ">=13.4.0" }, + { name = "aioboto3", marker = "extra == 's3'", specifier = ">=13.4.0" }, + { name = "aiofiles", specifier = ">=23.2.1" }, + { name = "anthropic", marker = "extra == 'anthropic'", specifier = ">=0.78.0" }, + { name = "backports-strenum", marker = "python_full_version < '3.11'", specifier = ">=1.3.0" }, + { name = "boto3", marker = "extra == 'bedrock'", specifier = ">=1.34.131" }, + { name = "boto3", marker = "extra == 's3'", specifier = ">=1.34.131" }, + { name = "boto3-stubs", marker = "extra == 'dev'", specifier = ">=1.35.24" }, + { name = "docling", marker = "extra == 'docling'", specifier = ">=2.64.0" }, + { name = "fal-client", marker = "extra == 'fal'", specifier = ">=0.4.1" }, + { name = "filetype", specifier = ">=1.2.0" }, + { name = "google-auth-oauthlib", marker = "extra == 'google'", specifier = ">=1.2.1" }, + { name = "google-cloud-storage", marker = "extra == 'gcp-storage'", specifier = ">=2.10.0" }, + { name = "google-genai", marker = "extra == 'google-genai'" }, + { name = "httpx", specifier = ">=0.23.0,<1.0.0" }, + { name = "huggingface-hub", marker = "extra == 'huggingface'", specifier = ">=0.23,<1.0.0" }, + { name = "instructor", specifier = ">=1.8.3,!=1.11.*,!=1.12.*" }, + { name = "instructor", extras = ["google-genai"], marker = "extra == 'google-genai'" }, + { name = "jinja2", specifier = ">=3.1.4" }, + { name = "json2html", specifier = ">=1.3.0" }, + { name = "kajson", specifier = "==0.3.1" }, + { name = "markdown", specifier = ">=3.6" }, + { name = "mike", marker = "extra == 'docs'", specifier = ">=2.1.3" }, + { name = "mistralai", marker = "extra == 'mistralai'", specifier = ">=1.12.0" }, + { name = "mkdocs", marker = "extra == 'docs'", specifier = ">=1.6.1" }, + { name = "mkdocs-glightbox", marker = "extra == 'docs'", specifier = ">=0.4.0" }, + { name = "mkdocs-material", marker = "extra == 'docs'", specifier = ">=9.6.14" }, + { name = "mkdocs-meta-manager", marker = "extra == 'docs'", specifier = ">=1.1.0" }, + { name = "moto", extras = ["s3"], marker = "extra == 'dev'", specifier = ">=5.0.0" }, + { name = "mypy", marker = "extra == 'dev'", specifier = "==1.19.1" }, + { name = "networkx", specifier = ">=3.4.2" }, + { name = "openai", specifier = ">=1.108.1" }, + { name = "opentelemetry-api" }, + { name = "opentelemetry-exporter-otlp-proto-http" }, + { name = "opentelemetry-sdk" }, + { name = "opentelemetry-semantic-conventions" }, + { name = "pillow", specifier = ">=11.2.1" }, + { name = "polyfactory", specifier = ">=2.21.0" }, + { name = "portkey-ai", specifier = ">=2.1.0" }, + { name = "posthog", specifier = ">=6.7.0" }, + { name = "pydantic", specifier = ">=2.10.6,<3.0.0" }, + { name = "pylint", marker = "extra == 'dev'", specifier = "==4.0.4" }, + { name = "pypdfium2", specifier = ">=4.30.0,!=4.30.1,<5.0.0" }, + { name = "pyright", marker = "extra == 'dev'", specifier = "==1.1.408" }, + { name = "pytest", marker = "extra == 'dev'", specifier = ">=9.0.2" }, + { name = "pytest-asyncio", marker = "extra == 'dev'", specifier = ">=0.24.0" }, + { name = "pytest-cov", marker = "extra == 'dev'", specifier = ">=6.1.1" }, + { name = "pytest-mock", marker = "extra == 'dev'", specifier = ">=3.14.0" }, + { name = "pytest-sugar", marker = "extra == 'dev'", specifier = ">=1.0.0" }, + { name = "pytest-xdist", marker = "extra == 'dev'", specifier = ">=3.6.1" }, + { name = "python-dotenv", specifier = ">=1.0.1" }, + { name = "pyyaml", specifier = ">=6.0.2" }, + { name = "rich", specifier = ">=13.8.1" }, + { name = "ruff", marker = "extra == 'dev'", specifier = "==0.14.13" }, + { name = "shortuuid", specifier = ">=1.0.13" }, + { name = "tomli", specifier = ">=2.3.0" }, + { name = "tomlkit", specifier = ">=0.13.2" }, + { name = "typer", specifier = ">=0.16.0" }, + { name = "types-aioboto3", extras = ["bedrock", "bedrock-runtime"], marker = "extra == 'dev'", specifier = ">=13.4.0" }, + { name = "types-aiofiles", marker = "extra == 'dev'", specifier = ">=24.1.0.20240626" }, + { name = "types-markdown", marker = "extra == 'dev'", specifier = ">=3.6.0.20240316" }, + { name = "types-networkx", marker = "extra == 'dev'", specifier = ">=3.3.0.20241020" }, + { name = "types-pyyaml", marker = "extra == 'dev'", specifier = ">=6.0.12.20250326" }, + { name = "typing-extensions", specifier = ">=4.13.2" }, +] +provides-extras = ["anthropic", "bedrock", "docling", "fal", "gcp-storage", "google", "google-genai", "huggingface", "mistralai", "s3", "docs", "dev"] + [[package]] name = "platformdirs" version = "4.5.0" From c07354cb9f8e64a4ff572d9f8f5841be921763f7 Mon Sep 17 00:00:00 2001 From: Louis Choquel Date: Sat, 21 Feb 2026 12:01:54 +0100 Subject: [PATCH 2/5] Adopt PipelexRunner API, add agent Make targets, and remove shipped config files Migrate from execute_pipeline() to PipelexRunner, add __main__ guard to hello_world.py, introduce agent-check and agent-test Make targets, switch pipelex source to _epic-mthds-1, reformat pyproject.toml to 2-space indent, and delete .pipelex/ config files now shipped by the pipelex package. Co-Authored-By: Claude Opus 4.6 --- .pipelex/inference/backends.toml | 108 ------ .pipelex/inference/backends/anthropic.toml | 111 ------- .pipelex/inference/backends/azure_openai.toml | 227 ------------- .pipelex/inference/backends/bedrock.toml | 138 -------- .pipelex/inference/backends/blackboxai.toml | 165 ---------- .pipelex/inference/backends/fal.toml | 108 ------ .pipelex/inference/backends/google.toml | 91 ------ .pipelex/inference/backends/groq.toml | 130 -------- .pipelex/inference/backends/huggingface.toml | 44 --- .pipelex/inference/backends/internal.toml | 44 --- .pipelex/inference/backends/mistral.toml | 219 ------------- .pipelex/inference/backends/ollama.toml | 64 ---- .pipelex/inference/backends/openai.toml | 224 ------------- .../inference/backends/pipelex_gateway.toml | 41 --- .../inference/backends/pipelex_inference.toml | 200 ------------ .pipelex/inference/backends/portkey.toml | 307 ------------------ .pipelex/inference/backends/scaleway.toml | 68 ---- .pipelex/inference/backends/vertexai.toml | 47 --- .pipelex/inference/backends/xai.toml | 57 ---- .pipelex/inference/deck/1_llm_deck.toml | 87 ----- .pipelex/inference/deck/2_img_gen_deck.toml | 53 --- .pipelex/inference/deck/3_extract_deck.toml | 42 --- .pipelex/inference/routing_profiles.toml | 149 --------- .pipelex/pipelex.toml | 192 ----------- .pipelex/pipelex_service.toml | 19 -- .pipelex/telemetry.toml | 92 ------ Makefile | 17 +- my_project/hello_world.py | 12 +- pyproject.toml | 125 +++---- tests/e2e/test_my_project.py | 4 +- uv.lock | 32 +- 31 files changed, 119 insertions(+), 3098 deletions(-) delete mode 100644 .pipelex/inference/backends.toml delete mode 100644 .pipelex/inference/backends/anthropic.toml delete mode 100644 .pipelex/inference/backends/azure_openai.toml delete mode 100644 .pipelex/inference/backends/bedrock.toml delete mode 100644 .pipelex/inference/backends/blackboxai.toml delete mode 100644 .pipelex/inference/backends/fal.toml delete mode 100644 .pipelex/inference/backends/google.toml delete mode 100644 .pipelex/inference/backends/groq.toml delete mode 100644 .pipelex/inference/backends/huggingface.toml delete mode 100644 .pipelex/inference/backends/internal.toml delete mode 100644 .pipelex/inference/backends/mistral.toml delete mode 100644 .pipelex/inference/backends/ollama.toml delete mode 100644 .pipelex/inference/backends/openai.toml delete mode 100644 .pipelex/inference/backends/pipelex_gateway.toml delete mode 100644 .pipelex/inference/backends/pipelex_inference.toml delete mode 100644 .pipelex/inference/backends/portkey.toml delete mode 100644 .pipelex/inference/backends/scaleway.toml delete mode 100644 .pipelex/inference/backends/vertexai.toml delete mode 100644 .pipelex/inference/backends/xai.toml delete mode 100644 .pipelex/inference/deck/1_llm_deck.toml delete mode 100644 .pipelex/inference/deck/2_img_gen_deck.toml delete mode 100644 .pipelex/inference/deck/3_extract_deck.toml delete mode 100644 .pipelex/inference/routing_profiles.toml delete mode 100644 .pipelex/pipelex.toml delete mode 100644 .pipelex/pipelex_service.toml delete mode 100644 .pipelex/telemetry.toml diff --git a/.pipelex/inference/backends.toml b/.pipelex/inference/backends.toml deleted file mode 100644 index 8ffc6c6..0000000 --- a/.pipelex/inference/backends.toml +++ /dev/null @@ -1,108 +0,0 @@ -#################################################################################################### -# Pipelex Inference Backends Configuration -#################################################################################################### -# -# This file configures the inference backends available to Pipelex. -# Each backend connects to a different AI service provider (OpenAI, Anthropic, Google, etc.). -# -# Documentation: https://docs.pipelex.com -# Support: https://go.pipelex.com/discord -# -#################################################################################################### - -[pipelex_gateway] -display_name = "⭐ Pipelex Gateway" -enabled = true # Enable after accepting terms via `pipelex init config` -api_key = "${PIPELEX_GATEWAY_API_KEY}" - -[anthropic] -enabled = false -api_key = "${ANTHROPIC_API_KEY}" - -[azure_openai] -display_name = "Azure OpenAI" -enabled = false -endpoint = "${AZURE_API_BASE}" -api_key = "${AZURE_API_KEY}" -api_version = "${AZURE_API_VERSION}" - -[bedrock] -display_name = "Amazon Bedrock" -enabled = false -aws_region = "${AWS_REGION}" - -[blackboxai] -display_name = "BlackBox AI" -enabled = false -endpoint = "https://api.blackbox.ai/v1" -api_key = "${BLACKBOX_API_KEY}" - -[fal] -display_name = "FAL" -enabled = false -api_key = "${FAL_API_KEY}" - -[google] -display_name = "Google AI" -enabled = false -api_key = "${GOOGLE_API_KEY}" - -[groq] -display_name = "Groq" -enabled = false -endpoint = "https://api.groq.com/openai/v1" -api_key = "${GROQ_API_KEY}" - -[huggingface] -display_name = "Hugging Face" -enabled = false -api_key = "${HF_TOKEN}" - -[mistral] -display_name = "Mistral AI" -enabled = false -api_key = "${MISTRAL_API_KEY}" - -[ollama] -enabled = false -endpoint = "http://localhost:11434/v1" - -[openai] -display_name = "OpenAI" -enabled = false -api_key = "${OPENAI_API_KEY}" - -[portkey] -display_name = "Portkey" -enabled = false -endpoint = "https://api.portkey.ai/v1" -api_key = "${PORTKEY_API_KEY}" - -[scaleway] -display_name = "Scaleway" -enabled = false -endpoint = "${SCALEWAY_ENDPOINT}" -api_key = "${SCALEWAY_API_KEY}" - -[vertexai] -display_name = "Google Vertex AI" -enabled = false # This is the only one we disable because setting it up requires internet access just to get credentials so it fails in CI sandboxes -gcp_project_id = "${GCP_PROJECT_ID}" -gcp_location = "${GCP_LOCATION}" -gcp_credentials_file_path = "${GCP_CREDENTIALS_FILE_PATH}" - -[xai] -display_name = "xAI" -enabled = false -endpoint = "https://api.x.ai/v1" -api_key = "${XAI_API_KEY}" - -[internal] # software-only backend, runs internally, without AI -enabled = true - -# Deprecated -[pipelex_inference] -display_name = "🛑 Legacy Pipelex Inference" -enabled = false -endpoint = "https://inference.pipelex.com/v1" -api_key = "${PIPELEX_INFERENCE_API_KEY}" diff --git a/.pipelex/inference/backends/anthropic.toml b/.pipelex/inference/backends/anthropic.toml deleted file mode 100644 index 145ba19..0000000 --- a/.pipelex/inference/backends/anthropic.toml +++ /dev/null @@ -1,111 +0,0 @@ -################################################################################ -# Anthropic Backend Configuration -################################################################################ -# -# This file defines the model specifications for Anthropic Claude models. -# It contains model definitions for various Claude language models -# accessible through the Anthropic API. -# -# Configuration structure: -# - Each model is defined in its own section with the model name as the header -# - Headers with dots must be quoted (e.g., ["claude-3.5-sonnet"]) -# - Model costs are in USD per million tokens (input/output) -# -# Documentation: https://docs.pipelex.com -# Support: https://go.pipelex.com/discord -# -################################################################################ - -################################################################################ -# MODEL DEFAULTS -################################################################################ - -[defaults] -model_type = "llm" -sdk = "anthropic" -prompting_target = "anthropic" -structure_method = "instructor/anthropic_tools" -thinking_mode = "manual" - -################################################################################ -# LANGUAGE MODELS -################################################################################ - -# --- Claude 3 Series ---------------------------------------------------------- -[claude-3-haiku] -model_id = "claude-3-haiku-20240307" -max_tokens = 4096 -inputs = ["text", "images"] -outputs = ["text", "structured"] -max_prompt_images = 100 -costs = { input = 0.25, output = 1.25 } -thinking_mode = "none" - -# --- Claude 3.7 Series -------------------------------------------------------- -["claude-3.7-sonnet"] -model_id = "claude-3-7-sonnet-20250219" -max_tokens = 8192 -inputs = ["text", "images", "pdf"] -outputs = ["text", "structured"] -max_prompt_images = 100 -costs = { input = 3.0, output = 15.0 } - -# --- Claude 4 Series ---------------------------------------------------------- -[claude-4-sonnet] -model_id = "claude-sonnet-4-20250514" -max_tokens = 64000 -inputs = ["text", "images", "pdf"] -outputs = ["text", "structured"] -max_prompt_images = 100 -costs = { input = 3.0, output = 15.0 } - -[claude-4-opus] -model_id = "claude-opus-4-20250514" -max_tokens = 32000 -inputs = ["text", "images", "pdf"] -outputs = ["text", "structured"] -max_prompt_images = 100 -costs = { input = 3.0, output = 15.0 } - -# --- Claude 4.1 Series -------------------------------------------------------- -["claude-4.1-opus"] -model_id = "claude-opus-4-1-20250805" -max_tokens = 32000 -inputs = ["text", "images", "pdf"] -outputs = ["text", "structured"] -max_prompt_images = 100 -costs = { input = 3.0, output = 15.0 } - -# --- Claude 4.5 Series -------------------------------------------------------- -["claude-4.5-sonnet"] -model_id = "claude-sonnet-4-5-20250929" -max_tokens = 64000 -inputs = ["text", "images", "pdf"] -outputs = ["text", "structured"] -max_prompt_images = 100 -costs = { input = 3.0, output = 15.0 } - -["claude-4.5-haiku"] -model_id = "claude-haiku-4-5-20251001" -max_tokens = 64000 -inputs = ["text", "images", "pdf"] -outputs = ["text", "structured"] -max_prompt_images = 100 -costs = { input = 1.0, output = 5.0 } - -["claude-4.5-opus"] -model_id = "claude-opus-4-5-20251101" -max_tokens = 64000 -inputs = ["text", "images", "pdf"] -outputs = ["text", "structured"] -max_prompt_images = 100 -costs = { input = 5.0, output = 25.0 } - -["claude-4.6-opus"] -model_id = "claude-opus-4-6" -max_tokens = 64000 -inputs = ["text", "images", "pdf"] -outputs = ["text", "structured"] -max_prompt_images = 100 -costs = { input = 5.0, output = 25.0 } -thinking_mode = "adaptive" diff --git a/.pipelex/inference/backends/azure_openai.toml b/.pipelex/inference/backends/azure_openai.toml deleted file mode 100644 index c3020da..0000000 --- a/.pipelex/inference/backends/azure_openai.toml +++ /dev/null @@ -1,227 +0,0 @@ -################################################################################ -# Azure OpenAI Backend Configuration -################################################################################ -# -# This file defines the model specifications for Azure OpenAI models. -# It contains model definitions for OpenAI models deployed on Azure -# accessible through the Azure OpenAI API. -# -# Configuration structure: -# - Each model is defined in its own section with the model name as the header -# - Headers with dots must be quoted (e.g., ["gpt-4.1"]) -# - Model costs are in USD per million tokens (input/output) -# -# Documentation: https://docs.pipelex.com -# Support: https://go.pipelex.com/discord -# -################################################################################ - -################################################################################ -# MODEL DEFAULTS -################################################################################ - -[defaults] -model_type = "llm" -sdk = "azure_openai_responses" -prompting_target = "openai" -structure_method = "instructor/openai_responses_tools" -thinking_mode = "none" - -################################################################################ -# LANGUAGE MODELS -################################################################################ - -# --- GPT-4o Series ------------------------------------------------------------ -[gpt-4o] -model_id = "gpt-4o-2024-11-20" -inputs = ["text", "images"] -outputs = ["text", "structured"] -costs = { input = 2.5, output = 10.0 } - -[gpt-4o-mini] -model_id = "gpt-4o-mini-2024-07-18" -inputs = ["text", "images"] -outputs = ["text", "structured"] -costs = { input = 0.15, output = 0.6 } - -# --- GPT-4.1 Series ----------------------------------------------------------- -["gpt-4.1"] -model_id = "gpt-4.1-2025-04-14" -inputs = ["text", "images"] -outputs = ["text", "structured"] -costs = { input = 2, output = 8 } - -["gpt-4.1-mini"] -model_id = "gpt-4.1-mini-2025-04-14" -inputs = ["text", "images"] -outputs = ["text", "structured"] -costs = { input = 0.4, output = 1.6 } - -["gpt-4.1-nano"] -model_id = "gpt-4.1-nano-2025-04-14" -inputs = ["text", "images"] -outputs = ["text", "structured"] -costs = { input = 0.1, output = 0.4 } - -# --- o Series ---------------------------------------------------------------- -[o1-mini] -model_id = "o1-mini-2024-09-12" -inputs = ["text"] -outputs = ["text", "structured"] -costs = { input = 3.0, output = 12.0 } -valued_constraints = { fixed_temperature = 1 } -thinking_mode = "manual" - -[o1] -model_id = "o1-2024-12-17" -inputs = ["text", "images"] -outputs = ["text", "structured"] -costs = { input = 15.0, output = 60.0 } -valued_constraints = { fixed_temperature = 1 } -thinking_mode = "manual" - -[o3-mini] -model_id = "o3-mini-2025-01-31" -inputs = ["text"] -outputs = ["text", "structured"] -costs = { input = 1.1, output = 4.4 } -valued_constraints = { fixed_temperature = 1 } -thinking_mode = "manual" - -[o3] -model_id = "o3-2025-04-16" -inputs = ["text"] -outputs = ["text", "structured"] -costs = { input = 2, output = 8 } -valued_constraints = { fixed_temperature = 1 } -thinking_mode = "manual" - -# --- GPT-5 Series ------------------------------------------------------------- -[gpt-5-mini] -model_id = "gpt-5-mini-2025-08-07" -inputs = ["text", "images"] -outputs = ["text", "structured"] -costs = { input = 0.25, output = 2.0 } -valued_constraints = { fixed_temperature = 1 } -thinking_mode = "manual" - -[gpt-5-nano] -model_id = "gpt-5-nano-2025-08-07" -inputs = ["text", "images"] -outputs = ["text", "structured"] -costs = { input = 0.05, output = 0.4 } -valued_constraints = { fixed_temperature = 1 } -thinking_mode = "manual" - -[gpt-5-chat] -model_id = "gpt-5-chat-2025-08-07" -inputs = ["text", "images"] -outputs = ["text", "structured"] -costs = { input = 1.25, output = 10.0 } -valued_constraints = { fixed_temperature = 1 } -thinking_mode = "manual" - -[gpt-5] -model_id = "gpt-5-2025-08-07" -inputs = ["text", "images"] -outputs = ["text", "structured"] -costs = { input = 1.25, output = 10.0 } -valued_constraints = { fixed_temperature = 1 } -thinking_mode = "manual" - -# --- GPT-5.1 Series ------------------------------------------------------------- -["gpt-5.1"] -model_id = "gpt-5.1-2025-11-13" -inputs = ["text", "images"] -outputs = ["text", "structured"] -costs = { input = 1.25, output = 10.0 } -valued_constraints = { fixed_temperature = 1 } -thinking_mode = "manual" - -["gpt-5.1-chat"] -model_id = "gpt-5.1-chat-2025-11-13" -inputs = ["text", "images"] -outputs = ["text", "structured"] -costs = { input = 1.25, output = 10.0 } -valued_constraints = { fixed_temperature = 1 } -thinking_mode = "manual" - -["gpt-5.1-codex"] -model_id = "gpt-5.1-codex-2025-11-13" -inputs = ["text", "images"] -outputs = ["text", "structured"] -costs = { input = 1.25, output = 10.0 } -valued_constraints = { fixed_temperature = 1 } -thinking_mode = "manual" - -# --- GPT-5.2 Series ------------------------------------------------------------- -["gpt-5.2"] -model_id = "gpt-5.2-2025-12-11" -inputs = ["text", "images"] -outputs = ["text", "structured"] -costs = { input = 1.75, output = 14.0 } -thinking_mode = "manual" - -["gpt-5.2-chat"] -model_id = "gpt-5.2-chat-2025-12-11" -inputs = ["text", "images"] -outputs = ["text", "structured"] -costs = { input = 1.25, output = 10.0 } -valued_constraints = { fixed_temperature = 1 } -thinking_mode = "manual" - -################################################################################ -# IMAGE GENERATION MODELS -################################################################################ - -# --- OpenAI Image Generation -------------------------------------------------- -[gpt-image-1] -sdk = "azure_rest_img_gen" -model_type = "img_gen" -model_id = "gpt-image-1-2025-04-15" -inputs = ["text"] -outputs = ["image"] -costs = { input = 10, output = 40 } - -[gpt-image-1.rules] -prompt = "positive_only" -num_images = "gpt" -aspect_ratio = "gpt" -background = "gpt" -inference = "gpt" -safety_checker = "unavailable" -output_format = "gpt" - -[gpt-image-1-mini] -sdk = "azure_rest_img_gen" -model_type = "img_gen" -model_id = "gpt-image-1-mini-2025-10-06" -inputs = ["text"] -outputs = ["image"] -costs = { input = 2.5, output = 8 } - -[gpt-image-1-mini.rules] -prompt = "positive_only" -num_images = "gpt" -aspect_ratio = "gpt" -background = "gpt" -inference = "gpt" -safety_checker = "unavailable" -output_format = "gpt" - -["gpt-image-1.5"] -sdk = "azure_rest_img_gen" -model_type = "img_gen" -model_id = "gpt-image-1.5-2025-12-16" -inputs = ["text"] -outputs = ["image"] -costs = { input = 8, output = 32 } - -["gpt-image-1.5".rules] -prompt = "positive_only" -num_images = "gpt" -aspect_ratio = "gpt" -background = "gpt" -inference = "gpt" -safety_checker = "unavailable" -output_format = "gpt" diff --git a/.pipelex/inference/backends/bedrock.toml b/.pipelex/inference/backends/bedrock.toml deleted file mode 100644 index d2ecd23..0000000 --- a/.pipelex/inference/backends/bedrock.toml +++ /dev/null @@ -1,138 +0,0 @@ -################################################################################ -# Amazon Bedrock Backend Configuration -################################################################################ -# -# This file defines the model specifications for Amazon Bedrock models. -# It contains model definitions for various language models -# accessible through the Amazon Bedrock service. -# -# Configuration structure: -# - Each model is defined in its own section with the model name as the header -# - Headers with dots must be quoted (e.g., ["claude-3.5-sonnet"]) -# - Model costs are in USD per million tokens (input/output) -# -# Documentation: https://docs.pipelex.com -# Support: https://go.pipelex.com/discord -# -################################################################################ - -################################################################################ -# MODEL DEFAULTS -################################################################################ - -[defaults] -model_type = "llm" -sdk = "bedrock_aioboto3" -prompting_target = "anthropic" -thinking_mode = "none" - -################################################################################ -# LANGUAGE MODELS -################################################################################ - -# --- Mistral Models ----------------------------------------------------------- -[bedrock-mistral-large] -model_id = "mistral.mistral-large-2407-v1:0" -max_tokens = 8192 -inputs = ["text"] -outputs = ["text"] -costs = { input = 4.0, output = 12.0 } - -# --- Meta Llama Models -------------------------------------------------------- -[bedrock-meta-llama-3-3-70b-instruct] -model_id = "us.meta.llama3-3-70b-instruct-v1:0" -max_tokens = 8192 -inputs = ["text"] -outputs = ["text"] -# TODO: find out the actual cost per million tokens for llama3 on bedrock -costs = { input = 3.0, output = 15.0 } - -# --- Amazon Nova Models ------------------------------------------------------- -[bedrock-nova-pro] -model_id = "us.amazon.nova-pro-v1:0" -max_tokens = 5120 -inputs = ["text"] -outputs = ["text"] -# TODO: find out the actual cost per million tokens for nova on bedrock -costs = { input = 3.0, output = 15.0 } - -# --- Claude LLMs -------------------------------------------------------------- -["claude-3.7-sonnet"] -sdk = "bedrock_anthropic" -model_id = "us.anthropic.claude-3-7-sonnet-20250219-v1:0" -max_tokens = 8192 -inputs = ["text", "images", "pdf"] -outputs = ["text", "structured"] -max_prompt_images = 100 -costs = { input = 3.0, output = 15.0 } -thinking_mode = "manual" - -[claude-4-sonnet] -sdk = "bedrock_anthropic" -model_id = "us.anthropic.claude-sonnet-4-20250514-v1:0" -max_tokens = 64000 -inputs = ["text", "images", "pdf"] -outputs = ["text", "structured"] -max_prompt_images = 100 -costs = { input = 3.0, output = 15.0 } -thinking_mode = "manual" - -[claude-4-opus] -sdk = "bedrock_anthropic" -model_id = "us.anthropic.claude-opus-4-20250514-v1:0" -max_tokens = 32000 -inputs = ["text", "images", "pdf"] -outputs = ["text", "structured"] -max_prompt_images = 100 -costs = { input = 3.0, output = 15.0 } -thinking_mode = "manual" - -["claude-4.1-opus"] -sdk = "bedrock_anthropic" -model_id = "us.anthropic.claude-opus-4-1-20250805-v1:0" -max_tokens = 32000 -inputs = ["text", "images", "pdf"] -outputs = ["text", "structured"] -max_prompt_images = 100 -costs = { input = 3.0, output = 15.0 } -thinking_mode = "manual" - -["claude-4.5-sonnet"] -sdk = "bedrock_anthropic" -model_id = "us.anthropic.claude-sonnet-4-5-20250929-v1:0" -max_tokens = 64000 -inputs = ["text", "images", "pdf"] -outputs = ["text", "structured"] -max_prompt_images = 100 -costs = { input = 3.0, output = 15.0 } -thinking_mode = "manual" - -["claude-4.5-haiku"] -sdk = "bedrock_anthropic" -model_id = "us.anthropic.claude-haiku-4-5-20251001-v1:0" -max_tokens = 64000 -inputs = ["text", "images", "pdf"] -outputs = ["text", "structured"] -max_prompt_images = 100 -costs = { input = 1.0, output = 5.0 } -thinking_mode = "manual" - -["claude-4.5-opus"] -sdk = "bedrock_anthropic" -model_id = "global.anthropic.claude-opus-4-5-20251101-v1:0" -max_tokens = 64000 -inputs = ["text", "images", "pdf"] -outputs = ["text", "structured"] -max_prompt_images = 100 -costs = { input = 5.0, output = 25.0 } -thinking_mode = "manual" - -["claude-4.6-opus"] -sdk = "bedrock_anthropic" -model_id = "global.anthropic.claude-opus-4-6-v1" -max_tokens = 64000 -inputs = ["text", "images", "pdf"] -outputs = ["text", "structured"] -max_prompt_images = 100 -costs = { input = 5.0, output = 25.0 } -thinking_mode = "adaptive" diff --git a/.pipelex/inference/backends/blackboxai.toml b/.pipelex/inference/backends/blackboxai.toml deleted file mode 100644 index dc1642d..0000000 --- a/.pipelex/inference/backends/blackboxai.toml +++ /dev/null @@ -1,165 +0,0 @@ -################################################################################ -# BlackBoxAI Backend Configuration -################################################################################ -# -# This file defines the model specifications for BlackBoxAI models. -# It contains model definitions for various language models from different providers -# accessible through the BlackBoxAI API. -# -# Configuration structure: -# - Each model is defined in its own section with the model name as the header -# - Headers with dots must be quoted (e.g., ["gpt-5.2"]) -# - Model costs are in USD per million tokens (input/output) -# -# Documentation: https://docs.pipelex.com -# Support: https://go.pipelex.com/discord -# -################################################################################ - -################################################################################ -# MODEL DEFAULTS -################################################################################ - -[defaults] -model_type = "llm" -sdk = "openai" -structure_method = "instructor/openai_tools" -thinking_mode = "none" - -################################################################################ -# LANGUAGE MODELS -################################################################################ - -# --- Anthropic Claude Models -------------------------------------------------- -["claude-4.5-sonnet"] -model_id = "blackboxai/anthropic/claude-sonnet-4.5" -inputs = ["text", "images"] -outputs = ["text", "structured"] -costs = { input = 3.00, output = 15.00 } -thinking_mode = "manual" - -["claude-4.5-haiku"] -model_id = "blackboxai/anthropic/claude-haiku-4.5" -inputs = ["text", "images"] -outputs = ["text", "structured"] -costs = { input = 1.00, output = 5.00 } -thinking_mode = "manual" - -[claude-4-sonnet] -model_id = "blackboxai/anthropic/claude-sonnet-4" -inputs = ["text", "images"] -outputs = ["text", "structured"] -costs = { input = 3.00, output = 15.00 } -thinking_mode = "manual" - -["claude-4.5-opus"] -model_id = "blackboxai/anthropic/claude-opus-4.5" -inputs = ["text", "images"] -outputs = ["text", "structured"] -costs = { input = 5.00, output = 25.00 } -thinking_mode = "manual" - -["claude-4.6-opus"] -model_id = "blackboxai/anthropic/claude-opus-4.6" -inputs = ["text", "images"] -outputs = ["text", "structured"] -costs = { input = 5.00, output = 25.00 } -thinking_mode = "adaptive" - -# --- OpenAI GPT-5 Models ------------------------------------------------------ -[gpt-5-mini] -model_id = "blackboxai/openai/gpt-5-mini" -inputs = ["text", "images"] -outputs = ["text", "structured"] -costs = { input = 0.25, output = 2.00 } - -[gpt-5] -model_id = "blackboxai/openai/gpt-5" -inputs = ["text", "images"] -outputs = ["text", "structured"] -costs = { input = 1.25, output = 10.00 } - -[gpt-5-pro] -model_id = "blackboxai/openai/gpt-5-pro" -inputs = ["text", "images"] -outputs = ["text", "structured"] -costs = { input = 15.00, output = 120.00 } - -[gpt-5-codex] -model_id = "blackboxai/openai/gpt-5-codex" -inputs = ["text"] -outputs = ["text", "structured"] -costs = { input = 1.25, output = 10.00 } - -["gpt-5.1"] -model_id = "blackboxai/openai/gpt-5.1" -inputs = ["text", "images"] -outputs = ["text", "structured"] -costs = { input = 1.25, output = 10.00 } - -["gpt-5.1-codex"] -model_id = "blackboxai/openai/gpt-5.1-codex" -inputs = ["text"] -outputs = ["text", "structured"] -costs = { input = 1.25, output = 10.00 } - -["gpt-5.2"] -model_id = "blackboxai/openai/gpt-5.2" -inputs = ["text", "images"] -outputs = ["text", "structured"] -costs = { input = 1.75, output = 14.00 } - -["gpt-5.2-pro"] -model_id = "blackboxai/openai/gpt-5.2-pro" -inputs = ["text", "images"] -outputs = ["text", "structured"] -costs = { input = 2.00, output = 16.00 } - -# --- Google Gemini Models ----------------------------------------------------- -["gemini-2.5-pro"] -model_id = "blackboxai/google/gemini-2.5-pro" -inputs = ["text", "images"] -outputs = ["text", "structured"] -costs = { input = 1.25, output = 10.00 } -thinking_mode = "none" - -["gemini-2.5-flash"] -model_id = "blackboxai/google/gemini-2.5-flash" -inputs = ["text", "images"] -outputs = ["text", "structured"] -costs = { input = 0.30, output = 2.50 } -thinking_mode = "none" - -# --- MiniMax Models ----------------------------------------------------------- -[minimax-m2] -model_id = "blackboxai/minimax/minimax-m2" -inputs = ["text"] -outputs = ["text", "structured"] -costs = { input = 0.27, output = 1.15 } - -# --- Qwen Models -------------------------------------------------------------- -[qwen3-max] -model_id = "blackboxai/qwen/qwen3-max" -inputs = ["text"] -outputs = ["text", "structured"] -costs = { input = 2.00, output = 10.00 } - -[qwen3-coder] -model_id = "blackboxai/qwen/qwen3-coder" -inputs = ["text"] -outputs = ["text", "structured"] -costs = { input = 1.50, output = 8.00 } - -# --- Qwen VL Models ------------------------------------------------- -[qwen3-vl-235b-a22b] -model_id = "blackboxai/qwen3-vl-235b-a22b" -inputs = ["text", "images"] -outputs = ["text", "structured"] -costs = { input = 3.00, output = 15.00 } - -[qwen3-vl-32b] -model_id = "blackboxai/qwen3-vl-32b" -inputs = ["text", "images"] -outputs = ["text", "structured"] -costs = { input = 1.00, output = 5.00 } -structure_method = "instructor/openrouter_structured_outputs" diff --git a/.pipelex/inference/backends/fal.toml b/.pipelex/inference/backends/fal.toml deleted file mode 100644 index d79251e..0000000 --- a/.pipelex/inference/backends/fal.toml +++ /dev/null @@ -1,108 +0,0 @@ -################################################################################ -# FAL Backend Configuration -################################################################################ -# -# This file defines the model specifications for FAL (Fast AI Labs) models. -# It contains model definitions for various image generation models -# accessible through the FAL API. -# -# Configuration structure: -# - Each model is defined in its own section with the model name as the header -# - Headers with dots must be quoted (e.g., ["flux-pro/v1.1"]) -# - Model costs are in USD per million tokens (input/output) -# -# Documentation: https://docs.pipelex.com -# Support: https://go.pipelex.com/discord -# -################################################################################ - -################################################################################ -# MODEL DEFAULTS -################################################################################ - -[defaults] -model_type = "img_gen" -sdk = "fal" -prompting_target = "fal" -thinking_mode = "none" - -################################################################################ -# IMAGE GENERATION MODELS -################################################################################ - -# --- Flux Pro Series ---------------------------------------------------------- -[flux-pro] -model_id = "fal-ai/flux-pro" -inputs = ["text"] -outputs = ["image"] -costs = { input = 0.05, output = 0.0 } - -[flux-pro.rules] -prompt = "positive_only" -num_images = "fal" -aspect_ratio = "flux" -inference = "flux" -safety_checker = "available" -output_format = "flux_1" -specific = "fal" - -["flux-pro/v1.1"] -model_id = "fal-ai/flux-pro/v1.1" -inputs = ["text"] -outputs = ["image"] -costs = { input = 0.05, output = 0.0 } - -["flux-pro/v1.1".rules] -prompt = "positive_only" -num_images = "fal" -aspect_ratio = "flux" -inference = "flux" -safety_checker = "available" -output_format = "flux_1" -specific = "fal" - -["flux-pro/v1.1-ultra"] -model_id = "fal-ai/flux-pro/v1.1-ultra" -inputs = ["text"] -outputs = ["image"] -costs = { input = 0.06, output = 0.0 } - -["flux-pro/v1.1-ultra".rules] -prompt = "positive_only" -num_images = "fal" -aspect_ratio = "flux_11_ultra" -inference = "flux_11_ultra" -safety_checker = "available" -output_format = "flux_1" -specific = "fal" - -[flux-2] -model_id = "fal-ai/flux-2" -inputs = ["text"] -outputs = ["image"] -costs = { input = 0.05, output = 0.0 } - -[flux-2.rules] -prompt = "positive_only" -num_images = "fal" -aspect_ratio = "flux" -inference = "flux" -safety_checker = "available" -output_format = "flux_2" -specific = "fal" - -# --- SDXL models -------------------------------------------------------------- -[fast-lightning-sdxl] -model_id = "fal-ai/fast-lightning-sdxl" -inputs = ["text"] -outputs = ["image"] -costs = { input = 0.0003, output = 0.0 } - -[fast-lightning-sdxl.rules] -prompt = "positive_only" -num_images = "fal" -aspect_ratio = "flux" -inference = "sdxl_lightning" -safety_checker = "unavailable" -output_format = "sdxl" -specific = "fal" diff --git a/.pipelex/inference/backends/google.toml b/.pipelex/inference/backends/google.toml deleted file mode 100644 index fdc63d5..0000000 --- a/.pipelex/inference/backends/google.toml +++ /dev/null @@ -1,91 +0,0 @@ -################################################################################ -# Google Gemini API Backend Configuration -################################################################################ -# -# This file defines the model specifications for Google Gemini API models. -# It contains model definitions for Gemini language models -# accessible through the Google Gemini API (not VertexAI). -# -# Configuration structure: -# - Each model is defined in its own section with the model name as the header -# - Headers with dots must be quoted (e.g., ["gemini-3.0-pro"]) -# - Model costs are in USD per million tokens (input/output) -# -# Documentation: https://docs.pipelex.com -# Support: https://go.pipelex.com/discord -# -################################################################################ - -################################################################################ -# MODEL DEFAULTS -################################################################################ - -[defaults] -model_type = "llm" -sdk = "google" -prompting_target = "gemini" -structure_method = "instructor/genai_tools" -thinking_mode = "manual" - -################################################################################ -# LANGUAGE MODELS -################################################################################ - -# --- Gemini 2.5 Series ---------------------------------------- -["gemini-2.5-pro"] -model_id = "gemini-2.5-pro" -inputs = ["text", "images", "pdf"] -outputs = ["text", "structured"] -max_prompt_images = 3000 -costs = { input = 1.25, output = 10.0 } - -["gemini-2.5-flash"] -model_id = "gemini-2.5-flash" -inputs = ["text", "images", "pdf"] -outputs = ["text", "structured"] -max_prompt_images = 3000 -costs = { input = 0.30, output = 2.50 } - -["gemini-2.5-flash-lite"] -model_id = "gemini-2.5-flash-lite" -inputs = ["text", "images", "pdf"] -outputs = ["text", "structured"] -max_prompt_images = 3000 -costs = { input = 0.10, output = 0.40 } - -# --- Gemini 3.0 Series ---------------------------------------- -["gemini-3.0-pro"] -model_id = "gemini-3-pro-preview" -inputs = ["text", "images", "pdf"] -outputs = ["text", "structured"] -max_prompt_images = 3000 -costs = { input = 2, output = 12.0 } -thinking_mode = "adaptive" - -["gemini-3.0-flash-preview"] -model_id = "gemini-3-flash-preview" -inputs = ["text", "images", "pdf"] -outputs = ["text", "structured"] -max_prompt_images = 3000 -costs = { input = 0.5, output = 3.0 } -thinking_mode = "adaptive" - -################################################################################ -# IMAGE GENERATION MODELS (Nano Banana) -################################################################################ - -[nano-banana] -model_type = "img_gen" -model_id = "gemini-2.5-flash-image" -inputs = ["text"] -outputs = ["image"] -thinking_mode = "none" -costs = { input = 0.0, output = 0.039 } - -[nano-banana-pro] -model_type = "img_gen" -model_id = "gemini-3-pro-image-preview" -inputs = ["text"] -outputs = ["image"] -thinking_mode = "none" -costs = { input = 0.0, output = 0.039 } diff --git a/.pipelex/inference/backends/groq.toml b/.pipelex/inference/backends/groq.toml deleted file mode 100644 index cb48094..0000000 --- a/.pipelex/inference/backends/groq.toml +++ /dev/null @@ -1,130 +0,0 @@ -################################################################################ -# Groq Backend Configuration -################################################################################ -# -# This file defines the model specifications for Groq models. -# It contains model definitions for various LLM models accessible through -# the Groq API, including text-only and vision-capable models. -# -# Configuration structure: -# - Each model is defined in its own section with the model name as the header -# - Headers with dots or slashes must be quoted (e.g., ["meta-llama/llama-4-scout"]) -# - Model costs are in USD per million tokens (input/output) -# - Vision models support max 5 images per request, 33MP max resolution -# -# Documentation: https://docs.pipelex.com -# Support: https://go.pipelex.com/discord -# -################################################################################ - -################################################################################ -# MODEL DEFAULTS -################################################################################ - -[defaults] -model_type = "llm" -sdk = "openai" -structure_method = "instructor/json" -thinking_mode = "none" - -################################################################################ -# PRODUCTION TEXT MODELS -################################################################################ - -# --- Meta Llama 3.x Series ---------------------------------------------------- -["llama-3.1-8b-instant"] -model_id = "llama-3.1-8b-instant" -max_tokens = 131072 -inputs = ["text"] -outputs = ["text", "structured"] -costs = { input = 0.05, output = 0.08 } - -["llama-3.3-70b-versatile"] -model_id = "llama-3.3-70b-versatile" -max_tokens = 32768 -inputs = ["text"] -outputs = ["text", "structured"] -costs = { input = 0.59, output = 0.79 } - -# --- Meta Llama Guard --------------------------------------------------------- -[llama-guard-4-12b] -model_id = "meta-llama/llama-guard-4-12b" -max_tokens = 1024 -inputs = ["text"] -outputs = ["text", "structured"] -costs = { input = 0.20, output = 0.20 } - -# --- OpenAI GPT-OSS Models ---------------------------------------------------- -[gpt-oss-20b] -model_id = "openai/gpt-oss-20b" -max_tokens = 65536 -inputs = ["text"] -outputs = ["text", "structured"] -costs = { input = 0.075, output = 0.30 } - -[gpt-oss-120b] -model_id = "openai/gpt-oss-120b" -max_tokens = 65536 -inputs = ["text"] -outputs = ["text", "structured"] -costs = { input = 0.15, output = 0.60 } - -# --- Groq Compound Systems ---------------------------------------------------- -["groq/compound"] -model_id = "groq/compound" -max_tokens = 8192 -inputs = ["text"] -outputs = ["text", "structured"] -costs = { input = 0.15, output = 0.45 } - -["groq/compound-mini"] -model_id = "groq/compound-mini" -max_tokens = 8192 -inputs = ["text"] -outputs = ["text", "structured"] -costs = { input = 0.10, output = 0.30 } - -################################################################################ -# PREVIEW MODELS -################################################################################ - -# --- Meta Llama 4 Vision Models (Preview) ------------------------------------- -[llama-4-scout-17b-16e-instruct] -model_id = "meta-llama/llama-4-scout-17b-16e-instruct" -max_tokens = 8192 -inputs = ["text", "images"] -outputs = ["text", "structured"] -max_prompt_images = 5 -costs = { input = 0.11, output = 0.34 } - -[llama-4-maverick-17b-128e-instruct] -model_id = "meta-llama/llama-4-maverick-17b-128e-instruct" -max_tokens = 8192 -inputs = ["text", "images"] -outputs = ["text", "structured"] -max_prompt_images = 5 -costs = { input = 0.20, output = 0.60 } - -# --- Moonshot Kimi K2 --------------------------------------------------------- -[kimi-k2-instruct-0905] -model_id = "moonshotai/kimi-k2-instruct-0905" -max_tokens = 16384 -inputs = ["text"] -outputs = ["text", "structured"] -costs = { input = 1.00, output = 3.00 } - -# --- OpenAI Safety Model ------------------------------------------------------ -[gpt-oss-safeguard-20b] -model_id = "openai/gpt-oss-safeguard-20b" -max_tokens = 65536 -inputs = ["text"] -outputs = ["text", "structured"] -costs = { input = 0.075, output = 0.30 } - -# --- Qwen 3 ------------------------------------------------------------------- -[qwen3-32b] -model_id = "qwen/qwen3-32b" -max_tokens = 40960 -inputs = ["text"] -outputs = ["text", "structured"] -costs = { input = 0.29, output = 0.59 } diff --git a/.pipelex/inference/backends/huggingface.toml b/.pipelex/inference/backends/huggingface.toml deleted file mode 100644 index 2fd0faf..0000000 --- a/.pipelex/inference/backends/huggingface.toml +++ /dev/null @@ -1,44 +0,0 @@ -################################################################################ -# Hugging Face Backend Configuration -################################################################################ -# -# This file defines the model specifications for Hugging Face models. -# It contains model definitions for various image generation models -# accessible through the Hugging Face Inference API with provider="auto". -# -# Configuration structure: -# - Each model is defined in its own section with the model name as the header -# - Headers with dots or slashes must be quoted (e.g., ["stabilityai/stable-diffusion-2-1"]) -# - Model costs are in USD per million tokens (input/output) -# -# Documentation: https://docs.pipelex.com -# Support: https://go.pipelex.com/discord -# -################################################################################ - -################################################################################ -# MODEL DEFAULTS -################################################################################ - -[defaults] -model_type = "img_gen" -sdk = "huggingface_img_gen" -thinking_mode = "none" - -################################################################################ -# IMAGE GENERATION MODELS -################################################################################ - -# --- Qwen Image Models -------------------------------------------------- -[qwen-image] -model_id = "Qwen/Qwen-Image" -inputs = ["text"] -outputs = ["image"] -costs = { input = 0.0, output = 0.0 } -variant = "fal-ai" -# variant = "replicate" - -[qwen-image.rules] -prompt = "with_negative" -aspect_ratio = "qwen_image" -inference = "qwen_image" diff --git a/.pipelex/inference/backends/internal.toml b/.pipelex/inference/backends/internal.toml deleted file mode 100644 index 8fcc38d..0000000 --- a/.pipelex/inference/backends/internal.toml +++ /dev/null @@ -1,44 +0,0 @@ -################################################################################ -# Internal Backend Configuration -################################################################################ -# -# This file defines the model specifications for internal software-only models. -# These models run internally without external APIs or AI services. -# -# Configuration structure: -# - Each model is defined in its own section with the model name as the header -# - Model costs are in USD per million tokens (input/output) -# -# Documentation: https://docs.pipelex.com -# Support: https://go.pipelex.com/discord -# -################################################################################ - -################################################################################ -# MODEL DEFAULTS -################################################################################ - -[defaults] -thinking_mode = "none" - -################################################################################ -# TEXT EXTRACTION MODELS -################################################################################ - -# --- PyPDFium2 Text Extractor ------------------------------------------------- -[pypdfium2-extract-pdf] -model_type = "text_extractor" -sdk = "pypdfium2" -model_id = "extract-text" -inputs = ["pdf"] -outputs = ["pages"] -costs = {} - -# --- Docling Text Extractor --------------------------------------------------- -[docling-extract-text] -model_type = "text_extractor" -sdk = "docling_sdk" -model_id = "extract-text" -inputs = ["pdf", "image"] -outputs = ["pages"] -costs = {} diff --git a/.pipelex/inference/backends/mistral.toml b/.pipelex/inference/backends/mistral.toml deleted file mode 100644 index 7ca2615..0000000 --- a/.pipelex/inference/backends/mistral.toml +++ /dev/null @@ -1,219 +0,0 @@ -################################################################################ -# Mistral Backend Configuration -################################################################################ -# -# This file defines the model specifications for Mistral AI models. -# It contains model definitions for various Mistral language models and specialized models -# accessible through the Mistral API. -# -# Configuration structure: -# - Each model is defined in its own section with the model name as the header -# - Headers with dots must be quoted (e.g., ["ministral-3b"]) -# - Model costs are in USD per million tokens (input/output) -# -# Documentation: https://docs.pipelex.com -# Support: https://go.pipelex.com/discord -# -################################################################################ - -################################################################################ -# MODEL DEFAULTS -################################################################################ - -[defaults] -model_type = "llm" -sdk = "mistral" -prompting_target = "mistral" -structure_method = "instructor/mistral_tools" -thinking_mode = "none" - -################################################################################ -# LANGUAGE MODELS -################################################################################ - -# --- Ministral Series --------------------------------------------------------- -[ministral-3b] -model_id = "ministral-3b-latest" -max_tokens = 131072 -inputs = ["text"] -outputs = ["text", "structured"] -costs = { input = 0.04, output = 0.04 } - -[ministral-8b] -model_id = "ministral-8b-latest" -max_tokens = 131072 -inputs = ["text"] -outputs = ["text", "structured"] -costs = { input = 0.1, output = 0.1 } - -# --- Mistral 7B Series -------------------------------------------------------- -[mistral-7b-2312] -model_id = "mistral-large-2402" -max_tokens = 32768 -inputs = ["text"] -outputs = ["text", "structured"] -costs = { input = 0.25, output = 0.25 } - -# --- Mistral 8x7B Series ------------------------------------------------------ -[mistral-8x7b-2312] -model_id = "open-mixtral-8x7b" -max_tokens = 32768 -inputs = ["text"] -outputs = ["text"] -costs = { input = 0.7, output = 0.7 } - -# --- Mistral Codestral Series ------------------------------------------------- -[mistral-codestral-2405] -model_id = "codestral-2405" -max_tokens = 262144 -inputs = ["text"] -outputs = ["text"] -costs = { input = 1.0, output = 3.0 } - -# --- Pixtral Series ----------------------------------------------------------- -[pixtral-12b] -model_id = "pixtral-12b-latest" -max_tokens = 131072 -inputs = ["text", "images"] -outputs = ["text", "structured"] -costs = { input = 0.15, output = 0.15 } - -[pixtral-large] -model_id = "pixtral-large-latest" -max_tokens = 131072 -inputs = ["text", "images"] -outputs = ["text", "structured"] -costs = { input = 2.0, output = 6.0 } - -# --- Mistral Small Series ----------------------------------------------------- -[mistral-small-2506] -model_id = "mistral-small-2506" -max_tokens = 128000 -inputs = ["text"] -outputs = ["text", "structured"] -costs = { input = 0.1, output = 0.3 } - -["mistral-small-3.2"] -model_id = "mistral-small-2506" -max_tokens = 128000 -inputs = ["text"] -outputs = ["text", "structured"] -costs = { input = 0.1, output = 0.3 } - -[mistral-small] -model_id = "mistral-small-latest" -max_tokens = 128000 -inputs = ["text"] -outputs = ["text", "structured"] -costs = { input = 0.1, output = 0.3 } - -# --- Mistral Medium Series ---------------------------------------------------- -[mistral-medium-2508] -model_id = "mistral-medium-2508" -max_tokens = 128000 -inputs = ["text", "images", "pdf"] -outputs = ["text", "structured"] -costs = { input = 0.4, output = 2.0 } - -["mistral-medium-3.1"] -model_id = "mistral-medium-2508" -max_tokens = 128000 -inputs = ["text", "images", "pdf"] -outputs = ["text", "structured"] -costs = { input = 0.4, output = 2.0 } - -[mistral-medium] -model_id = "mistral-medium-latest" -max_tokens = 128000 -inputs = ["text", "images", "pdf"] -outputs = ["text", "structured"] -costs = { input = 0.4, output = 2.0 } - -# --- Mistral Large Series ----------------------------------------------------- -[mistral-large-2512] -model_id = "mistral-large-2512" -max_tokens = 256000 -inputs = ["text", "images", "pdf"] -outputs = ["text", "structured"] -costs = { input = 0.5, output = 1.5 } - -[mistral-large-3] -model_id = "mistral-large-2512" -max_tokens = 256000 -inputs = ["text", "images", "pdf"] -outputs = ["text", "structured"] -costs = { input = 0.5, output = 1.5 } - -[mistral-large] -model_id = "mistral-large-latest" -max_tokens = 256000 -inputs = ["text", "images", "pdf"] -outputs = ["text", "structured"] -costs = { input = 0.5, output = 1.5 } - -# --- Magistral Series (reasoning models) ------------------------------------ -[magistral-small-2509] -model_id = "magistral-small-2509" -max_tokens = 128000 -inputs = ["text", "pdf"] -outputs = ["text"] -costs = { input = 0.5, output = 1.5 } -thinking_mode = "manual" - -[magistral-small] -model_id = "magistral-small-latest" -max_tokens = 128000 -inputs = ["text", "pdf"] -outputs = ["text"] -costs = { input = 0.5, output = 1.5 } -thinking_mode = "manual" - -[magistral-medium-2509] -model_id = "magistral-medium-2509" -max_tokens = 128000 -inputs = ["text", "pdf"] -outputs = ["text"] -costs = { input = 2, output = 5 } -thinking_mode = "manual" - -[magistral-medium] -model_id = "magistral-medium-latest" -max_tokens = 128000 -inputs = ["text", "pdf"] -outputs = ["text"] -costs = { input = 2, output = 5 } -thinking_mode = "manual" - -################################################################################ -# EXTRACTION MODELS -################################################################################ - -# TODO: add support to pricing per page - -[mistral-ocr-2503] -model_type = "text_extractor" -model_id = "mistral-ocr-2503" -max_tokens = 16384 -inputs = ["pdf", "image"] -outputs = ["pages"] - -[mistral-ocr-2505] -model_type = "text_extractor" -model_id = "mistral-ocr-2505" -max_tokens = 16384 -inputs = ["pdf", "image"] -outputs = ["pages"] - -[mistral-ocr-2512] -model_type = "text_extractor" -model_id = "mistral-ocr-2512" -max_tokens = 16384 -inputs = ["pdf", "image"] -outputs = ["pages"] - -[mistral-ocr] -model_type = "text_extractor" -model_id = "mistral-ocr-latest" -max_tokens = 16384 -inputs = ["pdf", "image"] -outputs = ["pages"] diff --git a/.pipelex/inference/backends/ollama.toml b/.pipelex/inference/backends/ollama.toml deleted file mode 100644 index 3e020a8..0000000 --- a/.pipelex/inference/backends/ollama.toml +++ /dev/null @@ -1,64 +0,0 @@ -################################################################################ -# Ollama Backend Configuration -################################################################################ -# -# This file defines the model specifications for Ollama models. -# It contains model definitions for local language models -# accessible through the Ollama API. -# -# Configuration structure: -# - Each model is defined in its own section with the model name as the header -# - Headers with dots must be quoted (e.g., ["mistral-small3.1-24b"]) -# - Model costs are in USD per million tokens (input/output) -# -# Documentation: https://docs.pipelex.com -# Support: https://go.pipelex.com/discord -# -################################################################################ - -################################################################################ -# MODEL DEFAULTS -################################################################################ - -[defaults] -model_type = "llm" -sdk = "openai" -prompting_target = "anthropic" -structure_method = "instructor/openai_tools" -thinking_mode = "none" - -################################################################################ -# LANGUAGE MODELS -################################################################################ - -# --- Gemma Models ------------------------------------------------------------- -[gemma3-4b] -model_id = "gemma3:4b" -inputs = ["text"] -outputs = ["text"] -max_prompt_images = 3000 -costs = { input = 0, output = 0 } - -# --- Llama Models ------------------------------------------------------------- -[llama4-scout] -model_id = "llama4:scout" -inputs = ["text"] -outputs = ["text"] -max_prompt_images = 3000 -costs = { input = 0, output = 0 } - -# --- Mistral Models ----------------------------------------------------------- -["mistral-small3.1-24b"] -model_id = "mistral-small3.1:24b" -inputs = ["text"] -outputs = ["text"] -max_prompt_images = 3000 -costs = { input = 0, output = 0 } - -# --- Qwen Models -------------------------------------------------------------- -[qwen3-8b] -model_id = "qwen3:8b" -inputs = ["text"] -outputs = ["text"] -costs = { input = 0, output = 0 } -# TODO: support tokens diff --git a/.pipelex/inference/backends/openai.toml b/.pipelex/inference/backends/openai.toml deleted file mode 100644 index 3c2af8c..0000000 --- a/.pipelex/inference/backends/openai.toml +++ /dev/null @@ -1,224 +0,0 @@ -################################################################################ -# OpenAI Backend Configuration -################################################################################ -# -# This file defines the model specifications for OpenAI models. -# It contains model definitions for various LLM and image generation models -# accessible through the OpenAI API. -# -# Configuration structure: -# - Each model is defined in its own section with the model name as the header -# - Headers with dots must be quoted (e.g., ["gpt-4.1"]) -# - Model costs are in USD per million tokens (input/output) -# -# Documentation: https://docs.pipelex.com -# Support: https://go.pipelex.com/discord -# -################################################################################ - -################################################################################ -# MODEL DEFAULTS -################################################################################ - -[defaults] -model_type = "llm" -sdk = "openai_responses" -prompting_target = "openai" -structure_method = "instructor/openai_responses_tools" -thinking_mode = "none" - -################################################################################ -# LANGUAGE MODELS -################################################################################ - -# --- GPT-3.5 Series ----------------------------------------------------------- -["gpt-3.5-turbo"] -model_id = "gpt-3.5-turbo-1106" -inputs = ["text"] -outputs = ["text", "structured"] -costs = { input = 0.5, output = 1.5 } - -# --- GPT-4 Series ------------------------------------------------------------- -[gpt-4] -inputs = ["text"] -outputs = ["text"] -costs = { input = 30.0, output = 60.0 } - -[gpt-4-turbo] -inputs = ["text"] -outputs = ["text", "structured"] -costs = { input = 10.0, output = 30.0 } - -# --- GPT-4o Series ------------------------------------------------------------ -[gpt-4o-2024-11-20] -inputs = ["text", "images", "pdf"] -outputs = ["text", "structured"] -costs = { input = 2.5, output = 10.0 } - -[gpt-4o] -inputs = ["text", "images", "pdf"] -outputs = ["text", "structured"] -costs = { input = 2.5, output = 10.0 } - -[gpt-4o-mini-2024-07-18] -inputs = ["text", "images", "pdf"] -outputs = ["text", "structured"] -costs = { input = 0.15, output = 0.6 } - -[gpt-4o-mini] -inputs = ["text", "images", "pdf"] -outputs = ["text", "structured"] -costs = { input = 0.15, output = 0.6 } - -# --- GPT-4.1 Series ----------------------------------------------------------- -["gpt-4.1"] -inputs = ["text", "images", "pdf"] -outputs = ["text", "structured"] -costs = { input = 2, output = 8 } - -["gpt-4.1-mini"] -inputs = ["text", "images", "pdf"] -outputs = ["text", "structured"] -costs = { input = 0.4, output = 1.6 } - -["gpt-4.1-nano"] -inputs = ["text", "images", "pdf"] -outputs = ["text", "structured"] -costs = { input = 0.1, output = 0.4 } - -# --- o Series ---------------------------------------------------------------- -[o1] -inputs = ["text", "images", "pdf"] -outputs = ["text", "structured"] -costs = { input = 15.0, output = 60.0 } -valued_constraints = { fixed_temperature = 1 } -thinking_mode = "manual" - -[o3-mini] -inputs = ["text"] -outputs = ["text", "structured"] -costs = { input = 1.1, output = 4.4 } -valued_constraints = { fixed_temperature = 1 } -thinking_mode = "manual" - -[o3] -inputs = ["text", "images", "pdf"] -outputs = ["text", "structured"] -costs = { input = 10.0, output = 40.0 } -valued_constraints = { fixed_temperature = 1 } -thinking_mode = "manual" - -[o4-mini] -inputs = ["text"] -outputs = ["text", "structured"] -costs = { input = 1.1, output = 4.4 } -valued_constraints = { fixed_temperature = 1 } -thinking_mode = "manual" - -# --- GPT-5 Series ------------------------------------------------------------- -[gpt-5] -inputs = ["text", "images", "pdf"] -outputs = ["text", "structured"] -costs = { input = 1.25, output = 10.0 } -valued_constraints = { fixed_temperature = 1 } -thinking_mode = "manual" - -[gpt-5-mini] -inputs = ["text", "images", "pdf"] -outputs = ["text", "structured"] -costs = { input = 0.25, output = 2.0 } -valued_constraints = { fixed_temperature = 1 } -thinking_mode = "manual" - -[gpt-5-nano] -inputs = ["text", "images", "pdf"] -outputs = ["text", "structured"] -costs = { input = 0.05, output = 0.4 } -valued_constraints = { fixed_temperature = 1 } -thinking_mode = "manual" - -[gpt-5-chat] -model_id = "gpt-5-chat-latest" -inputs = ["text", "images", "pdf"] -outputs = ["text"] -costs = { input = 1.25, output = 10.0 } -valued_constraints = { fixed_temperature = 1 } -thinking_mode = "manual" - -[gpt-5-codex] -inputs = ["text", "images", "pdf"] -outputs = ["text", "structured"] -costs = { input = 1.25, output = 10.0 } -valued_constraints = { fixed_temperature = 1 } -thinking_mode = "manual" - -# --- GPT-5.1 Series ------------------------------------------------------------- -["gpt-5.1"] -inputs = ["text", "images", "pdf"] -outputs = ["text", "structured"] -costs = { input = 1.25, output = 10.0 } -thinking_mode = "manual" - -["gpt-5.1-chat"] -model_id = "gpt-5.1-chat-latest" -inputs = ["text", "images", "pdf"] -outputs = ["text", "structured"] -costs = { input = 1.25, output = 10.0 } -valued_constraints = { fixed_temperature = 1 } -thinking_mode = "manual" - -["gpt-5.1-codex"] -inputs = ["text", "images", "pdf"] -outputs = ["text", "structured"] -costs = { input = 1.25, output = 10.0 } -valued_constraints = { fixed_temperature = 1 } -thinking_mode = "manual" - -["gpt-5.1-codex-max"] -inputs = ["text", "images", "pdf"] -outputs = ["text", "structured"] -costs = { input = 1.25, output = 10.0 } -valued_constraints = { fixed_temperature = 1 } -thinking_mode = "manual" - -# --- GPT-5.2 Series ------------------------------------------------------------- -["gpt-5.2"] -inputs = ["text", "images", "pdf"] -outputs = ["text", "structured"] -costs = { input = 1.75, output = 14.0 } -thinking_mode = "manual" - -["gpt-5.2-chat"] -model_id = "gpt-5.2-chat-latest" -inputs = ["text", "images", "pdf"] -outputs = ["text", "structured"] -costs = { input = 1.75, output = 14.0 } -valued_constraints = { fixed_temperature = 1 } -thinking_mode = "manual" - -################################################################################ -# IMAGE GENERATION MODELS -################################################################################ - -# --- OpenAI Image Generation -------------------------------------------------- -[gpt-image-1] -sdk = "openai_img_gen" -model_type = "img_gen" -inputs = ["text"] -outputs = ["image"] -costs = { input = 10, output = 40 } - -[gpt-image-1-mini] -sdk = "openai_img_gen" -model_type = "img_gen" -inputs = ["text"] -outputs = ["image"] -costs = { input = 2.5, output = 8 } - -["gpt-image-1.5"] -sdk = "openai_img_gen" -model_type = "img_gen" -model_id = "gpt-image-1.5" -inputs = ["text"] -outputs = ["image"] -costs = { input = 8, output = 32 } diff --git a/.pipelex/inference/backends/pipelex_gateway.toml b/.pipelex/inference/backends/pipelex_gateway.toml deleted file mode 100644 index bca075b..0000000 --- a/.pipelex/inference/backends/pipelex_gateway.toml +++ /dev/null @@ -1,41 +0,0 @@ -################################################################################ -# Pipelex Gateway Local Overrides -################################################################################ -# -# TELEMETRY NOTICE: -# -# Using Pipelex Gateway enables identified telemetry tied to your API key -# (hashed for security). This is independent from your telemetry.toml settings. -# -# We collect only technical data (model names, token counts, latency, error rates). -# We do NOT collect prompts, completions, pipe codes, or business data. -# -# This allows us to monitor service quality, enforce fair usage, and support you. -# -################################################################################ -# -# WARNING: USE AT YOUR OWN RISK! -# -# The actual model configuration is fetched remotely from Pipelex servers. -# Any override in this file may cause unexpected behavior or failures, -# as the remote configuration may change at any time. -# -# If you must override, you may ONLY use these keys per model: -# - sdk -# - structure_method -# -# All other keys will be ignored. -# -# If you need custom configurations, consider using your own API keys -# with direct provider backends (openai, anthropic, etc.) instead. -# -# Documentation: -# https://docs.pipelex.com/home/7-configuration/config-technical/inference-backend-config/ -# Support: https://go.pipelex.com/discord -# -################################################################################ - -# Per-model overrides example: -# [gpt-4o] -# sdk = "gateway_completions" -# structure_method = "instructor/openai_tools" diff --git a/.pipelex/inference/backends/pipelex_inference.toml b/.pipelex/inference/backends/pipelex_inference.toml deleted file mode 100644 index abb5fcf..0000000 --- a/.pipelex/inference/backends/pipelex_inference.toml +++ /dev/null @@ -1,200 +0,0 @@ -################################################################################ -# Pipelex Inference Backend Configuration -################################################################################ -# -# This file defines the model specifications for the Pipelex Inference backend. -# It contains model definitions for various LLM and image generation models -# accessible through the Pipelex unified inference API. -# -# Configuration structure: -# - Each model is defined in its own section with the model name as the header -# - Headers with dots must be quoted (e.g., ["gpt-4.1"]) -# - Model costs are in USD per million tokens (input/output) -# -# Documentation: https://docs.pipelex.com -# Support: https://go.pipelex.com/discord -# -################################################################################ - -################################################################################ -# MODEL DEFAULTS -################################################################################ - -[defaults] -model_type = "llm" -sdk = "openai" -prompting_target = "anthropic" -structure_method = "instructor/openai_tools" -thinking_mode = "none" - -################################################################################ -# LANGUAGE MODELS -################################################################################ - -# --- OpenAI LLMs -------------------------------------------------------------- -[gpt-4o] -model_id = "pipelex/gpt-4o" -inputs = ["text", "images"] -outputs = ["text", "structured"] -costs = { input = 2.75, output = 11.00 } -sdk = "openai_responses" -structure_method = "instructor/openai_responses_tools" - -[gpt-4o-mini] -model_id = "pipelex/gpt-4o-mini" -inputs = ["text", "images"] -outputs = ["text", "structured"] -costs = { input = 0.17, output = 0.66 } -sdk = "openai_responses" -structure_method = "instructor/openai_responses_tools" - -["gpt-4.1"] -model_id = "pipelex/gpt-4.1" -inputs = ["text", "images"] -outputs = ["text", "structured"] -costs = { input = 2, output = 8 } -sdk = "openai_responses" -structure_method = "instructor/openai_responses_tools" - -["gpt-4.1-mini"] -model_id = "pipelex/gpt-4.1-mini" -inputs = ["text", "images"] -outputs = ["text", "structured"] -costs = { input = 0.4, output = 1.6 } -sdk = "openai_responses" -structure_method = "instructor/openai_responses_tools" - -["gpt-4.1-nano"] -model_id = "pipelex/gpt-4.1-nano" -inputs = ["text", "images"] -outputs = ["text", "structured"] -costs = { input = 0.1, output = 0.4 } -sdk = "openai_responses" -structure_method = "instructor/openai_responses_tools" - -[gpt-5-nano] -model_id = "pipelex/gpt-5-nano" -inputs = ["text", "images"] -outputs = ["text", "structured"] -costs = { input = 0.05, output = 0.40 } -sdk = "openai_responses" -structure_method = "instructor/openai_responses_tools" - -[gpt-5-mini] -model_id = "pipelex/gpt-5-mini" -inputs = ["text", "images"] -outputs = ["text", "structured"] -costs = { input = 0.25, output = 2.00 } -sdk = "openai_responses" -structure_method = "instructor/openai_responses_tools" - -[gpt-5-chat] -model_id = "pipelex/gpt-5-chat" -inputs = ["text", "images"] -outputs = ["text"] -costs = { input = 1.25, output = 10.00 } -sdk = "openai_responses" -structure_method = "instructor/openai_responses_tools" - -[gpt-5] -model_id = "pipelex/gpt-5" -inputs = ["text", "images"] -outputs = ["text"] -costs = { input = 1.25, output = 10.00 } -sdk = "openai_responses" -structure_method = "instructor/openai_responses_tools" - -["gpt-5.1"] -model_id = "pipelex/gpt-5.1" -inputs = ["text", "images"] -outputs = ["text", "structured"] -costs = { input = 1.25, output = 10.00 } -sdk = "openai_responses" -structure_method = "instructor/openai_responses_tools" - -["gpt-5.1-chat"] -model_id = "pipelex/gpt-5.1-chat" -inputs = ["text", "images"] -outputs = ["text", "structured"] -costs = { input = 1.25, output = 10.00 } -sdk = "openai_responses" -structure_method = "instructor/openai_responses_tools" - -# --- Claude LLMs -------------------------------------------------------------- -["claude-4-sonnet"] -model_id = "pipelex/claude-4-sonnet" -inputs = ["text", "images"] -outputs = ["text", "structured"] -costs = { input = 3, output = 15 } - -["claude-4.1-opus"] -model_id = "pipelex/claude-4.1-opus" -inputs = ["text", "images"] -outputs = ["text", "structured"] -costs = { input = 15, output = 75 } - -["claude-4.5-sonnet"] -model_id = "pipelex/claude-4.5-sonnet" -inputs = ["text", "images"] -outputs = ["text", "structured"] -costs = { input = 3, output = 15 } - -["claude-4.5-haiku"] -model_id = "pipelex/claude-4.5-haiku" -inputs = ["text", "images"] -outputs = ["text", "structured"] -costs = { input = 1, output = 5 } - -["claude-4.5-opus"] -model_id = "pipelex/claude-4.5-opus" -inputs = ["text", "images"] -outputs = ["text", "structured"] -costs = { input = 5, output = 25 } - -# --- Gemini LLMs -------------------------------------------------------------- -["gemini-2.5-pro"] -model_id = "pipelex/gemini-2.5-pro" -inputs = ["text", "images"] -outputs = ["text", "structured"] -max_prompt_images = 3000 -costs = { input = 1.25, output = 10.0 } - -["gemini-2.5-flash"] -model_id = "pipelex/gemini-2.5-flash" -inputs = ["text", "images"] -outputs = ["text", "structured"] -costs = { input = 0.30, output = 2.50 } - -["gemini-2.5-flash-lite"] -model_id = "pipelex/gemini-2.5-flash-lite" -inputs = ["text", "images"] -outputs = ["text", "structured"] -costs = { input = 0.10, output = 0.40 } - -["gemini-3.0-pro"] -model_id = "pipelex/gemini-3.0-pro" -inputs = ["text", "images"] -outputs = ["text", "structured"] -max_prompt_images = 3000 -costs = { input = 2, output = 12.0 } - -# --- XAI LLMs -------------------------------------------------------------- - -[grok-3] -model_id = "grok-3" -inputs = ["text"] -outputs = ["text"] -costs = { input = 3, output = 15 } - -[grok-3-mini] -model_id = "grok-3-mini" -inputs = ["text"] -outputs = ["text"] -costs = { input = 0.3, output = 0.5 } - -################################################################################ -# OCR and IMAGE GENERATION MODELS -################################################################################ - -# We are still working in giving you acces to OCR and image generation models -# and to the best models from Mistral through the Pipelex Inference backend. diff --git a/.pipelex/inference/backends/portkey.toml b/.pipelex/inference/backends/portkey.toml deleted file mode 100644 index d91a1da..0000000 --- a/.pipelex/inference/backends/portkey.toml +++ /dev/null @@ -1,307 +0,0 @@ -################################################################################ -# Portkey Configuration -################################################################################ -# -# This file defines the model specifications for the Portkey backend. -# It contains model definitions for various AI models. -# -# Configuration structure: -# - Each model is defined in its own section with the model name as the header -# - Headers with dots must be quoted (e.g., ["gpt-4.1"]) -# - Model costs are in USD per million tokens (input/output) -# -# Documentation: https://docs.pipelex.com -# Support: https://go.pipelex.com/discord -# -################################################################################ - -################################################################################ -# MODEL DEFAULTS -################################################################################ - -[defaults] -model_type = "llm" -sdk = "portkey_completions" -structure_method = "instructor/openai_tools" -prompting_target = "anthropic" -thinking_mode = "none" - -################################################################################ -# LANGUAGE MODELS -################################################################################ - -# --- OpenAI LLMs -------------------------------------------------------------- -[gpt-4o-mini] -inputs = ["text", "images", "pdf"] -outputs = ["text", "structured"] -costs = { input = 0.15, output = 0.6 } -sdk = "portkey_responses" -structure_method = "instructor/openai_responses_tools" -thinking_mode = "none" -x-portkey-provider = "@openai" - -[gpt-4o] -inputs = ["text", "images", "pdf"] -outputs = ["text", "structured"] -costs = { input = 2.5, output = 10.0 } -sdk = "portkey_responses" -structure_method = "instructor/openai_responses_tools" -thinking_mode = "none" -x-portkey-provider = "@openai" - -["gpt-4.1-nano"] -inputs = ["text", "images", "pdf"] -outputs = ["text", "structured"] -costs = { input = 0.1, output = 0.4 } -sdk = "portkey_responses" -structure_method = "instructor/openai_responses_tools" -thinking_mode = "none" -x-portkey-provider = "@openai" - -["gpt-4.1-mini"] -inputs = ["text", "images", "pdf"] -outputs = ["text", "structured"] -costs = { input = 0.4, output = 1.6 } -sdk = "portkey_responses" -structure_method = "instructor/openai_responses_tools" -thinking_mode = "none" -x-portkey-provider = "@openai" - -["gpt-4.1"] -inputs = ["text", "images", "pdf"] -outputs = ["text", "structured"] -costs = { input = 2, output = 8 } -sdk = "portkey_responses" -structure_method = "instructor/openai_responses_tools" -thinking_mode = "none" -x-portkey-provider = "@openai" - -[o1] -inputs = ["text", "images", "pdf"] -outputs = ["text", "structured"] -costs = { input = 15.0, output = 60.0 } -valued_constraints = { fixed_temperature = 1 } -sdk = "portkey_responses" -structure_method = "instructor/openai_responses_tools" -thinking_mode = "manual" -x-portkey-provider = "@openai" - -[o3-mini] -inputs = ["text"] -outputs = ["text", "structured"] -costs = { input = 1.1, output = 4.4 } -valued_constraints = { fixed_temperature = 1 } -sdk = "portkey_responses" -structure_method = "instructor/openai_responses_tools" -thinking_mode = "manual" -x-portkey-provider = "@openai" - -[o3] -inputs = ["text", "images", "pdf"] -outputs = ["text", "structured"] -costs = { input = 2, output = 8 } -valued_constraints = { fixed_temperature = 1 } -sdk = "portkey_responses" -structure_method = "instructor/openai_responses_tools" -thinking_mode = "manual" -x-portkey-provider = "@openai" - -[o4-mini] -inputs = ["text"] -outputs = ["text", "structured"] -costs = { input = 1.1, output = 4.4 } -valued_constraints = { fixed_temperature = 1 } -sdk = "portkey_responses" -structure_method = "instructor/openai_responses_tools" -thinking_mode = "manual" -x-portkey-provider = "@openai" - -[gpt-5-nano] -inputs = ["text", "images", "pdf"] -outputs = ["text", "structured"] -costs = { input = 0.05, output = 0.4 } -valued_constraints = { fixed_temperature = 1 } -sdk = "portkey_responses" -structure_method = "instructor/openai_responses_tools" -thinking_mode = "manual" -x-portkey-provider = "@openai" - -[gpt-5-mini] -inputs = ["text", "images", "pdf"] -outputs = ["text", "structured"] -costs = { input = 0.25, output = 2.0 } -valued_constraints = { fixed_temperature = 1 } -sdk = "portkey_responses" -structure_method = "instructor/openai_responses_tools" -thinking_mode = "manual" -x-portkey-provider = "@openai" - -[gpt-5] -inputs = ["text", "images", "pdf"] -outputs = ["text", "structured"] -costs = { input = 1.25, output = 10.0 } -valued_constraints = { fixed_temperature = 1 } -sdk = "portkey_responses" -structure_method = "instructor/openai_responses_tools" -thinking_mode = "manual" -x-portkey-provider = "@openai" - -["gpt-5.1"] -inputs = ["text", "images", "pdf"] -outputs = ["text", "structured"] -costs = { input = 1.25, output = 10.0 } -valued_constraints = { fixed_temperature = 1 } -sdk = "portkey_responses" -structure_method = "instructor/openai_responses_tools" -thinking_mode = "manual" -x-portkey-provider = "@openai" - -["gpt-5.1-codex"] -inputs = ["text", "images", "pdf"] -outputs = ["text", "structured"] -costs = { input = 1.25, output = 10.0 } -valued_constraints = { fixed_temperature = 1 } -sdk = "portkey_responses" -structure_method = "instructor/openai_responses_tools" -thinking_mode = "manual" -x-portkey-provider = "@openai" - -# --- Claude LLMs -------------------------------------------------------------- -[claude-3-haiku] -model_id = "claude-3-haiku-20240307" -max_tokens = 4096 -inputs = ["text", "images"] -outputs = ["text", "structured"] -max_prompt_images = 100 -costs = { input = 0.25, output = 1.25 } -thinking_mode = "none" -x-portkey-provider = "@anthropic" - -["claude-3.7-sonnet"] -model_id = "claude-3-7-sonnet-20250219" -max_tokens = 8192 -inputs = ["text", "images", "pdf"] -outputs = ["text", "structured"] -max_prompt_images = 100 -costs = { input = 3.0, output = 15.0 } -thinking_mode = "manual" -x-portkey-provider = "@anthropic" - -[claude-4-sonnet] -model_id = "claude-sonnet-4-20250514" -max_tokens = 64000 -inputs = ["text", "images", "pdf"] -outputs = ["text", "structured"] -max_prompt_images = 100 -costs = { input = 3.0, output = 15.0 } -thinking_mode = "manual" -x-portkey-provider = "@anthropic" - -[claude-4-opus] -model_id = "claude-opus-4-20250514" -max_tokens = 32000 -inputs = ["text", "images", "pdf"] -outputs = ["text", "structured"] -max_prompt_images = 100 -costs = { input = 3.0, output = 15.0 } -thinking_mode = "manual" -x-portkey-provider = "@anthropic" - -["claude-4.1-opus"] -model_id = "claude-opus-4-1-20250805" -max_tokens = 32000 -inputs = ["text", "images", "pdf"] -outputs = ["text", "structured"] -max_prompt_images = 100 -costs = { input = 3.0, output = 15.0 } -thinking_mode = "manual" -x-portkey-provider = "@anthropic" - -["claude-4.5-sonnet"] -model_id = "claude-sonnet-4-5-20250929" -max_tokens = 64000 -inputs = ["text", "images", "pdf"] -outputs = ["text", "structured"] -max_prompt_images = 100 -costs = { input = 3.0, output = 15.0 } -thinking_mode = "manual" -x-portkey-provider = "@anthropic" - -["claude-4.5-haiku"] -model_id = "claude-haiku-4-5-20251001" -max_tokens = 64000 -inputs = ["text", "images", "pdf"] -outputs = ["text", "structured"] -max_prompt_images = 100 -costs = { input = 1.0, output = 5.0 } -thinking_mode = "manual" -x-portkey-provider = "@anthropic" - -["claude-4.5-opus"] -model_id = "claude-opus-4-5-20251101" -max_tokens = 64000 -inputs = ["text", "images", "pdf"] -outputs = ["text", "structured"] -max_prompt_images = 100 -costs = { input = 5.0, output = 25.0 } -thinking_mode = "manual" -x-portkey-provider = "@anthropic" - -["claude-4.6-opus"] -model_id = "claude-opus-4-6" -max_tokens = 64000 -inputs = ["text", "images", "pdf"] -outputs = ["text", "structured"] -max_prompt_images = 100 -costs = { input = 5.0, output = 25.0 } -thinking_mode = "adaptive" -x-portkey-provider = "@anthropic" - -# --- Gemini LLMs -------------------------------------------------------------- -["gemini-2.5-pro"] -model_id = "gemini-2.5-pro" -inputs = ["text", "images", "pdf"] -outputs = ["text", "structured"] -max_prompt_images = 3000 -costs = { input = 1.25, output = 10.0 } -thinking_mode = "manual" -prompting_target = "gemini" -x-portkey-provider = "@google" - -["gemini-2.5-flash"] -model_id = "gemini-2.5-flash" -inputs = ["text", "images", "pdf"] -outputs = ["text", "structured"] -costs = { input = 0.30, output = 2.50 } -thinking_mode = "manual" -prompting_target = "gemini" -x-portkey-provider = "@google" - -["gemini-2.5-flash-lite"] -model_id = "gemini-2.5-flash-lite" -inputs = ["text", "images", "pdf"] -outputs = ["text", "structured"] -costs = { input = 0.10, output = 0.40 } -thinking_mode = "manual" -prompting_target = "gemini" -x-portkey-provider = "@google" - -["gemini-3.0-pro"] -model_id = "gemini-3-pro-preview" -inputs = ["text", "images", "pdf"] -outputs = ["text", "structured"] -max_prompt_images = 3000 -costs = { input = 2, output = 12.0 } -thinking_mode = "adaptive" -prompting_target = "gemini" -x-portkey-provider = "@google" - -["gemini-3.0-flash-preview"] -model_id = "gemini-3-flash-preview" -inputs = ["text", "images", "pdf"] -outputs = ["text", "structured"] -max_prompt_images = 3000 -costs = { input = 0.5, output = 3.0 } -thinking_mode = "adaptive" -prompting_target = "gemini" -x-portkey-provider = "@google" diff --git a/.pipelex/inference/backends/scaleway.toml b/.pipelex/inference/backends/scaleway.toml deleted file mode 100644 index 75d6a05..0000000 --- a/.pipelex/inference/backends/scaleway.toml +++ /dev/null @@ -1,68 +0,0 @@ -################################################################################ -# Groq Backend Configuration -################################################################################ -# -# This file defines the model specifications for Scaleway models. -# It contains model definitions for various LLM models accessible through -# the Groq API, including text-only and vision-capable models. -# -# Configuration structure: -# - Each model is defined in its own section with the model name as the header -# - Headers with dots or slashes must be quoted (e.g., ["meta-llama/llama-4-scout"]) -# - Model costs are in USD per million tokens (input/output) -# - Vision models support max 5 images per request, 33MP max resolution -# -# Documentation: https://docs.pipelex.com -# Support: https://go.pipelex.com/discord -# -################################################################################ - -################################################################################ -# MODEL DEFAULTS -################################################################################ - -[defaults] -model_type = "llm" -sdk = "openai" -structure_method = "instructor/json" -thinking_mode = "none" - -# --- DeepSeek Models ---------------------------------------------------------- -[deepseek-r1-distill-llama-70b] -max_tokens = 32768 -inputs = ["text"] -outputs = ["text", "structured"] -costs = { input = 0.90, output = 0.90 } - -# --- Meta Llama 3.x Series ---------------------------------------------------- -["llama-3.1-8b-instruct"] -max_tokens = 131072 -inputs = ["text"] -outputs = ["text", "structured"] -costs = { input = 0.2, output = 0.2 } - -["llama-3.3-70b-instruct"] -max_tokens = 32768 -inputs = ["text"] -outputs = ["text", "structured"] -costs = { input = 0.90, output = 0.90 } - -# --- OpenAI GPT-OSS Models ---------------------------------------------------- -[gpt-oss-120b] -max_tokens = 65536 -inputs = ["text"] -outputs = ["text", "structured"] -costs = { input = 0.15, output = 0.60 } - -# --- Qwen 3 ------------------------------------------------------------------- -[qwen3-235b-a22b-instruct-2507] -max_tokens = 40960 -inputs = ["text"] -outputs = ["text", "structured"] -costs = { input = 0.75, output = 2.25 } - -[qwen3-coder-30b-a3b-instruct] -max_tokens = 40960 -inputs = ["text"] -outputs = ["text", "structured"] -costs = { input = 0.20, output = 0.80 } diff --git a/.pipelex/inference/backends/vertexai.toml b/.pipelex/inference/backends/vertexai.toml deleted file mode 100644 index fe89dc8..0000000 --- a/.pipelex/inference/backends/vertexai.toml +++ /dev/null @@ -1,47 +0,0 @@ -################################################################################ -# VertexAI Backend Configuration -################################################################################ -# -# This file defines the model specifications for Google VertexAI models. -# It contains model definitions for Gemini language models -# accessible through the Google VertexAI API. -# -# Configuration structure: -# - Each model is defined in its own section with the model name as the header -# - Headers with dots must be quoted (e.g., ["gemini-2.5-pro"]) -# - Model costs are in USD per million tokens (input/output) -# -# Documentation: https://docs.pipelex.com -# Support: https://go.pipelex.com/discord -# -################################################################################ - -################################################################################ -# MODEL DEFAULTS -################################################################################ - -[defaults] -model_type = "llm" -sdk = "openai" -prompting_target = "gemini" -structure_method = "instructor/vertexai_tools" -thinking_mode = "none" - -################################################################################ -# LANGUAGE MODELS -################################################################################ - -# --- Gemini 2.5 Series -------------------------------------------------------- -["gemini-2.5-pro"] -model_id = "google/gemini-2.5-pro" -inputs = ["text", "images"] -outputs = ["text", "structured"] -max_prompt_images = 3000 -costs = { input = 1.25, output = 10.0 } - -["gemini-2.5-flash"] -model_id = "google/gemini-2.5-flash" -inputs = ["text", "images"] -outputs = ["text", "structured"] -max_prompt_images = 3000 -costs = { input = 0.30, output = 2.50 } diff --git a/.pipelex/inference/backends/xai.toml b/.pipelex/inference/backends/xai.toml deleted file mode 100644 index a6348f2..0000000 --- a/.pipelex/inference/backends/xai.toml +++ /dev/null @@ -1,57 +0,0 @@ -################################################################################ -# XAI Backend Configuration -################################################################################ -# -# This file defines the model specifications for XAI (formerly Twitter AI) models. -# It contains model definitions for Grok language models -# accessible through the XAI API. -# -# Configuration structure: -# - Each model is defined in its own section with the model name as the header -# - Headers with dots must be quoted (e.g., ["grok-3"]) -# - Model costs are in USD per million tokens (input/output) -# -# Documentation: https://docs.pipelex.com -# Support: https://go.pipelex.com/discord -# -################################################################################ - -################################################################################ -# MODEL DEFAULTS -################################################################################ - -[defaults] -model_type = "llm" -sdk = "openai" -prompting_target = "anthropic" -structure_method = "instructor/openai_tools" -thinking_mode = "none" - -################################################################################ -# LANGUAGE MODELS -################################################################################ - -# --- Grok 3 Series ------------------------------------------------------------ -[grok-3] -model_id = "grok-3" -inputs = ["text"] -outputs = ["text"] -costs = { input = 3, output = 15 } - -[grok-3-mini] -model_id = "grok-3-mini" -inputs = ["text"] -outputs = ["text"] -costs = { input = 0.3, output = 0.5 } - -[grok-3-fast] -model_id = "grok-3-fast-latest" -inputs = ["text"] -outputs = ["text"] -costs = { input = 5, output = 25 } - -[grok-3-mini-fast] -model_id = "grok-3-mini-fast-latest" -inputs = ["text"] -outputs = ["text"] -costs = { input = 0.15, output = 4 } diff --git a/.pipelex/inference/deck/1_llm_deck.toml b/.pipelex/inference/deck/1_llm_deck.toml deleted file mode 100644 index 649576b..0000000 --- a/.pipelex/inference/deck/1_llm_deck.toml +++ /dev/null @@ -1,87 +0,0 @@ -#################################################################################################### -# Pipelex Model Deck - LLM Configuration -#################################################################################################### -# -# This file defines model defaults, aliases, and presets for LLMs -# -# Model Reference Syntax: -# - Preset: $preset_name or preset:preset_name -# - Alias: @alias_name or alias:alias_name -# -# Documentation: https://docs.pipelex.com -# Support: https://go.pipelex.com/discord -# -#################################################################################################### - -#################################################################################################### -# LLM Default Choices -#################################################################################################### - -[llm.choice_defaults] -default_temperature = 0.5 -for_text = "@default-general" -for_object = "@default-general" - -#################################################################################################### -# LLM Aliases -#################################################################################################### - -[llm.aliases] -best-gpt = "gpt-5.2" -best-claude = "claude-4.5-opus" -best-gemini = "gemini-3.0-pro" -best-mistral = "mistral-large" - -# Default aliases (first choice from waterfalls) -default-general = "claude-4.5-sonnet" -default-premium = "claude-4.6-opus" -default-premium-vision = "claude-4.6-opus" -default-premium-structured = "claude-4.6-opus" -default-large-context-code = "gemini-3.0-pro" -default-large-context-text = "gemini-2.5-flash" -default-small = "gpt-4o-mini" -default-small-structured = "gpt-4o-mini" -default-small-vision = "gemini-2.5-flash-lite" -default-small-creative = "gemini-2.5-flash-lite" - -#################################################################################################### -# LLM Presets -#################################################################################################### - -[llm.presets] - -# Writing -writing-factual = { model = "@default-premium", temperature = 0.1, description = "Factual writing with high accuracy" } -writing-creative = { model = "@default-premium", temperature = 0.9, description = "Creative writing with high variability" } - -# Retrieval -retrieval = { model = "@default-large-context-text", temperature = 0.1, description = "Data retrieval from large text corpora" } - -# Engineering -engineering-structured = { model = "@default-premium-structured", temperature = 0.2, description = "Structured engineering output (JSON, schemas)" } -engineering-code = { model = "@default-premium", temperature = 0.1, description = "Code generation and analysis" } -engineering-codebase-analysis = { model = "@best-gemini", temperature = 0.1, description = "Large codebase analysis" } - -# Vision -vision = { model = "@default-premium-vision", temperature = 0.5, description = "Vision language model for understanding images" } -vision-cheap = { model = "@default-small-vision", temperature = 0.5, description = "Budget vision model for simple image tasks" } -vision-diagram = { model = "@default-premium-vision", temperature = 0.3, description = "Diagram and chart interpretation" } -vision-table = { model = "@default-premium-vision", temperature = 0.3, description = "Table extraction from images" } - -# Image generation prompting -img-gen-prompting = { model = "@default-premium", temperature = 0.5, description = "Crafting image generation prompts" } -img-gen-prompting-cheap = { model = "@default-small", temperature = 0.5, description = "Budget image prompt generation" } - -# Reasoning -deep-analysis = { model = "@default-premium", temperature = 0.1, reasoning_effort = "high", description = "Deep reasoning and analysis" } -quick-reasoning = { model = "@default-premium", temperature = 0.3, reasoning_effort = "low", description = "Quick reasoning for simple tasks" } - -# Builder (isolated presets for the pipeline builder) -pipe-builder-engineering = { model = "claude-4.6-opus", temperature = 0.2, description = "Builder: structured engineering output" } -pipe-builder-img-gen-prompting = { model = "claude-4.5-sonnet", temperature = 0.7, description = "Builder: crafting image generation prompts" } - -# Testing -testing-text = { model = "@default-small", temperature = 0.5, description = "Testing preset for text generation" } -testing-structured = { model = "@default-small-structured", temperature = 0.1, description = "Testing preset for structured output" } -testing-vision = { model = "@default-small-vision", temperature = 0.5, description = "Testing preset for vision tasks" } -testing-vision-structured = { model = "@default-small-vision", temperature = 0.5, description = "Testing preset for structured vision output" } diff --git a/.pipelex/inference/deck/2_img_gen_deck.toml b/.pipelex/inference/deck/2_img_gen_deck.toml deleted file mode 100644 index 400b492..0000000 --- a/.pipelex/inference/deck/2_img_gen_deck.toml +++ /dev/null @@ -1,53 +0,0 @@ -#################################################################################################### -# Pipelex Model Deck - Image Generation Configuration -#################################################################################################### -# -# This file defines model aliases and presets for image generation models -# -# Model Reference Syntax: -# - Preset: $preset_name or preset:preset_name -# - Alias: @alias_name or alias:alias_name -# -# Documentation: https://docs.pipelex.com -# Support: https://go.pipelex.com/discord -# -#################################################################################################### - -#################################################################################################### -# Image Generation Default Choices -#################################################################################################### - -[img_gen] -default_quality = "medium" -choice_default = "$gen-image" - -#################################################################################################### -# Image Generation Aliases -#################################################################################################### - -[img_gen.aliases] -best-gpt = "gpt-image-1.5" -best-gemini = "nano-banana-pro" -best-blackforestlabs = "flux-2-pro" - -default-general = "flux-2-pro" -default-premium = "nano-banana-pro" -default-small = "gpt-image-1-mini" - -#################################################################################################### -# Image Generation Presets -#################################################################################################### - -[img_gen.presets] - -# General purpose -gen-image = { model = "@default-general", quality = "medium", description = "Standard image generation" } -gen-image-fast = { model = "@default-small", quality = "low", description = "Fast image generation with lower quality" } -gen-image-high-quality = { model = "@default-premium", quality = "high", description = "High-quality image generation" } - -# Testing -gen-image-testing = { model = "@default-small", quality = "low", description = "Testing preset for image generation" } -gen-image-testing-img2img = { model = "nano-banana-pro", description = "Testing preset for image-to-image" } -synthesize-photo = { model = "@default-small", quality = "low", description = "Synthesize realistic photos for testing" } -synthesize-ui = { model = "nano-banana-pro", description = "Synthesize UI screenshots for testing" } -synthesize-chart = { model = "nano-banana-pro", description = "Synthesize charts and graphs for testing" } diff --git a/.pipelex/inference/deck/3_extract_deck.toml b/.pipelex/inference/deck/3_extract_deck.toml deleted file mode 100644 index 2e5e5af..0000000 --- a/.pipelex/inference/deck/3_extract_deck.toml +++ /dev/null @@ -1,42 +0,0 @@ -#################################################################################################### -# Pipelex Model Deck - Base Configuration -#################################################################################################### -# -# This file defines model aliases and presets for Document extraction models, including -# extraction of text and images from documents and OCR and text extraction from images. -# -# Model Reference Syntax: -# - Preset: $preset_name or preset:preset_name -# - Alias: @alias_name or alias:alias_name -# -# Documentation: https://docs.pipelex.com -# Support: https://go.pipelex.com/discord -# -#################################################################################################### - -#################################################################################################### -# Document Extraction Default Choices -#################################################################################################### - -[extract] -choice_default = "@default-extract-document" - -#################################################################################################### -# Aliases -#################################################################################################### - -[extract.aliases] -default-premium = "azure-document-intelligence" -default-extract-document = "mistral-document-ai-2505" -default-extract-image = "mistral-document-ai-2505" -default-text-from-pdf = "pypdfium2-extract-pdf" -default-no-inference = "pypdfium2-extract-pdf" - -#################################################################################################### -# Extract Presets -#################################################################################################### - -[extract.presets] - -# Testing -extract-testing = { model = "@default-extract-document", max_nb_images = 5, image_min_size = 50, description = "Testing preset for document extraction" } diff --git a/.pipelex/inference/routing_profiles.toml b/.pipelex/inference/routing_profiles.toml deleted file mode 100644 index eb9aae0..0000000 --- a/.pipelex/inference/routing_profiles.toml +++ /dev/null @@ -1,149 +0,0 @@ -# Routing profile library - Routes models to their backends -# ========================================================================================= -# This file controls which backend serves which model. -# Simply change the 'active' field to switch profiles, -# or you can add your own custom profiles. -# -# Documentation: https://docs.pipelex.com -# Support: https://go.pipelex.com/discord -# ========================================================================================= - -# Note: The internal backend (software-only models) is always available regardless of -# which routing profile you select. See the documentation for details. - -# Which profile to use (change this to switch routing) -active = "all_pipelex_gateway" - -# We recommend using the "all_pipelex_gateway" profile to get a head start with all models. -# To use the Pipelex Gateway backend: -# 1. Get your API key at https://app.pipelex.com (free credits included) -# 2. Add it to your .env file: PIPELEX_GATEWAY_API_KEY=your-key-here -# 3. Run `pipelex init` and accept the Gateway terms of service - -# ========================================================================================= -# Routing Profiles -# ========================================================================================= - -[profiles.all_pipelex_gateway] -description = "Use Pipelex Gateway for all its supported models" -default = "pipelex_gateway" - -[profiles.all_anthropic] -description = "Use Anthropic backend for all its supported models" -default = "anthropic" - -[profiles.all_azure_openai] -description = "Use Azure OpenAI backend for all its supported models" -default = "azure_openai" - -[profiles.all_bedrock] -description = "Use Bedrock backend for all its supported models" -default = "bedrock" - -[profiles.all_blackboxai] -description = "Use BlackBoxAI backend for all its supported models" -default = "blackboxai" - -[profiles.all_fal] -description = "Use FAL backend for all its supported models" -default = "fal" - -[profiles.all_google] -description = "Use Google GenAI backend for all its supported models" -default = "google" - -[profiles.all_groq] -description = "Use groq backend for all its supported models" -default = "groq" - -[profiles.all_huggingface] -description = "Use HuggingFace backend for all its supported models" -default = "huggingface" - -[profiles.all_mistral] -description = "Use Mistral backend for all its supported models" -default = "mistral" - -[profiles.all_ollama] -description = "Use Ollama backend for all its supported models" -default = "ollama" - -[profiles.all_openai] -description = "Use OpenAI backend for all its supported models" -default = "openai" - -[profiles.all_portkey] -description = "Use Portkey backend for all its supported models" -default = "portkey" - -[profiles.all_scaleway] -description = "Use Scaleway backend for all its supported models" -default = "scaleway" - -[profiles.all_vertexai] -description = "Use Vertex AI backend for all its supported models" -default = "vertexai" - -[profiles.all_xai] -description = "Use xAI backend for all its supported models" -default = "xai" - -[profiles.all_internal] -description = "Use internal backend for all its supported models" -default = "internal" - -# ========================================================================================= -# Custom Profiles -# ========================================================================================= -# Add your own profiles below following the same pattern: -# -# [profiles.your_profile_name] -# description = "What this profile does" -# default = "backend-name" # Where to route models by default -# [profiles.your_profile_name.routes] -# "model-pattern" = "backend-name" # Specific routing rules -# -# Pattern matching supports: -# - Exact names: "gpt-4o-mini" -# - Wildcards: "claude-*" (matches all models starting with claude-) -# - Partial wildcards: "*-sonnet" (matches all sonnet variants) - -# ========================================================================================= -# Example of a custom routing profile with mostly pattern matching and one specific model -# ========================================================================================= -[profiles.example_routing_using_patterns] -description = "Example routing profile using patterns" -default = "pipelex_gateway" - -[profiles.example_routing_using_patterns.routes] -# Pattern matching: "model-pattern" = "backend-name" -"gpt-*" = "azure_openai" -"claude-*" = "bedrock" -"gemini-*" = "google" -"grok-*" = "xai" -"*-sdxl" = "fal" -"flux-*" = "fal" -"gpt-image-1" = "openai" - -# ========================================================================================= -# Example of a custom routing profile with specific model matching -# ========================================================================================= - -[profiles.example_routing_using_specific_models] -description = "Example routing profile using specific models" - -[profiles.example_routing_using_specific_models.routes] -"gpt-5-nano" = "pipelex_gateway" -"gpt-4o-mini" = "blackboxai" -"gpt-5-mini" = "openai" -"gpt-5-chat" = "azure_openai" - -"claude-4-sonnet" = "pipelex_gateway" -"claude-3.7-sonnet" = "blackboxai" - -"gemini-2.5-flash-lite" = "pipelex_gateway" -"gemini-2.5-flash" = "blackboxai" -"gemini-2.5-pro" = "vertexai" - -"grok-3" = "pipelex_gateway" -"grok-3-mini" = "xai" diff --git a/.pipelex/pipelex.toml b/.pipelex/pipelex.toml deleted file mode 100644 index ed8859f..0000000 --- a/.pipelex/pipelex.toml +++ /dev/null @@ -1,192 +0,0 @@ -#################################################################################################### -# Pipelex Configuration File -#################################################################################################### -# -# This configuration file is copied to client projects' .pipelex/ directory when running: -# `pipelex init config` -# -# Purpose: -# - This file allows you to override Pipelex's default settings for specific projects -# - All values below are set to their defaults - modify them as needed -# - The values here will override the defaults from the Pipelex package -# -# Finding Available Settings: -# - See the full default configuration in: pipelex/pipelex.toml (in the Pipelex package) -# - See the configuration structure classes in: pipelex/config.py and pipelex/cogt/config_cogt.py -# -# Common customizations include: -# - Logging levels and behavior -# - Excluded directories for scanning -# - LLM prompt dumping for debugging -# - Feature flags -# - Observer and reporting output directories -# -# Documentation: https://docs.pipelex.com -# Support: https://go.pipelex.com/discord -# -#################################################################################################### - -#################################################################################################### -# Pipeline Execution Config -#################################################################################################### - -[pipelex.pipeline_execution_config] -# Set to false to disable conversion of incoming data URLs to pipelex-storage:// URIs -is_normalize_data_urls_to_storage = true -# Set to false to disable generation of execution graphs -is_generate_graph = true - -[pipelex.pipeline_execution_config.graph_config.data_inclusion] -# Control what data is included in graph outputs -stuff_json_content = true -stuff_text_content = true -stuff_html_content = true -error_stack_traces = true - -[pipelex.pipeline_execution_config.graph_config.graphs_inclusion] -# Control which graph outputs are generated -graphspec_json = true -mermaidflow_mmd = true -mermaidflow_html = true -reactflow_viewspec = true -reactflow_html = true - -[pipelex.pipeline_execution_config.graph_config.reactflow_config] -# Customize ReactFlow graph rendering -edge_type = "bezier" # Options: "bezier", "smoothstep", "step", "straight" -nodesep = 50 # Horizontal spacing between nodes -ranksep = 30 # Vertical spacing between ranks/levels -initial_zoom = 1.0 # Initial zoom level (1.0 = 100%) -pan_to_top = true # Pan to show top of graph on load - -#################################################################################################### -# Storage Config -#################################################################################################### - -[pipelex.storage_config] -# Storage method: "local", "in_memory" (default), "s3", or "gcp" -method = "in_memory" -# Whether to fetch remote HTTP URLs and store them locally -is_fetch_remote_content_enabled = true -# Whether to upload local file paths to storage and replace with pipelex-storage:// URIs -is_upload_local_content_enabled = true - -[pipelex.storage_config.local] -# Local storage settings -uri_format = "{primary_id}/{secondary_id}/{hash}.{extension}" -local_storage_path = ".pipelex/storage" - -[pipelex.storage_config.in_memory] -# In-memory storage settings -uri_format = "{primary_id}/{secondary_id}/{hash}.{extension}" - -[pipelex.storage_config.s3] -# AWS S3 storage settings (requires boto3: `pip install pipelex[s3]`) -uri_format = "{primary_id}/{secondary_id}/{hash}.{extension}" -bucket_name = "" -region = "" -signed_urls_lifespan_seconds = 3600 # Set to "disabled" for public URLs - -[pipelex.storage_config.gcp] -# Google Cloud Storage settings (requires google-cloud-storage: `pip install pipelex[gcp-storage]`) -uri_format = "{primary_id}/{secondary_id}/{hash}.{extension}" -bucket_name = "" -project_id = "" -signed_urls_lifespan_seconds = 3600 # Set to "disabled" for public URLs - -#################################################################################################### -# Scan Config -#################################################################################################### - -[pipelex.scan_config] -# Directories to exclude when scanning for pipeline files -excluded_dirs = [ - ".venv", - "venv", - "env", - ".env", - "virtualenv", - ".virtualenv", - ".git", - "__pycache__", - ".pytest_cache", - ".mypy_cache", - ".ruff_cache", - "node_modules", - "results", -] - -#################################################################################################### -# Builder Config -#################################################################################################### - -[pipelex.builder_config] -# Settings for generated pipelines -default_output_dir = "." -default_bundle_file_name = "bundle" -default_directory_base_name = "pipeline" - -#################################################################################################### -# Log Config -#################################################################################################### - -[pipelex.log_config] -# Default logging level: "DEBUG", "INFO", "WARNING", "ERROR" -default_log_level = "INFO" -# Log output target: "stdout" or "stderr" -console_log_target = "stdout" -console_print_target = "stdout" - -[pipelex.log_config.package_log_levels] -# Log levels for specific packages (use "-" instead of "." in package names) -pipelex = "INFO" - -#################################################################################################### -# Feature Config -#################################################################################################### - -[pipelex.feature_config] -# WIP/Experimental feature flags -is_reporting_enabled = true - -#################################################################################################### -# Reporting Config -#################################################################################################### - -[pipelex.reporting_config] -# Cost reporting settings -is_log_costs_to_console = false -is_generate_cost_report_file_enabled = false -cost_report_dir_path = "reports" -cost_report_base_name = "cost_report" -cost_report_extension = "csv" -cost_report_unit_scale = 1.0 - -#################################################################################################### -# Cogt (Cognitive Tools) Config -#################################################################################################### - -[cogt.model_deck_config] -# Model fallback behavior: if true, uses secondary model options when primary fails -is_model_fallback_enabled = true -# Reaction to missing presets: "raise", "log", or "none" -missing_presets_reaction = "log" - -[cogt.tenacity_config] -# Retry behavior for API calls -max_retries = 50 # Maximum number of retry attempts before giving up -wait_multiplier = 0.2 # Multiplier applied to the wait time between retries (in seconds) -wait_max = 20 # Maximum wait time between retries (in seconds) -wait_exp_base = 1.3 # Base for exponential backoff calculation - -[cogt.llm_config] -# Enable dumping of LLM inputs/outputs for debugging -is_dump_text_prompts_enabled = false -is_dump_response_text_enabled = false - -[cogt.llm_config.instructor_config] -# Enable dumping of structured content generation details for debugging -is_dump_kwargs_enabled = false -is_dump_response_enabled = false -is_dump_error_enabled = false - diff --git a/.pipelex/pipelex_service.toml b/.pipelex/pipelex_service.toml deleted file mode 100644 index afe39a2..0000000 --- a/.pipelex/pipelex_service.toml +++ /dev/null @@ -1,19 +0,0 @@ -#################################################################################################### -# Pipelex Service Configuration -#################################################################################################### -# -# This file stores settings related to Pipelex managed services. -# Currently used for Pipelex Gateway terms acceptance. -# -# Documentation: https://docs.pipelex.com -# Support: https://go.pipelex.com/discord -# -#################################################################################################### - -[agreement] -# Set to true after accepting Pipelex terms of service. -terms_accepted = true - -# Note: when using pipelex_gateway, telemetry is enabled to monitor service usage. -# We collect technical data (model, pipe type...) and quantitative data (token counts...) -# but NOT your content, pipe codes, or output class names. diff --git a/.pipelex/telemetry.toml b/.pipelex/telemetry.toml deleted file mode 100644 index eb2c537..0000000 --- a/.pipelex/telemetry.toml +++ /dev/null @@ -1,92 +0,0 @@ -#################################################################################################### -# Custom Telemetry Configuration -#################################################################################################### -# -# This file controls YOUR custom telemetry settings for observability and analytics. -# Configure your own PostHog, Langfuse, or OTLP-compatible backends here. -# -# NOTE: When using Pipelex Gateway, identified telemetry is automatically enabled -# (tied to your Gateway API key, hashed for security). This allows us to monitor -# service quality, enforce fair usage, and provide you with better support. -# Gateway telemetry operates independently from your settings below - you can have both! -# -# To disable all telemetry, set the DO_NOT_TRACK=1 environment variable. -# -# Documentation: https://docs.pipelex.com -# Support: https://go.pipelex.com/discord -# -#################################################################################################### - -# ────────────────────────────────────────────────────────────────────────────── -# PostHog Configuration (Event tracking + AI span tracing) -# ────────────────────────────────────────────────────────────────────────────── - -[custom_posthog] -mode = "off" # Values: "off" | "anonymous" | "identified" -# user_id = "your_user_id" # Required when mode = "identified" -endpoint = "${POSTHOG_ENDPOINT}" # Default: https://us.i.posthog.com (or https://eu.i.posthog.com for EU) -api_key = "${POSTHOG_API_KEY}" # Get from PostHog Project Settings -geoip = true # Enable GeoIP lookup -debug = false # Enable PostHog debug mode -redact_properties = [ - "prompt", - "system_prompt", - "response", - "file_path", - "url", -] # Event properties to redact - -# AI span tracing to YOUR PostHog (does NOT affect Langfuse/OTLP - they receive full data) -[custom_posthog.tracing] -enabled = false # Send AI spans to your PostHog - -# Privacy controls for data sent to YOUR PostHog only -[custom_posthog.tracing.capture] -content = false # Capture prompt/completion content -# content_max_length = 1000 # Max length for captured content (omit for unlimited) -pipe_codes = false # Include pipe codes in span names/attributes -output_class_names = false # Include output class names in span names/attributes - -# ────────────────────────────────────────────────────────────────────────────── -# Portkey SDK Configuration -# ────────────────────────────────────────────────────────────────────────────── - -[custom_portkey] -force_debug_enabled = false -force_tracing_enabled = false - -# ────────────────────────────────────────────────────────────────────────────── -# Langfuse Integration -# Note: Langfuse receives FULL span data (no redaction) -# ────────────────────────────────────────────────────────────────────────────── - -[langfuse] -enabled = false -# endpoint = "https://cloud.langfuse.com" # Override for self-hosted Langfuse -# public_key = "${LANGFUSE_PUBLIC_KEY}" # Langfuse public key -# secret_key = "${LANGFUSE_SECRET_KEY}" # Langfuse secret key - -# ────────────────────────────────────────────────────────────────────────────── -# Additional OTLP Exporters (array for multiple) -# Note: OTLP exporters receive FULL span data (no redaction) -# ────────────────────────────────────────────────────────────────────────────── - -# [[otlp]] -# name = "my-collector" # Identifier for logging -# endpoint = "https://..." # OTLP endpoint URL -# headers = { Authorization = "Bearer ${OTLP_AUTH_TOKEN}" } # Headers for OTLP export - -# ────────────────────────────────────────────────────────────────────────────── -# Custom Telemetry Allowed Modes -# Controls which integration modes can use custom telemetry settings above. -# ────────────────────────────────────────────────────────────────────────────── - -[telemetry_allowed_modes] -ci = false # CI environments don't use custom telemetry -cli = true # CLI usage allows custom telemetry -docker = true # Docker deployments allow custom telemetry -fastapi = true # FastAPI integrations allow custom telemetry -mcp = true # MCP integrations allow custom telemetry -n8n = true # n8n integrations allow custom telemetry -pytest = false # Tests don't use custom telemetry -python = false # Direct Python SDK usage doesn't use custom telemetry by default diff --git a/Makefile b/Makefile index bb8426b..6d04b0d 100644 --- a/Makefile +++ b/Makefile @@ -77,6 +77,8 @@ make ti - Shorthand -> test-inference make check - Shorthand -> format lint mypy make c - Shorthand -> check make cc - Shorthand -> cleanderived check +make agent-check - Shorthand -> fix-unused-imports format lint pyright mypy (for AI agents) +make agent-test - Run unit tests, silent on success, output on failure (for AI agents) make li - Shorthand -> lock install make check-unused-imports - Check for unused imports without fixing make fix-unused-imports - Fix unused imports with ruff @@ -93,7 +95,7 @@ export HELP test t test-quiet tq test-with-prints tp test-inference ti \ codex-tests gha-tests \ run-all-tests run-manual-trigger-gha-tests run-gha_disabled-tests \ - validate v check c cc \ + validate v check c cc agent-check agent-test \ merge-check-ruff-lint merge-check-ruff-format merge-check-mypy merge-check-pyright \ li check-unused-imports fix-unused-imports check-uv check-TODOs @@ -284,6 +286,16 @@ test-inference: env ti: test-inference @echo "> done: ti = test-inference" +agent-test: env + @echo "• Running unit tests..." + @tmpfile=$$(mktemp); \ + $(VENV_PYTEST) -m $(USUAL_PYTEST_MARKERS) -o log_level=WARNING --tb=short -q > "$$tmpfile" 2>&1; \ + exit_code=$$?; \ + if [ $$exit_code -ne 0 ]; then grep -vE '\[\s*[0-9]+%\]\s*$$' "$$tmpfile"; fi; \ + rm -f "$$tmpfile"; \ + if [ $$exit_code -eq 0 ]; then echo "• All tests passed."; fi; \ + exit $$exit_code + ############################################################################################ ############################ Linting ############################ ############################################################################################ @@ -343,6 +355,9 @@ cc: cleanderived c check: cleanderived check-unused-imports c @echo "> done: check" +agent-check: fix-unused-imports format lint pyright mypy + @echo "> done: agent-check" + v: validate @echo "> done: v = validate" diff --git a/my_project/hello_world.py b/my_project/hello_world.py index f70cd9e..3737deb 100644 --- a/my_project/hello_world.py +++ b/my_project/hello_world.py @@ -2,7 +2,7 @@ from pipelex import pretty_print from pipelex.pipelex import Pipelex -from pipelex.pipeline.execute import execute_pipeline +from pipelex.pipeline.runner import PipelexRunner async def hello_world(): @@ -10,9 +10,11 @@ async def hello_world(): This function demonstrates the use of a super simple Pipelex pipeline to generate text. """ # Run the pipe - pipe_output = await execute_pipeline( + runner = PipelexRunner() + response = await runner.execute_pipeline( pipe_code="hello_world", ) + pipe_output = response.pipe_output # Print the output pretty_print(pipe_output, title="Your first Pipelex output") @@ -22,6 +24,6 @@ async def hello_world(): pretty_print(generated_text, title="Generated text") -# start Pipelex -with Pipelex.make(library_dirs=["my_project"]): - asyncio.run(hello_world()) +if __name__ == "__main__": + with Pipelex.make(library_dirs=["my_project"]): + asyncio.run(hello_world()) diff --git a/pyproject.toml b/pyproject.toml index 4ede691..aa39b71 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -7,20 +7,21 @@ license = "MIT" readme = "README.md" requires-python = ">=3.10,<3.15" classifiers = [ - "Programming Language :: Python :: 3", - "Programming Language :: Python :: 3.10", - "Programming Language :: Python :: 3.11", - "Programming Language :: Python :: 3.12", - "Programming Language :: Python :: 3.13", - "Programming Language :: Python :: 3.14", - "Operating System :: OS Independent", + "Programming Language :: Python :: 3", + "Programming Language :: Python :: 3.10", + "Programming Language :: Python :: 3.11", + "Programming Language :: Python :: 3.12", + "Programming Language :: Python :: 3.13", + "Programming Language :: Python :: 3.14", + "Operating System :: OS Independent", ] dependencies = ["pipelex[mistralai,anthropic,google,google-genai,bedrock,fal]"] [tool.uv.sources] # pipelex = { git = "https://github.com/Pipelex/pipelex.git", branch = "pre-release/v0.18.0b3" } -pipelex = { path = "../_epic-mthds-4", editable = true } +pipelex = { path = "../_epic-mthds-1", editable = true } +# pipelex = { git = "https://github.com/Pipelex/pipelex.git", rev = "ba658a74dadb5889dbbec61f20ba7810d96e5d20" } [tool.setuptools] @@ -29,19 +30,19 @@ include-package-data = true [project.optional-dependencies] dev = [ - "boto3-stubs>=1.35.24", - "mypy>=1.11.2", - "pyright>=1.1.405", - "pytest>=9.0.1", - "pytest-sugar>=1.0.0", - "pytest_asyncio>=0.24.0", - "ruff>=0.6.8", - "types-aioboto3[bedrock,bedrock-runtime]>=13.4.0", - "types-aiofiles>=24.1.0.20240626", - "types-markdown>=3.6.0.20240316", - "types-networkx>=3.3.0.20241020", - "types-openpyxl>=3.1.5.20250306", - "types-PyYAML>=6.0.12.20250326", + "boto3-stubs>=1.35.24", + "mypy>=1.11.2", + "pyright>=1.1.405", + "pytest>=9.0.1", + "pytest-sugar>=1.0.0", + "pytest_asyncio>=0.24.0", + "ruff>=0.6.8", + "types-aioboto3[bedrock,bedrock-runtime]>=13.4.0", + "types-aiofiles>=24.1.0.20240626", + "types-markdown>=3.6.0.20240316", + "types-networkx>=3.3.0.20241020", + "types-openpyxl>=3.1.5.20250306", + "types-PyYAML>=6.0.12.20250326", ] [project.urls] @@ -164,43 +165,43 @@ typeCheckingMode = "strict" [tool.pytest] minversion = "9.0" addopts = [ - "--import-mode=importlib", - "-ra", - "-m", - "not (inference or llm or img_gen or extract or needs_output or pipelex_api)", + "--import-mode=importlib", + "-ra", + "-m", + "not (inference or llm or img_gen or extract or needs_output or pipelex_api)", ] asyncio_default_fixture_loop_scope = "session" xfail_strict = true filterwarnings = [ - "ignore:Support for class-based `config` is deprecated:DeprecationWarning", - "ignore:websockets.*is deprecated:DeprecationWarning", - "ignore:typing\\.io is deprecated:DeprecationWarning", - "ignore:typing\\.re is deprecated:DeprecationWarning", - "ignore:.*has been moved to cryptography.*", - "ignore:Use.*Types instead", + "ignore:Support for class-based `config` is deprecated:DeprecationWarning", + "ignore:websockets.*is deprecated:DeprecationWarning", + "ignore:typing\\.io is deprecated:DeprecationWarning", + "ignore:typing\\.re is deprecated:DeprecationWarning", + "ignore:.*has been moved to cryptography.*", + "ignore:Use.*Types instead", ] markers = [ - "needs_output: tests that need output to be displayed", - "inference: slow and costly due to inference calls", - "llm: slow and costly due to llm inference calls", - "img_gen: slow and costly due to image generation inference calls", - "extract: slow and costly due to doc extraction inference calls", - "gha_disabled: tests that should not run in GitHub Actions", - "codex_disabled: tests that should not run in Codex", - "dry_runnable: tests that can be run in dry-run mode", - "pipelex_api: tests that require access to the Pipelex API", + "needs_output: tests that need output to be displayed", + "inference: slow and costly due to inference calls", + "llm: slow and costly due to llm inference calls", + "img_gen: slow and costly due to image generation inference calls", + "extract: slow and costly due to doc extraction inference calls", + "gha_disabled: tests that should not run in GitHub Actions", + "codex_disabled: tests that should not run in Codex", + "dry_runnable: tests that can be run in dry-run mode", + "pipelex_api: tests that require access to the Pipelex API", ] [tool.ruff] exclude = [ - ".cursor", - ".git", - ".github", - ".mypy_cache", - ".ruff_cache", - ".venv", - ".vscode", - "trigger_pipeline", + ".cursor", + ".git", + ".github", + ".mypy_cache", + ".ruff_cache", + ".venv", + ".vscode", + "trigger_pipeline", ] line-length = 150 target-version = "py311" @@ -211,18 +212,18 @@ target-version = "py311" ignore = ["F401"] external = ["F401"] select = [ - "E4", - "E7", - "E9", - "F", - "A001", - "A002", - "A003", - "RUF008", - "RUF009", - "RUF012", - "RUF013", - "RUF100", - "E501", - "I", + "E4", + "E7", + "E9", + "F", + "A001", + "A002", + "A003", + "RUF008", + "RUF009", + "RUF012", + "RUF013", + "RUF100", + "E501", + "I", ] diff --git a/tests/e2e/test_my_project.py b/tests/e2e/test_my_project.py index 8838acc..d4c69c6 100644 --- a/tests/e2e/test_my_project.py +++ b/tests/e2e/test_my_project.py @@ -1,3 +1,5 @@ +import runpy + import pytest @@ -5,4 +7,4 @@ @pytest.mark.inference class TestMyProject: def test_hello_world(self): - import my_project.hello_world # noqa: F401 + runpy.run_path("my_project/hello_world.py", run_name="__main__") diff --git a/uv.lock b/uv.lock index f96cac2..767cb9e 100644 --- a/uv.lock +++ b/uv.lock @@ -1318,6 +1318,20 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/81/f2/08ace4142eb281c12701fc3b93a10795e4d4dc7f753911d836675050f886/msgpack-1.1.2-cp314-cp314t-win_arm64.whl", hash = "sha256:d99ef64f349d5ec3293688e91486c5fdb925ed03807f64d98d205d2713c60b46", size = 70868, upload-time = "2025-10-08T09:15:44.959Z" }, ] +[[package]] +name = "mthds" +version = "0.0.2" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "backports-strenum", marker = "python_full_version < '3.11'" }, + { name = "httpx" }, + { name = "pydantic" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/b4/1c/207fed4b05c8dbcfe6f6c0802b0c59cdc683466f02f95625475248e1ff20/mthds-0.0.2.tar.gz", hash = "sha256:273aaec6e8332f462f772c1bd58cf5baaea4a69224de2fa024b08ebe7ed005f1", size = 60229, upload-time = "2026-02-19T13:38:28.094Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/6c/52/c831b1c6bb0a2b7ecaf21de099132eb4c1a1277569ad7ae214da86dd5583/mthds-0.0.2-py3-none-any.whl", hash = "sha256:168210f289f67cc1fd831ddc4bb46f2d65de6c48b5932406bc31459be94f312e", size = 11818, upload-time = "2026-02-19T13:38:26.447Z" }, +] + [[package]] name = "multidict" version = "6.7.0" @@ -1485,7 +1499,7 @@ dev = [ requires-dist = [ { name = "boto3-stubs", marker = "extra == 'dev'", specifier = ">=1.35.24" }, { name = "mypy", marker = "extra == 'dev'", specifier = ">=1.11.2" }, - { name = "pipelex", extras = ["mistralai", "anthropic", "google", "google-genai", "bedrock", "fal"], editable = "../_epic-mthds-4" }, + { name = "pipelex", extras = ["mistralai", "anthropic", "google", "google-genai", "bedrock", "fal"], editable = "../_epic-mthds-1" }, { name = "pyright", marker = "extra == 'dev'", specifier = ">=1.1.405" }, { name = "pytest", marker = "extra == 'dev'", specifier = ">=9.0.1" }, { name = "pytest-asyncio", marker = "extra == 'dev'", specifier = ">=0.24.0" }, @@ -1966,7 +1980,7 @@ wheels = [ [[package]] name = "pipelex" version = "0.18.0b3" -source = { editable = "../_epic-mthds-4" } +source = { editable = "../_epic-mthds-1" } dependencies = [ { name = "aiofiles" }, { name = "backports-strenum", marker = "python_full_version < '3.11'" }, @@ -1977,6 +1991,7 @@ dependencies = [ { name = "json2html" }, { name = "kajson" }, { name = "markdown" }, + { name = "mthds" }, { name = "networkx", version = "3.4.2", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.11'" }, { name = "networkx", version = "3.6", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.11'" }, { name = "openai" }, @@ -1993,6 +2008,7 @@ dependencies = [ { name = "python-dotenv" }, { name = "pyyaml" }, { name = "rich" }, + { name = "semantic-version" }, { name = "shortuuid" }, { name = "tomli" }, { name = "tomlkit" }, @@ -2053,6 +2069,7 @@ requires-dist = [ { name = "mkdocs-material", marker = "extra == 'docs'", specifier = ">=9.6.14" }, { name = "mkdocs-meta-manager", marker = "extra == 'docs'", specifier = ">=1.1.0" }, { name = "moto", extras = ["s3"], marker = "extra == 'dev'", specifier = ">=5.0.0" }, + { name = "mthds", specifier = ">=0.0.2" }, { name = "mypy", marker = "extra == 'dev'", specifier = "==1.19.1" }, { name = "networkx", specifier = ">=3.4.2" }, { name = "openai", specifier = ">=1.108.1" }, @@ -2061,6 +2078,7 @@ requires-dist = [ { name = "opentelemetry-sdk" }, { name = "opentelemetry-semantic-conventions" }, { name = "pillow", specifier = ">=11.2.1" }, + { name = "pipelex-tools", marker = "extra == 'dev'", specifier = ">=0.1.1" }, { name = "polyfactory", specifier = ">=2.21.0" }, { name = "portkey-ai", specifier = ">=2.1.0" }, { name = "posthog", specifier = ">=6.7.0" }, @@ -2078,6 +2096,7 @@ requires-dist = [ { name = "pyyaml", specifier = ">=6.0.2" }, { name = "rich", specifier = ">=13.8.1" }, { name = "ruff", marker = "extra == 'dev'", specifier = "==0.14.13" }, + { name = "semantic-version", specifier = ">=2.10.0" }, { name = "shortuuid", specifier = ">=1.0.13" }, { name = "tomli", specifier = ">=2.3.0" }, { name = "tomlkit", specifier = ">=0.13.2" }, @@ -2722,6 +2741,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/48/f0/ae7ca09223a81a1d890b2557186ea015f6e0502e9b8cb8e1813f1d8cfa4e/s3transfer-0.14.0-py3-none-any.whl", hash = "sha256:ea3b790c7077558ed1f02a3072fb3cb992bbbd253392f4b6e9e8976941c7d456", size = 85712, upload-time = "2025-09-09T19:23:30.041Z" }, ] +[[package]] +name = "semantic-version" +version = "2.10.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/7d/31/f2289ce78b9b473d582568c234e104d2a342fd658cc288a7553d83bb8595/semantic_version-2.10.0.tar.gz", hash = "sha256:bdabb6d336998cbb378d4b9db3a4b56a1e3235701dc05ea2690d9a997ed5041c", size = 52289, upload-time = "2022-05-26T13:35:23.454Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/6a/23/8146aad7d88f4fcb3a6218f41a60f6c2d4e3a72de72da1825dc7c8f7877c/semantic_version-2.10.0-py2.py3-none-any.whl", hash = "sha256:de78a3b8e0feda74cabc54aab2da702113e33ac9d9eb9d2389bcf1f58b7d9177", size = 15552, upload-time = "2022-05-26T13:35:21.206Z" }, +] + [[package]] name = "shellingham" version = "1.5.4" From aea7090f3f146174a27d549a82a6a0065d2529ce Mon Sep 17 00:00:00 2001 From: Louis Choquel Date: Sat, 21 Feb 2026 18:44:53 +0100 Subject: [PATCH 3/5] Add CLAUDE.md, quiet Makefile env targets, plxt tooling, and MTHDS formatting - Add CLAUDE.md with project commands and structure for AI agents - Make check-uv/env targets quiet (only print when action needed), add verbose variants for setup commands (install, lock, update) - Add plxt format/lint targets and merge checks for .mthds/.toml files - Add SHELL/SHELLFLAGS for safer pipefail behavior - Move pipelex-tools from dev-only to main dependency - Apply MTHDS standard formatting to hello_world.mthds Co-Authored-By: Claude Opus 4.6 --- CLAUDE.md | 39 +++++++++++++++++++ Makefile | 73 +++++++++++++++++++++++++++++++----- my_project/hello_world.mthds | 3 +- uv.lock | 18 ++++++++- 4 files changed, 120 insertions(+), 13 deletions(-) create mode 100644 CLAUDE.md diff --git a/CLAUDE.md b/CLAUDE.md new file mode 100644 index 0000000..2f51801 --- /dev/null +++ b/CLAUDE.md @@ -0,0 +1,39 @@ +# Pipelex Starter Project + +## Commands + +### Linting & Type Checking + +After making code changes, always run: +```bash +make agent-check +``` +This runs: fix-unused-imports, ruff format, ruff lint, plxt format/lint (`.mthds`/`.toml`), pyright, mypy. + +### Running Tests + +```bash +make agent-test +``` +Silent on success, full output on failure. Excludes inference/LLM markers by default. + +Run specific tests (local only): `make tp TEST=test_function_name` + +### Other Useful Targets + +- `make install` - Create venv + install all deps (uses uv) +- `make li` - Lock + install +- `make cleanderived` - Remove caches/compiled files (useful when linters get confused) +- `make validate` / `make v` - Run pipelex validate --all +- `make tb` - Quick boot test +- `make fui` - Fix unused imports only +- `make plxt-format` - Format `.mthds`/`.toml` files with plxt +- `make plxt-lint` - Lint `.mthds`/`.toml` files with plxt + +## Project Structure + +- Package: `my_project/` (Python 3.10+, target 3.11) +- Tests: `tests/` (e2e, integration, test_pipelines) +- Dependency manager: uv (>=0.7.2) +- Pipelex dependency: `pipelex` package from PyPI (see pyproject.toml) +- `.mthds` files: Pipelex method definition files in `my_project/` diff --git a/Makefile b/Makefile index 6d04b0d..d3e5055 100644 --- a/Makefile +++ b/Makefile @@ -1,3 +1,6 @@ +SHELL := /bin/bash +.SHELLFLAGS := -o pipefail -c + ifeq ($(wildcard .env),.env) include .env export @@ -13,6 +16,7 @@ VENV_RUFF := $(VIRTUAL_ENV)/bin/ruff VENV_PYRIGHT := $(VIRTUAL_ENV)/bin/pyright VENV_MYPY := $(VIRTUAL_ENV)/bin/mypy VENV_PIPELEX := $(VIRTUAL_ENV)/bin/pipelex +VENV_PLXT := RUST_LOG=warn "$(VIRTUAL_ENV)/bin/plxt" UV_MIN_VERSION = $(shell grep -m1 'required-version' pyproject.toml | sed -E 's/.*= *"([^<>=, ]+).*/\1/') @@ -48,8 +52,12 @@ make er - Shorthand -> export-requirements make erd - Shorthand -> export-requirements-dev make validate - Run the setup sequence to validate the config and libraries -make format - format with ruff format -make lint - lint with ruff check +make format - Format all (ruff-format + plxt-format) +make lint - Lint all (ruff-lint + plxt-lint) +make ruff-format - Format Python with ruff +make ruff-lint - Lint Python with ruff +make plxt-format - Format .mthds/.toml with plxt +make plxt-lint - Lint .mthds/.toml with plxt make pyright - Check types with pyright make mypy - Check types with mypy @@ -60,6 +68,8 @@ make reinstall - Reinstall dependencies make merge-check-ruff-lint - Run ruff merge check without updating files make merge-check-ruff-format - Run ruff merge check without updating files +make merge-check-plxt-format - Check .mthds/.toml formatting with plxt +make merge-check-plxt-lint - Lint .mthds/.toml with plxt make merge-check-mypy - Run mypy merge check without updating files make merge-check-pyright - Run pyright merge check without updating files @@ -88,15 +98,15 @@ endef export HELP .PHONY: \ - all help env lock install update build \ + all help env env-verbose check-uv check-uv-verbose lock install update build \ export-requirements export-requirements-dev er erd \ - format lint pyright mypy \ + format lint ruff-format ruff-lint plxt-format plxt-lint pyright mypy \ cleanderived cleanenv cleanall \ test t test-quiet tq test-with-prints tp test-inference ti \ codex-tests gha-tests \ run-all-tests run-manual-trigger-gha-tests run-gha_disabled-tests \ validate v check c cc agent-check agent-test \ - merge-check-ruff-lint merge-check-ruff-format merge-check-mypy merge-check-pyright \ + merge-check-ruff-lint merge-check-ruff-format merge-check-plxt-format merge-check-plxt-lint merge-check-mypy merge-check-pyright \ li check-unused-imports fix-unused-imports check-uv check-TODOs all help: @@ -107,7 +117,18 @@ all help: ### SETUP ########################################################################################## +# Quiet check-uv: only shows output if uv is missing (needs install) check-uv: + @command -v uv >/dev/null 2>&1 || { \ + echo ""; \ + echo "=== [$(PROJECT_NAME)] ===== (check-uv) ====== Ensuring uv ≥ $(UV_MIN_VERSION) =========="; \ + echo "uv not found – installing latest …"; \ + curl -LsSf https://astral.sh/uv/install.sh | sh; \ + } + @uv self update >/dev/null 2>&1 || true + +# Verbose check-uv: always shows output (for setup commands) +check-uv-verbose: $(call PRINT_TITLE,"Ensuring uv ≥ $(UV_MIN_VERSION)") @command -v uv >/dev/null 2>&1 || { \ echo "uv not found – installing latest …"; \ @@ -115,7 +136,17 @@ check-uv: } @uv self update >/dev/null 2>&1 || true +# Quiet env: only shows output if venv needs to be created env: check-uv + @if [ ! -d $(VIRTUAL_ENV) ]; then \ + echo ""; \ + echo "=== [$(PROJECT_NAME)] ===== (env) ====== Creating virtual environment ================="; \ + echo "Creating Python virtual env in \`${VIRTUAL_ENV}\`"; \ + uv venv $(VIRTUAL_ENV) --python $(PYTHON_VERSION); \ + fi + +# Verbose env: always shows output (for setup commands like install, lock, update) +env-verbose: check-uv-verbose $(call PRINT_TITLE,"Creating virtual environment") @if [ ! -d $(VIRTUAL_ENV) ]; then \ echo "Creating Python virtual env in \`${VIRTUAL_ENV}\`"; \ @@ -124,18 +155,18 @@ env: check-uv echo "Python virtual env already exists in \`${VIRTUAL_ENV}\`"; \ fi -install: env +install: env-verbose $(call PRINT_TITLE,"Installing dependencies") @. $(VIRTUAL_ENV)/bin/activate && \ uv sync --all-extras && \ echo "Installed dependencies in ${VIRTUAL_ENV}"; -lock: env +lock: env-verbose $(call PRINT_TITLE,"Resolving dependencies without update") @uv lock && \ echo "uv lock without update"; -update: env +update: env-verbose $(call PRINT_TITLE,"Updating all dependencies") @uv pip compile --upgrade pyproject.toml -o requirements.lock && \ uv pip install -e ".[dev]" && \ @@ -300,14 +331,28 @@ agent-test: env ############################ Linting ############################ ############################################################################################ -format: env +ruff-format: env $(call PRINT_TITLE,"Formatting with ruff") @$(VENV_RUFF) format . -lint: env +ruff-lint: env $(call PRINT_TITLE,"Linting with ruff") @$(VENV_RUFF) check . --fix +plxt-format: env + $(call PRINT_TITLE,"Formatting MTHDS/TOML with plxt") + $(VENV_PLXT) fmt + +plxt-lint: env + $(call PRINT_TITLE,"Linting MTHDS/TOML with plxt") + $(VENV_PLXT) lint + +format: ruff-format plxt-format + @echo "> done: format = ruff-format plxt-format" + +lint: ruff-lint plxt-lint + @echo "> done: lint = ruff-lint plxt-lint" + pyright: env $(call PRINT_TITLE,"Typechecking with pyright") $(VENV_PYRIGHT) --pythonpath $(VENV_PYTHON) --project pyproject.toml @@ -325,6 +370,14 @@ merge-check-ruff-format: env $(call PRINT_TITLE,"Formatting with ruff") $(VENV_RUFF) format --check . +merge-check-plxt-format: env + $(call PRINT_TITLE,"Checking MTHDS/TOML formatting with plxt") + $(VENV_PLXT) fmt --check + +merge-check-plxt-lint: env + $(call PRINT_TITLE,"Linting MTHDS/TOML with plxt") + $(VENV_PLXT) lint + merge-check-ruff-lint: env check-unused-imports $(call PRINT_TITLE,"Linting with ruff without fixing files") $(VENV_RUFF) check . diff --git a/my_project/hello_world.mthds b/my_project/hello_world.mthds index 504b80c..26c1d52 100644 --- a/my_project/hello_world.mthds +++ b/my_project/hello_world.mthds @@ -1,6 +1,6 @@ -domain = "quick_start" +domain = "quick_start" description = "Discovering Pipelex" [pipe] @@ -12,4 +12,3 @@ model = { model = "gpt-4o-mini", temperature = 0.9, max_tokens = "auto" } prompt = """ Write a haiku about Hello World. """ - diff --git a/uv.lock b/uv.lock index 767cb9e..4aa2b61 100644 --- a/uv.lock +++ b/uv.lock @@ -2000,6 +2000,7 @@ dependencies = [ { name = "opentelemetry-sdk" }, { name = "opentelemetry-semantic-conventions" }, { name = "pillow" }, + { name = "pipelex-tools" }, { name = "polyfactory" }, { name = "portkey-ai" }, { name = "posthog" }, @@ -2078,7 +2079,7 @@ requires-dist = [ { name = "opentelemetry-sdk" }, { name = "opentelemetry-semantic-conventions" }, { name = "pillow", specifier = ">=11.2.1" }, - { name = "pipelex-tools", marker = "extra == 'dev'", specifier = ">=0.1.1" }, + { name = "pipelex-tools", specifier = ">=0.1.4" }, { name = "polyfactory", specifier = ">=2.21.0" }, { name = "portkey-ai", specifier = ">=2.1.0" }, { name = "posthog", specifier = ">=6.7.0" }, @@ -2110,6 +2111,21 @@ requires-dist = [ ] provides-extras = ["anthropic", "bedrock", "docling", "fal", "gcp-storage", "google", "google-genai", "huggingface", "mistralai", "s3", "docs", "dev"] +[[package]] +name = "pipelex-tools" +version = "0.1.4" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/3f/cd/e01fda44b3228a65f3527ec7b0bd293b7f3e1bd51d9bd5c2f3e02a120610/pipelex_tools-0.1.4.tar.gz", hash = "sha256:13d0cfd609d239fe900dc2088959efe02246ddebf5d129d41ac7ab14237e4858", size = 141659, upload-time = "2026-02-17T09:04:45.918Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/3f/f6/8a44b810a12fdd69fbc4e7208d210807bf925929b99a9928f85b779e7ace/pipelex_tools-0.1.4-py3-none-macosx_10_12_x86_64.whl", hash = "sha256:28daec84cf7c0236e06bc997b1d1be6d49a7364fa48db8d50f6b12e33112972e", size = 5070670, upload-time = "2026-02-17T09:04:30.155Z" }, + { url = "https://files.pythonhosted.org/packages/19/45/2115f126ca7ef4fa0067c6d222e46e6889a211ab17b92cf82315b9517681/pipelex_tools-0.1.4-py3-none-macosx_11_0_arm64.whl", hash = "sha256:dc795d831fba9c7e4226a3e99264dd12acae22e302a5dd4990a18cb7bc371f3e", size = 4815945, upload-time = "2026-02-17T09:04:32.728Z" }, + { url = "https://files.pythonhosted.org/packages/29/0c/1fd0fe5e2653b9a85d332e0500d6e6be1aef63de1aae42a2cb7da1f58a53/pipelex_tools-0.1.4-py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:46095fe914bf03836dafb1b2a3f8f6da11dfeef6f95e27da467f0c0663cc8aa5", size = 4955694, upload-time = "2026-02-17T09:04:35.454Z" }, + { url = "https://files.pythonhosted.org/packages/23/a4/1fbe3098eda66c510e17e041e7a35e62efaf7c05951d8c048d790adf319d/pipelex_tools-0.1.4-py3-none-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:c2dbafaa0fdd3f66092cd00db3b7ff27251073f777c8425c510e2fee02bd71ea", size = 5201405, upload-time = "2026-02-17T09:04:37.868Z" }, + { url = "https://files.pythonhosted.org/packages/79/a2/d6ae8bf2ae18ea1cd733f5542b9bbebba5b3a11e291da417387f26100466/pipelex_tools-0.1.4-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:fe42601510376364e26b36448175f2a9f73e8f80d07a8d069e5de36da9ee5c61", size = 5214166, upload-time = "2026-02-17T09:04:40.313Z" }, + { url = "https://files.pythonhosted.org/packages/61/59/10a2bafdddab747accfdd7f13848e9c293289205d25faaae43d3e41976e7/pipelex_tools-0.1.4-py3-none-win32.whl", hash = "sha256:7cdd7389afe54e7365cf199bfd70d6d22627bab09583a7dc8553681120b36be2", size = 4589310, upload-time = "2026-02-17T09:04:42.186Z" }, + { url = "https://files.pythonhosted.org/packages/fd/94/e32e686613b5f985be36e78dbed9771cc89de9db573b702277ec641b0cd2/pipelex_tools-0.1.4-py3-none-win_amd64.whl", hash = "sha256:0d6e93a83ba60db422a04a94bc18e8d0cd67f7a911f6341a3702ca6760b685bb", size = 5388721, upload-time = "2026-02-17T09:04:44.448Z" }, +] + [[package]] name = "platformdirs" version = "4.5.0" From 7eb5b93af2cf790a2e3023b246d96633c2179a39 Mon Sep 17 00:00:00 2001 From: Louis Choquel Date: Sun, 22 Feb 2026 01:32:36 +0100 Subject: [PATCH 4/5] dep feature/Chicago --- pyproject.toml | 3 +- uv.lock | 94 ++++++-------------------------------------------- 2 files changed, 12 insertions(+), 85 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index aa39b71..41aa9f0 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -20,8 +20,7 @@ dependencies = ["pipelex[mistralai,anthropic,google,google-genai,bedrock,fal]"] [tool.uv.sources] # pipelex = { git = "https://github.com/Pipelex/pipelex.git", branch = "pre-release/v0.18.0b3" } -pipelex = { path = "../_epic-mthds-1", editable = true } -# pipelex = { git = "https://github.com/Pipelex/pipelex.git", rev = "ba658a74dadb5889dbbec61f20ba7810d96e5d20" } +pipelex = { git = "https://github.com/Pipelex/pipelex.git", branch = "feature/Chicago" } [tool.setuptools] diff --git a/uv.lock b/uv.lock index 4aa2b61..08e7db6 100644 --- a/uv.lock +++ b/uv.lock @@ -1499,7 +1499,7 @@ dev = [ requires-dist = [ { name = "boto3-stubs", marker = "extra == 'dev'", specifier = ">=1.35.24" }, { name = "mypy", marker = "extra == 'dev'", specifier = ">=1.11.2" }, - { name = "pipelex", extras = ["mistralai", "anthropic", "google", "google-genai", "bedrock", "fal"], editable = "../_epic-mthds-1" }, + { name = "pipelex", extras = ["mistralai", "anthropic", "google", "google-genai", "bedrock", "fal"], git = "https://github.com/Pipelex/pipelex.git?branch=feature%2FChicago" }, { name = "pyright", marker = "extra == 'dev'", specifier = ">=1.1.405" }, { name = "pytest", marker = "extra == 'dev'", specifier = ">=9.0.1" }, { name = "pytest-asyncio", marker = "extra == 'dev'", specifier = ">=0.24.0" }, @@ -1980,7 +1980,7 @@ wheels = [ [[package]] name = "pipelex" version = "0.18.0b3" -source = { editable = "../_epic-mthds-1" } +source = { git = "https://github.com/Pipelex/pipelex.git?branch=feature%2FChicago#258890c9cfc6e86fd71280450fa48718abfee04c" } dependencies = [ { name = "aiofiles" }, { name = "backports-strenum", marker = "python_full_version < '3.11'" }, @@ -2039,91 +2039,19 @@ mistralai = [ { name = "mistralai" }, ] -[package.metadata] -requires-dist = [ - { name = "aioboto3", marker = "extra == 'bedrock'", specifier = ">=13.4.0" }, - { name = "aioboto3", marker = "extra == 's3'", specifier = ">=13.4.0" }, - { name = "aiofiles", specifier = ">=23.2.1" }, - { name = "anthropic", marker = "extra == 'anthropic'", specifier = ">=0.78.0" }, - { name = "backports-strenum", marker = "python_full_version < '3.11'", specifier = ">=1.3.0" }, - { name = "boto3", marker = "extra == 'bedrock'", specifier = ">=1.34.131" }, - { name = "boto3", marker = "extra == 's3'", specifier = ">=1.34.131" }, - { name = "boto3-stubs", marker = "extra == 'dev'", specifier = ">=1.35.24" }, - { name = "docling", marker = "extra == 'docling'", specifier = ">=2.64.0" }, - { name = "fal-client", marker = "extra == 'fal'", specifier = ">=0.4.1" }, - { name = "filetype", specifier = ">=1.2.0" }, - { name = "google-auth-oauthlib", marker = "extra == 'google'", specifier = ">=1.2.1" }, - { name = "google-cloud-storage", marker = "extra == 'gcp-storage'", specifier = ">=2.10.0" }, - { name = "google-genai", marker = "extra == 'google-genai'" }, - { name = "httpx", specifier = ">=0.23.0,<1.0.0" }, - { name = "huggingface-hub", marker = "extra == 'huggingface'", specifier = ">=0.23,<1.0.0" }, - { name = "instructor", specifier = ">=1.8.3,!=1.11.*,!=1.12.*" }, - { name = "instructor", extras = ["google-genai"], marker = "extra == 'google-genai'" }, - { name = "jinja2", specifier = ">=3.1.4" }, - { name = "json2html", specifier = ">=1.3.0" }, - { name = "kajson", specifier = "==0.3.1" }, - { name = "markdown", specifier = ">=3.6" }, - { name = "mike", marker = "extra == 'docs'", specifier = ">=2.1.3" }, - { name = "mistralai", marker = "extra == 'mistralai'", specifier = ">=1.12.0" }, - { name = "mkdocs", marker = "extra == 'docs'", specifier = ">=1.6.1" }, - { name = "mkdocs-glightbox", marker = "extra == 'docs'", specifier = ">=0.4.0" }, - { name = "mkdocs-material", marker = "extra == 'docs'", specifier = ">=9.6.14" }, - { name = "mkdocs-meta-manager", marker = "extra == 'docs'", specifier = ">=1.1.0" }, - { name = "moto", extras = ["s3"], marker = "extra == 'dev'", specifier = ">=5.0.0" }, - { name = "mthds", specifier = ">=0.0.2" }, - { name = "mypy", marker = "extra == 'dev'", specifier = "==1.19.1" }, - { name = "networkx", specifier = ">=3.4.2" }, - { name = "openai", specifier = ">=1.108.1" }, - { name = "opentelemetry-api" }, - { name = "opentelemetry-exporter-otlp-proto-http" }, - { name = "opentelemetry-sdk" }, - { name = "opentelemetry-semantic-conventions" }, - { name = "pillow", specifier = ">=11.2.1" }, - { name = "pipelex-tools", specifier = ">=0.1.4" }, - { name = "polyfactory", specifier = ">=2.21.0" }, - { name = "portkey-ai", specifier = ">=2.1.0" }, - { name = "posthog", specifier = ">=6.7.0" }, - { name = "pydantic", specifier = ">=2.10.6,<3.0.0" }, - { name = "pylint", marker = "extra == 'dev'", specifier = "==4.0.4" }, - { name = "pypdfium2", specifier = ">=4.30.0,!=4.30.1,<5.0.0" }, - { name = "pyright", marker = "extra == 'dev'", specifier = "==1.1.408" }, - { name = "pytest", marker = "extra == 'dev'", specifier = ">=9.0.2" }, - { name = "pytest-asyncio", marker = "extra == 'dev'", specifier = ">=0.24.0" }, - { name = "pytest-cov", marker = "extra == 'dev'", specifier = ">=6.1.1" }, - { name = "pytest-mock", marker = "extra == 'dev'", specifier = ">=3.14.0" }, - { name = "pytest-sugar", marker = "extra == 'dev'", specifier = ">=1.0.0" }, - { name = "pytest-xdist", marker = "extra == 'dev'", specifier = ">=3.6.1" }, - { name = "python-dotenv", specifier = ">=1.0.1" }, - { name = "pyyaml", specifier = ">=6.0.2" }, - { name = "rich", specifier = ">=13.8.1" }, - { name = "ruff", marker = "extra == 'dev'", specifier = "==0.14.13" }, - { name = "semantic-version", specifier = ">=2.10.0" }, - { name = "shortuuid", specifier = ">=1.0.13" }, - { name = "tomli", specifier = ">=2.3.0" }, - { name = "tomlkit", specifier = ">=0.13.2" }, - { name = "typer", specifier = ">=0.16.0" }, - { name = "types-aioboto3", extras = ["bedrock", "bedrock-runtime"], marker = "extra == 'dev'", specifier = ">=13.4.0" }, - { name = "types-aiofiles", marker = "extra == 'dev'", specifier = ">=24.1.0.20240626" }, - { name = "types-markdown", marker = "extra == 'dev'", specifier = ">=3.6.0.20240316" }, - { name = "types-networkx", marker = "extra == 'dev'", specifier = ">=3.3.0.20241020" }, - { name = "types-pyyaml", marker = "extra == 'dev'", specifier = ">=6.0.12.20250326" }, - { name = "typing-extensions", specifier = ">=4.13.2" }, -] -provides-extras = ["anthropic", "bedrock", "docling", "fal", "gcp-storage", "google", "google-genai", "huggingface", "mistralai", "s3", "docs", "dev"] - [[package]] name = "pipelex-tools" -version = "0.1.4" +version = "0.2.0" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/3f/cd/e01fda44b3228a65f3527ec7b0bd293b7f3e1bd51d9bd5c2f3e02a120610/pipelex_tools-0.1.4.tar.gz", hash = "sha256:13d0cfd609d239fe900dc2088959efe02246ddebf5d129d41ac7ab14237e4858", size = 141659, upload-time = "2026-02-17T09:04:45.918Z" } +sdist = { url = "https://files.pythonhosted.org/packages/5a/06/b0b78881565093fd5c662dbb128b9ed8e090403c1d6110357e1ce8ed9ea2/pipelex_tools-0.2.0.tar.gz", hash = "sha256:8dc6b866aae05dd2d59ac80f5f21ab31c5477b9e09400d86fc2a1b2143868266", size = 143877, upload-time = "2026-02-21T21:04:34.277Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/3f/f6/8a44b810a12fdd69fbc4e7208d210807bf925929b99a9928f85b779e7ace/pipelex_tools-0.1.4-py3-none-macosx_10_12_x86_64.whl", hash = "sha256:28daec84cf7c0236e06bc997b1d1be6d49a7364fa48db8d50f6b12e33112972e", size = 5070670, upload-time = "2026-02-17T09:04:30.155Z" }, - { url = "https://files.pythonhosted.org/packages/19/45/2115f126ca7ef4fa0067c6d222e46e6889a211ab17b92cf82315b9517681/pipelex_tools-0.1.4-py3-none-macosx_11_0_arm64.whl", hash = "sha256:dc795d831fba9c7e4226a3e99264dd12acae22e302a5dd4990a18cb7bc371f3e", size = 4815945, upload-time = "2026-02-17T09:04:32.728Z" }, - { url = "https://files.pythonhosted.org/packages/29/0c/1fd0fe5e2653b9a85d332e0500d6e6be1aef63de1aae42a2cb7da1f58a53/pipelex_tools-0.1.4-py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:46095fe914bf03836dafb1b2a3f8f6da11dfeef6f95e27da467f0c0663cc8aa5", size = 4955694, upload-time = "2026-02-17T09:04:35.454Z" }, - { url = "https://files.pythonhosted.org/packages/23/a4/1fbe3098eda66c510e17e041e7a35e62efaf7c05951d8c048d790adf319d/pipelex_tools-0.1.4-py3-none-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:c2dbafaa0fdd3f66092cd00db3b7ff27251073f777c8425c510e2fee02bd71ea", size = 5201405, upload-time = "2026-02-17T09:04:37.868Z" }, - { url = "https://files.pythonhosted.org/packages/79/a2/d6ae8bf2ae18ea1cd733f5542b9bbebba5b3a11e291da417387f26100466/pipelex_tools-0.1.4-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:fe42601510376364e26b36448175f2a9f73e8f80d07a8d069e5de36da9ee5c61", size = 5214166, upload-time = "2026-02-17T09:04:40.313Z" }, - { url = "https://files.pythonhosted.org/packages/61/59/10a2bafdddab747accfdd7f13848e9c293289205d25faaae43d3e41976e7/pipelex_tools-0.1.4-py3-none-win32.whl", hash = "sha256:7cdd7389afe54e7365cf199bfd70d6d22627bab09583a7dc8553681120b36be2", size = 4589310, upload-time = "2026-02-17T09:04:42.186Z" }, - { url = "https://files.pythonhosted.org/packages/fd/94/e32e686613b5f985be36e78dbed9771cc89de9db573b702277ec641b0cd2/pipelex_tools-0.1.4-py3-none-win_amd64.whl", hash = "sha256:0d6e93a83ba60db422a04a94bc18e8d0cd67f7a911f6341a3702ca6760b685bb", size = 5388721, upload-time = "2026-02-17T09:04:44.448Z" }, + { url = "https://files.pythonhosted.org/packages/56/54/865a714ff7880f148acc762686a8281844e5620af38a307fdf535dc4fb94/pipelex_tools-0.2.0-py3-none-macosx_10_12_x86_64.whl", hash = "sha256:0d3cac6f46be310a66d242b9ca42a237042661c77ed8de1c4116c2b066f9e977", size = 5077220, upload-time = "2026-02-21T21:04:20.644Z" }, + { url = "https://files.pythonhosted.org/packages/d3/47/09abdef9d35cadbda5d3909fe45546282f87d48ad0c876f757f4dc5a847a/pipelex_tools-0.2.0-py3-none-macosx_11_0_arm64.whl", hash = "sha256:7c908648a72b96bef1a68c2ff2ba407e39757e8843f45e1e799d11fab78c405b", size = 4826425, upload-time = "2026-02-21T21:04:22.403Z" }, + { url = "https://files.pythonhosted.org/packages/cf/35/2dea7a7c615e349807b5aa56604b83e5ed4aa2f7ba6b3249fab7d1d23582/pipelex_tools-0.2.0-py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:45f88bffd036d5813cd2684a6cc6fba46852e25614a3723faa8e5ef16fb0dd28", size = 4965360, upload-time = "2026-02-21T21:04:24.959Z" }, + { url = "https://files.pythonhosted.org/packages/4c/59/2dacf08122e0302b7bf07ea4617dcbf82946445b8b5fcf137443fde3bd20/pipelex_tools-0.2.0-py3-none-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:b1cc46c25659c6f42ec088a13fa685f1b49f836b22a7300dc807573f74b3af7f", size = 5216110, upload-time = "2026-02-21T21:04:26.904Z" }, + { url = "https://files.pythonhosted.org/packages/2b/09/89f87484e575205632d3f97c7f22dd3080473ab34c50801d6f12a031028d/pipelex_tools-0.2.0-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:283210d5f24cbc945a80b3127fc42abc7cf3dbaa18b269d68561bc6a528e1b14", size = 5222398, upload-time = "2026-02-21T21:04:29.134Z" }, + { url = "https://files.pythonhosted.org/packages/9e/31/6777aa89f9ef096e963b890b748150d4085351edb013edc97b57256488e6/pipelex_tools-0.2.0-py3-none-win32.whl", hash = "sha256:f39eb059f27f2426c3c3dbc990e9d1c13ab66cee3974a1e9d513b4cff24fd7b2", size = 4598419, upload-time = "2026-02-21T21:04:30.819Z" }, + { url = "https://files.pythonhosted.org/packages/5b/c9/4a3c74f29a5a73f4ca3ea79943212472db5803f13e1de324da5d706f0e01/pipelex_tools-0.2.0-py3-none-win_amd64.whl", hash = "sha256:94488f11c1700bd682cb9da981ee7e7569bc1fa601544ae658254592fc493f56", size = 5393983, upload-time = "2026-02-21T21:04:32.558Z" }, ] [[package]] From a674e259225e9ca1ea1a50e153a76333727af291 Mon Sep 17 00:00:00 2001 From: Louis Choquel Date: Sun, 22 Feb 2026 01:36:01 +0100 Subject: [PATCH 5/5] cleanup --- pyproject.toml | 1 - 1 file changed, 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 41aa9f0..6c5118c 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -19,7 +19,6 @@ classifiers = [ dependencies = ["pipelex[mistralai,anthropic,google,google-genai,bedrock,fal]"] [tool.uv.sources] -# pipelex = { git = "https://github.com/Pipelex/pipelex.git", branch = "pre-release/v0.18.0b3" } pipelex = { git = "https://github.com/Pipelex/pipelex.git", branch = "feature/Chicago" }