diff --git a/.github/workflows/docs-next.yml b/.github/workflows/docs-next.yml deleted file mode 100644 index 2ae9c33..0000000 --- a/.github/workflows/docs-next.yml +++ /dev/null @@ -1,44 +0,0 @@ -name: Build Docs (Next) - -on: - pull_request: - paths: - - "docs-next/**" - - ".github/workflows/docs-next.yml" - push: - branches: [master] - paths: - - "docs-next/**" - - ".github/workflows/docs-next.yml" - workflow_dispatch: - -jobs: - build: - runs-on: ubuntu-latest - defaults: - run: - working-directory: docs-next - steps: - - uses: actions/checkout@v6 - - - uses: pnpm/action-setup@v6 - with: - version: 10 - - - uses: actions/setup-node@v6 - with: - node-version: 24 - cache: pnpm - cache-dependency-path: docs-next/pnpm-lock.yaml - - - name: Install dependencies - run: pnpm install --frozen-lockfile - - - name: Type check - run: pnpm types:check - - - name: Lint - run: pnpm lint - - - name: Build - run: pnpm build diff --git a/.github/workflows/docs.yml b/.github/workflows/docs.yml index f0b52f7..26b4774 100644 --- a/.github/workflows/docs.yml +++ b/.github/workflows/docs.yml @@ -1,11 +1,14 @@ name: Deploy Docs on: + pull_request: + paths: + - "docs/**" + - ".github/workflows/docs.yml" push: branches: [master] paths: - "docs/**" - - "zensical.toml" - ".github/workflows/docs.yml" workflow_dispatch: @@ -21,23 +24,43 @@ concurrency: jobs: build: runs-on: ubuntu-latest + defaults: + run: + working-directory: docs steps: - uses: actions/checkout@v6 - - uses: astral-sh/setup-uv@v8.1.0 + - uses: pnpm/action-setup@v6 + with: + version: 10 + + - uses: actions/setup-node@v6 + with: + node-version: 24 + cache: pnpm + cache-dependency-path: docs/pnpm-lock.yaml - name: Install dependencies - run: uv sync --extra docs + run: pnpm install --frozen-lockfile + + - name: Type check + run: pnpm types:check + + - name: Lint + run: pnpm lint - - name: Build docs - run: uv run zensical build + - name: Build + run: pnpm build - - uses: actions/upload-pages-artifact@v5 + - name: Upload Pages artifact + if: github.event_name == 'push' || github.event_name == 'workflow_dispatch' + uses: actions/upload-pages-artifact@v5 with: - path: site + path: docs/out deploy: needs: build + if: github.event_name == 'push' || github.event_name == 'workflow_dispatch' runs-on: ubuntu-latest environment: name: github-pages diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 3c78b42..e23466e 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -93,14 +93,20 @@ cargo clippy --manifest-path crates/taskito-core/Cargo.toml ## Documentation -Docs use [Zensical](https://zensical.com/). To preview locally: +Docs are a [Fumadocs](https://fumadocs.dev) site (Next.js + MDX) under `docs/`. To preview locally: ```bash -pip install ".[docs]" -zensical serve +pnpm --dir docs install +pnpm --dir docs dev ``` -Then open http://localhost:8000. +Then open http://localhost:3000. To validate before opening a PR: + +```bash +pnpm --dir docs types:check +pnpm --dir docs lint +pnpm --dir docs build +``` ## Questions? diff --git a/README.md b/README.md index 692579e..346408c 100644 --- a/README.md +++ b/README.md @@ -44,7 +44,7 @@ print(job.result(timeout=10)) # 5 Most Python task queues require a separate broker (Redis, RabbitMQ) even for single-machine workloads. taskito embeds everything — storage, scheduling, and worker management — into a single `pip install` with no external dependencies beyond Python itself. For distributed setups, an optional Postgres backend enables multi-machine workers with the same API. -The heavy lifting runs in Rust: a Tokio async scheduler, OS thread worker pool with crossbeam channels, and Diesel ORM over SQLite in WAL mode. Python's GIL is only held during task execution. For CPU-bound workloads, run with `--pool prefork` to spawn child processes with independent GILs and get true parallel speedup — see the [prefork guide](https://taskito.grigori.in/guide/execution/prefork/). +The heavy lifting runs in Rust: a Tokio async scheduler, OS thread worker pool with `tokio::sync::mpsc` channels, and Diesel ORM over SQLite in WAL mode. Python's GIL is only held during task execution. For CPU-bound workloads, run with `--pool prefork` to spawn child processes with independent GILs and get true parallel speedup — see the [prefork guide](https://docs.byteveda.org/taskito/docs/guides/advanced-execution/prefork). ## Features @@ -261,9 +261,9 @@ def test_add(): Full documentation with guides, API reference, architecture diagrams, and examples: -**[Read the docs →](https://taskito.grigori.in)** +**[Read the docs →](https://docs.byteveda.org/taskito)** -Coming from Celery? See the **[Migration Guide](https://taskito.grigori.in/guide/migration/)**. +Coming from Celery? See the **[Migration Guide](https://docs.byteveda.org/taskito/docs/guides/operations/migration)**. ## Comparison diff --git a/docs-next/.gitignore b/docs/.gitignore similarity index 100% rename from docs-next/.gitignore rename to docs/.gitignore diff --git a/docs-next/README.md b/docs/README.md similarity index 99% rename from docs-next/README.md rename to docs/README.md index cbd8f27..d0aff64 100644 --- a/docs-next/README.md +++ b/docs/README.md @@ -1,4 +1,4 @@ -# docs-next +# docs This is a Next.js application generated with [Create Fumadocs](https://github.com/fuma-nama/fumadocs). diff --git a/docs/api/canvas.md b/docs/api/canvas.md deleted file mode 100644 index 6aab3b6..0000000 --- a/docs/api/canvas.md +++ /dev/null @@ -1,338 +0,0 @@ -# Canvas (Workflows) - -::: taskito.canvas - -Canvas primitives for composing task workflows. Import directly from the package: - -```python -from taskito import chain, group, chord, chunks, starmap -``` - ---- - -## Signature - -A frozen task call spec — describes *what* to call and *with what arguments*, without executing it. - -### Creating Signatures - -```python -# Mutable signature — receives previous result in chains -sig = add.s(2, 3) - -# Immutable signature — ignores previous result in chains -sig = add.si(2, 3) -``` - -### Fields - -| Field | Type | Description | -|---|---|---| -| `task` | `TaskWrapper` | The task to call | -| `args` | `tuple` | Positional arguments | -| `kwargs` | `dict` | Keyword arguments | -| `options` | `dict` | Enqueue options (priority, queue, etc.) | -| `immutable` | `bool` | If `True`, ignores previous result in chains | - -### `sig.apply()` - -```python -sig.apply(queue: Queue | None = None) -> JobResult -``` - -Enqueue this signature immediately. If `queue` is `None`, uses the task's parent queue. - -```python -sig = add.s(2, 3) -job = sig.apply() -print(job.result(timeout=10)) # 5 -``` - -### `await sig.apply_async()` - -```python -await sig.apply_async(queue: Queue | None = None) -> JobResult -``` - -Async version of `apply()`. Safe to call from async contexts (FastAPI handlers, etc.). - -### Mutable vs Immutable - -In a [`chain`](#chain), the previous task's return value is **prepended** to a mutable signature's args: - -```python -# add.s(10) in a chain where previous step returned 5: -# → add(5, 10) = 15 - -# add.si(2, 3) in a chain: -# → add(2, 3) = 5 (always, regardless of previous result) -``` - ---- - -## chain - -Execute signatures sequentially, piping each result to the next. - -### Constructor - -```python -chain(*signatures: Signature) -``` - -Requires at least one signature. - -### `chain.apply()` - -```python -chain.apply(queue: Queue | None = None) -> JobResult -``` - -Execute the chain by enqueuing and waiting for each step sequentially. Returns the [`JobResult`](result.md) of the **last** step. - -Each step's return value is prepended to the next mutable signature's args. Immutable signatures (`task.si()`) receive their args as-is. - -```python -@queue.task() -def double(x): - return x * 2 - -@queue.task() -def add_ten(x): - return x + 10 - -# double(3) → 6, then add_ten(6) → 16 -result = chain(double.s(3), add_ten.s()).apply() -print(result.result(timeout=10)) # 16 -``` - -```mermaid -graph LR - A["double(3)"] -->|"6"| B["add_ten(6)"] - B -->|"16"| C["Result: 16"] -``` - -### `await chain.apply_async()` - -```python -await chain.apply_async(queue: Queue | None = None) -> JobResult -``` - -Async version of `apply()`. Awaits each step's result using `aresult()` instead of blocking with `result()`. Safe to call from async contexts. - -```python -result = await chain(double.s(3), add_ten.s()).apply_async() -value = await result.aresult(timeout=10) # 16 -``` - ---- - -## group - -Execute signatures in parallel and collect all results. - -### Constructor - -```python -group(*signatures: Signature) -``` - -Requires at least one signature. - -### `group.apply()` - -```python -group.apply(queue: Queue | None = None) -> list[JobResult] -``` - -Enqueue all signatures and return a list of [`JobResult`](result.md) handles. Jobs run concurrently across available workers. - -```python -jobs = group( - add.s(1, 2), - add.s(3, 4), - add.s(5, 6), -).apply() - -results = [j.result(timeout=10) for j in jobs] -print(results) # [3, 7, 11] -``` - -```mermaid -graph LR - A["add(1,2)"] --> D["Results: [3, 7, 11]"] - B["add(3,4)"] --> D - C["add(5,6)"] --> D -``` - -### `await group.apply_async()` - -```python -await group.apply_async(queue: Queue | None = None) -> list[JobResult] -``` - -Async version of `apply()`. With `max_concurrency`, uses `asyncio.gather` to await each wave concurrently instead of blocking. - ---- - -## chord - -Run a group in parallel, then pass all results to a callback. - -### Constructor - -```python -chord(group_: group, callback: Signature) -``` - -| Parameter | Type | Description | -|---|---|---| -| `group_` | `group` | The group of tasks to run in parallel | -| `callback` | `Signature` | The task to call with all collected results | - -### `chord.apply()` - -```python -chord.apply(queue: Queue | None = None) -> JobResult -``` - -Execute the group, wait for all results, then run the callback with the list of results prepended to its args (unless immutable). Returns the [`JobResult`](result.md) of the callback. - -```python -@queue.task() -def fetch(url): - return requests.get(url).text - -@queue.task() -def merge(results): - return "\n".join(results) - -result = chord( - group(fetch.s("https://a.com"), fetch.s("https://b.com")), - merge.s(), -).apply() - -combined = result.result(timeout=30) -``` - -```mermaid -graph LR - A["fetch(a.com)"] --> C["merge([...])"] - B["fetch(b.com)"] --> C - C --> D["Combined result"] -``` - -### `await chord.apply_async()` - -```python -await chord.apply_async(queue: Queue | None = None) -> JobResult -``` - -Async version of `apply()`. Awaits all group results using `asyncio.gather`, then enqueues the callback. - ---- - -## chunks - -Split an iterable into fixed-size chunks and process each chunk in parallel. - -### Constructor - -```python -chunks(task: TaskWrapper, items: list, chunk_size: int) -> group -``` - -| Parameter | Type | Description | -|---|---|---| -| `task` | `TaskWrapper` | The task to call for each chunk. Receives the chunk as a single positional argument. | -| `items` | `list` | The full list to split. Must be non-empty. | -| `chunk_size` | `int` | Items per chunk. Must be positive. | - -Returns a [`group`](#group) of signatures — one per chunk. Apply it the same way as any other group. - -```python -@queue.task() -def process_batch(records): - return [transform(r) for r in records] - -records = load_records() # 10_000 items -jobs = chunks(process_batch, records, 100).apply() # → 100 parallel jobs - -results = [j.result(timeout=60) for j in jobs] -``` - -Raises `ValueError` if `chunk_size <= 0` or `items` is empty. - ---- - -## starmap - -Spread an iterable of argument tuples over parallel task invocations. - -### Constructor - -```python -starmap(task: TaskWrapper, args_list: list[tuple]) -> group -``` - -| Parameter | Type | Description | -|---|---|---| -| `task` | `TaskWrapper` | The task to call. Each tuple is unpacked into positional args. | -| `args_list` | `list[tuple]` | One tuple per invocation. Must be non-empty. | - -Returns a [`group`](#group) of signatures — one per tuple. Equivalent to `group(task.s(*a) for a in args_list)`. - -```python -@queue.task() -def add(x, y): - return x + y - -jobs = starmap(add, [(1, 2), (3, 4), (5, 6)]).apply() -results = [j.result(timeout=10) for j in jobs] # [3, 7, 11] -``` - -Raises `ValueError` if `args_list` is empty. - ---- - -## Complete Example - -An ETL pipeline using all three primitives: - -```python -from taskito import Queue, chain, group, chord - -queue = Queue() - -@queue.task() -def extract(source): - return load_data(source) - -@queue.task() -def transform(data): - return clean(data) - -@queue.task() -def aggregate(results): - return merge_datasets(results) - -@queue.task() -def load(data): - save_to_warehouse(data) - -# Extract from 3 sources in parallel, transform each, -# aggregate all results, then load -pipeline = chain( - chord( - group( - chain(extract.s("db"), transform.s()), - chain(extract.s("api"), transform.s()), - chain(extract.s("csv"), transform.s()), - ), - aggregate.s(), - ), - load.s(), -) - -result = pipeline.apply(queue) -``` diff --git a/docs/api/cli.md b/docs/api/cli.md deleted file mode 100644 index fef3fcb..0000000 --- a/docs/api/cli.md +++ /dev/null @@ -1,163 +0,0 @@ -# CLI Reference - -taskito provides a command-line interface for running workers and inspecting queue state. - -## Installation - -The CLI is installed automatically with the package: - -```bash -pip install taskito -``` - -The `taskito` command becomes available in your `PATH`. - -## Commands - -### `taskito worker` - -Start a worker process that consumes and executes tasks. - -```bash -taskito worker --app [--queues ] -``` - -| Flag | Required | Description | -|---|---|---| -| `--app` | Yes | Python path to the `Queue` instance in `module:attribute` format | -| `--queues` | No | Comma-separated list of queues to process. Default: all registered queues | - -**Examples:** - -```bash -# Start a worker using the queue defined in myapp/tasks.py -taskito worker --app myapp.tasks:queue - -# Only process the "emails" and "reports" queues -taskito worker --app myapp.tasks:queue --queues emails,reports - -# Use a nested module path -taskito worker --app myproject.workers.tasks:task_queue -``` - -The worker blocks until interrupted with `Ctrl+C`. It performs a graceful shutdown — in-flight tasks are allowed to complete before the process exits. - -### `taskito info` - -Display queue statistics. - -```bash -taskito info --app [--watch] -``` - -| Flag | Required | Description | -|---|---|---| -| `--app` | Yes | Python path to the `Queue` instance | -| `--watch` | No | Continuously refresh stats every 2 seconds | - -**Examples:** - -```bash -# Show stats once -taskito info --app myapp.tasks:queue -``` - -Output: - -``` -taskito queue statistics ------------------------------- - pending 12 - running 4 - completed 1847 - failed 0 - dead 2 - cancelled 0 ------------------------------- - total 1865 -``` - -```bash -# Live monitoring with throughput -taskito info --app myapp.tasks:queue --watch -``` - -Output (refreshes every 2s): - -``` -taskito queue statistics ------------------------------- - pending 3 - running 8 - completed 2104 - failed 0 - dead 2 - cancelled 0 ------------------------------- - total 2117 - - throughput 12.5 jobs/s - -Refreshing every 2s... (Ctrl+C to stop) -``` - -## App Path Format - -The `--app` flag uses `module:attribute` format: - -``` -myapp.tasks:queue -│ │ -│ └── attribute name (the Queue variable) -└── Python module path (dotted, importable) -``` - -The module must be importable from the current working directory. If your module is in a package, make sure the package is installed or the parent directory is in `PYTHONPATH`. - -**Common patterns:** - -| App structure | `--app` value | -|---|---| -| `tasks.py` with `queue = Queue()` | `tasks:queue` | -| `myapp/tasks.py` with `queue = Queue()` | `myapp.tasks:queue` | -| `src/workers/q.py` with `app = Queue()` | `src.workers.q:app` | - -### `taskito scaler` - -Start a lightweight KEDA metrics server. - -```bash -taskito scaler --app [--host ] [--port ] [--target-queue-depth ] -``` - -| Flag | Default | Description | -|---|---|---| -| `--app` | — | Python path to the `Queue` instance | -| `--host` | `0.0.0.0` | Bind address | -| `--port` | `9091` | Bind port | -| `--target-queue-depth` | `10` | Scaling target hint returned to KEDA in `/api/scaler` responses | - -The server exposes three routes: - -| Route | Description | -|---|---| -| `GET /api/scaler` | Queue depth and target for KEDA `metrics-api` trigger. Add `?queue=` to filter. | -| `GET /metrics` | Prometheus text format (requires `prometheus-client`). | -| `GET /health` | Liveness probe — always returns `{"status": "ok"}`. | - -**Example:** - -```bash -taskito scaler --app myapp:queue --port 9091 --target-queue-depth 5 -``` - -See the [KEDA Integration guide](../guide/operations/keda.md) for Kubernetes deploy templates. - -## Error Messages - -| Error | Cause | -|---|---| -| `--app must be in 'module:attribute' format` | Missing `:` separator | -| `could not import module '...'` | Module not found or import error | -| `module '...' has no attribute '...'` | Attribute doesn't exist on the module | -| `'...' is not a Queue instance` | The attribute exists but isn't a `Queue` | diff --git a/docs/api/context.md b/docs/api/context.md deleted file mode 100644 index 4506502..0000000 --- a/docs/api/context.md +++ /dev/null @@ -1,145 +0,0 @@ -# Job Context - -::: taskito.context - -Per-job context for the currently executing task. Provides access to job metadata and controls from inside a running task. - -## Usage - -```python -from taskito.context import current_job - -# or directly: -from taskito import current_job -``` - -`current_job` is a module-level singleton. It works in both sync and async tasks: - -- **Sync tasks** — reads from `threading.local`, isolated per worker thread. -- **Async tasks** — reads from a `contextvars.ContextVar`, isolated per concurrent coroutine even when multiple async tasks run on the same event loop. - -!!! warning - `current_job` can only be used inside a running task. Accessing it outside a task raises `RuntimeError`. - -## Properties - -### `current_job.id` - -```python -current_job.id -> str -``` - -The unique ID of the currently executing job. - -```python -@queue.task() -def process(data): - print(f"Running as job {current_job.id}") - ... -``` - -### `current_job.task_name` - -```python -current_job.task_name -> str -``` - -The registered name of the currently executing task. - -### `current_job.retry_count` - -```python -current_job.retry_count -> int -``` - -How many times this job has been retried. `0` on the first attempt. - -```python -@queue.task(max_retries=3) -def flaky_task(): - if current_job.retry_count > 0: - print(f"Retry attempt #{current_job.retry_count}") - call_external_api() -``` - -### `current_job.queue_name` - -```python -current_job.queue_name -> str -``` - -The name of the queue this job is running on. - -## Methods - -### `current_job.update_progress()` - -```python -current_job.update_progress(progress: int) -> None -``` - -Update the job's progress percentage (0–100). The value is written directly to the database and can be read via [`job.progress`](result.md#jobprogress) or [`queue.get_job()`](queue.md#queueget_job). - -```python -@queue.task() -def process_files(file_list): - for i, path in enumerate(file_list): - handle(path) - current_job.update_progress(int((i + 1) / len(file_list) * 100)) -``` - -Read progress from the caller: - -```python -job = process_files.delay(files) - -# Poll progress -import time -while job.status == "running": - print(f"Progress: {job.progress}%") - time.sleep(1) -``` - -### `current_job.publish()` - -```python -current_job.publish(data: Any) -> None -``` - -Publish a partial result visible to [`job.stream()`](result.md#jobstream) consumers. Use this to stream intermediate data from long-running tasks. - -`data` must be JSON-serializable. It is stored as a task log entry with `level="result"`, distinguishing it from regular logs. - -```python -@queue.task() -def process_batch(items): - for i, item in enumerate(items): - result = process(item) - current_job.publish({"item_id": item.id, "status": "ok"}) - current_job.update_progress(int((i + 1) / len(items) * 100)) - return {"total": len(items)} -``` - -Consumer side: - -```python -job = process_batch.delay(items) -for partial in job.stream(timeout=120): - print(f"Processed: {partial}") -``` - -## How It Works - -**Sync tasks (thread pool):** - -1. Before execution, the Rust worker calls `_set_context()` with the job's metadata -2. `current_job` reads from `threading.local` — each worker thread has independent storage -3. After the task completes (success or failure), `_clear_context()` resets the thread-local - -**Async tasks (native async pool):** - -1. Before execution, `set_async_context()` sets a `contextvars.ContextVar` token -2. `current_job` checks `contextvars` first; if a token is set it returns that context -3. After the coroutine finishes, `clear_async_context()` resets the token - -This means concurrent async tasks on the same event loop each see their own isolated context — there is no cross-task interference. diff --git a/docs/api/index.md b/docs/api/index.md deleted file mode 100644 index c78a4ab..0000000 --- a/docs/api/index.md +++ /dev/null @@ -1,13 +0,0 @@ -# API Reference - -Complete Python API reference for all public classes and methods. - -| Class | Description | -|-------|-------------| -| [Queue](queue/index.md) | Central orchestrator — task registration, enqueue, workers, and all queue operations | -| [TaskWrapper](task.md) | Handle returned by `@queue.task()` — `delay()`, `apply_async()`, `map()`, signatures | -| [JobResult](result.md) | Handle for an enqueued job — status polling, result retrieval, dependencies | -| [JobContext](context.md) | Runtime context inside a running task — job ID, retry count, progress updates | -| [Canvas](canvas.md) | Workflow primitives — `Signature`, `chain`, `group`, `chord` | -| [Testing](testing.md) | Test mode, `TestResult`, `MockResource` for unit testing tasks | -| [CLI](cli.md) | `taskito` command-line interface — `worker`, `info`, `scaler` | diff --git a/docs/api/queue/events.md b/docs/api/queue/events.md deleted file mode 100644 index 67bf0b3..0000000 --- a/docs/api/queue/events.md +++ /dev/null @@ -1,70 +0,0 @@ -# Queue — Events & Logs - -Methods for event callbacks, webhook registration, and structured task logging. - -## Events & Webhooks - -### `queue.on_event()` - -```python -queue.on_event(event: str) -> Callable -``` - -Register a callback for a queue event. Supported events: `job.completed`, `job.failed`, `job.retried`, `job.dead`. - -```python -@queue.on_event("job.failed") -def handle_failure(job_id: str, task_name: str, error: str) -> None: - ... -``` - -### `queue.add_webhook()` - -```python -queue.add_webhook( - url: str, - events: list[EventType] | None = None, - headers: dict[str, str] | None = None, - secret: str | None = None, - max_retries: int = 3, - timeout: float = 10.0, - retry_backoff: float = 2.0, -) -> None -``` - -Register a webhook URL for one or more events. 4xx responses are not retried; 5xx responses are retried with exponential backoff. - -| Parameter | Type | Default | Description | -|---|---|---|---| -| `url` | `str` | — | URL to POST to. Must be `http://` or `https://`. | -| `events` | `list[EventType] | None` | `None` | Event types to subscribe to. `None` means all events. | -| `headers` | `dict[str, str] | None` | `None` | Extra HTTP headers to include. | -| `secret` | `str | None` | `None` | HMAC-SHA256 signing secret for `X-Taskito-Signature`. | -| `max_retries` | `int` | `3` | Maximum delivery attempts. | -| `timeout` | `float` | `10.0` | HTTP request timeout in seconds. | -| `retry_backoff` | `float` | `2.0` | Base for exponential backoff between retries. | - -## Logs - -### `queue.task_logs()` - -```python -queue.task_logs(job_id: str, limit: int = 100) -> list[dict] -``` - -Return structured log entries emitted by `current_job.log()` during the given job's execution. - -### `queue.query_logs()` - -```python -queue.query_logs( - task_name: str | None = None, - level: str | None = None, - message_like: str | None = None, - since: float | None = None, - limit: int = 100, - offset: int = 0, -) -> list[dict] -``` - -Query task logs across all jobs with optional filters. diff --git a/docs/api/queue/index.md b/docs/api/queue/index.md deleted file mode 100644 index f6779a4..0000000 --- a/docs/api/queue/index.md +++ /dev/null @@ -1,199 +0,0 @@ -# Queue - -::: taskito.app.Queue - -The central class for creating and managing a task queue. - -!!! tip "Sub-pages" - The Queue API is split across several pages for readability: - - - **[Job Management](jobs.md)** — get, list, cancel, archive, replay jobs - - **[Queue & Stats](queues.md)** — rate limits, concurrency, pause/resume, statistics, dead letters - - **[Workers & Hooks](workers.md)** — run workers, lifecycle hooks, circuit breakers, async methods - - **[Resources & Locking](resources.md)** — resource system, distributed locks - - **[Events & Logs](events.md)** — event callbacks, webhooks, structured logs - -## Constructor - -```python -Queue( - db_path: str = ".taskito/taskito.db", - workers: int = 0, - default_retry: int = 3, - default_timeout: int = 300, - default_priority: int = 0, - result_ttl: int | None = None, - middleware: list[TaskMiddleware] | None = None, - drain_timeout: int = 30, - interception: str = "off", - max_intercept_depth: int = 10, - recipe_signing_key: str | None = None, - max_reconstruction_timeout: int = 10, - file_path_allowlist: list[str] | None = None, - disabled_proxies: list[str] | None = None, - async_concurrency: int = 100, - event_workers: int = 4, - scheduler_poll_interval_ms: int = 50, - scheduler_reap_interval: int = 100, - scheduler_cleanup_interval: int = 1200, - namespace: str | None = None, -) -``` - -| Parameter | Type | Default | Description | -|---|---|---|---| -| `db_path` | `str` | `".taskito/taskito.db"` | Path to SQLite database file. Parent directories are created automatically. | -| `workers` | `int` | `0` | Number of worker threads (`0` = auto-detect CPU count) | -| `default_retry` | `int` | `3` | Default max retry attempts for tasks | -| `default_timeout` | `int` | `300` | Default task timeout in seconds | -| `default_priority` | `int` | `0` | Default task priority (higher = more urgent) | -| `result_ttl` | `int | None` | `None` | Auto-cleanup completed/dead jobs older than this many seconds. `None` disables. | -| `middleware` | `list[TaskMiddleware] | None` | `None` | Queue-level middleware applied to all tasks. | -| `drain_timeout` | `int` | `30` | Seconds to wait for in-flight tasks during graceful shutdown. | -| `interception` | `str` | `"off"` | Argument interception mode: `"strict"`, `"lenient"`, or `"off"`. See [Resource System](../../resources/interception.md). | -| `max_intercept_depth` | `int` | `10` | Max recursion depth for argument walking. | -| `recipe_signing_key` | `str | None` | `None` | HMAC-SHA256 key for proxy recipe integrity. Falls back to `TASKITO_RECIPE_SECRET` env var. | -| `max_reconstruction_timeout` | `int` | `10` | Max seconds allowed for proxy reconstruction. | -| `file_path_allowlist` | `list[str] | None` | `None` | Allowed file path prefixes for the file proxy handler. | -| `disabled_proxies` | `list[str] | None` | `None` | Handler names to skip when registering built-in proxy handlers. | -| `async_concurrency` | `int` | `100` | Maximum number of `async def` tasks running concurrently on the native async executor. | -| `event_workers` | `int` | `4` | Thread pool size for the event bus. Increase for high event volume. | -| `scheduler_poll_interval_ms` | `int` | `50` | Milliseconds between scheduler poll cycles. Lower values improve scheduling precision at the cost of CPU. | -| `scheduler_reap_interval` | `int` | `100` | Reap stale/timed-out jobs every N poll cycles. | -| `scheduler_cleanup_interval` | `int` | `1200` | Clean up old completed jobs every N poll cycles. | -| `namespace` | `str | None` | `None` | Namespace for multi-tenant isolation. Jobs enqueued on this queue carry this namespace; workers only dequeue matching jobs. `None` means no namespace (default). | - -## Task Registration - -### `@queue.task()` - -```python -@queue.task( - name: str | None = None, - max_retries: int = 3, - retry_backoff: float = 1.0, - retry_delays: list[float] | None = None, - max_retry_delay: int | None = None, - timeout: int = 300, - soft_timeout: float | None = None, - priority: int = 0, - rate_limit: str | None = None, - queue: str = "default", - circuit_breaker: dict | None = None, - middleware: list[TaskMiddleware] | None = None, - expires: float | None = None, - inject: list[str] | None = None, - serializer: Serializer | None = None, - max_concurrent: int | None = None, -) -> TaskWrapper -``` - -Register a function as a background task. Returns a [`TaskWrapper`](../task.md). - -| Parameter | Type | Default | Description | -|---|---|---|---| -| `name` | `str | None` | Auto-generated | Explicit task name. Defaults to `module.qualname`. | -| `max_retries` | `int` | `3` | Max retry attempts before moving to DLQ. | -| `retry_backoff` | `float` | `1.0` | Base delay in seconds for exponential backoff. | -| `retry_delays` | `list[float] | None` | `None` | Per-attempt delays in seconds, overrides backoff. e.g. `[1, 5, 30]`. | -| `max_retry_delay` | `int | None` | `None` | Cap on backoff delay in seconds. Defaults to 300 s. | -| `timeout` | `int` | `300` | Hard execution time limit in seconds. | -| `soft_timeout` | `float | None` | `None` | Cooperative time limit checked via `current_job.check_timeout()`. | -| `priority` | `int` | `0` | Default priority (higher = more urgent). | -| `rate_limit` | `str | None` | `None` | Rate limit string, e.g. `"100/m"`. | -| `queue` | `str` | `"default"` | Named queue to submit to. | -| `circuit_breaker` | `dict | None` | `None` | Circuit breaker config: `{"threshold": 5, "window": 60, "cooldown": 120}`. | -| `middleware` | `list[TaskMiddleware] | None` | `None` | Per-task middleware, applied in addition to queue-level middleware. | -| `expires` | `float | None` | `None` | Seconds until the job expires if not started. | -| `inject` | `list[str] | None` | `None` | Resource names to inject as keyword arguments. See [Resource System](../../resources/index.md). | -| `serializer` | `Serializer | None` | `None` | Per-task serializer override. Falls back to queue-level serializer. | -| `max_concurrent` | `int | None` | `None` | Max concurrent running instances. `None` = no limit. | - -### `@queue.periodic()` - -```python -@queue.periodic( - cron: str, - name: str | None = None, - args: tuple = (), - kwargs: dict | None = None, - queue: str = "default", - timezone: str | None = None, -) -> TaskWrapper -``` - -Register a periodic (cron-scheduled) task. Uses 6-field cron expressions with seconds. - -| Parameter | Type | Default | Description | -|---|---|---|---| -| `cron` | `str` | — | 6-field cron expression (seconds precision). | -| `name` | `str | None` | Auto-generated | Explicit task name. | -| `args` | `tuple` | `()` | Positional arguments passed to the task on each run. | -| `kwargs` | `dict | None` | `None` | Keyword arguments passed to the task on each run. | -| `queue` | `str` | `"default"` | Named queue to submit to. | -| `timezone` | `str | None` | `None` | IANA timezone name (e.g. `"America/New_York"`). Defaults to UTC. | - -## Enqueue Methods - -### `queue.enqueue()` - -```python -queue.enqueue( - task_name: str, - args: tuple = (), - kwargs: dict | None = None, - priority: int | None = None, - delay: float | None = None, - queue: str | None = None, - max_retries: int | None = None, - timeout: int | None = None, - unique_key: str | None = None, - metadata: str | None = None, - depends_on: str | list[str] | None = None, -) -> JobResult -``` - -Enqueue a task for execution. Returns a [`JobResult`](../result.md) handle. - -| Parameter | Type | Default | Description | -|---|---|---|---| -| `depends_on` | `str | list[str] | None` | `None` | Job ID(s) this job depends on. See [Dependencies](../../guide/execution/dependencies.md). | - -### `queue.enqueue_many()` - -```python -queue.enqueue_many( - task_name: str, - args_list: list[tuple], - kwargs_list: list[dict] | None = None, - priority: int | None = None, - queue: str | None = None, - max_retries: int | None = None, - timeout: int | None = None, - delay: float | None = None, - delay_list: list[float | None] | None = None, - unique_keys: list[str | None] | None = None, - metadata: str | None = None, - metadata_list: list[str | None] | None = None, - expires: float | None = None, - expires_list: list[float | None] | None = None, - result_ttl: int | None = None, - result_ttl_list: list[int | None] | None = None, -) -> list[JobResult] -``` - -Enqueue multiple jobs in a single transaction for high throughput. Supports both uniform parameters (applied to all jobs) and per-job lists. - -| Parameter | Type | Default | Description | -|---|---|---|---| -| `delay` | `float | None` | `None` | Uniform delay in seconds for all jobs | -| `delay_list` | `list[float | None] | None` | `None` | Per-job delays in seconds | -| `unique_keys` | `list[str | None] | None` | `None` | Per-job deduplication keys | -| `metadata` | `str | None` | `None` | Uniform metadata JSON for all jobs | -| `metadata_list` | `list[str | None] | None` | `None` | Per-job metadata JSON | -| `expires` | `float | None` | `None` | Uniform expiry in seconds for all jobs | -| `expires_list` | `list[float | None] | None` | `None` | Per-job expiry in seconds | -| `result_ttl` | `int | None` | `None` | Uniform result TTL in seconds | -| `result_ttl_list` | `list[int | None] | None` | `None` | Per-job result TTL in seconds | - -Per-job lists (`*_list`) take precedence over uniform values when both are provided. diff --git a/docs/api/queue/jobs.md b/docs/api/queue/jobs.md deleted file mode 100644 index 3649642..0000000 --- a/docs/api/queue/jobs.md +++ /dev/null @@ -1,129 +0,0 @@ -# Queue — Job Management - -Methods for retrieving, filtering, cancelling, archiving, and replaying jobs. - -## Job Retrieval - -### `queue.get_job()` - -```python -queue.get_job(job_id: str) -> JobResult | None -``` - -Retrieve a job by ID. Returns `None` if not found. - -### `queue.list_jobs()` - -```python -queue.list_jobs( - status: str | None = None, - queue: str | None = None, - task_name: str | None = None, - limit: int = 50, - offset: int = 0, - namespace: str | None = _UNSET, -) -> list[JobResult] -``` - -List jobs with optional filters. Returns newest first. Defaults to the queue's namespace — pass `namespace=None` to see all namespaces. - -| Parameter | Type | Default | Description | -|---|---|---|---| -| `status` | `str | None` | `None` | Filter by status: `pending`, `running`, `completed`, `failed`, `dead`, `cancelled` | -| `queue` | `str | None` | `None` | Filter by queue name | -| `task_name` | `str | None` | `None` | Filter by task name | -| `limit` | `int` | `50` | Maximum results to return | -| `offset` | `int` | `0` | Pagination offset | - -### `queue.list_jobs_filtered()` - -```python -queue.list_jobs_filtered( - status: str | None = None, - queue: str | None = None, - task_name: str | None = None, - metadata_like: str | None = None, - error_like: str | None = None, - created_after: float | None = None, - created_before: float | None = None, - limit: int = 50, - offset: int = 0, - namespace: str | None = _UNSET, -) -> list[JobResult] -``` - -Extended filtering with metadata and error pattern matching and time range constraints. Defaults to the queue's namespace — pass `namespace=None` to see all namespaces. - -## Job Operations - -### `queue.cancel_job()` - -```python -queue.cancel_job(job_id: str) -> bool -``` - -Cancel a pending job. Returns `True` if cancelled, `False` if not pending. Cascade-cancels dependents. - -### `queue.update_progress()` - -```python -queue.update_progress(job_id: str, progress: int) -> None -``` - -Update progress for a running job (0–100). - -### `queue.job_errors()` - -```python -queue.job_errors(job_id: str) -> list[dict] -``` - -Get error history for a job. Returns a list of dicts with `id`, `job_id`, `attempt`, `error`, `failed_at`. - -### `queue.job_dag()` - -```python -queue.job_dag(job_id: str) -> dict -``` - -Return a dependency graph for a job, including all ancestors and descendants. Useful for visualizing workflow chains. - -## Archival - -### `queue.archive()` - -```python -queue.archive(job_id: str) -> None -``` - -Move a completed or failed job to the archive for long-term retention. - -### `queue.list_archived()` - -```python -queue.list_archived( - task_name: str | None = None, - limit: int = 50, - offset: int = 0, -) -> list[dict] -``` - -List archived jobs with optional task name filter. - -## Replay - -### `queue.replay()` - -```python -queue.replay(job_id: str) -> JobResult -``` - -Re-enqueue a completed or failed job with its original arguments. Returns the new job handle. - -### `queue.replay_history()` - -```python -queue.replay_history(job_id: str) -> list[dict] -``` - -Return the replay log for a job — every time it has been replayed and the resulting new job IDs. diff --git a/docs/api/queue/queues.md b/docs/api/queue/queues.md deleted file mode 100644 index ece6397..0000000 --- a/docs/api/queue/queues.md +++ /dev/null @@ -1,156 +0,0 @@ -# Queue — Queue & Stats - -Methods for managing queues, collecting statistics, and handling dead letters. - -## Queue Management - -### `queue.set_queue_rate_limit()` - -```python -queue.set_queue_rate_limit(queue_name: str, rate_limit: str) -> None -``` - -Set a rate limit for all jobs in a queue. Checked by the scheduler before per-task rate limits. - -| Parameter | Type | Description | -|---|---|---| -| `queue_name` | `str` | Queue name (e.g. `"default"`). | -| `rate_limit` | `str` | Rate limit string: `"N/s"`, `"N/m"`, or `"N/h"`. | - -### `queue.set_queue_concurrency()` - -```python -queue.set_queue_concurrency(queue_name: str, max_concurrent: int) -> None -``` - -Set a maximum number of concurrently running jobs for a queue across all workers. Checked by the scheduler before per-task `max_concurrent` limits. - -| Parameter | Type | Description | -|---|---|---| -| `queue_name` | `str` | Queue name (e.g. `"default"`). | -| `max_concurrent` | `int` | Maximum simultaneous running jobs from this queue. | - -### `queue.pause()` - -```python -queue.pause(queue_name: str) -> None -``` - -Pause a named queue. Workers continue running but skip jobs in this queue until it is resumed. - -### `queue.resume()` - -```python -queue.resume(queue_name: str) -> None -``` - -Resume a previously paused queue. - -### `queue.paused_queues()` - -```python -queue.paused_queues() -> list[str] -``` - -Return the names of all currently paused queues. - -### `queue.purge()` - -```python -queue.purge( - queue: str | None = None, - task_name: str | None = None, - status: str | None = None, -) -> int -``` - -Delete jobs matching the given filters. Returns the count deleted. - -### `queue.revoke_task()` - -```python -queue.revoke_task(task_name: str) -> None -``` - -Prevent all future enqueues of the given task name. Existing pending jobs are not affected. - -## Statistics - -### `queue.stats()` - -```python -queue.stats() -> dict[str, int] -``` - -Returns `{"pending": N, "running": N, "completed": N, "failed": N, "dead": N, "cancelled": N}`. - -### `queue.stats_by_queue()` - -```python -queue.stats_by_queue() -> dict[str, dict[str, int]] -``` - -Returns per-queue status counts: `{queue_name: {"pending": N, ...}}`. - -### `queue.stats_all_queues()` - -```python -queue.stats_all_queues() -> dict[str, dict[str, int]] -``` - -Returns stats for all queues including those with zero jobs. - -### `queue.metrics()` - -```python -queue.metrics() -> dict -``` - -Returns current throughput and latency snapshot. - -### `queue.metrics_timeseries()` - -```python -queue.metrics_timeseries( - window: int = 3600, - bucket: int = 60, -) -> list[dict] -``` - -Returns historical metrics bucketed by time. `window` is the lookback period in seconds; `bucket` is the bucket size in seconds. - -## Dead Letter Queue - -### `queue.dead_letters()` - -```python -queue.dead_letters(limit: int = 10, offset: int = 0) -> list[dict] -``` - -List dead letter entries. Each dict contains: `id`, `original_job_id`, `queue`, `task_name`, `error`, `retry_count`, `failed_at`, `metadata`. - -### `queue.retry_dead()` - -```python -queue.retry_dead(dead_id: str) -> str -``` - -Re-enqueue a dead letter job. Returns the new job ID. - -### `queue.purge_dead()` - -```python -queue.purge_dead(older_than: int = 86400) -> int -``` - -Purge dead letter entries older than `older_than` seconds. Returns count deleted. - -## Cleanup - -### `queue.purge_completed()` - -```python -queue.purge_completed(older_than: int = 86400) -> int -``` - -Purge completed jobs older than `older_than` seconds. Returns count deleted. diff --git a/docs/api/queue/resources.md b/docs/api/queue/resources.md deleted file mode 100644 index f308603..0000000 --- a/docs/api/queue/resources.md +++ /dev/null @@ -1,165 +0,0 @@ -# Queue — Resources & Locking - -Methods for the worker resource system and distributed locking. - -## Resource System - -### `@queue.worker_resource()` - -```python -@queue.worker_resource( - name: str, - depends_on: list[str] | None = None, - teardown: Callable | None = None, - health_check: Callable | None = None, - health_check_interval: float = 0.0, - max_recreation_attempts: int = 3, - scope: str = "worker", - pool_size: int | None = None, - pool_min: int = 0, - acquire_timeout: float = 10.0, - max_lifetime: float = 3600.0, - idle_timeout: float = 300.0, - reloadable: bool = False, - frozen: bool = False, -) -> Callable -``` - -Decorator to register a resource factory initialized at worker startup. - -| Parameter | Type | Default | Description | -|---|---|---|---| -| `name` | `str` | — | Resource name used in `inject=["name"]` or `Inject["name"]`. | -| `depends_on` | `list[str] | None` | `None` | Names of resources this one depends on. | -| `teardown` | `Callable | None` | `None` | Called with the resource instance on shutdown. | -| `health_check` | `Callable | None` | `None` | Called periodically; returns truthy if healthy. | -| `health_check_interval` | `float` | `0.0` | Seconds between health checks (0 = disabled). | -| `max_recreation_attempts` | `int` | `3` | Max times to recreate on health failure. | -| `scope` | `str` | `"worker"` | Lifetime scope: `"worker"`, `"task"`, `"thread"`, or `"request"`. | -| `pool_size` | `int | None` | `None` | Pool capacity for task-scoped resources (default = worker thread count). | -| `pool_min` | `int` | `0` | Pre-warmed instances for task-scoped resources. | -| `acquire_timeout` | `float` | `10.0` | Max seconds to wait for a pool instance. | -| `max_lifetime` | `float` | `3600.0` | Max seconds a pooled instance can live. | -| `idle_timeout` | `float` | `300.0` | Max idle seconds before pool eviction. | -| `reloadable` | `bool` | `False` | Allow hot-reload via SIGHUP. | -| `frozen` | `bool` | `False` | Wrap in a read-only proxy that blocks attribute writes. | - -### `queue.register_resource()` - -```python -queue.register_resource(definition: ResourceDefinition) -> None -``` - -Programmatically register a `ResourceDefinition`. Equivalent to `@queue.worker_resource()` but accepts a pre-built definition object. - -### `queue.load_resources()` - -```python -queue.load_resources(toml_path: str) -> None -``` - -Load resource definitions from a TOML file. Must be called before `run_worker()`. See [TOML Configuration](../../resources/configuration.md). - -### `queue.health_check()` - -```python -queue.health_check(name: str) -> bool -``` - -Run a resource's health check immediately. Returns `True` if healthy, `False` otherwise or if the runtime is not initialized. - -### `queue.resource_status()` - -```python -queue.resource_status() -> list[dict] -``` - -Return per-resource status. Each entry contains: `name`, `scope`, `health`, `init_duration_ms`, `recreations`, `depends_on`. Task-scoped entries also include `pool` stats. - -### `queue.register_type()` - -```python -queue.register_type( - python_type: type, - strategy: str, - *, - resource: str | None = None, - message: str | None = None, - converter: Callable | None = None, - type_key: str | None = None, - proxy_handler: str | None = None, -) -> None -``` - -Register a custom type with the interception system. Requires `interception="strict"` or `"lenient"`. - -| Parameter | Type | Description | -|---|---|---| -| `python_type` | `type` | The type to register. | -| `strategy` | `str` | `"pass"`, `"convert"`, `"redirect"`, `"reject"`, or `"proxy"`. | -| `resource` | `str | None` | Resource name for `"redirect"` strategy. | -| `message` | `str | None` | Rejection reason for `"reject"` strategy. | -| `converter` | `Callable | None` | Converter callable for `"convert"` strategy. | -| `type_key` | `str | None` | Dispatch key for the converter reconstructor. | -| `proxy_handler` | `str | None` | Handler name for `"proxy"` strategy. | - -### `queue.interception_stats()` - -```python -queue.interception_stats() -> dict -``` - -Return interception metrics: total call count, per-strategy counts, average duration in ms, max depth reached. Returns an empty dict if interception is disabled. - -### `queue.proxy_stats()` - -```python -queue.proxy_stats() -> list[dict] -``` - -Return per-handler proxy metrics: handler name, deconstruction count, reconstruction count, error count, average reconstruction time in ms. - -## Distributed Locking - -### `queue.lock()` - -```python -queue.lock( - name: str, - ttl: int = 30, - auto_extend: bool = True, - owner_id: str | None = None, - timeout: float | None = None, - retry_interval: float = 0.1, -) -> contextlib.AbstractContextManager -``` - -Acquire a distributed lock. Use as a context manager: - -```python -with queue.lock("my-resource", ttl=60): - # exclusive section - ... -``` - -Raises `LockNotAcquired` if acquisition fails (when `timeout` is `None` or expires). - -### `queue.alock()` - -```python -queue.alock( - name: str, - ttl: float = 30.0, - auto_extend: bool = True, - owner_id: str | None = None, - timeout: float | None = None, - retry_interval: float = 0.1, -) -> AsyncDistributedLock -``` - -Async context manager version of `lock()`. Returns an `AsyncDistributedLock` directly — use `async with`, not `await`: - -```python -async with queue.alock("my-resource"): - ... -``` diff --git a/docs/api/queue/workers.md b/docs/api/queue/workers.md deleted file mode 100644 index e9b60eb..0000000 --- a/docs/api/queue/workers.md +++ /dev/null @@ -1,127 +0,0 @@ -# Queue — Workers & Hooks - -Methods for running workers, lifecycle hooks, circuit breakers, and the sync/async method mapping. - -## Workers - -### `queue.run_worker()` - -```python -queue.run_worker( - queues: Sequence[str] | None = None, - tags: list[str] | None = None, - pool: str = "thread", - app: str | None = None, -) -> None -``` - -Start the worker loop. **Blocks** until interrupted. - -| Parameter | Type | Default | Description | -|---|---|---|---| -| `queues` | `Sequence[str] | None` | `None` | Queue names to consume from. `None` = all. | -| `tags` | `list[str] | None` | `None` | Tags for worker specialization / routing. | -| `pool` | `str` | `"thread"` | Worker pool type: `"thread"` or `"prefork"`. | -| `app` | `str | None` | `None` | Import path to Queue (e.g. `"myapp:queue"`). Required when `pool="prefork"`. | - -### `queue.workers()` - -```python -queue.workers() -> list[dict] -``` - -Return live state of all registered workers. Each dict contains: - -| Key | Type | Description | -|-----|------|-------------| -| `worker_id` | `str` | Unique worker ID | -| `hostname` | `str` | OS hostname | -| `pid` | `int` | Process ID | -| `status` | `str` | `"active"` or `"draining"` | -| `pool_type` | `str` | `"thread"`, `"prefork"`, or `"native-async"` | -| `started_at` | `int` | Registration timestamp (ms since epoch) | -| `last_heartbeat` | `int` | Last heartbeat timestamp (ms) | -| `queues` | `str` | Comma-separated queue names | -| `threads` | `int` | Worker thread/process count | -| `tags` | `str | None` | Worker specialization tags | -| `resources` | `str | None` | Registered resource names (JSON) | -| `resource_health` | `str | None` | Per-resource health status (JSON) | - -### `await queue.aworkers()` - -```python -await queue.aworkers() -> list[dict] -``` - -Async version of `workers()`. - -### `queue.arun_worker()` - -```python -await queue.arun_worker( - queues: Sequence[str] | None = None, - tags: list[str] | None = None, - pool: str = "thread", - app: str | None = None, -) -> None -``` - -Async version of `run_worker()`. Runs the blocking worker loop in a thread executor so it does not block the asyncio event loop. Accepts the same `pool` and `app` kwargs as the sync variant (`app` is required when `pool="prefork"`). - -## Circuit Breakers - -### `queue.circuit_breakers()` - -```python -queue.circuit_breakers() -> list[dict] -``` - -Return current state of all circuit breakers: task name, state (`closed`/`open`/`half-open`), failure count, last failure time. - -## Hooks - -### `@queue.before_task` - -```python -@queue.before_task -def hook(task_name: str, args: tuple, kwargs: dict) -> None: ... -``` - -### `@queue.after_task` - -```python -@queue.after_task -def hook(task_name: str, args: tuple, kwargs: dict, result: Any, error: Exception | None) -> None: ... -``` - -### `@queue.on_success` - -```python -@queue.on_success -def hook(task_name: str, args: tuple, kwargs: dict, result: Any) -> None: ... -``` - -### `@queue.on_failure` - -```python -@queue.on_failure -def hook(task_name: str, args: tuple, kwargs: dict, error: Exception) -> None: ... -``` - -## Async Methods - -| Sync | Async | -|---|---| -| `queue.stats()` | `await queue.astats()` | -| `queue.stats_by_queue()` | `await queue.astats_by_queue()` | -| `queue.stats_all_queues()` | `await queue.astats_all_queues()` | -| `queue.dead_letters()` | `await queue.adead_letters()` | -| `queue.retry_dead()` | `await queue.aretry_dead()` | -| `queue.cancel_job()` | `await queue.acancel_job()` | -| `queue.run_worker()` | `await queue.arun_worker()` | -| `queue.metrics()` | `await queue.ametrics()` | -| `queue.workers()` | `await queue.aworkers()` | -| `queue.circuit_breakers()` | `await queue.acircuit_breakers()` | -| `queue.replay()` | `await queue.areplay()` | -| `queue.lock()` | `queue.alock()` (async context manager, not a coroutine) | -| `queue.resource_status()` | `await queue.aresource_status()` | diff --git a/docs/api/result.md b/docs/api/result.md deleted file mode 100644 index 617c307..0000000 --- a/docs/api/result.md +++ /dev/null @@ -1,211 +0,0 @@ -# JobResult - -::: taskito.result.JobResult - -Handle to an enqueued job. Provides methods to check status and retrieve results, both synchronously and asynchronously. - -Returned by [`task.delay()`](task.md#taskdelay), [`task.apply_async()`](task.md#taskapply_async), [`queue.enqueue()`](queue.md#queueenqueue), and canvas operations. - -## Properties - -### `job.id` - -```python -job.id -> str -``` - -The unique job ID (UUIDv7, time-ordered). - -### `job.status` - -```python -job.status -> str -``` - -Current job status. **Fetches fresh from the database** on every access. - -Returns one of: `"pending"`, `"running"`, `"complete"`, `"failed"`, `"dead"`, `"cancelled"`. - -```python -job = add.delay(2, 3) -print(job.status) # "pending" -# ... after worker processes it ... -print(job.status) # "complete" -``` - -### `job.progress` - -```python -job.progress -> int | None -``` - -Current progress (0–100) if reported by the task via [`current_job.update_progress()`](context.md). Returns `None` if no progress has been reported. Refreshes from the database. - -### `job.error` - -```python -job.error -> str | None -``` - -Error message if the job failed. `None` if the job hasn't failed. Refreshes from the database. - -### `job.errors` - -```python -job.errors -> list[dict] -``` - -Full error history for this job — one entry per failed attempt. Each dict contains: - -| Key | Type | Description | -|---|---|---| -| `id` | `str` | Error record ID | -| `job_id` | `str` | Parent job ID | -| `attempt` | `int` | Retry attempt number | -| `error` | `str` | Error message/traceback | -| `failed_at` | `str` | ISO timestamp of the failure | - -```python -job = flaky_task.delay() -# ... after retries ... -for err in job.errors: - print(f"Attempt {err['attempt']}: {err['error']}") -``` - -### `job.dependencies` - -```python -job.dependencies -> list[str] -``` - -List of job IDs this job depends on. Returns an empty list if the job has no dependencies. See [Dependencies](../guide/execution/dependencies.md). - -### `job.dependents` - -```python -job.dependents -> list[str] -``` - -List of job IDs that depend on this job. Returns an empty list if no other jobs depend on this one. - -## Methods - -### `job.to_dict()` - -```python -job.to_dict() -> dict -``` - -Return all job fields as a plain dictionary. Useful for JSON serialization (e.g. in the [dashboard](../guide/observability/dashboard.md) or [FastAPI integration](../integrations/fastapi.md)). - -### `job.result()` - -```python -job.result( - timeout: float = 30.0, - poll_interval: float = 0.05, - max_poll_interval: float = 0.5, -) -> Any -``` - -**Block** until the job completes and return the deserialized result. Uses exponential backoff for polling — starts at `poll_interval` and gradually increases to `max_poll_interval`. - -| Parameter | Type | Default | Description | -|---|---|---|---| -| `timeout` | `float` | `30.0` | Maximum seconds to wait | -| `poll_interval` | `float` | `0.05` | Initial seconds between status checks | -| `max_poll_interval` | `float` | `0.5` | Maximum seconds between status checks | - -**Raises:** - -- `TimeoutError` — if the job doesn't complete within `timeout` -- `TaskFailedError` — if the job status is `"failed"` -- `MaxRetriesExceededError` — if the job status is `"dead"` (all retries exhausted) -- `TaskCancelledError` — if the job status is `"cancelled"` -- `SerializationError` — if result deserialization fails - -```python -from taskito import TaskFailedError, MaxRetriesExceededError, TaskCancelledError - -job = add.delay(2, 3) -result = job.result(timeout=10) # blocks, returns 5 - -# Handle specific failure modes -try: - result = job.result(timeout=10) -except TaskCancelledError: - print("Job was cancelled") -except MaxRetriesExceededError: - print("Job exhausted all retries") -except TaskFailedError: - print("Job failed") -``` - -### `await job.aresult()` - -```python -await job.aresult( - timeout: float = 30.0, - poll_interval: float = 0.05, - max_poll_interval: float = 0.5, -) -> Any -``` - -Async version of `result()`. Uses `asyncio.sleep()` instead of `time.sleep()`, so it won't block the event loop. - -| Parameter | Type | Default | Description | -|---|---|---|---| -| `timeout` | `float` | `30.0` | Maximum seconds to wait | -| `poll_interval` | `float` | `0.05` | Initial seconds between status checks | -| `max_poll_interval` | `float` | `0.5` | Maximum seconds between status checks | - -**Raises:** - -- `TimeoutError` — if the job doesn't complete within `timeout` -- `TaskFailedError` — if the job status is `"failed"` -- `MaxRetriesExceededError` — if the job status is `"dead"` -- `TaskCancelledError` — if the job status is `"cancelled"` -- `SerializationError` — if result deserialization fails - -```python -job = add.delay(2, 3) -result = await job.aresult(timeout=10) -``` - -### `job.stream()` - -```python -job.stream( - timeout: float = 60.0, - poll_interval: float = 0.5, -) -> Iterator[Any] -``` - -Iterate over partial results published by the task via [`current_job.publish()`](context.md#current_jobpublish). Yields each result as it arrives, stops when the job reaches a terminal state. - -| Parameter | Type | Default | Description | -|---|---|---|---| -| `timeout` | `float` | `60.0` | Maximum seconds to wait | -| `poll_interval` | `float` | `0.5` | Seconds between polls | - -```python -job = batch_process.delay(items) -for partial in job.stream(timeout=120): - print(f"Got: {partial}") -``` - -### `await job.astream()` - -```python -async for partial in job.astream( - timeout: float = 60.0, - poll_interval: float = 0.5, -) -> AsyncIterator[Any] -``` - -Async version of `stream()`. Uses `asyncio.sleep` so it won't block the event loop. - -```python -async for partial in job.astream(timeout=120): - print(f"Got: {partial}") -``` diff --git a/docs/api/task.md b/docs/api/task.md deleted file mode 100644 index 791d746..0000000 --- a/docs/api/task.md +++ /dev/null @@ -1,130 +0,0 @@ -# TaskWrapper - -::: taskito.task.TaskWrapper - -Created by `@queue.task()` — not instantiated directly. Wraps a decorated function to provide task submission methods. - -## Properties - -### `task.name` - -```python -task.name -> str -``` - -The registered task name. Either the explicit `name` passed to `@queue.task()` or the function's qualified name. - -## Methods - -### `task.delay()` - -```python -task.delay(*args, **kwargs) -> JobResult -``` - -Enqueue the task for background execution using the decorator's default options. Returns a [`JobResult`](result.md) handle. - -```python -@queue.task(priority=5) -def add(a, b): - return a + b - -job = add.delay(2, 3) -print(job.result(timeout=10)) # 5 -``` - -### `task.apply_async()` - -```python -task.apply_async( - args: tuple = (), - kwargs: dict | None = None, - priority: int | None = None, - delay: float | None = None, - queue: str | None = None, - max_retries: int | None = None, - timeout: int | None = None, - unique_key: str | None = None, - metadata: str | None = None, - depends_on: str | list[str] | None = None, -) -> JobResult -``` - -Enqueue with full control over submission options. Any parameter not provided falls back to the decorator's default. - -| Parameter | Type | Default | Description | -|---|---|---|---| -| `args` | `tuple` | `()` | Positional arguments for the task | -| `kwargs` | `dict | None` | `None` | Keyword arguments for the task | -| `priority` | `int | None` | `None` | Override priority (higher = more urgent) | -| `delay` | `float | None` | `None` | Delay in seconds before the task is eligible | -| `queue` | `str | None` | `None` | Override queue name | -| `max_retries` | `int | None` | `None` | Override max retry count | -| `timeout` | `int | None` | `None` | Override timeout in seconds | -| `unique_key` | `str | None` | `None` | Deduplicate active jobs with same key | -| `metadata` | `str | None` | `None` | Arbitrary JSON metadata to attach | -| `depends_on` | `str | list[str] | None` | `None` | Job ID(s) this job depends on. See [Dependencies](../guide/execution/dependencies.md). | - -```python -job = send_email.apply_async( - args=("user@example.com", "Hello"), - priority=10, - delay=3600, - queue="emails", - unique_key="welcome-user@example.com", - metadata='{"campaign": "onboarding"}', -) -``` - -### `task.map()` - -```python -task.map(iterable: list[tuple]) -> list[JobResult] -``` - -Enqueue one job per item in a single batch SQLite transaction. Uses the decorator's default options. - -```python -jobs = add.map([(1, 2), (3, 4), (5, 6)]) -results = [j.result(timeout=10) for j in jobs] -print(results) # [3, 7, 11] -``` - -### `task.s()` - -```python -task.s(*args, **kwargs) -> Signature -``` - -Create a **mutable** [`Signature`](canvas.md). In a [`chain`](canvas.md#chain), the previous task's return value is prepended to `args`. - -```python -sig = add.s(10) -# In a chain, if the previous step returned 5: -# add(5, 10) → 15 -``` - -### `task.si()` - -```python -task.si(*args, **kwargs) -> Signature -``` - -Create an **immutable** [`Signature`](canvas.md#signature). Ignores the previous task's result — arguments are used as-is. - -```python -sig = add.si(2, 3) -# Always calls add(2, 3) regardless of previous result -``` - -### `task()` - -```python -task(*args, **kwargs) -> Any -``` - -Call the underlying function directly (synchronous, not queued). Useful for testing or when you don't need background execution. - -```python -result = add(2, 3) # Direct call, returns 5 -``` diff --git a/docs/api/testing.md b/docs/api/testing.md deleted file mode 100644 index 2d32f15..0000000 --- a/docs/api/testing.md +++ /dev/null @@ -1,165 +0,0 @@ -# Testing API Reference - -::: taskito.testing - -## `TestMode` - -```python -from taskito.testing import TestMode -# or use the shortcut: -# queue.test_mode() -``` - -Context manager that intercepts `enqueue()` to run tasks synchronously. No worker, no Rust, no SQLite. - -### Constructor - -```python -TestMode(queue: Queue, propagate_errors: bool = False, resources: dict[str, Any] | None = None) -``` - -| Parameter | Type | Default | Description | -|---|---|---|---| -| `queue` | `Queue` | *required* | The Queue instance to put into test mode | -| `propagate_errors` | `bool` | `False` | Re-raise task exceptions immediately instead of capturing them | -| `resources` | `dict[str, Any] | None` | `None` | Resource name → mock instance map injected during test mode. `MockResource` values are unwrapped automatically. | - -### Usage - -```python -with TestMode(queue) as results: - my_task.delay(42) - assert results[0].return_value == expected - -# Shortcut via Queue: -with queue.test_mode() as results: - my_task.delay(42) - -# With mock resources: -with queue.test_mode(resources={"db": mock_session}) as results: - create_user.delay("Alice") -``` - ---- - -## `TestResult` - -```python -from taskito import TestResult -``` - -Dataclass capturing the result of a single task execution in test mode. - -### Attributes - -| Attribute | Type | Description | -|---|---|---| -| `job_id` | `str` | Synthetic test ID (e.g. `"test-000001"`) | -| `task_name` | `str` | Fully qualified name of the task | -| `args` | `tuple` | Positional arguments the task was called with | -| `kwargs` | `dict` | Keyword arguments the task was called with | -| `return_value` | `Any` | Return value of the task (or `None` if it failed) | -| `error` | `Exception | None` | Exception instance if the task raised | -| `traceback` | `str | None` | Formatted traceback string if the task raised | - -### Properties - -| Property | Type | Description | -|---|---|---| -| `succeeded` | `bool` | `True` if `error is None` | -| `failed` | `bool` | `True` if `error is not None` | - ---- - -## `TestResults` - -```python -from taskito import TestResults -``` - -A `list[TestResult]` subclass with convenience filtering methods. - -### Properties - -| Property | Returns | Description | -|---|---|---| -| `succeeded` | `TestResults` | All results where `succeeded is True` | -| `failed` | `TestResults` | All results where `failed is True` | - -### Methods - -#### `.filter()` - -```python -results.filter(task_name: str | None = None, succeeded: bool | None = None) -> TestResults -``` - -Filter results by task name and/or outcome. - -| Parameter | Type | Default | Description | -|---|---|---|---| -| `task_name` | `str | None` | `None` | Exact match on task name | -| `succeeded` | `bool | None` | `None` | `True` = successes only, `False` = failures only | - -Returns a new `TestResults` containing only matching items. - -**Examples:** - -```python -results.filter(task_name="myapp.send_email") -results.filter(succeeded=False) -results.filter(task_name="myapp.process", succeeded=True) -``` - ---- - -## `MockResource` - -```python -from taskito import MockResource -``` - -Test double for a worker resource with optional call tracking. Pass instances to `queue.test_mode(resources=...)`. - -### Constructor - -```python -MockResource( - name: str, - return_value: Any = None, - wraps: Any = None, - track_calls: bool = False, -) -``` - -| Parameter | Type | Default | Description | -|---|---|---|---| -| `name` | `str` | *required* | Resource name (informational). | -| `return_value` | `Any` | `None` | Value returned when the resource is accessed via `.get()`. | -| `wraps` | `Any` | `None` | A real object to wrap — returned as-is from `.get()`. | -| `track_calls` | `bool` | `False` | Increment `call_count` each time `.get()` is called. | - -### Attributes - -| Attribute | Type | Description | -|---|---|---| -| `call_count` | `int` | Number of times the resource was accessed. | -| `calls` | `list` | Reserved for future per-call argument tracking. | - -### Usage - -```python -from taskito import MockResource - -# Simple mock value -mock_db = MockResource("db", return_value=FakeSessionFactory()) - -# Wrap a real object with call tracking -spy = MockResource("db", wraps=real_session_factory, track_calls=True) - -with queue.test_mode(resources={"db": spy}) as results: - process_order.delay(42) - -assert spy.call_count == 1 -assert results[0].succeeded -``` diff --git a/docs/api/workflows.md b/docs/api/workflows.md deleted file mode 100644 index 115840d..0000000 --- a/docs/api/workflows.md +++ /dev/null @@ -1,305 +0,0 @@ -# Workflows API - -::: taskito.workflows - -DAG workflow builder, execution handles, and analysis tools. - -## `Workflow` - -::: taskito.workflows.Workflow - -Builder for a workflow DAG. - -### Constructor - -```python -Workflow( - name: str = "workflow", - version: int = 1, - on_failure: str = "fail_fast", - cache_ttl: float | None = None, -) -``` - -| Parameter | Type | Default | Description | -|-----------|------|---------|-------------| -| `name` | `str` | `"workflow"` | Workflow name (used for definition storage) | -| `version` | `int` | `1` | Version number | -| `on_failure` | `str` | `"fail_fast"` | Error strategy: `"fail_fast"` or `"continue"` | -| `cache_ttl` | `float \| None` | `None` | Cache TTL in seconds for incremental runs | - -### `step()` - -```python -wf.step( - name: str, - task: TaskWrapper, - *, - after: str | list[str] | None = None, - args: tuple = (), - kwargs: dict | None = None, - queue: str | None = None, - max_retries: int | None = None, - timeout_ms: int | None = None, - priority: int | None = None, - fan_out: str | None = None, - fan_in: str | None = None, - condition: str | Callable | None = None, -) -> Workflow -``` - -Add a task step. Returns `self` for chaining. - -### `gate()` - -```python -wf.gate( - name: str, - *, - after: str | list[str] | None = None, - condition: str | Callable | None = None, - timeout: float | None = None, - on_timeout: str = "reject", - message: str | Callable | None = None, -) -> Workflow -``` - -Add an approval gate step. - -### `visualize()` - -```python -wf.visualize(fmt: str = "mermaid") -> str -``` - -Render the DAG as a Mermaid or DOT diagram string. - -### `ancestors()` / `descendants()` - -```python -wf.ancestors(node: str) -> list[str] -wf.descendants(node: str) -> list[str] -``` - -### `topological_levels()` - -```python -wf.topological_levels() -> list[list[str]] -``` - -### `stats()` - -```python -wf.stats() -> dict[str, int | float] -``` - -Returns `{nodes, edges, depth, width, density}`. - -### `critical_path()` - -```python -wf.critical_path(costs: dict[str, float]) -> tuple[list[str], float] -``` - -Returns `(path, total_cost)` — the longest-weighted path. - -### `execution_plan()` - -```python -wf.execution_plan(max_workers: int = 1) -> list[list[str]] -``` - -### `bottleneck_analysis()` - -```python -wf.bottleneck_analysis(costs: dict[str, float]) -> dict[str, Any] -``` - -Returns `{node, cost, percentage, critical_path, total_cost, suggestion}`. - ---- - -## `WorkflowRun` - -::: taskito.workflows.WorkflowRun - -Handle for a submitted workflow run. - -### `status()` - -```python -run.status() -> WorkflowStatus -``` - -### `wait()` - -```python -run.wait(timeout: float | None = None, poll_interval: float = 0.1) -> WorkflowStatus -``` - -Block until the workflow reaches a terminal state. Raises `WorkflowTimeoutError` on timeout. - -### `cancel()` - -```python -run.cancel() -> None -``` - -### `node_status()` - -```python -run.node_status(node_name: str) -> NodeStatus -``` - -### `visualize()` - -```python -run.visualize(fmt: str = "mermaid") -> str -``` - -Render the DAG with live node status colors. - ---- - -## `WorkflowProxy` - -::: taskito.workflows.WorkflowProxy - -Returned by `@queue.workflow()`. Wraps a factory function. - -### `submit()` - -```python -proxy.submit(*args, **kwargs) -> WorkflowRun -``` - -Build and submit the workflow. - -### `build()` - -```python -proxy.build(*args, **kwargs) -> Workflow -``` - -Materialize without submitting. - -### `as_step()` - -```python -proxy.as_step(**params) -> SubWorkflowRef -``` - -Return a reference for use as a sub-workflow step. - ---- - -## Queue Methods - -Added to `Queue` via `QueueWorkflowMixin`: - -### `submit_workflow()` - -```python -queue.submit_workflow( - workflow: Workflow, - *, - incremental: bool = False, - base_run: str | None = None, -) -> WorkflowRun -``` - -### `approve_gate()` - -```python -queue.approve_gate(run_id: str, node_name: str) -> None -``` - -### `reject_gate()` - -```python -queue.reject_gate(run_id: str, node_name: str, error: str = "rejected") -> None -``` - -### `@queue.workflow()` - -```python -@queue.workflow(name: str | None = None, *, version: int = 1) -def factory() -> Workflow: ... -``` - ---- - -## Types - -### `WorkflowState` - -```python -class WorkflowState(str, Enum): - PENDING = "pending" - RUNNING = "running" - COMPLETED = "completed" - FAILED = "failed" - CANCELLED = "cancelled" - PAUSED = "paused" -``` - -### `NodeStatus` - -```python -class NodeStatus(str, Enum): - PENDING = "pending" - READY = "ready" - RUNNING = "running" - COMPLETED = "completed" - FAILED = "failed" - SKIPPED = "skipped" - WAITING_APPROVAL = "waiting_approval" - CACHE_HIT = "cache_hit" -``` - -### `WorkflowStatus` - -```python -@dataclass -class WorkflowStatus: - run_id: str - state: WorkflowState - started_at: int | None - completed_at: int | None - error: str | None - nodes: dict[str, NodeSnapshot] -``` - -### `NodeSnapshot` - -```python -@dataclass -class NodeSnapshot: - name: str - status: NodeStatus - job_id: str | None - error: str | None -``` - -### `WorkflowContext` - -```python -@dataclass(frozen=True) -class WorkflowContext: - run_id: str - results: dict[str, Any] - statuses: dict[str, str] - params: dict[str, Any] | None - failure_count: int - success_count: int -``` - -### `GateConfig` - -```python -@dataclass -class GateConfig: - timeout: float | None = None - on_timeout: str = "reject" - message: str | Callable | None = None -``` diff --git a/docs/architecture/failure-model.md b/docs/architecture/failure-model.md deleted file mode 100644 index 58c3bf6..0000000 --- a/docs/architecture/failure-model.md +++ /dev/null @@ -1,54 +0,0 @@ -# Failure Model - -Taskito provides **at-least-once delivery**. Here's what happens when things go wrong. - -## Worker crash mid-task - -The job stays in `running` status. The scheduler's stale reaper detects it after `timeout_ms` elapses, marks it failed, and retries (if retries remain) or moves to the dead letter queue. No manual intervention needed. - -## Parent process crash - -All worker threads stop. Jobs in `running` stay in that state until the next worker starts, when the stale reaper picks them up. Jobs in `pending` are unaffected — they'll be dispatched normally on restart. - -## Database unavailable - -Scheduler polls fail silently (logged via `log::error!`). No new jobs are dispatched. In-flight jobs complete normally — results are cached in memory until the database becomes available. - -## Network partition (Postgres/Redis) - -Same behavior as database unavailable. The scheduler retries on the next poll cycle (default: every 50ms). Connection pools handle reconnection automatically. - -## Duplicate execution - -`claim_execution` prevents two workers from picking up the same job simultaneously. But if a worker crashes *after* starting execution, the job will be retried — potentially executing the same task twice. Design tasks to be [idempotent](../guide/reliability/guarantees.md) to handle this safely. - -## Recovery timeline - -```mermaid -sequenceDiagram - participant C as Client - participant DB as Database - participant S as Scheduler - participant W as Worker - - C->>DB: enqueue(job) - S->>DB: dequeue + claim_execution - S->>W: dispatch job - W->>W: execute task... - Note over W: Worker crashes at T=5s - Note over S: Scheduler continues polling... - Note over S: T=300s: reap_stale_jobs() detects
job.started_at + timeout_ms < now - S->>DB: mark failed, schedule retry - S->>DB: dequeue (same job, retry_count=1) - S->>W: dispatch to different worker - W->>DB: complete + clear claim -``` - -## Partial writes - -If a task completes successfully but the result write to the database fails (e.g., database full, connection lost), the job stays in `running` status. The stale reaper eventually marks it failed and retries it. The task will execute again — make sure it's [idempotent](../guide/reliability/guarantees.md). - -## Jobs without timeouts - -!!! warning - If a job has no `timeout_ms` set and the worker crashes, the job stays in `running` **forever**. The stale reaper only detects jobs that have exceeded their timeout. Always set a timeout on production tasks. diff --git a/docs/architecture/index.md b/docs/architecture/index.md deleted file mode 100644 index aa86f8d..0000000 --- a/docs/architecture/index.md +++ /dev/null @@ -1,41 +0,0 @@ -# Architecture - -taskito is a hybrid Python/Rust system. Python provides the user-facing API. Rust handles all the heavy lifting: storage, scheduling, dispatch, rate limiting, and worker management. - -```mermaid -flowchart TD - subgraph py ["Python Layer"] - direction LR - Q["Queue"] --> IC["ArgumentInterceptor"] - TW["@queue.task()"] ~~~ RR["ResourceRuntime"] - end - - subgraph rust ["Rust Core — PyO3"] - direction LR - PQ["PyQueue"] --> SCH["Scheduler"] - SCH --> WP["Worker Pool"] - SCH --> RL["Rate Limiter"] - end - - subgraph storage ["Storage"] - direction LR - SQ[("SQLite")] ~~~ PG[("PostgreSQL")] - end - - IC --> PQ - WP -->|"acquire GIL"| TW - SCH -->|"poll / update"| SQ - PQ -->|"INSERT"| SQ -``` - -## Section overview - -| Page | What it covers | -|---|---| -| [Job Lifecycle](job-lifecycle.md) | State machine, status codes, transitions | -| [Worker Pool](worker-pool.md) | Thread architecture, async dispatch, GIL management | -| [Storage Layer](storage.md) | SQLite pragmas, schema, indexes, Postgres differences | -| [Scheduler](scheduler.md) | Poll loop, dispatch flow, periodic tasks | -| [Resource System](resources.md) | Argument interception, DI, proxy reconstruction | -| [Failure Model](failure-model.md) | Crash recovery, duplicate execution, partial writes | -| [Serialization](serialization.md) | Pluggable serializers, format details | diff --git a/docs/architecture/job-lifecycle.md b/docs/architecture/job-lifecycle.md deleted file mode 100644 index aa6c386..0000000 --- a/docs/architecture/job-lifecycle.md +++ /dev/null @@ -1,29 +0,0 @@ -# Job Lifecycle - -Every job moves through a state machine from creation to completion (or death). - -```mermaid -stateDiagram-v2 - [*] --> Pending: enqueue() / delay() - Pending --> Running: dequeued by scheduler - Pending --> Cancelled: cancel_job() - Running --> Complete: task returns successfully - Running --> Failed: task raises exception - Failed --> Pending: retry (count < max_retries)\nwith exponential backoff - Failed --> Dead: retries exhausted\nmoved to DLQ - Dead --> Pending: retry_dead() - Complete --> [*] - Cancelled --> [*] - Dead --> [*]: purge_dead() -``` - -## Status codes - -| Status | Integer | Description | -|---|---|---| -| Pending | 0 | Waiting to be picked up | -| Running | 1 | Currently executing | -| Complete | 2 | Finished successfully | -| Failed | 3 | Last attempt failed (may retry) | -| Dead | 4 | All retries exhausted, in DLQ | -| Cancelled | 5 | Cancelled before execution | diff --git a/docs/architecture/resources.md b/docs/architecture/resources.md deleted file mode 100644 index 56ca699..0000000 --- a/docs/architecture/resources.md +++ /dev/null @@ -1,30 +0,0 @@ -# Resource System - -The resource system is a three-layer Python pipeline that runs entirely outside Rust: - -```mermaid -flowchart TD - subgraph enqueue ["enqueue()"] - ARGS["Task arguments"] --> IC["ArgumentInterceptor"] - IC -->|PASS / CONVERT / REDIRECT| SER["Serializer"] - IC -->|PROXY| PX["ProxyHandler.deconstruct()"] - PX --> SER - end - - SER -->|"serialized payload"| QUEUE[("Queue")] - - subgraph worker ["Worker dispatch"] - DE["Deserialize"] --> RC["reconstruct_args()"] - RC --> FN["Task function"] - RT["ResourceRuntime"] -->|"inject"| FN - PX2["ProxyHandler.reconstruct()"] --> FN - end - - QUEUE --> DE -``` - -**Layer 1 — Argument Interception**: The `ArgumentInterceptor` walks every argument before serialization, applying the strategy registered for its type. CONVERT types are transformed to JSON-safe markers. REDIRECT types are replaced with a DI placeholder. PROXY types are deconstructed by their handler. REJECT types raise an error in strict mode. - -**Layer 2 — Worker Resource Runtime**: `ResourceRuntime` initializes all registered resources at worker startup in topological dependency order. At task dispatch time it injects the requested resources (via `inject=` or `Inject["name"]` annotation) as keyword arguments. Task-scoped resources are acquired from a semaphore pool and returned after the task finishes. - -**Layer 3 — Resource Proxies**: `ProxyHandler` implementations know how to deconstruct live objects (file handles, HTTP sessions, cloud clients) into a JSON-serializable recipe, and how to reconstruct them on the worker before the task function is called. Recipes are optionally HMAC-signed for tamper detection. diff --git a/docs/architecture/scheduler.md b/docs/architecture/scheduler.md deleted file mode 100644 index 60d05a5..0000000 --- a/docs/architecture/scheduler.md +++ /dev/null @@ -1,29 +0,0 @@ -# Scheduler - -The scheduler runs in a dedicated Tokio single-threaded async runtime: - -``` -loop { - sleep(50ms) or shutdown signal - - // Try to dequeue and dispatch a job - try_dispatch() - - // Every ~100 iterations (~5s): reap timed-out jobs - reap_stale() - - // Every ~60 iterations (~3s): check periodic tasks - check_periodic() - - // Every ~1200 iterations (~60s): auto-cleanup old jobs - auto_cleanup() -} -``` - -## Dispatch flow - -1. `dequeue_from()` — atomically SELECT + UPDATE (pending → running) within a transaction -2. Check rate limit — if over limit, reschedule 1s in the future -3. Send job to worker pool via crossbeam channel -4. Worker executes task, sends result back -5. `handle_result()` — mark complete, schedule retry, or move to DLQ diff --git a/docs/architecture/serialization.md b/docs/architecture/serialization.md deleted file mode 100644 index 1902707..0000000 --- a/docs/architecture/serialization.md +++ /dev/null @@ -1,27 +0,0 @@ -# Serialization - -taskito uses a pluggable serializer for task arguments and results. The default is `CloudpickleSerializer`, which supports lambdas, closures, and complex Python objects. - -```python -from taskito import Queue, JsonSerializer - -# Use JSON for simpler, cross-language payloads -queue = Queue(serializer=JsonSerializer()) -``` - -## Built-in serializers - -| Serializer | Format | Best for | -|---|---|---| -| `CloudpickleSerializer` (default) | Binary (pickle) | Complex Python objects, lambdas, closures | -| `JsonSerializer` | JSON | Simple types, cross-language interop, debugging | - -## Custom serializers - -Implement the `Serializer` protocol (`dumps(obj) -> bytes`, `loads(data) -> Any`). - -## What gets serialized - -- **Arguments**: `serializer.dumps((args, kwargs))` — stored as BLOB in `payload` -- **Results**: `serializer.dumps(return_value)` — stored as BLOB in `result` -- **Periodic task args**: Serialized at registration time, stored as BLOBs in `periodic_tasks.args` diff --git a/docs/architecture/storage.md b/docs/architecture/storage.md deleted file mode 100644 index 6ae0491..0000000 --- a/docs/architecture/storage.md +++ /dev/null @@ -1,113 +0,0 @@ -# Storage Layer - -## SQLite configuration - -| Pragma | Value | Why | -|---|---|---| -| `journal_mode` | WAL | Concurrent reads while writing | -| `busy_timeout` | 5000ms | Wait on lock contention instead of failing | -| `synchronous` | NORMAL | Fast writes, safe with WAL | -| `journal_size_limit` | 64MB | Prevent unbounded WAL file growth | - -## Database schema - -```mermaid -erDiagram - jobs { - TEXT id PK - TEXT queue - TEXT task_name - BLOB payload - INTEGER status - INTEGER priority - INTEGER created_at - INTEGER scheduled_at - INTEGER started_at - INTEGER completed_at - INTEGER retry_count - INTEGER max_retries - BLOB result - TEXT error - INTEGER timeout_ms - TEXT unique_key - INTEGER progress - TEXT metadata - BOOLEAN cancel_requested - INTEGER expires_at - INTEGER result_ttl_ms - } - - dead_letter { - TEXT id PK - TEXT original_job_id - TEXT queue - TEXT task_name - BLOB payload - TEXT error - INTEGER retry_count - INTEGER failed_at - TEXT metadata - INTEGER priority - INTEGER max_retries - INTEGER timeout_ms - INTEGER result_ttl_ms - } - - job_errors { - TEXT id PK - TEXT job_id FK - INTEGER attempt - TEXT error - INTEGER failed_at - } - - rate_limits { - TEXT key PK - REAL tokens - REAL max_tokens - REAL refill_rate - INTEGER last_refill - } - - periodic_tasks { - TEXT name PK - TEXT task_name - TEXT cron_expr - BLOB args - BLOB kwargs - TEXT queue - BOOLEAN enabled - INTEGER last_run - INTEGER next_run - } - - workers { - TEXT worker_id PK - INTEGER last_heartbeat - TEXT queues - TEXT status - } - - jobs ||--o{ job_errors : "error history" - jobs ||--o| dead_letter : "DLQ on exhaustion" -``` - -## Key indexes - -- `idx_jobs_dequeue`: `(queue, status, priority DESC, scheduled_at)` — fast dequeue -- `idx_jobs_status`: `(status)` — fast stats queries -- `idx_jobs_unique_key`: partial unique index on `unique_key` where status is pending/running -- `idx_job_errors_job_id`: `(job_id)` — fast error history lookup - -## Connection pooling - -Diesel's `r2d2` connection pool with up to 8 connections (SQLite) or 10 connections (Postgres). In-memory databases use a single connection (SQLite `:memory:` is per-connection). - -## Postgres differences - -taskito also supports PostgreSQL as an alternative storage backend. See the [Postgres Backend guide](../guide/operations/postgres.md) for full details. - -- **Connection pooling**: `r2d2` pool with a default of 10 connections (vs. 8 for SQLite) -- **Schema isolation**: All tables are created inside a configurable PostgreSQL schema (default: `taskito`), with `search_path` set on each connection -- **Additional tables**: The Postgres backend creates 11 tables (vs. 6 for SQLite), adding `job_dependencies`, `task_metrics`, `replay_history`, `task_logs`, and `circuit_breakers` -- **Concurrent writes**: No single-writer constraint — multiple workers can write simultaneously diff --git a/docs/architecture/worker-pool.md b/docs/architecture/worker-pool.md deleted file mode 100644 index 4e62d9c..0000000 --- a/docs/architecture/worker-pool.md +++ /dev/null @@ -1,28 +0,0 @@ -# Worker Pool - -The worker pool dispatches jobs from the scheduler to Python task functions. - -```mermaid -flowchart TD - SCH["Scheduler\nTokio async · 50ms poll"] - - SCH -->|"sync job"| JCH["Job Channel\nbounded: workers×2"] - SCH -->|"async job"| AP["Native Async Pool"] - - JCH --> WP["Worker Threads\nGIL per task · N threads"] - AP --> EL["Async Executor\ndedicated event loop"] - - WP -->|"Result"| RCH["Result Channel"] - EL -->|"PyResultSender"| RCH - - RCH --> ML["Main Loop\npy.allow_threads"] - ML -->|"complete / retry / DLQ"| DB[("SQLite")] -``` - -## Design decisions - -- **OS threads, not Python threads**: Sync workers are Rust `std::thread` threads. The GIL is only acquired when calling Python task code. -- **Bounded channels**: Both job and result channels are bounded to `workers × 2` to provide backpressure. -- **GIL isolation**: Each sync worker acquires the GIL independently using `Python::with_gil()`. The scheduler and result handler release the GIL via `py.allow_threads()`. -- **Native async dispatch**: `async def` tasks bypass the thread pool entirely. A `NativeAsyncPool` sends them to a dedicated `AsyncTaskExecutor` running on a Python daemon thread. `PyResultSender` (a `#[pyclass]`) bridges results back into the Rust scheduler. -- **Context isolation**: Sync tasks use `threading.local` for `current_job`; async tasks use `contextvars.ContextVar`, which is properly scoped across `await` boundaries and isolated between concurrent coroutines. diff --git a/docs/assets/css/custom.css b/docs/assets/css/custom.css deleted file mode 100644 index b027348..0000000 --- a/docs/assets/css/custom.css +++ /dev/null @@ -1,50 +0,0 @@ -.copy-markdown-btn { - display: inline-flex; - align-items: center; - gap: 0.4rem; - padding: 0.35rem 0.75rem; - margin-bottom: 1rem; - border: 1px solid var(--md-default-fg-color--lightest); - border-radius: 0.25rem; - background: var(--md-code-bg-color); - color: var(--md-default-fg-color--light); - font-size: 0.7rem; - font-family: var(--md-text-font-family); - cursor: pointer; - transition: background 0.2s, color 0.2s, border-color 0.2s; -} - -.copy-markdown-btn:hover { - background: var(--md-accent-fg-color--transparent); - border-color: var(--md-accent-fg-color); - color: var(--md-accent-fg-color); -} - -.copy-markdown-btn.copied { - background: var(--md-typeset-ins-color); - border-color: transparent; - color: #fff; -} - -.copy-markdown-btn.error { - background: var(--md-typeset-del-color); - border-color: transparent; - color: #fff; -} - -.copy-markdown-btn.loading { - opacity: 0.7; - cursor: wait; -} - -.copy-markdown-btn:disabled { - cursor: not-allowed; -} - -.copy-markdown-btn svg { - flex-shrink: 0; -} - -.md-generator { - display: none; -} diff --git a/docs/assets/js/copy-markdown.js b/docs/assets/js/copy-markdown.js deleted file mode 100644 index f1e7905..0000000 --- a/docs/assets/js/copy-markdown.js +++ /dev/null @@ -1,307 +0,0 @@ -document.addEventListener("DOMContentLoaded", function () { - const GITHUB_RAW_BASE = - "https://raw.githubusercontent.com/ByteVeda/taskito/master/docs/"; - - function getSourcePath() { - // Try the edit button first (most reliable) - const editLink = document.querySelector("a[title='Edit this page']"); - if (editLink) { - const href = editLink.getAttribute("href"); - const match = href.match(/\/docs\/(.+\.md)$/); - if (match) return match[1]; - } - - // Fallback: derive from URL path - let path = window.location.pathname; - - // Strip site prefix (handles both root and subdirectory deploys) - const base = document.querySelector("base"); - if (base) { - const baseHref = new URL(base.href).pathname; - if (path.startsWith(baseHref)) { - path = path.slice(baseHref.length); - } - } - - // Remove leading slash - path = path.replace(/^\//, ""); - - // Handle trailing slash or empty → index.md - if (path === "") { - path = "index.md"; - } else if (path.endsWith("/")) { - path = path.slice(0, -1) + ".md"; - } else if (!path.endsWith(".md")) { - // e.g. /guide/tasks → guide/tasks.md - path += ".md"; - } - - return path; - } - - function htmlToMarkdown(container) { - var result = []; - - function processNode(node, listPrefix) { - if (node.nodeType === Node.TEXT_NODE) { - var text = node.textContent; - if (text.trim()) result.push(text); - return; - } - if (node.nodeType !== Node.ELEMENT_NODE) return; - - var tag = node.tagName.toLowerCase(); - - // Skip the copy button itself and edit links - if (node.classList.contains("copy-markdown-btn")) return; - if (node.classList.contains("md-content__button")) return; - - // Headings - if (/^h[1-6]$/.test(tag)) { - var level = parseInt(tag[1]); - result.push("\n" + "#".repeat(level) + " " + node.textContent.trim() + "\n"); - return; - } - - // Horizontal rule - if (tag === "hr") { - result.push("\n---\n"); - return; - } - - // Code blocks - if (tag === "pre") { - var code = node.querySelector("code"); - if (code) { - var lang = ""; - var classes = code.className.split(/\s+/); - for (var i = 0; i < classes.length; i++) { - var m = classes[i].match(/^language-(.+)$/); - if (m) { lang = m[1]; break; } - } - result.push("\n```" + lang + "\n" + code.textContent + "```\n"); - } else { - result.push("\n```\n" + node.textContent + "```\n"); - } - return; - } - - // Inline code (but not inside pre) - if (tag === "code") { - result.push("`" + node.textContent + "`"); - return; - } - - // Bold - if (tag === "strong" || tag === "b") { - result.push("**" + node.textContent + "**"); - return; - } - - // Italic - if (tag === "em" || tag === "i") { - result.push("*" + node.textContent + "*"); - return; - } - - // Links - if (tag === "a") { - var href = node.getAttribute("href") || ""; - // Skip anchor links like headerlinks - if (node.classList.contains("headerlink")) return; - result.push("[" + node.textContent.trim() + "](" + href + ")"); - return; - } - - // Images - if (tag === "img") { - var alt = node.getAttribute("alt") || ""; - var src = node.getAttribute("src") || ""; - result.push("![" + alt + "](" + src + ")"); - return; - } - - // SVGs (e.g. pymdownx.emoji to_svg renders inline SVGs) - if (tag === "svg") { - var title = node.querySelector("title"); - if (title) result.push(title.textContent); - return; - } - - // Paragraphs - if (tag === "p") { - result.push("\n"); - for (var c = node.firstChild; c; c = c.nextSibling) { - processNode(c, listPrefix); - } - result.push("\n"); - return; - } - - // Blockquote - if (tag === "blockquote") { - var bqContent = htmlToMarkdownInner(node); - var lines = bqContent.trim().split("\n"); - result.push("\n"); - for (var i = 0; i < lines.length; i++) { - result.push("> " + lines[i] + "\n"); - } - return; - } - - // Lists - if (tag === "ul" || tag === "ol") { - result.push("\n"); - var items = node.children; - for (var i = 0; i < items.length; i++) { - if (items[i].tagName.toLowerCase() === "li") { - var prefix = tag === "ol" ? (i + 1) + ". " : "- "; - result.push((listPrefix || "") + prefix); - for (var c = items[i].firstChild; c; c = c.nextSibling) { - if (c.nodeType === Node.ELEMENT_NODE && (c.tagName.toLowerCase() === "ul" || c.tagName.toLowerCase() === "ol")) { - processNode(c, (listPrefix || "") + " "); - } else { - processNode(c, listPrefix); - } - } - if (result.length === 0 || !result[result.length - 1].endsWith("\n")) { - result.push("\n"); - } - } - } - return; - } - - // Tables - if (tag === "table") { - var rows = node.querySelectorAll("tr"); - for (var r = 0; r < rows.length; r++) { - var cells = rows[r].querySelectorAll("th, td"); - var rowText = "|"; - for (var c = 0; c < cells.length; c++) { - rowText += " " + htmlToMarkdownInner(cells[c]).trim() + " |"; - } - result.push(rowText + "\n"); - // Add separator after header row - if (r === 0 && rows[r].querySelector("th")) { - var sep = "|"; - for (var c = 0; c < cells.length; c++) { - sep += " --- |"; - } - result.push(sep + "\n"); - } - } - return; - } - - // Default: recurse into children - for (var c = node.firstChild; c; c = c.nextSibling) { - processNode(c, listPrefix); - } - } - - function htmlToMarkdownInner(el) { - var saved = result; - result = []; - for (var c = el.firstChild; c; c = c.nextSibling) { - processNode(c, ""); - } - var out = result.join(""); - result = saved; - return out; - } - - for (var c = container.firstChild; c; c = c.nextSibling) { - processNode(c, ""); - } - - // Clean up excessive blank lines - return result.join("").replace(/\n{3,}/g, "\n\n").trim() + "\n"; - } - - function injectButton() { - const container = document.querySelector(".md-content__inner"); - if (!container) return; - - const btn = document.createElement("button"); - btn.className = "copy-markdown-btn"; - btn.setAttribute("aria-label", "Copy page as Markdown"); - btn.title = "Copy as Markdown"; - btn.innerHTML = - '' + - '' + - "" + - "Copy as Markdown"; - - btn.addEventListener("click", async function () { - const sourcePath = getSourcePath(); - - btn.disabled = true; - btn.classList.add("loading"); - btn.querySelector("span").textContent = "Fetching..."; - - try { - let markdown; - // Try local _sources first (available in built site), fall back to GitHub - let fetched = false; - for (const url of ["/_sources/" + sourcePath, GITHUB_RAW_BASE + sourcePath]) { - try { - const res = await fetch(url); - if (res.ok) { - const text = await res.text(); - // Guard against SPA servers returning HTML for unknown routes - if (!text.trimStart().startsWith(">` to `Option>>` so callers can omit individual entries (matches the pattern already used by `delay_seconds_list` / `metadata_list` / etc.). Type stub follows; pure Python callers unaffected. - -### Fixed - -- **Scheduler concurrency cap atomicity** -- a TOCTOU race between the cap check and `claim_execution` allowed two schedulers to both pass the cap and over-dispatch. Also fixed an off-by-one (`>=` against a count that already includes the just-dequeued running job). `try_dispatch` was restructured into named helpers (`active_queues`, `check_pre_claim_gates`, `claim_for_dispatch`, `check_post_claim_concurrency`, `rollback_claim_and_retry`); cap check now runs after `claim_execution` with strict `>`. New regression tests cover exact-cap, max-1, and per-queue caps. -- **Scheduler reschedule on full or closed worker channel** -- a naive `try_send` swallowed `TrySendError::Full` / `Closed`, leaving the job in `Running` until the stale-reaper timed it out -- surfacing as a *timeout* in metrics and middleware (wrong outcome for a job that never ran). Replaced with a full match: warn, roll back the claim, and reschedule with a 100 ms backoff. -- **`enqueue_many` middleware contract** -- `on_enqueue` now receives each job's own args and kwargs (was always `args_list[0]`). Mutations to the per-job options dict propagate to the enqueued jobs (was discarded -- middleware ran *after* `enqueue_batch` against a fresh empty dict). Middleware exceptions surface via `logger.exception("middleware on_enqueue() error")` instead of a silent `except: pass`. -- **`result()` / `aresult()` deadline race** -- could raise `TimeoutError` even when the job had already failed/died/cancelled, when the terminal state landed during the final poll-then-deadline-check window. A defensive re-poll inside the deadline branch lets the caller see the real exception class (`TaskFailedError`, `MaxRetriesExceededError`, `TaskCancelledError`). -- **`result_handler.rs` triple-fetch** -- the Failure branch fetched `get_job` up to three times per call (queue context + `!should_retry` DLQ + retry-exhausted DLQ). Now fetches once and reuses the same `Option<&Job>` via a small DLQ closure. -- **`ResourcePool._active_count` underflow** -- the increment moved to *after* the factory call returns successfully. The failure path no longer needs (or has) a decrement, so a wedged factory can't underflow `active` in `stats()`. Failed attempts also stop counting toward `total_acquisitions`. -- **Prefork timeout** -- children that exceed their per-job timeout are killed; previously could hang indefinitely. -- **CI PyO3 finalization SIGABRT** -- eliminated; pip cache warning silenced. -- **Dashboard timestamps** -- every timestamp field (`created_at`, `last_heartbeat`, `logged_at`, etc.) renders as milliseconds consistent with the backend; previously an extra `× 1000` pushed dates into year 58282. The contract is documented at the top of `dashboard/src/lib/api-types.ts` with per-field JSDoc. -- **Dashboard DAG cycle bound** -- BFS layer assignment now uses a `visited` set + max-iterations break, so an accidental cycle in a workflow definition can't loop forever. -- **Dashboard settings storage opacity** -- settings PUT bodies are JSON-encoded server-side so callers see structured values, not stringified JSON. -- **Dashboard dead-letter row keyboard accessibility** -- clickable group rows use `role="button"`, `tabIndex={0}`, and an `onKeyDown` handler that triggers expansion on Enter / Space; `aria-expanded` reflects the open/closed state. Biome `noStaticElementInteractions` and `useKeyWithClickEvents` rules promoted from `off` to `error`. - -### Internal - -- **`app.py` split into `mixins/` package** -- `QueueInspectionMixin`, `QueueOperationsMixin`, `QueueLockMixin`, `QueueWorkflowMixin`, and the decorator/event/resource modules now live under `py_src/taskito/mixins/`. The `Queue` class is now a thin assembly over the mixins. -- **`workflows/tracker.py` split into package** -- `WorkflowTracker` decomposed into `_GateManager`, `_FanOutOrchestrator`, `_SubWorkflowCoordinator`. -- **`redis_backend/jobs.rs` split into submodule** -- separate files for enqueue, query, helpers, maintenance. -- **`py_queue/workflow_ops.rs` split into submodule**. -- **`dashboard.py` split into package** -- handler/router separation. -- **Dashboard health-audit follow-ups** -- extracted `formatAxisTime` shared between metric charts; extracted `job-dag-layout` pure module; centralized log-level color map in `status.ts`; debounced filters via refs (no `eslint-disable`); promoted Biome `useExhaustiveDependencies` from `warn` to `error`; pure helpers (`parseRefreshOption`, `refreshIntervalMs`) extracted from `refresh-interval-provider` for testability; new vitest coverage on `api-client`, `errors`, `settings`, and `refresh-interval-provider` (81 tests at release). -- **Dependency bumps** -- `redis 0.27 → 1.2`, `libsqlite3-sys 0.30 → 0.37`, `thiserror 1 → 2`, `rand 0.8 → 0.10`, `pq-sys`, `cron`, `tailwind-merge`, `@vitejs/plugin-react`, `react`, and Python dep floors to latest stable. -- **CI** -- `dorny/paths-filter v3 → v4`; drop `area/` label prefix and skip jobs by path; floating major tags for action references; per-PR Postgres/Redis service containers run the storage contract suite on every change (PR #73, landed in 0.11.1; now exercised across every release). - -### Test counts at release - -- Rust: 89 tests (up from 78) -- Python: 496 passed, 9 skipped across 49 files (up from 469 / 46) -- Dashboard (vitest): 81 tests - ---- - -## 0.11.1 - -### Fixed - -- **Workflow fan-in race** -- concurrent child completions on different worker threads could both expand fan-in. The `check_fan_out_completion` Rust call now delegates to a new `WorkflowStorage::finalize_fan_out_parent` compare-and-swap, so the parent transitions at most once regardless of how many children complete simultaneously. -- **Sub-workflow compile failure** -- if a child workflow's factory or compile step raised, the parent node was left permanently `SKIPPED`, hanging the outer run. The parent is now promoted to `RUNNING` only after the child's compile + submit succeed, and is marked `FAILED` on error so the run finalizes. -- **Redis `purge_execution_claims`** -- previously a silent no-op. Execution claims are now mirrored into a time-indexed sorted set (`taskito:exec_claims:by_time`) so the scheduler's maintenance loop can reap stale claims in O(log n). Legacy keys still expire via the 24 h `PX` TTL. -- **SQLite `move_to_dlq` cascade** -- cascade-cancel errors on the dependent sweep are now propagated (parity with Postgres and Redis) instead of being swallowed as a warning. Callers see the failure and can decide whether to retry or alert. - -### Performance - -- **Workflow ops release the GIL during SQLite I/O** -- every method in `workflow_ops.rs` now wraps DB round-trips in `py.allow_threads(...)`. Event-bus callbacks that fire from worker threads no longer serialize the rest of the Python runtime on each fan-in / mark-result / cancel call. -- **`WorkflowSqliteStorage` cached per queue** -- migrations run once on first workflow API call via `OnceLock`, instead of re-running `CREATE TABLE IF NOT EXISTS` on every single call. - -### Safety - -- **`cancel_workflow_run` iterative** -- replaced recursive sub-workflow cascade with an iterative BFS plus `visited` set. No recursion deadlock, no connection-pool exhaustion on deep sub-workflow trees, and any accidental cycle in `parent_run_id` terminates safely. -- **Tracker state lock** -- `WorkflowTracker._state_lock` (RLock) now guards every access to `_run_configs`, `_job_to_run`, `_child_to_parent`, and `_gate_timers`, which are touched from worker threads, gate-timeout timers, and user threads. -- **Gate timer cleanup** -- `_cleanup_run` cancels any pending gate timers for the finishing run and drops stale child→parent mappings. Timers no longer fire on already-terminal runs. -- **Workflow metadata JSON escaping** -- `build_metadata_json` uses `serde_json::json!`; node names containing backslashes, control characters, or Unicode are now escaped correctly. Previously they produced malformed JSON that silently dropped the workflow event. -- **Narrower exception handling in the tracker** -- broad `except Exception:` clauses narrowed to `(RuntimeError, ValueError)` on Rust FFI call sites; the remaining broad catches are restricted to user callables and event emission with an explanatory `# noqa`. Silent `let _ = storage.cancel_job(...)` replaced with `log::warn!` via a shared helper. - -### Added - -- **`PrometheusMiddleware(task_filter=...)`** -- parity with `OTelMiddleware` and `SentryMiddleware`. A predicate `(task_name: str) -> bool` toggles metric export per task. - -### Changed - -- **`dagron-core` git dependency pinned** -- `Cargo.toml` now pins `dagron-core` to a specific commit SHA. Upstream pushes no longer cause silent build breakage. -- **`Storage` trait doc comment** -- now lists all three backends (SQLite, Postgres, Redis) instead of just the two Diesel ones. -- **`AsyncQueueMixin.metrics_timeseries` stub** -- parameter name corrected from `interval` to `bucket` to match the real sync signature. Call sites typed via the stub were silently wrong at runtime. - ---- - -## 0.11.0 - -### Features - -- **DAG workflows** -- first-class support for directed acyclic graph workflows built on the new [dagron-core](https://github.com/ByteVeda/dagron) engine; `Workflow` builder with `step()`, `gate()`, and `after=` dependencies; `queue.submit_workflow(wf)` launches a run, `WorkflowRun.wait()` blocks until terminal, `run.status()` returns per-node snapshots, `run.cancel()` halts in-flight execution; workflows are persisted across restarts with full node history -- **Fan-out / fan-in** -- `step(fan_out="each")` expands a list result into N parallel child jobs; `step(fan_in="all")` aggregates all child results into a single downstream step; supports empty lists, single-item lists, and preserves result ordering -- **Conditional execution** -- per-step `condition="on_success" | "on_failure" | "always"` or a callable `(WorkflowContext) -> bool`; combine with `Workflow(on_failure="continue")` so independent branches keep running after a sibling fails; skip propagation respects `always` -- **Approval gates** -- `wf.gate("review", after="evaluate", timeout=3600, on_timeout="reject")` pauses the workflow until `queue.approve_gate(run_id, name)` or `queue.reject_gate(run_id, name)`; timeout enforced with a background timer; emits `WORKFLOW_GATE_REACHED` event -- **Sub-workflows** -- compose workflows by referencing another workflow as a step via `region_etl.as_step(region="eu")`; child workflows have a `parent_run_id` link and propagate cancellation and failure upward; child terminal status feeds into parent DAG evaluation -- **Cron-scheduled workflows** -- `@queue.periodic(cron=...)` now accepts a `WorkflowProxy`; launcher task is auto-registered and submits a fresh workflow run on every tick -- **Incremental re-runs** -- `Workflow(cache_ttl=86400)` hashes step results with SHA-256; `queue.submit_workflow(wf, incremental=True, base_run=prev_run.id)` skips completed steps whose inputs are unchanged; failed steps always re-run; dirty propagation cascades to downstream nodes; new `CACHE_HIT` terminal status distinguishes cached steps from freshly executed ones -- **Graph algorithms** -- `wf.topological_levels()`, `wf.stats()`, `wf.critical_path(durations)`, `wf.bottleneck_analysis(durations)`, and `wf.execution_plan()` for pre-execution analysis; all algorithms operate on the compiled DAG without requiring a live run -- **Visualization** -- `wf.visualize("mermaid")` and `wf.visualize("dot")` render the DAG; `run.visualize("mermaid")` color-codes live node status (running/completed/failed/cache-hit/waiting-approval) -- **Workflow events** -- new event types `WORKFLOW_SUBMITTED`, `WORKFLOW_COMPLETED`, `WORKFLOW_FAILED`, `WORKFLOW_CANCELLED`, `WORKFLOW_GATE_REACHED` for observability hooks -- **Type-safe builder** -- `step()` accepts any object satisfying the `HasTaskName` protocol (runtime-checkable), keeping the builder API strict without coupling to a concrete `TaskWrapper` class - -### Internal - -- New Rust crate `crates/taskito-workflows/` -- workflow engine with `WorkflowDefinition`, `WorkflowRun`, `WorkflowNode`, node status state machine (including `CacheHit` variant), and storage trait with SQLite/Postgres/Redis backends; feature-gated behind `workflows` cargo feature -- `dagron-core` added as git dependency (`https://github.com/ByteVeda/dagron.git`) for DAG construction and traversal -- New PyO3 bindings in `crates/taskito-python/src/py_workflow/` -- `PyWorkflowBuilder`, `PyWorkflowHandle`, `PyWorkflowRunStatus`; `py_queue/workflow_ops.rs` exposes `submit_workflow`, `mark_workflow_node_result`, `expand_fan_out`, `check_fan_out_completion`, `skip_workflow_node`, `set_workflow_node_waiting_approval`, `resolve_workflow_gate`, `finalize_run_if_terminal`, and base-run lookup helpers -- New Python package `py_src/taskito/workflows/` with 11 modules -- `builder.py` (Workflow, GateConfig, WorkflowProxy), `tracker.py` (cascade evaluator), `run.py` (WorkflowRun), `mixins.py` (QueueWorkflowMixin), `fan_out.py`, `context.py` (WorkflowContext), `incremental.py` (dirty-set computation), `analysis.py` (graph algorithms), `visualization.py`, `types.py`, `__init__.py` -- `maturin` CI feature list fixed -- `ci.yml` and `publish.yml` now include `workflows` alongside `extension-module,postgres,redis,native-async` (previously missing, which would have shipped broken wheels) -- CI action versions bumped -- `Swatinem/rust-cache@v2.9.1`, `actions/setup-node@v6` to silence Node.js 20 deprecation warnings -- 74 new Python tests across 10 files covering linear, fan-out, conditions, gates, sub-workflows, cron, analysis, caching, and visualization - ---- - -## 0.10.1 - -### Changed - -- Repository transferred to [ByteVeda](https://github.com/ByteVeda/taskito) org -- Documentation URL updated to [docs.byteveda.org/taskito](https://docs.byteveda.org/taskito) -- All internal links updated from `pratyush618/taskito` to `ByteVeda/taskito` - ---- - -## 0.10.0 - -### Features - -- **Dashboard rebuild** -- full rewrite of the web dashboard using Preact, Vite, and Tailwind CSS; production-grade dark/light UI with lucide icons, toast notifications, loading states, timeseries charts, and 3 new pages (Resources, Queue Management, System Internals); 128KB single-file HTML (32KB gzipped) served from the Python package with zero runtime dependencies -- **Smart scheduling** -- adaptive backpressure polling (50ms base → 200ms max backoff when idle, instant reset on dispatch); per-task duration cache tracks average execution time in-memory; weighted least-loaded dispatch for prefork pool factors in task duration (`score = in_flight × avg_duration`) - -### Internal - -- Dashboard frontend source in `dashboard/` (Preact + Vite + Tailwind CSS + TypeScript); build via `cd dashboard && npm run build`; output inlined into `py_src/taskito/templates/dashboard.html` -- `dashboard.py` simplified to read single pre-built HTML instead of composing from 8 separate template files -- `Scheduler::run()` uses adaptive polling with exponential backoff (50ms → 200ms max); `tick()` returns `bool` for feedback -- `TaskDurationCache` in-memory HashMap tracks per-task avg wall_time_ns, updated on every `handle_result()` -- `weighted_least_loaded()` dispatch strategy in `prefork/dispatch.rs`; `aging_factor` field added to `SchedulerConfig` - ---- - -## 0.9.0 - -### Features - -- **Prefork worker pool** -- `queue.run_worker(pool="prefork", app="myapp:queue")` spawns child Python processes with independent GILs for true CPU parallelism; each child imports the app module, builds its own task registry, and executes tasks in a read-execute-write loop over JSON Lines IPC; the parent Rust scheduler dequeues jobs and dispatches to the least-loaded child via stdin pipes; reader threads parse child stdout and feed results back to the scheduler; graceful shutdown sends shutdown messages to children and waits with timeout before killing -- **Worker discovery** -- `queue.workers()` now returns `hostname`, `pid`, `pool_type`, and `started_at` for each worker, giving operators visibility into multi-machine deployments -- **Worker lifecycle events** -- three new event types: `WORKER_ONLINE` (registered in storage), `WORKER_OFFLINE` (dead worker reaped), `WORKER_UNHEALTHY` (resource health degraded); subscribe via `queue.on_event(EventType.WORKER_OFFLINE, callback)` -- **Worker status transitions** -- workers report `active → draining → stopped` status; shutdown signal sets status to `"draining"` before drain timeout, visible in `queue.workers()` and the dashboard -- **Orphan rescue prep** -- `list_claims_by_worker` storage method enables future orphaned job rescue when dead workers are detected -- **Task result streaming** -- `current_job.publish(data)` streams partial results from inside tasks; `job.stream()` / `await job.astream()` iterates partial results as they arrive; built on existing `task_logs` infrastructure with `level="result"` (no new tables or Rust changes); FastAPI SSE endpoint supports `?include_results=true` to stream partial results alongside progress - -### Internal - -- New Rust module `crates/taskito-python/src/prefork/` with 4 files: `mod.rs` (PreforkPool + WorkerDispatcher impl), `child.rs` (ChildWriter/ChildReader/ChildProcess split handles), `protocol.rs` (ParentMessage/ChildMessage JSON serialization), `dispatch.rs` (least-loaded dispatcher) -- New Python package `py_src/taskito/prefork/` with `child.py` (child process main loop), `__init__.py` (PreforkConfig), `__main__.py` (entry point) -- `base64` and `gethostname` crates added to `taskito-python` dependencies -- `run_worker()` gains `pool` and `app_path` parameters in both Rust (`py_queue/worker.rs`) and Python (`app.py`) -- `workers` table gains 4 columns: `started_at`, `hostname`, `pid`, `pool_type` (all backends + migrations) -- `reap_dead_workers` returns `Vec` (reaped worker IDs) instead of `u64`; enables `WORKER_OFFLINE` event emission -- New storage methods: `update_worker_status`, `list_claims_by_worker` across all 3 backends - ---- - -## 0.8.0 - -### Features - -- **Namespace-based routing** -- `Queue(namespace="team-a")` isolates workloads across teams/services sharing a single database; enqueued jobs carry the namespace, workers only dequeue matching jobs, `list_jobs()` and `list_jobs_filtered()` default to the queue's namespace (pass `namespace=None` for global view); DLQ and archival preserve namespace through the full job lifecycle; periodic tasks inherit namespace from their scheduler; backward compatible (`None` namespace matches only `NULL`-namespace jobs) - -### Internal - -- `namespace` column added to `dead_letter` and `archived_jobs` tables; `DeadLetterRow`, `NewDeadLetterRow`, `ArchivedJobRow` models updated; Redis `DeadJobEntry` uses `#[serde(default)]` for backward compatibility -- `Storage` trait: `dequeue`, `dequeue_from`, `list_jobs`, `list_jobs_filtered` signatures gain `namespace: Option<&str>` parameter; all 3 backends + delegate macro updated -- `Scheduler` struct carries `namespace: Option` field, passes to `dequeue_from` in poller -- `PyQueue` struct carries `namespace: Option` field; `PyJob` exposes `namespace` to Python -- `_UNSET` sentinel in `mixins.py` distinguishes "namespace not passed" from explicit `None` - ---- - -## 0.7.0 - -### Features - -- **Async canvas primitives** -- `Signature.apply_async()`, `chain.apply_async()`, `group.apply_async()`, and `chord.apply_async()` for non-blocking workflow execution from async contexts; `chain` uses `aresult()` for truly async step-by-step execution; `group` uses `asyncio.gather` for concurrent wave awaiting; `chord` awaits all group results then enqueues the callback -- **Sample-based circuit breaker recovery** -- half-open state now allows N probe requests (default 5) instead of a single probe; closes only when the success rate meets a configurable threshold (default 80%); immediately re-opens when the threshold becomes mathematically impossible; timeout safety valve re-opens if probes don't complete within the cooldown period; configure via `circuit_breaker={"half_open_probes": 5, "half_open_success_rate": 0.8}` on `@queue.task()` -- **`enqueue_many()` parity with `enqueue()`** -- batch enqueue now supports per-job `delay`/`delay_list`, `unique_keys`, `metadata`/`metadata_list`, `expires`/`expires_list`, and `result_ttl`/`result_ttl_list` parameters; also emits `JOB_ENQUEUED` events and dispatches `on_enqueue` middleware hooks, matching single-enqueue behavior -- **`TaskFailedError` exception** -- new exception type in the hierarchy for tasks that failed (as opposed to cancelled or dead-lettered); `job.result()` now raises `TaskFailedError`, `TaskCancelledError`, `MaxRetriesExceededError`, or `SerializationError` instead of generic `RuntimeError` -- **`PyResultSender` conditional export** -- `from taskito import PyResultSender` works when built with `native-async` feature; silently unavailable otherwise (no confusing `AttributeError`) - -### Fixes - -- **Middleware context `queue_name` was `"unknown"`** -- `on_retry`, `on_dead_letter`, `on_cancel`, and `on_timeout` middleware hooks now receive the actual queue name from the job instead of a hardcoded `"unknown"` string -- **Redis `KEYS *` in lock reaping** -- `reap_expired_locks` replaced `KEYS` (O(N), blocks Redis server) with cursor-based `SCAN` using `COUNT 100` -- **Redis execution claims never expire** -- `claim_execution` now uses `SET NX PX 86400000` (24-hour TTL); orphaned claims from dead workers auto-expire instead of blocking re-execution forever -- **`_taskito_is_async` fragility** -- `_taskito_is_async` and `_taskito_async_fn` are now declared fields on `TaskWrapper.__init__` instead of dynamically monkey-patched attributes; prevents silent fallback to sync execution path if attributes are missing - -### Internal - -- All production Rust `eprintln!` calls replaced with `log` crate macros (`log::info!`, `log::warn!`, `log::error!`); `log` dependency added to `taskito-python` and `taskito-async` crates -- `ResultOutcome::Retry`, `::DeadLettered`, `::Cancelled` now carry `queue: String` for middleware context -- Ruff `target-version` updated from `py39` to `py310` to match `requires-python = ">=3.10"` -- Fixed UP035 (`Callable` import from `collections.abc`) and B905 (`zip()` without `strict=`) lint warnings -- Circuit breakers schema: 5 new columns on `circuit_breakers` table (`half_open_max_probes`, `half_open_success_rate`, `half_open_probe_count`, `half_open_success_count`, `half_open_failure_count`) with backward-compatible defaults - ---- - -## 0.6.0 - -### Features - -- **Middleware lifecycle hooks wired** -- `on_retry(ctx, error, retry_count)`, `on_dead_letter(ctx, error)`, and `on_cancel(ctx)` are now dispatched from the Rust result handler; they fire for every matching outcome across all registered middleware -- **Expanded middleware hooks** -- `TaskMiddleware` gains four new hooks: `on_enqueue`, `on_dead_letter`, `on_timeout`, `on_cancel`; `on_enqueue` receives a mutable `options` dict that can modify priority, delay, queue, and other enqueue parameters before the job is written -- **`JOB_RETRYING`, `JOB_DEAD`, `JOB_CANCELLED` events now emitted** -- these three event types were previously defined but never fired; they are now emitted from the Rust result handler with payloads `{job_id, task_name, error, retry_count}`, `{job_id, task_name, error}`, and `{job_id, task_name}` respectively -- **Queue-level rate limits** -- `queue.set_queue_rate_limit("name", "100/m")` applies a token-bucket rate limit to an entire queue, checked in the scheduler before per-task limits -- **Queue-level concurrency caps** -- `queue.set_queue_concurrency("name", 10)` limits how many jobs from a queue run simultaneously across all workers, checked before per-task `max_concurrent` -- **Worker lifecycle events** -- `EventType.WORKER_STARTED` and `EventType.WORKER_STOPPED` fired when a worker thread comes online or exits; subscribe via `queue.on_event(EventType.WORKER_STARTED, cb)` -- **Queue pause/resume events** -- `EventType.QUEUE_PAUSED` and `EventType.QUEUE_RESUMED` fired by `queue.pause()` and `queue.resume()` -- **`event_workers` parameter** -- `Queue(event_workers=N)` configures the event bus thread pool size (default 4); raise for high event volume -- **Per-webhook delivery options** -- `queue.add_webhook()` now accepts `max_retries`, `timeout`, and `retry_backoff` per endpoint, replacing the previous hardcoded values -- **OTel customization** -- `OpenTelemetryMiddleware` adds `span_name_fn`, `attribute_prefix`, `extra_attributes_fn`, and `task_filter` parameters -- **Sentry customization** -- `SentryMiddleware` adds `tag_prefix`, `transaction_name_fn`, `task_filter`, and `extra_tags_fn` parameters -- **Prometheus customization** -- `PrometheusMiddleware` and `PrometheusStatsCollector` add `namespace`, `extra_labels_fn`, and `disabled_metrics` parameters; metrics grouped by category (`"jobs"`, `"queue"`, `"resource"`, `"proxy"`, `"intercept"`) -- **FastAPI route selection** -- `TaskitoRouter` adds `include_routes`/`exclude_routes`, `dependencies`, `sse_poll_interval`, `result_timeout`, `default_page_size`, `max_page_size`, and `result_serializer` parameters; new endpoints: `/health`, `/readiness`, `/resources`, `/stats/queues` -- **Flask CLI group** -- `Taskito(app, cli_group="tasks")` renames the CLI command group; `flask taskito info --format json` outputs machine-readable stats -- **Django settings** -- `TASKITO_AUTODISCOVER_MODULE`, `TASKITO_ADMIN_PER_PAGE`, `TASKITO_ADMIN_TITLE`, `TASKITO_ADMIN_HEADER`, `TASKITO_DASHBOARD_HOST`, `TASKITO_DASHBOARD_PORT` control autodiscovery, admin pagination, branding, and dashboard bind address -- **`max_retry_delay` on `@queue.task()`** -- caps exponential backoff at a configurable ceiling in seconds (defaults to 300 s) -- **`max_concurrent` on `@queue.task()`** -- limits how many instances of a task run simultaneously across all workers -- **`serializer` on `@queue.task()`** -- per-task serializer override; falls back to queue-level serializer -- **Per-task serializer full round-trip** -- deserialization now also uses the per-task serializer; previously only enqueue (serialization) did; both the sync and native-async worker paths call `_deserialize_payload(task_name, payload)` instead of cloudpickle directly -- **`on_timeout` middleware hook wired** -- `on_timeout(ctx)` now fires when the Rust maintenance reaper detects a stale job that exceeded its hard timeout; fires before `on_retry` (if retrying) or `on_dead_letter` (if retries exhausted); previously the hook existed in `TaskMiddleware` but was never called -- **`QUEUE_PAUSED` / `QUEUE_RESUMED` events emitted** -- `queue.pause()` and `queue.resume()` now emit these events with payload `{"queue": "..."}` after updating storage; previously the event types were defined but never fired -- **Scheduler tuning** -- `Queue(scheduler_poll_interval_ms=N, scheduler_reap_interval=N, scheduler_cleanup_interval=N)` exposes the three Rust scheduler timing knobs to Python - ---- - -For older releases (0.5.0 and below), see the [changelog archive](changelog/archive.md). diff --git a/docs/changelog/archive.md b/docs/changelog/archive.md deleted file mode 100644 index caec846..0000000 --- a/docs/changelog/archive.md +++ /dev/null @@ -1,282 +0,0 @@ -# Changelog Archive - -Older releases. For the latest changes, see the [main changelog](../changelog.md). - ---- - -## 0.5.0 - -### New Features - -- **Native async tasks** -- `async def` task functions run natively on a dedicated event loop; no wrapping in `asyncio.run()` or thread bridging; dual-dispatch worker pool routes async jobs to `NativeAsyncPool` and sync jobs to the existing thread pool -- **`async_concurrency` parameter** -- `Queue(async_concurrency=100)` caps concurrent async tasks on the event loop; independent of the `workers` (sync thread) count -- **`current_job` in async tasks** -- `current_job.id`, `.log()`, `.update_progress()`, `.check_cancelled()` work inside `async def` tasks via `contextvars`; each concurrent task gets an isolated context -- **KEDA integration** -- `taskito scaler --app myapp:queue --port 9091` starts a lightweight metrics server; `/api/scaler` returns queue depth for KEDA `metrics-api` trigger; `/metrics` exposes Prometheus text format; `/health` for liveness probes -- **KEDA deploy templates** -- `deploy/keda/` contains ready-to-use `ScaledObject`, `ScaledObject` (Prometheus), and `ScaledJob` YAML manifests -- **Argument interception** -- `interception="strict"|"lenient"` on `Queue()` classifies every task argument before serialization; five strategies: PASS, CONVERT, REDIRECT, PROXY, REJECT; built-in rules cover UUID, datetime, Decimal, Pydantic models, dataclasses, SQLAlchemy sessions, Redis clients, file handles, and more -- **Worker resource runtime** -- `@queue.worker_resource("name")` decorator registers a factory initialized once at worker startup; four scopes: `"worker"` (default), `"task"` (pool), `"thread"` (thread-local), `"request"` (per-task fresh) -- **Resource injection** -- `@queue.task(inject=["name"])` or `db: Inject["name"]` annotation syntax injects live resources into tasks without serializing them; `from taskito import Inject` -- **Resource dependencies** -- `depends_on=["other"]` on `@queue.worker_resource()`; topological initialization order, reverse teardown; cycles detected eagerly at registration time (`CircularDependencyError`) -- **Health checking** -- `health_check=` and `health_check_interval=` on `@queue.worker_resource()`; unhealthy resources are recreated up to `max_recreation_attempts` times; `queue.health_check("name")` for manual checks -- **Resource pools** -- task-scoped resources get a semaphore-based pool with `pool_size`, `pool_min`, `acquire_timeout`, `max_lifetime`, `idle_timeout`; `pool_min > 0` pre-warms instances at startup -- **Thread-local resources** -- `scope="thread"` creates one instance per worker thread via `ThreadLocalStore`, torn down on shutdown -- **Frozen resources** -- `frozen=True` wraps the resource in a `FrozenResource` proxy that raises `AttributeError` on attribute writes -- **Hot reload** -- `reloadable=True` marks a resource for reload on `SIGHUP`; `taskito reload --app myapp:queue` CLI subcommand; `queue._resource_runtime.reload()` programmatic reload -- **TOML resource config** -- `queue.load_resources("resources.toml")` loads resource definitions from a TOML file; factory, teardown, and health_check are dotted import paths; Python 3.11+ built-in `tomllib`, older versions need `tomli` -- **Resource proxies** -- transparent deconstruct/reconstruct of non-serializable objects; built-in handlers: `file`, `logger`, `requests_session`, `httpx_client`, `boto3_client`, `gcs_client` -- **Proxy security** -- HMAC-SHA256 recipe signing via `recipe_signing_key=` on `Queue()` or `TASKITO_RECIPE_SECRET` env var; reconstruction timeout via `max_reconstruction_timeout=`; file path allowlist via `file_path_allowlist=`; per-handler opt-out via `disabled_proxies=` -- **`NoProxy` wrapper** -- `from taskito import NoProxy`; opt out of proxy handling for a specific argument, letting the serializer handle it directly -- **Custom type rules** -- `queue.register_type(MyType, "redirect", resource="my_resource")` registers custom types with any strategy (requires interception enabled) -- **Interception metrics** -- `queue.interception_stats()` returns total calls, per-strategy counts, average duration, and max depth reached -- **Proxy metrics** -- `queue.proxy_stats()` returns per-handler deconstruction/reconstruction counts, error counts, and average duration -- **Resource status** -- `queue.resource_status()` returns per-resource health, scope, init duration, and recreation count -- **Test mode resources** -- `queue.test_mode(resources={"db": mock_db})` injects mocks during test mode without worker startup; `MockResource(name, return_value=..., wraps=..., track_calls=True)` adds call tracking -- **Optional cloud dependencies** -- `pip install taskito[aws]` adds boto3>=1.20; `pip install taskito[gcs]` adds google-cloud-storage>=2.0 - -### Breaking Changes - -- **Dropped Python 3.9 support** -- minimum required version is now Python 3.10; Python 3.9 reached EOL in October 2025 - -### Internal - -- `crates/taskito-async/` new Rust crate: `NativeAsyncPool` implementing `WorkerDispatcher`, `PyResultSender` (#[pyclass]) bridging Python executor to Rust scheduler; feature-gated via `native-async` cargo feature -- `py_src/taskito/async_support/` package: `AsyncTaskExecutor` (dedicated event loop, bounded semaphore, full lifecycle support), `context.py` (contextvar-based job context), `__init__.py` public API -- `py_src/taskito/scaler.py`: `serve_scaler()` with `ThreadingHTTPServer`, routes `/api/scaler`, `/metrics`, `/health` -- Dashboard CSS and JS split into separate files (`assets/css/`, `assets/js/` modules) -- `_taskito_is_async` and `_taskito_async_fn` attributes set on task wrappers at registration time -- `py_src/taskito/interception/` package: `strategy.py`, `registry.py`, `walker.py`, `interceptor.py`, `reconstruct.py`, `converters.py`, `built_in.py`, `errors.py`, `metrics.py` -- `py_src/taskito/resources/` package: `definition.py`, `runtime.py`, `pool.py`, `thread_local.py`, `frozen.py`, `health.py`, `graph.py`, `toml_config.py` -- `py_src/taskito/proxies/` package: `handler.py`, `registry.py`, `reconstruct.py`, `signing.py`, `schema.py`, `no_proxy.py`, `metrics.py`, `built_in.py`, and `handlers/` subpackage -- `py_src/taskito/inject.py`: `Inject` metaclass for annotation-based resource injection -- Worker startup initializes `ResourceRuntime` before first dispatch; teardown on graceful shutdown -- `TestMode` extended with `resources=` parameter and `_test_mode_active` flag that disables proxy reconstruction during tests -- Worker heartbeat extended to include per-resource health JSON - ---- - -## 0.4.0 - -### New Features - -- **Distributed locking** — `queue.lock()` / `await queue.alock()` context managers with auto-extend background thread, acquisition timeout, and cross-process support; `LockNotAcquired` exception for failed acquisitions -- **Exactly-once semantics** — `claim_execution` / `complete_execution` storage layer prevents duplicate task execution across worker restarts -- **Async worker pool** — `AsyncWorkerPool` with `spawn_blocking` and GIL management; `WorkerDispatcher` trait in `taskito-core` future-proofs for other language bindings -- **Queue pause/resume** — `queue.pause()`, `queue.resume()`, `queue.paused_queues()` to suspend and restore processing per named queue -- **Job archival** — `queue.archive()` moves jobs to a persistent archive; `queue.list_archived()` retrieves them -- **Job revocation** — `queue.purge()` removes jobs by filter; `queue.revoke_task()` prevents all future enqueues of a given task name -- **Job replay** — `queue.replay()` re-enqueues a completed or failed job; `queue.replay_history()` returns the replay log -- **Circuit breakers** — `circuit_breaker={"threshold": 5, "window": 60, "cooldown": 120}` on `@queue.task()`; `queue.circuit_breakers()` returns current state of all circuit breakers -- **Structured task logging** — `current_job.log(message)` from inside tasks; `queue.task_logs(job_id)` and `queue.query_logs()` for retrieval -- **Cron timezone support** — `timezone="America/New_York"` on `@queue.periodic()`; uses `chrono-tz` under the hood, defaults to UTC -- **Custom retry delays** — `retry_delays=[1, 5, 30]` on `@queue.task()` for per-attempt delay overrides instead of exponential backoff -- **Soft timeouts** — `soft_timeout=` on `@queue.task()`; checked cooperatively via `current_job.check_timeout()` -- **Worker tags/specialization** — `tags=["gpu", "heavy"]` on `queue.run_worker()`; jobs can be routed to workers with matching tags -- **Worker inspection** — `queue.workers()` / `await queue.aworkers()` return live worker state -- **Job DAG visualization** — `queue.job_dag(job_id)` returns a dependency graph for a job and its ancestors/descendants -- **Metrics timeseries** — `queue.metrics_timeseries()` returns historical throughput/latency data; `queue.metrics()` for current snapshot -- **Extended job filtering** — `queue.list_jobs_filtered()` with `metadata_like`, `error_like`, `created_after`, `created_before` parameters -- **`MsgPackSerializer`** — built-in, requires `pip install msgpack`; faster than cloudpickle, smaller payloads, cross-language compatible -- **`EncryptedSerializer`** — AES-256-GCM encryption, requires `pip install cryptography`; wraps another serializer, payloads in DB are opaque ciphertext -- **`drain_timeout`** — configurable graceful shutdown wait time on `Queue()` constructor (default: 30 seconds) -- **Per-job `result_ttl`** — `result_ttl` override on `.apply_async()` to set cleanup policy per job -- **Dashboard enhancements** — workers tab, circuit breakers panel, job archival UI - -### Internal - -- `diesel_common/` shared macro module eliminates SQLite/Postgres duplication across backends -- `scheduler` split into 4 focused modules (`mod.rs`, `poller.rs`, `result_handler.rs`, `maintenance.rs`) -- `py_queue` split into 3 focused modules (`mod.rs`, `inspection.rs`, `worker.rs`) with PyO3 `multiple-pymethods` feature -- Python mixins consolidated from 7 to 3 groups: `QueueInspectionMixin`, `QueueOperationsMixin`, `QueueLockMixin` - ---- - -## 0.3.0 - -### Features - -- **Redis storage backend** — optional Redis backend for distributed workloads (`pip install taskito[redis]`); Lua scripts for atomic operations, sorted sets for indexing -- **Events & webhooks** — event system with webhook delivery support -- **Flask integration** — contrib integration for Flask applications -- **Prometheus integration** — contrib stats collector with `PrometheusStatsCollector` -- **Sentry integration** — contrib middleware for Sentry error tracking - -### Build & CI - -- Add `openssl-sys` dependency, refactor GitHub Actions for wheel building/publishing -- Enable postgres feature for macOS and Windows wheel builds -- Add Rust linting/caching, optimize test matrix, reduce redundant CI jobs -- Add redis feature to wheel builds - -### Fixes - -- Guard arithmetic overflow across timeout detection, worker reaping, scheduler cleanup, circuit breaker timing, and Redis TTL purging -- Treat cancelled jobs as terminal in `_poll_once` so `result()` raises immediately -- Cap float-to-i64 casts to prevent silent overflow in delay_seconds, expires, retry_delays, retry_backoff -- Reject negative pagination in list_jobs, dead_letters, list_archived, query_task_logs -- Fix async/sync misuse in FastAPI handlers -- Replace deprecated `asyncio.get_event_loop()` with `get_running_loop()` -- Replace Redis `KEYS` with `SCAN` in purge operations -- Fix Redis `enqueue_unique()` race condition with atomic Lua scripts -- Only call middleware `after()` for those whose `before()` succeeded -- Recover from poisoned mutex in scheduler instead of panicking -- Validate `EncryptedSerializer` key type and size before use -- Thread-safe double-checked locking for Prometheus metrics init and dashboard SPA cache -- Skip webhook retries on 4xx client errors -- Clamp percentile index in task_metrics to prevent IndexError -- Fix dashboard formatting - -### Docs - -- Add circuit breakers, events/webhooks, and logging guides -- Add integration docs for Django, FastAPI, Flask, OTel, Prometheus, Sentry -- Remove Linux-only warnings from postgres and installation docs - -## 0.2.3 - -### Features - -- **Postgres storage backend** — optional PostgreSQL backend for multi-machine workers and higher write throughput (`pip install taskito[postgres]`); full feature parity with SQLite including jobs, DLQ, rate limiting, periodic tasks, circuit breakers, workers, metrics, and logs -- **Django integration** — `TASKITO_BACKEND`, `TASKITO_DB_URL`, `TASKITO_SCHEMA` settings for configuring the backend from Django projects - -### Build & Tooling - -- **Pre-commit hooks** — Added `.pre-commit-config.yaml` with local hooks for `cargo fmt`, `cargo clippy`, `ruff check`, `ruff format`, and `mypy` - -### Critical Fixes - -- **Dashboard dead routes** — Moved `/logs` and `/replay-history` handlers above the generic catch-all in `dashboard.py`, fixing 404s on these endpoints -- **Stale `__version__`** — Replaced hardcoded version with `importlib.metadata.version()` with fallback -- **`retry_dead` non-atomic** — Wrapped enqueue + delete in a single transaction (SQLite & Postgres), preventing ghost dead letters on partial failure -- **`retry_dead` hardcoded defaults** — Added `priority`, `max_retries`, `timeout_ms`, `result_ttl_ms` columns to `dead_letter` table; replayed jobs now preserve their original configuration -- **`enqueue_unique` race condition** — Wrapped check + insert in a transaction; catches unique constraint violations to return the existing job instead of erroring -- **`now_millis()` panic** — Replaced `.expect()` with `.unwrap_or(Duration::ZERO)` to prevent scheduler panic on clock issues -- **`reap_stale` double error records** — Removed redundant `storage.fail()` call; `handle_result` already records the failure -- **README cron format** — Updated example to correct 6-field format: `"0 0 */6 * * *"` - -### Important Fixes - -- **`result.py` hardcoded cloudpickle** — `job.result()` now uses the queue's configured serializer for deserialization -- **Context leak on deserialization failure** — Wrapped deserialization + call in closure; `_clear_context` always runs via `finally` -- **OTel spans not thread-safe** — Added `threading.Lock` around all `_spans` dict access in `OpenTelemetryMiddleware` -- **`build_periodic_payload` misleading `_kwargs` param** — Removed unused parameter, added explanatory comment -- **Tokio runtime panic** — Replaced `.expect()` with graceful error handling on runtime creation -- **`dequeue` LIMIT 10** — Increased to 100 for better throughput under load (both SQLite & Postgres) -- **`check_periodic` not atomic** — Uses `enqueue_unique` with deterministic key to prevent duplicate periodic jobs -- **SQLite `purge_completed_with_ttl` no transaction** — Wrapped in transaction for consistency -- **Django admin status validation** — Added try/except around `queue.list_jobs()` to handle connection errors gracefully -- **Silent job loss on `get_job` None** — Added `warn!` logging when a dequeued job ID returns None -- **Cascade cleanup on job purge** — `purge_completed()` and `purge_completed_with_ttl()` now automatically delete orphaned child records (`job_errors`, `task_logs`, `task_metrics`, `job_dependencies`, `replay_history`) when removing completed jobs - -### Minor Fixes - -- **`cascade_cancel` O(n²)** — Replaced `Vec::contains` with `HashSet` for dependency lookups (both backends) -- **`chain.apply()` hardcoded 300s timeout** — Now derives timeout from `sig.options.get("timeout", 300)` -- **`_FakeJobResult` missing `refresh()`** — Added no-op method for test mode compatibility -- **Storage trait doc outdated** — Updated to mention both SQLite and Postgres backends -- **`wall_time_ns` truncation** — Uses `.try_into().unwrap_or(i64::MAX)` to prevent silent overflow - ---- - -## 0.2.2 - -- Added `readme` field to `pyproject.toml` so PyPI displays the project description. - ---- - -## 0.2.1 - -Re-release of 0.2.0 — PyPI does not allow re-uploads of deleted versions. - ---- - -## 0.2.0 - -### Core Reliability - -- **Exception hierarchy** (F8) — `TaskitoError` base class with `TaskTimeoutError`, `SoftTimeoutError`, `TaskCancelledError`, `MaxRetriesExceededError`, `SerializationError`, `CircuitBreakerOpenError`, `RateLimitExceededError`, `JobNotFoundError`, `QueueError` -- **Pluggable serializers** (F2) — `CloudpickleSerializer` (default), `JsonSerializer`, or custom `Serializer` protocol -- **Exception filtering** (F1) — `retry_on` and `dont_retry_on` parameters for selective retries -- **Cancel running tasks** (F3) — cooperative cancellation with `queue.cancel_running_job()` and `current_job.check_cancelled()` -- **Soft timeouts** (F4) — `soft_timeout` parameter with `current_job.check_timeout()` for cooperative time limits - -### Developer Experience - -- **Per-task middleware** (F5) — `TaskMiddleware` base class with `before()`, `after()`, `on_retry()` hooks; queue-level and per-task registration -- **Worker heartbeat** (F6) — `queue.workers()` / `await queue.aworkers()` to monitor worker health; `GET /api/workers` dashboard endpoint; `workers` table in schema -- **Job expiration** (F7) — `expires` parameter on `apply_async()` to skip time-sensitive jobs that weren't started in time -- **Result TTL per job** (F11) — `result_ttl` parameter on `apply_async()` to override global cleanup policy per job - -### Power Features - -- **chunks / starmap** (F9) — `chunks(task, items, chunk_size)` and `starmap(task, args_list)` canvas primitives -- **Group concurrency** (F10) — `max_concurrency` parameter on `group()` to limit parallel execution -- **OpenTelemetry** (F12) — `OpenTelemetryMiddleware` for distributed tracing; install with `pip install taskito[otel]` - -### Build & Tooling - -- Zensical site configuration (`zensical.toml`) -- Makefile for `docs` / `docs-serve` commands -- Lock file (`uv.lock`) for reproducible builds - -### Bug Fixes - -- Fixed "Copy as Markdown" table cells rendering empty for SVG/img emoji icons - -### Internal - -- Hardened core scheduler and rate limiter -- Reorganized resilience modules and storage layer - ---- - -## 0.1.1 - -### Features - -- **Web dashboard** -- `taskito dashboard --app myapp:queue` serves a built-in monitoring UI with dark mode, auto-refresh, job detail views, and dead letter management -- **FastAPI integration** -- `TaskitoRouter` provides a pre-built `APIRouter` with endpoints for stats, job status, progress streaming (SSE), and dead letter management -- **Testing utilities** -- `queue.test_mode()` context manager for running tasks synchronously without a worker; includes `TestResult`, `TestResults` with filtering -- **CLI dashboard command** -- `taskito dashboard` command with `--host` and `--port` options -- **Celery-style worker banner** -- Worker startup now displays registered tasks, queues, and configuration -- **Async result awaiting** -- `await job.aresult()` for non-blocking result fetching - -### Changes - -- Renamed `python/` to `py_src/` and `rust/` to `crates/` for clearer project structure -- Default `db_path` now uses `.taskito/` directory, with automatic directory creation - ---- - -## 0.1.0 - -*Initial release* - -### Features - -- **Task queue** — `@queue.task()` decorator with `.delay()` and `.apply_async()` -- **Priority queues** — integer priority levels, higher values processed first -- **Retry with exponential backoff** — configurable max retries, backoff multiplier, and jitter -- **Dead letter queue** — failed jobs preserved for inspection and replay -- **Rate limiting** — token bucket algorithm with `"N/s"`, `"N/m"`, `"N/h"` syntax -- **Task workflows** — `chain`, `group`, and `chord` primitives -- **Periodic tasks** — cron-scheduled tasks with 6-field expressions (seconds granularity) -- **Progress tracking** — `current_job.update_progress()` from inside tasks -- **Job cancellation** — cancel pending jobs before execution -- **Unique tasks** — deduplicate active jobs by key -- **Batch enqueue** — `task.map()` and `queue.enqueue_many()` with single-transaction inserts -- **Named queues** — route tasks to isolated queues, subscribe workers selectively -- **Hooks** — `before_task`, `after_task`, `on_success`, `on_failure` -- **Async support** — `aresult()`, `astats()`, `arun_worker()`, and more -- **Job context** — `current_job.id`, `.task_name`, `.retry_count`, `.queue_name` -- **Error history** — per-attempt error tracking via `job.errors` -- **Result TTL** — automatic cleanup of completed/dead jobs -- **CLI** — `taskito worker` and `taskito info --watch` -- **Metadata** — attach arbitrary JSON to jobs - -### Architecture - -- Rust core with PyO3 bindings -- SQLite storage with WAL mode and Diesel ORM -- Tokio async scheduler with 50ms poll interval -- OS thread worker pool with crossbeam channels -- cloudpickle serialization for arguments and results diff --git a/docs/comparison.md b/docs/comparison.md deleted file mode 100644 index a553f55..0000000 --- a/docs/comparison.md +++ /dev/null @@ -1,134 +0,0 @@ -# Comparison - -**TL;DR**: Taskito is Celery without the broker. Rust scheduler, no Redis/RabbitMQ, lower latency, better concurrency. Start with SQLite, scale to Postgres when needed. - -## Feature Matrix - -| Feature | taskito | Celery | RQ | Dramatiq | Huey | TaskIQ | -|---|---|---|---|---|---|---| -| Broker required | **No** | Redis / RabbitMQ | Redis | Redis / RabbitMQ | Redis | Redis / RabbitMQ / Nats | -| Core language | **Rust + Python** | Python | Python | Python | Python | Python | -| Priority queues | **Yes** | Yes | No | No | Yes | Yes | -| Rate limiting | **Yes** | Yes | No | Yes | No | No | -| Dead letter queue | **Yes** | No | Yes | No | No | No | -| Task chaining | **Yes** (chain/group/chord) | Yes (canvas) | No | Yes (pipelines) | No | Yes (pipelines) | -| Job cancellation | **Yes** | Yes (revoke) | No | No | Yes | No | -| Progress tracking | **Yes** | Yes (custom) | No | No | No | No | -| Unique tasks | **Yes** | No (manual) | No | No | Yes | No | -| Batch enqueue | **Yes** | No | No | No | No | No | -| Retry with backoff | **Yes** (exponential + jitter) | Yes | Yes | Yes | Yes | Yes | -| Periodic/cron tasks | **Yes** (6-field with seconds) | Yes (celery-beat) | Yes (rq-scheduler) | Yes (APScheduler) | Yes | Yes (taskiq-cron) | -| Async support | **Yes** | Yes | No | No | No | Yes (native) | -| Cancel running tasks | **Yes** (cooperative) | Yes (revoke) | No | No | No | No | -| Soft timeouts | **Yes** | No | No | No | No | No | -| Custom serializers | **Yes** | Yes | No | No | No | Yes | -| Per-task middleware | **Yes** | No | No | Yes | No | Yes | -| Multi-process (prefork) | **Yes** | Yes | No | No | No | No | -| Namespace isolation | **Yes** | No | No | No | No | No | -| Result streaming | **Yes** (publish/stream) | No | No | No | No | No | -| Worker discovery | **Yes** (hostname/pid/status) | Yes (flower) | No | No | No | No | -| Lifecycle events | **Yes** (13 types) | Yes (signals) | No | Yes (actors) | No | No | -| Async canvas | **Yes** | No | No | No | No | No | -| OpenTelemetry | **Yes** (optional) | Yes (contrib) | No | No | No | Yes (built-in) | -| CLI | **Yes** | Yes | Yes | Yes | Yes | Yes | -| Result backend | **Built-in** (SQLite) | Redis / DB / custom | Redis | Redis / custom | Redis / SQLite | Redis / custom | -| Setup complexity | **`pip install`** | Broker + backend | Redis server | Broker | Redis server | Broker + backend | - -## When to Use taskito - -taskito is ideal when: - -- **Single-machine deployments** — no need for distributed workers across multiple servers -- **Zero infrastructure** — you don't want to install, configure, or manage Redis or RabbitMQ -- **Embedded applications** — CLI tools, desktop apps, or services where simplicity matters -- **Prototyping** — get a task queue running in 5 lines, iterate fast -- **Low-to-medium throughput** — hundreds to thousands of jobs per second is plenty - -## When NOT to Use taskito - -Consider alternatives when: - -- **Multi-server workers** — you need workers on separate machines (taskito supports this with Postgres/Redis backends, but Celery has more mature distributed tooling) -- **Very high throughput** — millions of jobs/sec across a cluster (use Celery + RabbitMQ) -- **Existing Redis infrastructure** — if Redis is already in your stack, RQ or Huey are simple choices -- **Complex routing** — you need topic exchanges, message filtering, or pub/sub patterns (use Celery + RabbitMQ) - -## Detailed Comparison - -### vs Celery - -Celery is the most popular Python task queue — battle-tested, feature-rich, and widely adopted. - -| | taskito | Celery | -|---|---|---| -| **Setup** | `pip install taskito` | Install broker (Redis/RabbitMQ), result backend, Celery itself | -| **Dependencies** | 1 (cloudpickle) | 10+ (kombu, billiard, vine, etc.) | -| **Configuration** | Constructor params | Settings module or app config | -| **Worker model** | Rust OS threads | prefork/eventlet/gevent pools | -| **Distributed** | No (single process) | Yes (multi-server) | -| **Canvas** | chain, group, chord, starmap, chunks | chain, group, chord, starmap, chunks, and more | - -**Choose taskito** if you want zero-infrastructure simplicity on a single machine. -**Choose Celery** if you need distributed workers, complex routing, or enterprise features. - -Looking to switch? See the [Migrating from Celery](guide/operations/migration.md) guide for a step-by-step walkthrough with side-by-side code examples. - -### vs RQ (Redis Queue) - -RQ focuses on simplicity — a minimal task queue built on Redis. - -| | taskito | RQ | -|---|---|---| -| **Broker** | None (SQLite) | Redis required | -| **Priority** | Yes (integer levels) | Separate queues for priority | -| **Rate limiting** | Built-in | No | -| **Chaining** | Yes | No | -| **Monitoring** | CLI + progress | rq-dashboard (web) | - -**Choose taskito** if you want similar simplicity without requiring Redis. -**Choose RQ** if you already run Redis and want a web dashboard. - -### vs Dramatiq - -Dramatiq is a reliable, performance-focused alternative to Celery. - -| | taskito | Dramatiq | -|---|---|---| -| **Broker** | None (SQLite) | Redis or RabbitMQ | -| **Priority** | Yes | No (FIFO only) | -| **Rate limiting** | Built-in | Middleware | -| **DLQ** | Built-in | No | -| **Middleware** | Hooks + per-task `TaskMiddleware` | Full middleware stack | - -**Choose taskito** if you want built-in DLQ and priority without a broker. -**Choose Dramatiq** if you need a middleware ecosystem and distributed workers. - -### vs Huey - -Huey is a lightweight task queue with Redis or SQLite backends. - -| | taskito | Huey | -|---|---|---| -| **Backend** | SQLite (Rust-native) | Redis or SQLite (Python) | -| **Performance** | Rust scheduler + OS threads | Python threads | -| **Chaining** | chain, group, chord | Pipeline (limited) | -| **Rate limiting** | Built-in token bucket | No | -| **DLQ** | Built-in | No | -| **Progress** | Built-in | No | - -**Choose taskito** if you want higher performance and more features with SQLite. -**Choose Huey** if you need a mature, well-documented SQLite-backed queue. - -### vs TaskIQ - -TaskIQ is a modern, async-native task queue. It's a good fit if you're fully async and already have a broker. - -| | taskito | TaskIQ | -|---|---|---| -| **Broker** | None (DB-backed) | Redis / RabbitMQ / Nats | -| **Async** | Native + sync | Async-first | -| **Scheduler** | Rust (Tokio) | Python | -| **GIL** | Rust scheduler bypasses GIL | Python scheduler competes for GIL | -| **Setup** | `pip install taskito` | Install broker + taskiq + broker plugin | - -Choose taskito if you want zero infrastructure. Choose TaskIQ if you're fully async and already have Redis/Nats. diff --git a/docs-next/content/docs/api-reference/canvas.mdx b/docs/content/docs/api-reference/canvas.mdx similarity index 100% rename from docs-next/content/docs/api-reference/canvas.mdx rename to docs/content/docs/api-reference/canvas.mdx diff --git a/docs-next/content/docs/api-reference/cli.mdx b/docs/content/docs/api-reference/cli.mdx similarity index 100% rename from docs-next/content/docs/api-reference/cli.mdx rename to docs/content/docs/api-reference/cli.mdx diff --git a/docs-next/content/docs/api-reference/context.mdx b/docs/content/docs/api-reference/context.mdx similarity index 100% rename from docs-next/content/docs/api-reference/context.mdx rename to docs/content/docs/api-reference/context.mdx diff --git a/docs-next/content/docs/api-reference/meta.json b/docs/content/docs/api-reference/meta.json similarity index 100% rename from docs-next/content/docs/api-reference/meta.json rename to docs/content/docs/api-reference/meta.json diff --git a/docs-next/content/docs/api-reference/overview.mdx b/docs/content/docs/api-reference/overview.mdx similarity index 100% rename from docs-next/content/docs/api-reference/overview.mdx rename to docs/content/docs/api-reference/overview.mdx diff --git a/docs-next/content/docs/api-reference/queue/events.mdx b/docs/content/docs/api-reference/queue/events.mdx similarity index 100% rename from docs-next/content/docs/api-reference/queue/events.mdx rename to docs/content/docs/api-reference/queue/events.mdx diff --git a/docs-next/content/docs/api-reference/queue/index.mdx b/docs/content/docs/api-reference/queue/index.mdx similarity index 100% rename from docs-next/content/docs/api-reference/queue/index.mdx rename to docs/content/docs/api-reference/queue/index.mdx diff --git a/docs-next/content/docs/api-reference/queue/jobs.mdx b/docs/content/docs/api-reference/queue/jobs.mdx similarity index 100% rename from docs-next/content/docs/api-reference/queue/jobs.mdx rename to docs/content/docs/api-reference/queue/jobs.mdx diff --git a/docs-next/content/docs/api-reference/queue/meta.json b/docs/content/docs/api-reference/queue/meta.json similarity index 100% rename from docs-next/content/docs/api-reference/queue/meta.json rename to docs/content/docs/api-reference/queue/meta.json diff --git a/docs-next/content/docs/api-reference/queue/queues.mdx b/docs/content/docs/api-reference/queue/queues.mdx similarity index 100% rename from docs-next/content/docs/api-reference/queue/queues.mdx rename to docs/content/docs/api-reference/queue/queues.mdx diff --git a/docs-next/content/docs/api-reference/queue/resources.mdx b/docs/content/docs/api-reference/queue/resources.mdx similarity index 100% rename from docs-next/content/docs/api-reference/queue/resources.mdx rename to docs/content/docs/api-reference/queue/resources.mdx diff --git a/docs-next/content/docs/api-reference/queue/workers.mdx b/docs/content/docs/api-reference/queue/workers.mdx similarity index 100% rename from docs-next/content/docs/api-reference/queue/workers.mdx rename to docs/content/docs/api-reference/queue/workers.mdx diff --git a/docs-next/content/docs/api-reference/result.mdx b/docs/content/docs/api-reference/result.mdx similarity index 100% rename from docs-next/content/docs/api-reference/result.mdx rename to docs/content/docs/api-reference/result.mdx diff --git a/docs-next/content/docs/api-reference/task.mdx b/docs/content/docs/api-reference/task.mdx similarity index 100% rename from docs-next/content/docs/api-reference/task.mdx rename to docs/content/docs/api-reference/task.mdx diff --git a/docs-next/content/docs/api-reference/testing.mdx b/docs/content/docs/api-reference/testing.mdx similarity index 100% rename from docs-next/content/docs/api-reference/testing.mdx rename to docs/content/docs/api-reference/testing.mdx diff --git a/docs-next/content/docs/api-reference/workflows.mdx b/docs/content/docs/api-reference/workflows.mdx similarity index 100% rename from docs-next/content/docs/api-reference/workflows.mdx rename to docs/content/docs/api-reference/workflows.mdx diff --git a/docs-next/content/docs/architecture/failure-model.mdx b/docs/content/docs/architecture/failure-model.mdx similarity index 100% rename from docs-next/content/docs/architecture/failure-model.mdx rename to docs/content/docs/architecture/failure-model.mdx diff --git a/docs-next/content/docs/architecture/job-lifecycle.mdx b/docs/content/docs/architecture/job-lifecycle.mdx similarity index 100% rename from docs-next/content/docs/architecture/job-lifecycle.mdx rename to docs/content/docs/architecture/job-lifecycle.mdx diff --git a/docs-next/content/docs/architecture/meta.json b/docs/content/docs/architecture/meta.json similarity index 100% rename from docs-next/content/docs/architecture/meta.json rename to docs/content/docs/architecture/meta.json diff --git a/docs-next/content/docs/architecture/overview.mdx b/docs/content/docs/architecture/overview.mdx similarity index 100% rename from docs-next/content/docs/architecture/overview.mdx rename to docs/content/docs/architecture/overview.mdx diff --git a/docs-next/content/docs/architecture/resources.mdx b/docs/content/docs/architecture/resources.mdx similarity index 100% rename from docs-next/content/docs/architecture/resources.mdx rename to docs/content/docs/architecture/resources.mdx diff --git a/docs-next/content/docs/architecture/scheduler.mdx b/docs/content/docs/architecture/scheduler.mdx similarity index 100% rename from docs-next/content/docs/architecture/scheduler.mdx rename to docs/content/docs/architecture/scheduler.mdx diff --git a/docs-next/content/docs/architecture/serialization.mdx b/docs/content/docs/architecture/serialization.mdx similarity index 100% rename from docs-next/content/docs/architecture/serialization.mdx rename to docs/content/docs/architecture/serialization.mdx diff --git a/docs-next/content/docs/architecture/storage.mdx b/docs/content/docs/architecture/storage.mdx similarity index 100% rename from docs-next/content/docs/architecture/storage.mdx rename to docs/content/docs/architecture/storage.mdx diff --git a/docs-next/content/docs/architecture/worker-pool.mdx b/docs/content/docs/architecture/worker-pool.mdx similarity index 100% rename from docs-next/content/docs/architecture/worker-pool.mdx rename to docs/content/docs/architecture/worker-pool.mdx diff --git a/docs-next/content/docs/getting-started/concepts.mdx b/docs/content/docs/getting-started/concepts.mdx similarity index 100% rename from docs-next/content/docs/getting-started/concepts.mdx rename to docs/content/docs/getting-started/concepts.mdx diff --git a/docs-next/content/docs/getting-started/installation.mdx b/docs/content/docs/getting-started/installation.mdx similarity index 100% rename from docs-next/content/docs/getting-started/installation.mdx rename to docs/content/docs/getting-started/installation.mdx diff --git a/docs-next/content/docs/getting-started/meta.json b/docs/content/docs/getting-started/meta.json similarity index 100% rename from docs-next/content/docs/getting-started/meta.json rename to docs/content/docs/getting-started/meta.json diff --git a/docs-next/content/docs/getting-started/quickstart.mdx b/docs/content/docs/getting-started/quickstart.mdx similarity index 100% rename from docs-next/content/docs/getting-started/quickstart.mdx rename to docs/content/docs/getting-started/quickstart.mdx diff --git a/docs-next/content/docs/guides/advanced-execution/async-tasks.mdx b/docs/content/docs/guides/advanced-execution/async-tasks.mdx similarity index 100% rename from docs-next/content/docs/guides/advanced-execution/async-tasks.mdx rename to docs/content/docs/guides/advanced-execution/async-tasks.mdx diff --git a/docs-next/content/docs/guides/advanced-execution/batch-enqueue.mdx b/docs/content/docs/guides/advanced-execution/batch-enqueue.mdx similarity index 100% rename from docs-next/content/docs/guides/advanced-execution/batch-enqueue.mdx rename to docs/content/docs/guides/advanced-execution/batch-enqueue.mdx diff --git a/docs-next/content/docs/guides/advanced-execution/dependencies.mdx b/docs/content/docs/guides/advanced-execution/dependencies.mdx similarity index 100% rename from docs-next/content/docs/guides/advanced-execution/dependencies.mdx rename to docs/content/docs/guides/advanced-execution/dependencies.mdx diff --git a/docs-next/content/docs/guides/advanced-execution/index.mdx b/docs/content/docs/guides/advanced-execution/index.mdx similarity index 100% rename from docs-next/content/docs/guides/advanced-execution/index.mdx rename to docs/content/docs/guides/advanced-execution/index.mdx diff --git a/docs-next/content/docs/guides/advanced-execution/meta.json b/docs/content/docs/guides/advanced-execution/meta.json similarity index 100% rename from docs-next/content/docs/guides/advanced-execution/meta.json rename to docs/content/docs/guides/advanced-execution/meta.json diff --git a/docs-next/content/docs/guides/advanced-execution/prefork.mdx b/docs/content/docs/guides/advanced-execution/prefork.mdx similarity index 100% rename from docs-next/content/docs/guides/advanced-execution/prefork.mdx rename to docs/content/docs/guides/advanced-execution/prefork.mdx diff --git a/docs-next/content/docs/guides/advanced-execution/streaming.mdx b/docs/content/docs/guides/advanced-execution/streaming.mdx similarity index 100% rename from docs-next/content/docs/guides/advanced-execution/streaming.mdx rename to docs/content/docs/guides/advanced-execution/streaming.mdx diff --git a/docs-next/content/docs/guides/advanced-execution/unique-tasks.mdx b/docs/content/docs/guides/advanced-execution/unique-tasks.mdx similarity index 100% rename from docs-next/content/docs/guides/advanced-execution/unique-tasks.mdx rename to docs/content/docs/guides/advanced-execution/unique-tasks.mdx diff --git a/docs-next/content/docs/guides/core/execution-model.mdx b/docs/content/docs/guides/core/execution-model.mdx similarity index 100% rename from docs-next/content/docs/guides/core/execution-model.mdx rename to docs/content/docs/guides/core/execution-model.mdx diff --git a/docs-next/content/docs/guides/core/index.mdx b/docs/content/docs/guides/core/index.mdx similarity index 100% rename from docs-next/content/docs/guides/core/index.mdx rename to docs/content/docs/guides/core/index.mdx diff --git a/docs-next/content/docs/guides/core/meta.json b/docs/content/docs/guides/core/meta.json similarity index 100% rename from docs-next/content/docs/guides/core/meta.json rename to docs/content/docs/guides/core/meta.json diff --git a/docs-next/content/docs/guides/core/queues.mdx b/docs/content/docs/guides/core/queues.mdx similarity index 100% rename from docs-next/content/docs/guides/core/queues.mdx rename to docs/content/docs/guides/core/queues.mdx diff --git a/docs-next/content/docs/guides/core/scheduling.mdx b/docs/content/docs/guides/core/scheduling.mdx similarity index 100% rename from docs-next/content/docs/guides/core/scheduling.mdx rename to docs/content/docs/guides/core/scheduling.mdx diff --git a/docs-next/content/docs/guides/core/tasks.mdx b/docs/content/docs/guides/core/tasks.mdx similarity index 100% rename from docs-next/content/docs/guides/core/tasks.mdx rename to docs/content/docs/guides/core/tasks.mdx diff --git a/docs-next/content/docs/guides/core/workers.mdx b/docs/content/docs/guides/core/workers.mdx similarity index 100% rename from docs-next/content/docs/guides/core/workers.mdx rename to docs/content/docs/guides/core/workers.mdx diff --git a/docs-next/content/docs/guides/core/workflows.mdx b/docs/content/docs/guides/core/workflows.mdx similarity index 100% rename from docs-next/content/docs/guides/core/workflows.mdx rename to docs/content/docs/guides/core/workflows.mdx diff --git a/docs-next/content/docs/guides/extensibility/events-webhooks.mdx b/docs/content/docs/guides/extensibility/events-webhooks.mdx similarity index 100% rename from docs-next/content/docs/guides/extensibility/events-webhooks.mdx rename to docs/content/docs/guides/extensibility/events-webhooks.mdx diff --git a/docs-next/content/docs/guides/extensibility/index.mdx b/docs/content/docs/guides/extensibility/index.mdx similarity index 100% rename from docs-next/content/docs/guides/extensibility/index.mdx rename to docs/content/docs/guides/extensibility/index.mdx diff --git a/docs-next/content/docs/guides/extensibility/meta.json b/docs/content/docs/guides/extensibility/meta.json similarity index 100% rename from docs-next/content/docs/guides/extensibility/meta.json rename to docs/content/docs/guides/extensibility/meta.json diff --git a/docs-next/content/docs/guides/extensibility/middleware.mdx b/docs/content/docs/guides/extensibility/middleware.mdx similarity index 100% rename from docs-next/content/docs/guides/extensibility/middleware.mdx rename to docs/content/docs/guides/extensibility/middleware.mdx diff --git a/docs-next/content/docs/guides/extensibility/serializers.mdx b/docs/content/docs/guides/extensibility/serializers.mdx similarity index 100% rename from docs-next/content/docs/guides/extensibility/serializers.mdx rename to docs/content/docs/guides/extensibility/serializers.mdx diff --git a/docs-next/content/docs/guides/index.mdx b/docs/content/docs/guides/index.mdx similarity index 100% rename from docs-next/content/docs/guides/index.mdx rename to docs/content/docs/guides/index.mdx diff --git a/docs-next/content/docs/guides/integrations/django.mdx b/docs/content/docs/guides/integrations/django.mdx similarity index 100% rename from docs-next/content/docs/guides/integrations/django.mdx rename to docs/content/docs/guides/integrations/django.mdx diff --git a/docs-next/content/docs/guides/integrations/fastapi.mdx b/docs/content/docs/guides/integrations/fastapi.mdx similarity index 100% rename from docs-next/content/docs/guides/integrations/fastapi.mdx rename to docs/content/docs/guides/integrations/fastapi.mdx diff --git a/docs-next/content/docs/guides/integrations/flask.mdx b/docs/content/docs/guides/integrations/flask.mdx similarity index 100% rename from docs-next/content/docs/guides/integrations/flask.mdx rename to docs/content/docs/guides/integrations/flask.mdx diff --git a/docs-next/content/docs/guides/integrations/index.mdx b/docs/content/docs/guides/integrations/index.mdx similarity index 100% rename from docs-next/content/docs/guides/integrations/index.mdx rename to docs/content/docs/guides/integrations/index.mdx diff --git a/docs-next/content/docs/guides/integrations/meta.json b/docs/content/docs/guides/integrations/meta.json similarity index 100% rename from docs-next/content/docs/guides/integrations/meta.json rename to docs/content/docs/guides/integrations/meta.json diff --git a/docs-next/content/docs/guides/integrations/otel.mdx b/docs/content/docs/guides/integrations/otel.mdx similarity index 100% rename from docs-next/content/docs/guides/integrations/otel.mdx rename to docs/content/docs/guides/integrations/otel.mdx diff --git a/docs-next/content/docs/guides/integrations/prometheus.mdx b/docs/content/docs/guides/integrations/prometheus.mdx similarity index 100% rename from docs-next/content/docs/guides/integrations/prometheus.mdx rename to docs/content/docs/guides/integrations/prometheus.mdx diff --git a/docs-next/content/docs/guides/integrations/sentry.mdx b/docs/content/docs/guides/integrations/sentry.mdx similarity index 100% rename from docs-next/content/docs/guides/integrations/sentry.mdx rename to docs/content/docs/guides/integrations/sentry.mdx diff --git a/docs-next/content/docs/guides/meta.json b/docs/content/docs/guides/meta.json similarity index 100% rename from docs-next/content/docs/guides/meta.json rename to docs/content/docs/guides/meta.json diff --git a/docs-next/content/docs/guides/observability/dashboard-api.mdx b/docs/content/docs/guides/observability/dashboard-api.mdx similarity index 100% rename from docs-next/content/docs/guides/observability/dashboard-api.mdx rename to docs/content/docs/guides/observability/dashboard-api.mdx diff --git a/docs-next/content/docs/guides/observability/dashboard.mdx b/docs/content/docs/guides/observability/dashboard.mdx similarity index 100% rename from docs-next/content/docs/guides/observability/dashboard.mdx rename to docs/content/docs/guides/observability/dashboard.mdx diff --git a/docs-next/content/docs/guides/observability/index.mdx b/docs/content/docs/guides/observability/index.mdx similarity index 100% rename from docs-next/content/docs/guides/observability/index.mdx rename to docs/content/docs/guides/observability/index.mdx diff --git a/docs-next/content/docs/guides/observability/logging.mdx b/docs/content/docs/guides/observability/logging.mdx similarity index 100% rename from docs-next/content/docs/guides/observability/logging.mdx rename to docs/content/docs/guides/observability/logging.mdx diff --git a/docs-next/content/docs/guides/observability/meta.json b/docs/content/docs/guides/observability/meta.json similarity index 100% rename from docs-next/content/docs/guides/observability/meta.json rename to docs/content/docs/guides/observability/meta.json diff --git a/docs-next/content/docs/guides/observability/monitoring.mdx b/docs/content/docs/guides/observability/monitoring.mdx similarity index 100% rename from docs-next/content/docs/guides/observability/monitoring.mdx rename to docs/content/docs/guides/observability/monitoring.mdx diff --git a/docs-next/content/docs/guides/operations/deployment.mdx b/docs/content/docs/guides/operations/deployment.mdx similarity index 100% rename from docs-next/content/docs/guides/operations/deployment.mdx rename to docs/content/docs/guides/operations/deployment.mdx diff --git a/docs-next/content/docs/guides/operations/index.mdx b/docs/content/docs/guides/operations/index.mdx similarity index 100% rename from docs-next/content/docs/guides/operations/index.mdx rename to docs/content/docs/guides/operations/index.mdx diff --git a/docs-next/content/docs/guides/operations/job-management.mdx b/docs/content/docs/guides/operations/job-management.mdx similarity index 100% rename from docs-next/content/docs/guides/operations/job-management.mdx rename to docs/content/docs/guides/operations/job-management.mdx diff --git a/docs-next/content/docs/guides/operations/keda.mdx b/docs/content/docs/guides/operations/keda.mdx similarity index 100% rename from docs-next/content/docs/guides/operations/keda.mdx rename to docs/content/docs/guides/operations/keda.mdx diff --git a/docs-next/content/docs/guides/operations/meta.json b/docs/content/docs/guides/operations/meta.json similarity index 100% rename from docs-next/content/docs/guides/operations/meta.json rename to docs/content/docs/guides/operations/meta.json diff --git a/docs-next/content/docs/guides/operations/migration.mdx b/docs/content/docs/guides/operations/migration.mdx similarity index 100% rename from docs-next/content/docs/guides/operations/migration.mdx rename to docs/content/docs/guides/operations/migration.mdx diff --git a/docs-next/content/docs/guides/operations/postgres.mdx b/docs/content/docs/guides/operations/postgres.mdx similarity index 100% rename from docs-next/content/docs/guides/operations/postgres.mdx rename to docs/content/docs/guides/operations/postgres.mdx diff --git a/docs-next/content/docs/guides/operations/testing.mdx b/docs/content/docs/guides/operations/testing.mdx similarity index 100% rename from docs-next/content/docs/guides/operations/testing.mdx rename to docs/content/docs/guides/operations/testing.mdx diff --git a/docs-next/content/docs/guides/operations/troubleshooting.mdx b/docs/content/docs/guides/operations/troubleshooting.mdx similarity index 100% rename from docs-next/content/docs/guides/operations/troubleshooting.mdx rename to docs/content/docs/guides/operations/troubleshooting.mdx diff --git a/docs-next/content/docs/guides/reliability/circuit-breakers.mdx b/docs/content/docs/guides/reliability/circuit-breakers.mdx similarity index 100% rename from docs-next/content/docs/guides/reliability/circuit-breakers.mdx rename to docs/content/docs/guides/reliability/circuit-breakers.mdx diff --git a/docs-next/content/docs/guides/reliability/error-handling.mdx b/docs/content/docs/guides/reliability/error-handling.mdx similarity index 100% rename from docs-next/content/docs/guides/reliability/error-handling.mdx rename to docs/content/docs/guides/reliability/error-handling.mdx diff --git a/docs-next/content/docs/guides/reliability/guarantees.mdx b/docs/content/docs/guides/reliability/guarantees.mdx similarity index 100% rename from docs-next/content/docs/guides/reliability/guarantees.mdx rename to docs/content/docs/guides/reliability/guarantees.mdx diff --git a/docs-next/content/docs/guides/reliability/index.mdx b/docs/content/docs/guides/reliability/index.mdx similarity index 100% rename from docs-next/content/docs/guides/reliability/index.mdx rename to docs/content/docs/guides/reliability/index.mdx diff --git a/docs-next/content/docs/guides/reliability/locking.mdx b/docs/content/docs/guides/reliability/locking.mdx similarity index 100% rename from docs-next/content/docs/guides/reliability/locking.mdx rename to docs/content/docs/guides/reliability/locking.mdx diff --git a/docs-next/content/docs/guides/reliability/meta.json b/docs/content/docs/guides/reliability/meta.json similarity index 100% rename from docs-next/content/docs/guides/reliability/meta.json rename to docs/content/docs/guides/reliability/meta.json diff --git a/docs-next/content/docs/guides/reliability/rate-limiting.mdx b/docs/content/docs/guides/reliability/rate-limiting.mdx similarity index 100% rename from docs-next/content/docs/guides/reliability/rate-limiting.mdx rename to docs/content/docs/guides/reliability/rate-limiting.mdx diff --git a/docs-next/content/docs/guides/reliability/retries.mdx b/docs/content/docs/guides/reliability/retries.mdx similarity index 100% rename from docs-next/content/docs/guides/reliability/retries.mdx rename to docs/content/docs/guides/reliability/retries.mdx diff --git a/docs-next/content/docs/guides/resources/configuration.mdx b/docs/content/docs/guides/resources/configuration.mdx similarity index 100% rename from docs-next/content/docs/guides/resources/configuration.mdx rename to docs/content/docs/guides/resources/configuration.mdx diff --git a/docs-next/content/docs/guides/resources/dependency-injection.mdx b/docs/content/docs/guides/resources/dependency-injection.mdx similarity index 100% rename from docs-next/content/docs/guides/resources/dependency-injection.mdx rename to docs/content/docs/guides/resources/dependency-injection.mdx diff --git a/docs-next/content/docs/guides/resources/index.mdx b/docs/content/docs/guides/resources/index.mdx similarity index 100% rename from docs-next/content/docs/guides/resources/index.mdx rename to docs/content/docs/guides/resources/index.mdx diff --git a/docs-next/content/docs/guides/resources/interception.mdx b/docs/content/docs/guides/resources/interception.mdx similarity index 100% rename from docs-next/content/docs/guides/resources/interception.mdx rename to docs/content/docs/guides/resources/interception.mdx diff --git a/docs-next/content/docs/guides/resources/meta.json b/docs/content/docs/guides/resources/meta.json similarity index 100% rename from docs-next/content/docs/guides/resources/meta.json rename to docs/content/docs/guides/resources/meta.json diff --git a/docs-next/content/docs/guides/resources/observability.mdx b/docs/content/docs/guides/resources/observability.mdx similarity index 100% rename from docs-next/content/docs/guides/resources/observability.mdx rename to docs/content/docs/guides/resources/observability.mdx diff --git a/docs-next/content/docs/guides/resources/proxies.mdx b/docs/content/docs/guides/resources/proxies.mdx similarity index 100% rename from docs-next/content/docs/guides/resources/proxies.mdx rename to docs/content/docs/guides/resources/proxies.mdx diff --git a/docs-next/content/docs/guides/resources/testing.mdx b/docs/content/docs/guides/resources/testing.mdx similarity index 100% rename from docs-next/content/docs/guides/resources/testing.mdx rename to docs/content/docs/guides/resources/testing.mdx diff --git a/docs-next/content/docs/guides/workflows/analysis.mdx b/docs/content/docs/guides/workflows/analysis.mdx similarity index 100% rename from docs-next/content/docs/guides/workflows/analysis.mdx rename to docs/content/docs/guides/workflows/analysis.mdx diff --git a/docs-next/content/docs/guides/workflows/building.mdx b/docs/content/docs/guides/workflows/building.mdx similarity index 100% rename from docs-next/content/docs/guides/workflows/building.mdx rename to docs/content/docs/guides/workflows/building.mdx diff --git a/docs-next/content/docs/guides/workflows/caching.mdx b/docs/content/docs/guides/workflows/caching.mdx similarity index 100% rename from docs-next/content/docs/guides/workflows/caching.mdx rename to docs/content/docs/guides/workflows/caching.mdx diff --git a/docs-next/content/docs/guides/workflows/canvas.mdx b/docs/content/docs/guides/workflows/canvas.mdx similarity index 100% rename from docs-next/content/docs/guides/workflows/canvas.mdx rename to docs/content/docs/guides/workflows/canvas.mdx diff --git a/docs-next/content/docs/guides/workflows/composition.mdx b/docs/content/docs/guides/workflows/composition.mdx similarity index 100% rename from docs-next/content/docs/guides/workflows/composition.mdx rename to docs/content/docs/guides/workflows/composition.mdx diff --git a/docs-next/content/docs/guides/workflows/conditions.mdx b/docs/content/docs/guides/workflows/conditions.mdx similarity index 100% rename from docs-next/content/docs/guides/workflows/conditions.mdx rename to docs/content/docs/guides/workflows/conditions.mdx diff --git a/docs-next/content/docs/guides/workflows/fan-out.mdx b/docs/content/docs/guides/workflows/fan-out.mdx similarity index 100% rename from docs-next/content/docs/guides/workflows/fan-out.mdx rename to docs/content/docs/guides/workflows/fan-out.mdx diff --git a/docs-next/content/docs/guides/workflows/gates.mdx b/docs/content/docs/guides/workflows/gates.mdx similarity index 100% rename from docs-next/content/docs/guides/workflows/gates.mdx rename to docs/content/docs/guides/workflows/gates.mdx diff --git a/docs-next/content/docs/guides/workflows/index.mdx b/docs/content/docs/guides/workflows/index.mdx similarity index 100% rename from docs-next/content/docs/guides/workflows/index.mdx rename to docs/content/docs/guides/workflows/index.mdx diff --git a/docs-next/content/docs/guides/workflows/meta.json b/docs/content/docs/guides/workflows/meta.json similarity index 100% rename from docs-next/content/docs/guides/workflows/meta.json rename to docs/content/docs/guides/workflows/meta.json diff --git a/docs-next/content/docs/index.mdx b/docs/content/docs/index.mdx similarity index 100% rename from docs-next/content/docs/index.mdx rename to docs/content/docs/index.mdx diff --git a/docs-next/content/docs/meta.json b/docs/content/docs/meta.json similarity index 100% rename from docs-next/content/docs/meta.json rename to docs/content/docs/meta.json diff --git a/docs-next/content/docs/more/changelog.mdx b/docs/content/docs/more/changelog.mdx similarity index 100% rename from docs-next/content/docs/more/changelog.mdx rename to docs/content/docs/more/changelog.mdx diff --git a/docs-next/content/docs/more/comparison.mdx b/docs/content/docs/more/comparison.mdx similarity index 100% rename from docs-next/content/docs/more/comparison.mdx rename to docs/content/docs/more/comparison.mdx diff --git a/docs-next/content/docs/more/examples/benchmark.mdx b/docs/content/docs/more/examples/benchmark.mdx similarity index 100% rename from docs-next/content/docs/more/examples/benchmark.mdx rename to docs/content/docs/more/examples/benchmark.mdx diff --git a/docs-next/content/docs/more/examples/data-pipeline.mdx b/docs/content/docs/more/examples/data-pipeline.mdx similarity index 100% rename from docs-next/content/docs/more/examples/data-pipeline.mdx rename to docs/content/docs/more/examples/data-pipeline.mdx diff --git a/docs-next/content/docs/more/examples/fastapi-service.mdx b/docs/content/docs/more/examples/fastapi-service.mdx similarity index 100% rename from docs-next/content/docs/more/examples/fastapi-service.mdx rename to docs/content/docs/more/examples/fastapi-service.mdx diff --git a/docs-next/content/docs/more/examples/index.mdx b/docs/content/docs/more/examples/index.mdx similarity index 100% rename from docs-next/content/docs/more/examples/index.mdx rename to docs/content/docs/more/examples/index.mdx diff --git a/docs-next/content/docs/more/examples/meta.json b/docs/content/docs/more/examples/meta.json similarity index 100% rename from docs-next/content/docs/more/examples/meta.json rename to docs/content/docs/more/examples/meta.json diff --git a/docs-next/content/docs/more/examples/notifications.mdx b/docs/content/docs/more/examples/notifications.mdx similarity index 100% rename from docs-next/content/docs/more/examples/notifications.mdx rename to docs/content/docs/more/examples/notifications.mdx diff --git a/docs-next/content/docs/more/examples/web-scraper.mdx b/docs/content/docs/more/examples/web-scraper.mdx similarity index 100% rename from docs-next/content/docs/more/examples/web-scraper.mdx rename to docs/content/docs/more/examples/web-scraper.mdx diff --git a/docs-next/content/docs/more/examples/workflows.mdx b/docs/content/docs/more/examples/workflows.mdx similarity index 100% rename from docs-next/content/docs/more/examples/workflows.mdx rename to docs/content/docs/more/examples/workflows.mdx diff --git a/docs-next/content/docs/more/faq.mdx b/docs/content/docs/more/faq.mdx similarity index 100% rename from docs-next/content/docs/more/faq.mdx rename to docs/content/docs/more/faq.mdx diff --git a/docs-next/content/docs/more/meta.json b/docs/content/docs/more/meta.json similarity index 100% rename from docs-next/content/docs/more/meta.json rename to docs/content/docs/more/meta.json diff --git a/docs/examples/benchmark.md b/docs/examples/benchmark.md deleted file mode 100644 index e3ef1bd..0000000 --- a/docs/examples/benchmark.md +++ /dev/null @@ -1,222 +0,0 @@ -# Example: Benchmark - -Measure taskito's throughput by enqueuing and processing a large batch of tasks. - -## benchmark.py - -```python -"""taskito throughput benchmark. - -Measures: -1. Enqueue throughput (jobs/sec) using batch insert -2. Processing throughput (jobs/sec) with N workers -3. End-to-end latency -""" - -import os -import threading -import time - -from taskito import Queue - -# ── Configuration ──────────────────────────────────────── - -NUM_JOBS = 10_000 -NUM_WORKERS = os.cpu_count() or 4 -DB_PATH = ":memory:" # In-memory for pure speed test - -queue = Queue(db_path=DB_PATH, workers=NUM_WORKERS) - -@queue.task() -def noop(x): - """Minimal task — measures framework overhead.""" - return x - -@queue.task() -def cpu_light(x): - """Light CPU work — string formatting.""" - return f"processed-{x}-{'x' * 100}" - -# ── Benchmark Functions ────────────────────────────────── - -def bench_enqueue(task, n): - """Measure batch enqueue throughput.""" - args_list = [(i,) for i in range(n)] - - start = time.perf_counter() - jobs = task.map(args_list) - elapsed = time.perf_counter() - start - - rate = n / elapsed - print(f" Enqueued {n:,} jobs in {elapsed:.2f}s ({rate:,.0f} jobs/s)") - return jobs - -def bench_process(jobs, timeout=120): - """Measure processing throughput by waiting for all jobs.""" - n = len(jobs) - start = time.perf_counter() - - # Wait for the last job (highest ID, enqueued last) - # With FIFO ordering, this means all jobs are done - last = jobs[-1] - try: - last.result(timeout=timeout, poll_interval=0.01, max_poll_interval=0.1) - except TimeoutError: - stats = queue.stats() - print(f" Timed out! Stats: {stats}") - return - - elapsed = time.perf_counter() - start - rate = n / elapsed - print(f" Processed {n:,} jobs in {elapsed:.2f}s ({rate:,.0f} jobs/s)") - -def bench_latency(task, samples=100): - """Measure single-job round-trip latency.""" - latencies = [] - for i in range(samples): - start = time.perf_counter() - job = task.delay(i) - job.result(timeout=10) - latencies.append(time.perf_counter() - start) - - avg = sum(latencies) / len(latencies) - p50 = sorted(latencies)[len(latencies) // 2] - p99 = sorted(latencies)[int(len(latencies) * 0.99)] - print(f" Latency (n={samples}): avg={avg*1000:.1f}ms p50={p50*1000:.1f}ms p99={p99*1000:.1f}ms") - -# ── Main ───────────────────────────────────────────────── - -def main(): - print(f"taskito benchmark") - print(f" Workers: {NUM_WORKERS}") - print(f" Jobs: {NUM_JOBS:,}") - print(f" DB: {DB_PATH}") - print() - - # Start worker in background - worker_thread = threading.Thread(target=queue.run_worker, daemon=True) - worker_thread.start() - time.sleep(0.5) # Let worker initialize - - # 1. Noop throughput - print("── noop task (framework overhead) ──") - jobs = bench_enqueue(noop, NUM_JOBS) - bench_process(jobs) - print() - - # 2. Light CPU task throughput - print("── cpu_light task ──") - jobs = bench_enqueue(cpu_light, NUM_JOBS) - bench_process(jobs) - print() - - # 3. Single-job latency - print("── single-job latency ──") - bench_latency(noop) - print() - - # Final stats - stats = queue.stats() - print(f"Final stats: {stats}") - -if __name__ == "__main__": - main() -``` - -## Running - -```bash -python benchmark.py -``` - -## Sample Output - -``` -taskito benchmark - Workers: 8 - Jobs: 10,000 - DB: :memory: - -── noop task (framework overhead) ── - Enqueued 10,000 jobs in 0.18s (55,556 jobs/s) - Processed 10,000 jobs in 2.41s (4,149 jobs/s) - -── cpu_light task ── - Enqueued 10,000 jobs in 0.19s (52,632 jobs/s) - Processed 10,000 jobs in 2.53s (3,953 jobs/s) - -── single-job latency ── - Latency (n=100): avg=1.2ms p50=1.1ms p99=3.4ms - -Final stats: {'pending': 0, 'running': 0, 'completed': 20100, 'failed': 0, 'dead': 0, 'cancelled': 0} -``` - -!!! note - Actual numbers depend on your hardware, Python version, and SQLite configuration. The numbers above are from an 8-core machine with Python 3.12. - -## What Makes taskito Fast - -| Component | How it helps | -|---|---| -| **Batch inserts** | `task.map()` inserts all jobs in a single SQLite transaction | -| **WAL mode** | Concurrent reads while writing — workers don't block enqueue | -| **Rust scheduler** | 50ms poll loop runs in native code, not Python | -| **OS threads** | Workers are Rust `std::thread`, not Python threads | -| **GIL per task** | GIL acquired only during Python task execution, released between tasks | -| **crossbeam channels** | Lock-free job dispatch to workers | -| **r2d2 pool** | Up to 8 concurrent SQLite connections | -| **Diesel ORM** | Compiled SQL queries, no runtime query building | - -## How It Compares - -Rough directional comparison on the same hardware (8-core, single machine). These are not scientific benchmarks — run the script above on your own hardware for accurate numbers. - -| Metric | taskito (SQLite) | taskito (Postgres) | Celery + Redis | Dramatiq + Redis | -|--------|-----------------|-------------------|---------------|-----------------| -| Enqueue throughput | ~55,000/s | ~20,000/s | ~5,000/s | ~3,000/s | -| Processing (noop, 8 workers) | ~4,000/s | ~3,500/s | ~2,000/s | ~1,500/s | -| p50 latency | 1.1ms | 2.5ms | 5–10ms | 8–15ms | -| p99 latency | 3.4ms | 8ms | 20–50ms | 30–80ms | -| Memory (idle worker) | ~30 MB | ~35 MB | ~80 MB | ~60 MB | -| Setup | `pip install taskito` | + Postgres | + Redis + Celery | + Redis + Dramatiq | -| External services | 0 | 1 (Postgres) | 2 (Redis + result backend) | 1 (Redis) | - -!!! note - Celery numbers are from public benchmarks and community reports. Your mileage will vary depending on workload, serializer, and broker configuration. Run your own benchmarks before making decisions. - -**Why is taskito faster?** - -- Rust scheduler avoids GIL contention — scheduling and dispatch never block Python -- SQLite WAL mode with batch inserts — disk I/O is minimized -- Direct DB polling — no broker hop (enqueue → DB → dequeue is one less network round-trip vs enqueue → Redis → dequeue) -- OS thread pool with per-task GIL acquisition — no multiprocessing overhead for I/O-bound tasks - -## Tune for Your Workload - -| Symptom | Config to change | Why | -|---------|-----------------|-----| -| Low throughput (I/O tasks) | Increase `workers` | More threads = more concurrent I/O | -| Low throughput (CPU tasks) | Use `pool="prefork"` | Each process gets its own GIL | -| High latency | Decrease `scheduler_poll_interval_ms` | Scheduler checks for ready jobs more often | -| Database too busy | Increase `scheduler_poll_interval_ms` | Less frequent polling reduces DB load | -| Memory growing | Set `result_ttl` | Auto-cleanup old results and metrics | -| Jobs timing out | Increase `default_timeout` | Give tasks more time to complete | -| Jobs piling up | Add more workers or use Postgres | SQLite single-writer limit may bottleneck | - -## Tuning - -Adjust these for your workload: - -```python -# More workers for I/O-bound tasks -queue = Queue(workers=16) - -# Fewer workers for CPU-bound tasks (limited by GIL) -queue = Queue(workers=4) - -# In-memory DB for maximum throughput (no persistence) -queue = Queue(db_path=":memory:") - -# File DB for durability (slightly slower) -queue = Queue(db_path="tasks.db") -``` diff --git a/docs/examples/data-pipeline.md b/docs/examples/data-pipeline.md deleted file mode 100644 index a5f4508..0000000 --- a/docs/examples/data-pipeline.md +++ /dev/null @@ -1,264 +0,0 @@ -# Example: ETL Data Pipeline - -A multi-stage extract → transform → load pipeline demonstrating task dependencies, DAG workflows, progress tracking, error history inspection, metadata, and named queues. - -## Project Structure - -``` -data-pipeline/ - pipeline.py # Task definitions + DAG construction - worker.py # Worker entry point - monitor.py # Status monitoring script -``` - -## pipeline.py - -```python -"""ETL pipeline with task dependencies and named queues.""" - -import csv -import json - -import httpx - -from taskito import Queue, current_job - -queue = Queue( - db_path=".taskito/pipeline.db", - workers=6, - default_retry=3, - default_timeout=120, -) - -# ── Extract Tasks ──────────────────────────────────────── - -@queue.task(queue="extract", max_retries=5, retry_backoff=2.0) -def extract_api(endpoint: str) -> list[dict]: - """Pull records from an API endpoint with retries.""" - response = httpx.get(endpoint, timeout=30) - response.raise_for_status() - return response.json() - -@queue.task(queue="extract") -def extract_csv(file_path: str) -> list[dict]: - """Read records from a CSV file.""" - with open(file_path, newline="") as f: - return list(csv.DictReader(f)) - -# ── Transform Tasks ────────────────────────────────────── - -@queue.task(queue="transform") -def normalize(records: list[dict], schema: str) -> list[dict]: - """Normalize records against a schema with progress tracking.""" - results = [] - for i, record in enumerate(records): - results.append({**record, "schema": schema, "normalized": True}) - if (i + 1) % 50 == 0: - current_job.update_progress(int((i + 1) / len(records) * 100)) - current_job.update_progress(100) - return results - -@queue.task(queue="transform") -def deduplicate(records: list[dict]) -> list[dict]: - """Remove duplicate records by ID.""" - seen = set() - unique = [] - for r in records: - if r["id"] not in seen: - seen.add(r["id"]) - unique.append(r) - return unique - -# ── Load Tasks ─────────────────────────────────────────── - -@queue.task(queue="load") -def load_to_warehouse(records: list[dict], table: str) -> dict: - """Load records into the data warehouse (writes JSON to disk as stand-in).""" - dest = f"/tmp/{table.replace('.', '_')}.json" - with open(dest, "w") as f: - json.dump(records, f, indent=2) - return {"table": table, "rows_inserted": len(records), "dest": dest} - -# ── DAG Construction ───────────────────────────────────── - -def build_pipeline(api_endpoint: str, csv_path: str, target_table: str): - """Build a diamond-shaped ETL DAG. - - extract_api ──→ normalize_a ──┐ - ├──→ load - extract_csv ──→ normalize_b ──┘ - """ - - # Stage 1: Extract (parallel) - job_api = extract_api.apply_async( - args=[api_endpoint], - metadata=json.dumps({"source": "api", "endpoint": api_endpoint}), - ) - job_csv = extract_csv.apply_async( - args=[csv_path], - metadata=json.dumps({"source": "csv", "file": csv_path}), - ) - - # Stage 2: Transform (each depends on its extract) - job_norm_a = normalize.apply_async( - args=[[], "schema_v2"], # actual data passed via result - depends_on=job_api.id, - metadata=json.dumps({"stage": "transform", "schema": "v2"}), - ) - job_norm_b = normalize.apply_async( - args=[[], "schema_v2"], - depends_on=job_csv.id, - metadata=json.dumps({"stage": "transform", "schema": "v2"}), - ) - - # Stage 3: Load (depends on both transforms) - job_load = load_to_warehouse.apply_async( - args=[[], target_table], - depends_on=[job_norm_a.id, job_norm_b.id], - priority=10, # high priority once unblocked - metadata=json.dumps({"stage": "load", "table": target_table}), - ) - - return { - "extract": [job_api, job_csv], - "transform": [job_norm_a, job_norm_b], - "load": job_load, - } - - -if __name__ == "__main__": - print("Building ETL pipeline...") - jobs = build_pipeline( - api_endpoint="https://api.example.com/records", - csv_path="/data/export.csv", - target_table="analytics.events", - ) - - print(f"\nDAG created:") - print(f" Extract: {[j.id for j in jobs['extract']]}") - print(f" Transform: {[j.id for j in jobs['transform']]}") - print(f" Load: {jobs['load'].id}") - - # Inspect dependency graph - load_job = queue.get_job(jobs["load"].id) - print(f"\nLoad depends on: {load_job.dependencies}") - - for ext_job in jobs["extract"]: - fetched = queue.get_job(ext_job.id) - print(f" {ext_job.id} dependents: {fetched.dependents}") -``` - -## worker.py - -```python -"""Start the pipeline worker.""" -from pipeline import queue - -if __name__ == "__main__": - print("Starting pipeline worker (queues: extract, transform, load)...") - queue.run_worker(queues=["extract", "transform", "load"]) -``` - -## monitor.py - -```python -"""Monitor pipeline status and inspect errors.""" - -import time -from pipeline import queue - -def monitor(load_job_id: str): - """Poll the pipeline until the load job completes.""" - while True: - stats = queue.stats() - print(f"Queue stats: {stats}") - - job = queue.get_job(load_job_id) - if job is None: - print("Load job not found!") - return - - print(f"Load job status: {job.status}, progress: {job.progress}%") - - if job.status == "complete": - print(f"\nPipeline complete! Result: {job.result(timeout=1)}") - return - - if job.status in ("failed", "dead", "cancelled"): - print(f"\nPipeline failed with status: {job.status}") - - # Inspect error history for all jobs - for dep_id in job.dependencies: - dep = queue.get_job(dep_id) - if dep and dep.status in ("dead", "failed"): - errors = dep.errors - print(f"\n Job {dep_id} errors:") - for err in errors: - print(f" Attempt {err['attempt']}: {err['error']}") - print(f" At: {err['failed_at']}") - - # Check dead letter queue - dead = queue.dead_letters(limit=10) - if dead: - print(f"\nDead letters ({len(dead)}):") - for d in dead: - print(f" {d['id']}: {d['task_name']} — {d['error']}") - return - - time.sleep(2) - -if __name__ == "__main__": - import sys - if len(sys.argv) < 2: - print("Usage: python monitor.py ") - sys.exit(1) - monitor(sys.argv[1]) -``` - -## Running It - -=== "Terminal 1 — Worker" - - ```bash - python worker.py - ``` - -=== "Terminal 2 — Build Pipeline" - - ```bash - python pipeline.py - # Copy the load job ID from the output - ``` - -=== "Terminal 3 — Monitor" - - ```bash - python monitor.py - ``` - -## Cascade Cancellation - -If an extract job fails permanently (exhausts all retries), the entire downstream chain is automatically cancelled: - -```python -# If extract_api fails after 5 retries: -# → normalize_a is cascade cancelled -# → load is cascade cancelled (because one dependency failed) -# normalize_b may still complete, but load won't run -``` - -This prevents wasting resources on a pipeline that can't succeed. - -## Key Patterns Demonstrated - -| Pattern | Where | -|---|---| -| Task dependencies | `depends_on` in transform and load stages | -| Diamond DAG | Two branches converge at the load stage | -| Cascade cancel | Extract failure cancels downstream transforms and load | -| Progress tracking | `normalize` reports progress every 50 records | -| Error history | `monitor.py` inspects `job.errors` for failed jobs | -| Metadata | Each job tagged with source/stage info via `metadata` | -| Named queues | `extract`, `transform`, `load` for queue isolation | -| Priority | Load job gets `priority=10` to run first once unblocked | -| Dead letter inspection | `monitor.py` checks `queue.dead_letters()` | diff --git a/docs/examples/fastapi-service.md b/docs/examples/fastapi-service.md deleted file mode 100644 index 3969331..0000000 --- a/docs/examples/fastapi-service.md +++ /dev/null @@ -1,177 +0,0 @@ -# Example: FastAPI Image Processing Service - -A REST API for image processing that demonstrates FastAPI integration, progress tracking, async result fetching, job cancellation, and SSE progress streaming. - -## Project Structure - -``` -image-service/ - app.py # FastAPI app + task definitions - client.py # Example client script -``` - -## app.py - -```python -"""FastAPI image processing service with taskito.""" - -import time - -from fastapi import FastAPI, HTTPException -from taskito import Queue, current_job -from taskito.contrib.fastapi import TaskitoRouter - -queue = Queue(db_path=".taskito/images.db", workers=4, result_ttl=3600) - -# ── Tasks ──────────────────────────────────────────────── - -@queue.task(timeout=300) -def resize_image(image_url: str, sizes: list[int]) -> dict: - """Resize an image to multiple sizes with progress updates.""" - results = {} - for i, size in enumerate(sizes): - # Simulate resize work - time.sleep(1) - results[f"{size}x{size}"] = f"{image_url}?w={size}&h={size}" - current_job.update_progress(int((i + 1) / len(sizes) * 100)) - return results - -@queue.task(max_retries=3, retry_backoff=2.0) -def generate_thumbnail(image_url: str) -> str: - """Generate a thumbnail — retries on failure.""" - time.sleep(0.5) - return f"{image_url}?thumb=true" - -@queue.task(timeout=600) -def apply_filters(image_url: str, filters: list[str]) -> dict: - """Apply a sequence of filters with progress.""" - results = {} - for i, f in enumerate(filters): - time.sleep(2) - results[f] = f"{image_url}?filter={f}" - current_job.update_progress(int((i + 1) / len(filters) * 100)) - return results - -# ── FastAPI App ────────────────────────────────────────── - -app = FastAPI(title="Image Processing Service") - -# Mount the taskito router — adds /tasks/stats, /tasks/jobs/{id}, etc. -app.include_router(TaskitoRouter(queue), prefix="/tasks") - -@app.post("/process") -async def submit_job(image_url: str, sizes: list[int] | None = None): - """Submit an image processing job and return the job ID.""" - if sizes is None: - sizes = [128, 256, 512, 1024] - job = resize_image.delay(image_url, sizes) - return {"job_id": job.id, "status_url": f"/tasks/jobs/{job.id}"} - -@app.post("/process/{job_id}/cancel") -async def cancel_job(job_id: str): - """Cancel a pending image processing job.""" - cancelled = await queue.acancel_job(job_id) - if not cancelled: - raise HTTPException(400, "Job is not in a cancellable state") - return {"cancelled": True, "job_id": job_id} - -@app.get("/process/{job_id}/result") -async def get_result(job_id: str, timeout: float = 0): - """Get the result, optionally blocking until complete.""" - job = queue.get_job(job_id) - if job is None: - raise HTTPException(404, "Job not found") - if timeout > 0: - try: - result = await job.aresult(timeout=timeout) - return {"status": "complete", "result": result} - except TimeoutError: - return {"status": job.status, "result": None} - return {"status": job.status, "progress": job.progress} -``` - -## client.py - -```python -"""Example client for the image processing service.""" - -import httpx -import json -import time - -BASE = "http://localhost:8000" - -# 1. Submit a job -resp = httpx.post(f"{BASE}/process", params={ - "image_url": "https://example.com/photo.jpg", - "sizes": [128, 256, 512, 1024], -}) -data = resp.json() -job_id = data["job_id"] -print(f"Submitted job: {job_id}") - -# 2. Stream progress via SSE -print("\nStreaming progress:") -with httpx.stream("GET", f"{BASE}/tasks/jobs/{job_id}/progress") as r: - for line in r.iter_lines(): - if line.startswith("data:"): - payload = json.loads(line[5:].strip()) - print(f" Progress: {payload['progress']}% — Status: {payload['status']}") - if payload["status"] in ("complete", "failed", "dead", "cancelled"): - break - -# 3. Fetch the final result -result = httpx.get(f"{BASE}/tasks/jobs/{job_id}/result", params={"timeout": 5}) -print(f"\nResult: {result.json()}") -``` - -## Running It - -=== "Terminal 1 — Worker" - - ```bash - taskito worker --app app:queue - ``` - -=== "Terminal 2 — API Server" - - ```bash - uvicorn app:app --reload - ``` - -=== "Terminal 3 — Client" - - ```bash - python client.py - ``` - -## SSE from the Browser - -```javascript -const jobId = "01H5K6X..."; -const source = new EventSource(`/tasks/jobs/${jobId}/progress`); - -const progressBar = document.getElementById("progress"); - -source.onmessage = (event) => { - const data = JSON.parse(event.data); - progressBar.style.width = `${data.progress}%`; - progressBar.textContent = `${data.progress}%`; - - if (["complete", "failed", "dead", "cancelled"].includes(data.status)) { - source.close(); - } -}; -``` - -## Key Patterns Demonstrated - -| Pattern | Where | -|---|---| -| FastAPI integration | `TaskitoRouter(queue)` mounted at `/tasks` | -| Progress tracking | `current_job.update_progress()` in `resize_image` and `apply_filters` | -| Async result fetch | `await job.aresult(timeout=...)` in `get_result` endpoint | -| Async cancellation | `await queue.acancel_job()` in `cancel_job` endpoint | -| SSE streaming | `/tasks/jobs/{id}/progress` endpoint from `TaskitoRouter` | -| Retry with backoff | `generate_thumbnail` — 3 retries, 2x backoff | -| Result TTL | `result_ttl=3600` — auto-cleanup after 1 hour | diff --git a/docs/examples/index.md b/docs/examples/index.md deleted file mode 100644 index 1c863f6..0000000 --- a/docs/examples/index.md +++ /dev/null @@ -1,12 +0,0 @@ -# Examples - -End-to-end examples demonstrating common taskito patterns. - -| Example | Description | -|---------|-------------| -| [FastAPI Service](fastapi-service.md) | REST API that enqueues tasks and streams progress via SSE | -| [Notification Service](notifications.md) | Multi-channel notifications with retries and rate limiting | -| [Web Scraper Pipeline](web-scraper.md) | Distributed scraping with chains and error handling | -| [Data Pipeline](data-pipeline.md) | ETL pipeline with dependencies, groups, and chords | -| [DAG Workflows](workflows.md) | Fan-out, conditions, gates, sub-workflows, incremental runs | -| [Benchmark](benchmark.md) | Performance benchmarks comparing taskito to alternatives | diff --git a/docs/examples/notifications.md b/docs/examples/notifications.md deleted file mode 100644 index e5414c8..0000000 --- a/docs/examples/notifications.md +++ /dev/null @@ -1,214 +0,0 @@ -# Example: Notification Service - -A notification service demonstrating delayed scheduling, unique tasks, priority queues, and job cancellation. - -## Project Structure - -``` -notifications/ - tasks.py # Notification task definitions - service.py # Enqueue notifications with scheduling -``` - -## tasks.py - -```python -"""Notification tasks with priority and deduplication.""" - -import os - -import httpx - -from taskito import Queue - -queue = Queue( - db_path=".taskito/notifications.db", - workers=4, - default_retry=3, - default_timeout=30, - result_ttl=7200, # auto-cleanup after 2 hours -) - -# ── Notification Tasks ─────────────────────────────────── - -@queue.task(priority=10, max_retries=3, retry_backoff=2.0) -def send_urgent_email(to: str, subject: str, body: str) -> dict: - """High-priority email — runs before bulk notifications.""" - response = httpx.post( - "https://api.mailgun.net/v3/YOUR_DOMAIN/messages", - auth=("api", os.environ["MAILGUN_API_KEY"]), - data={"from": "noreply@example.com", "to": to, "subject": subject, "text": body}, - ) - response.raise_for_status() - return {"to": to, "subject": subject, "sent": True} - -@queue.task(priority=0, max_retries=3, retry_backoff=2.0) -def send_bulk_email(to: str, subject: str, body: str) -> dict: - """Low-priority bulk email.""" - response = httpx.post( - "https://api.mailgun.net/v3/YOUR_DOMAIN/messages", - auth=("api", os.environ["MAILGUN_API_KEY"]), - data={"from": "noreply@example.com", "to": to, "subject": subject, "text": body}, - ) - response.raise_for_status() - return {"to": to, "subject": subject, "sent": True} - -@queue.task(priority=5, max_retries=5, retry_backoff=2.0) -def send_push(user_id: str, title: str, message: str) -> dict: - """Push notification with retries.""" - response = httpx.post( - "https://fcm.googleapis.com/fcm/send", - headers={"Authorization": f"key={os.environ['FCM_SERVER_KEY']}"}, - json={"to": f"/topics/user-{user_id}", "notification": {"title": title, "body": message}}, - ) - response.raise_for_status() - return {"user_id": user_id, "title": title, "sent": True} - -@queue.task(max_retries=3, retry_backoff=2.0) -def send_sms(phone: str, message: str) -> dict: - """SMS notification via Twilio.""" - response = httpx.post( - f"https://api.twilio.com/2010-04-01/Accounts/{os.environ['TWILIO_ACCOUNT_SID']}/Messages.json", - auth=(os.environ["TWILIO_ACCOUNT_SID"], os.environ["TWILIO_AUTH_TOKEN"]), - data={"From": os.environ["TWILIO_FROM_NUMBER"], "To": phone, "Body": message}, - ) - response.raise_for_status() - return {"phone": phone, "sent": True} - -# ── Periodic Digest ────────────────────────────────────── - -@queue.periodic(cron="0 9 * * * *") -def daily_digest(): - """Send daily digest email every day at 9 AM.""" - print("[DIGEST] Sending daily digest to all subscribers...") -``` - -## service.py - -```python -"""Notification service — enqueue with scheduling, deduplication, and cancellation.""" - -import time -from tasks import ( - queue, - send_urgent_email, - send_bulk_email, - send_push, - send_sms, -) - -# ── 1. Delayed Scheduling ─────────────────────────────── -# Schedule a reminder 30 minutes from now - -print("1. Delayed scheduling") -reminder = send_push.apply_async( - args=["user_123", "Reminder", "Your meeting starts in 5 minutes"], - delay=1800, # 30 minutes from now -) -print(f" Scheduled reminder: {reminder.id} (runs in 30 min)") - -# Schedule a follow-up email for 1 hour later -followup = send_bulk_email.apply_async( - args=["user@example.com", "How was your meeting?", "We'd love your feedback."], - delay=3600, # 1 hour from now -) -print(f" Scheduled follow-up: {followup.id} (runs in 1 hour)") - -# ── 2. Unique Tasks (Deduplication) ───────────────────── -# Prevent duplicate notifications for the same event - -print("\n2. Unique tasks") -job1 = send_push.apply_async( - args=["user_456", "New message", "You have a new message"], - unique_key="notify:user_456:new_message", -) -job2 = send_push.apply_async( - args=["user_456", "New message", "You have a new message"], - unique_key="notify:user_456:new_message", -) -print(f" First enqueue: {job1.id}") -print(f" Second enqueue: {job2.id}") -print(f" Same job? {job1.id == job2.id}") # True — deduplicated - -# ── 3. Priority Queues ────────────────────────────────── -# Urgent notifications run before bulk - -print("\n3. Priority queues") -# Enqueue bulk emails first -bulk_jobs = send_bulk_email.map([ - ("alice@example.com", "Newsletter", "This week's updates..."), - ("bob@example.com", "Newsletter", "This week's updates..."), - ("carol@example.com", "Newsletter", "This week's updates..."), -]) -print(f" Enqueued {len(bulk_jobs)} bulk emails (priority=0)") - -# Enqueue urgent email after — but it runs first due to priority=10 -urgent = send_urgent_email.delay( - "admin@example.com", - "Server Alert", - "CPU usage exceeded 90%", -) -print(f" Enqueued urgent email (priority=10) — runs before bulk") - -# ── 4. Job Cancellation ───────────────────────────────── -# Cancel a scheduled notification before it sends - -print("\n4. Job cancellation") -scheduled = send_sms.apply_async( - args=["+1234567890", "Your order ships tomorrow"], - delay=7200, # 2 hours from now -) -print(f" Scheduled SMS: {scheduled.id}") - -# User updated their preference — cancel the SMS -cancelled = queue.cancel_job(scheduled.id) -print(f" Cancelled: {cancelled}") # True - -job = queue.get_job(scheduled.id) -print(f" Status: {job.status}") # "cancelled" - -# ── 5. Inspect Pending Notifications ───────────────────── - -print("\n5. Pending notifications") -pending = queue.list_jobs(status="pending", limit=10) -for j in pending: - d = j.to_dict() - print(f" {d['id'][:12]}... | {d['task_name']} | priority={d['priority']}") - -stats = queue.stats() -print(f"\nQueue stats: {stats}") -``` - -## Running It - -=== "Terminal 1 — Worker" - - ```bash - taskito worker --app tasks:queue - ``` - -=== "Terminal 2 — Enqueue" - - ```bash - python service.py - ``` - -=== "Terminal 3 — Monitor" - - ```bash - taskito info --app tasks:queue --watch - ``` - -## Key Patterns Demonstrated - -| Pattern | Where | -|---|---| -| Delayed scheduling | `delay=1800` — reminder runs 30 min later | -| Unique tasks | `unique_key="notify:user_456:..."` — deduplicates | -| Priority queues | `priority=10` urgent runs before `priority=0` bulk | -| Job cancellation | `queue.cancel_job()` revokes a scheduled SMS | -| Batch enqueue | `send_bulk_email.map()` for newsletter | -| Periodic tasks | `daily_digest` runs every day at 9 AM | -| Result TTL | `result_ttl=7200` — auto-cleanup after 2 hours | -| Retry with backoff | `send_push` — 5 retries, 2x backoff | -| Job inspection | `queue.list_jobs()` to view pending notifications | diff --git a/docs/examples/web-scraper.md b/docs/examples/web-scraper.md deleted file mode 100644 index aca7da7..0000000 --- a/docs/examples/web-scraper.md +++ /dev/null @@ -1,200 +0,0 @@ -# Example: Web Scraper Pipeline - -A complete multi-stage web scraper demonstrating rate limiting, retries, workflows, progress tracking, hooks, periodic cleanup, and named queues. - -## Project Structure - -``` -scraper/ - tasks.py # Task definitions - worker.py # Worker entry point - run.py # Enqueue scraping jobs -``` - -## tasks.py - -```python -import json -import time -from taskito import Queue, current_job, chain, group, chord - -queue = Queue( - db_path=".taskito/scraper.db", - workers=4, - default_retry=3, - default_timeout=60, - result_ttl=3600, # Auto-cleanup results after 1 hour -) - -# ── Hooks ──────────────────────────────────────────────── - -@queue.before_task -def log_start(task_name, args, kwargs): - print(f"[START] {task_name}") - -@queue.on_success -def log_success(task_name, args, kwargs, result): - print(f"[DONE] {task_name}") - -@queue.on_failure -def log_failure(task_name, args, kwargs, error): - print(f"[FAIL] {task_name}: {error}") - -# ── Tasks ──────────────────────────────────────────────── - -@queue.task( - rate_limit="30/m", # Max 30 requests per minute - max_retries=5, - retry_backoff=2.0, - queue="scraping", -) -def fetch_page(url): - """Fetch a single URL. Rate-limited and retried on failure.""" - import urllib.request - with urllib.request.urlopen(url, timeout=10) as resp: - return resp.read().decode("utf-8") - -@queue.task(queue="processing") -def extract_links(html): - """Extract all links from an HTML page.""" - import re - return re.findall(r'href="(https?://[^"]+)"', html) - -@queue.task(queue="processing") -def extract_title(html): - """Extract the page title.""" - import re - match = re.search(r"(.*?)", html, re.IGNORECASE | re.DOTALL) - return match.group(1).strip() if match else "No title" - -@queue.task(queue="storage") -def store_results(results, url=""): - """Store scraped data to a JSON file.""" - data = {"url": url, "results": results, "scraped_at": time.time()} - filename = f"output_{int(time.time())}.json" - with open(filename, "w") as f: - json.dump(data, f, indent=2) - return filename - -@queue.task(queue="processing") -def summarize(pages): - """Aggregate results from multiple pages.""" - total_links = sum(len(p.get("links", [])) for p in pages) - titles = [p.get("title", "?") for p in pages] - return { - "pages_scraped": len(pages), - "total_links": total_links, - "titles": titles, - } - -@queue.task() -def scrape_page(url): - """Full pipeline for a single page: fetch → extract links + title.""" - html = fetch_page(url) # Direct call (not queued) - links = extract_links(html) - title = extract_title(html) - return {"url": url, "title": title, "links": links} - -# ── Periodic cleanup ──────────────────────────────────── - -@queue.periodic(cron="0 0 * * * *") -def hourly_cleanup(): - """Purge completed jobs and dead letters every hour.""" - completed = queue.purge_completed(older_than=3600) - dead = queue.purge_dead(older_than=86400) - print(f"Cleanup: purged {completed} completed, {dead} dead") -``` - -## run.py - -```python -"""Enqueue scraping jobs.""" -from tasks import queue, scrape_page, summarize, store_results -from taskito import group, chord - -urls = [ - "https://example.com", - "https://httpbin.org/html", - "https://jsonplaceholder.typicode.com", -] - -# ── Option 1: Simple parallel scraping ────────────────── - -print("Enqueuing scrape jobs...") -jobs = [scrape_page.delay(url) for url in urls] - -# Wait for all results -for job in jobs: - result = job.result(timeout=30) - print(f" {result['title']} — {len(result['links'])} links") - -# ── Option 2: Chord — scrape in parallel, then summarize ─ - -print("\nRunning chord pipeline...") -result = chord( - group(*[scrape_page.s(url) for url in urls]), - summarize.s(), -).apply(queue) - -summary = result.result(timeout=60) -print(f" Scraped {summary['pages_scraped']} pages") -print(f" Found {summary['total_links']} total links") -print(f" Titles: {summary['titles']}") - -# ── Option 3: Batch enqueue with .map() ───────────────── - -print("\nBatch enqueue with .map()...") -jobs = scrape_page.map([(url,) for url in urls]) -results = [j.result(timeout=30) for j in jobs] -print(f" Scraped {len(results)} pages in batch") - -# ── Check stats ───────────────────────────────────────── - -stats = queue.stats() -print(f"\nQueue stats: {stats}") -``` - -## worker.py - -```python -"""Start the worker.""" -from tasks import queue - -if __name__ == "__main__": - print("Starting scraper worker...") - queue.run_worker(queues=["scraping", "processing", "storage"]) -``` - -## Running It - -=== "Terminal 1 — Worker" - - ```bash - python worker.py - ``` - -=== "Terminal 2 — Enqueue" - - ```bash - python run.py - ``` - -=== "Terminal 3 — Monitor" - - ```bash - taskito info --app tasks:queue --watch - ``` - -## Key Patterns Demonstrated - -| Pattern | Where | -|---|---| -| Rate limiting | `fetch_page` — 30 requests/min | -| Retry with backoff | `fetch_page` — 5 retries, 2.0x backoff | -| Named queues | `scraping`, `processing`, `storage` | -| Hooks | `log_start`, `log_success`, `log_failure` | -| Workflows (chord) | Parallel scrape → summarize | -| Batch enqueue | `.map()` for bulk job creation | -| Periodic tasks | `hourly_cleanup` runs every hour | -| Result TTL | Auto-cleanup completed jobs after 1 hour | -| Direct call | `scrape_page` calls `fetch_page()` directly | diff --git a/docs/examples/workflows.md b/docs/examples/workflows.md deleted file mode 100644 index b175a7d..0000000 --- a/docs/examples/workflows.md +++ /dev/null @@ -1,261 +0,0 @@ -# Example: DAG Workflows - -Real-world workflow patterns demonstrating fan-out, conditions, approval gates, sub-workflows, and incremental runs. - -## ML Training Pipeline - -A training pipeline that evaluates a model, gates deployment on accuracy, and has a rollback path. - -```python -from taskito import Queue -from taskito.workflows import Workflow, WorkflowContext - -queue = Queue(db_path="ml.db", workers=4) - - -@queue.task() -def fetch_dataset() -> dict: - return {"rows": 50_000, "path": "/data/train.parquet"} - - -@queue.task() -def train_model(dataset: dict) -> dict: - # ... training logic ... - return {"model_id": "v3.2", "accuracy": 0.97, "loss": 0.08} - - -@queue.task() -def evaluate(model: dict) -> dict: - return {"accuracy": model["accuracy"], "passed": model["accuracy"] > 0.90} - - -@queue.task() -def deploy(model_id: str) -> str: - return f"deployed {model_id}" - - -@queue.task() -def notify_failure() -> str: - return "sent alert: model below threshold" - - -def accuracy_gate(ctx: WorkflowContext) -> bool: - return ctx.results.get("evaluate", {}).get("passed", False) - - -wf = Workflow(name="ml_pipeline") -wf.step("fetch", fetch_dataset) -wf.step("train", train_model, after="fetch") -wf.step("evaluate", evaluate, after="train") -wf.gate("review", after="evaluate", timeout=3600, on_timeout="reject") -wf.step("deploy", deploy, after="review") -wf.step("alert", notify_failure, after="review", condition="on_failure") -``` - -```mermaid -flowchart TD - fetch --> train --> evaluate - evaluate --> review["review\n(approval gate)"] - review -->|approved| deploy - review -->|rejected| alert -``` - -Usage: - -```python -run = queue.submit_workflow(wf) - -# After human review: -queue.approve_gate(run.id, "review") - -result = run.wait(timeout=120) -print(run.visualize("mermaid")) -``` - ---- - -## Map-Reduce with Fan-Out - -Process a batch of items in parallel, then aggregate results. - -```python -@queue.task() -def fetch_urls() -> list[str]: - return [ - "https://api.example.com/page/1", - "https://api.example.com/page/2", - "https://api.example.com/page/3", - ] - - -@queue.task() -def scrape(url: str) -> dict: - import httpx - resp = httpx.get(url) - return {"url": url, "status": resp.status_code, "size": len(resp.content)} - - -@queue.task() -def summarize(results: list[dict]) -> dict: - total = sum(r["size"] for r in results) - return {"pages": len(results), "total_bytes": total} - - -wf = Workflow(name="scrape_pipeline") -wf.step("fetch", fetch_urls) -wf.step("scrape", scrape, after="fetch", fan_out="each") -wf.step("summarize", summarize, after="scrape", fan_in="all") - -run = queue.submit_workflow(wf) -result = run.wait(timeout=60) -# summarize receives [{"url": ..., "size": ...}, ...] -``` - ---- - -## Resilient Pipeline with Continue Mode - -Independent branches keep running even when one fails. - -```python -@queue.task(max_retries=0) -def ingest_orders() -> str: - return "orders ingested" - - -@queue.task(max_retries=0) -def ingest_inventory() -> str: - raise RuntimeError("inventory source down") - - -@queue.task() -def build_report() -> str: - return "report built" - - -@queue.task() -def send_alert() -> str: - return "alert sent to #data-eng" - - -wf = Workflow(name="daily_ingest", on_failure="continue") -wf.step("orders", ingest_orders) -wf.step("inventory", ingest_inventory) -wf.step("report", build_report, after="orders") -wf.step("alert", send_alert, after="inventory", condition="on_failure") -``` - -```mermaid -flowchart TD - orders --> report["report ✓"] - inventory["inventory ✗"] --> alert["alert ✓\n(on_failure)"] -``` - -`inventory` fails, but `orders → report` runs to completion. `alert` fires because its predecessor failed. - ---- - -## Multi-Region ETL with Sub-Workflows - -Compose reusable pipelines as sub-workflow steps. - -```python -@queue.task() -def extract(region: str) -> list: - return [{"region": region, "id": i} for i in range(100)] - - -@queue.task() -def load(data: list) -> int: - return len(data) - - -@queue.task() -def reconcile() -> str: - return "all regions reconciled" - - -@queue.workflow("region_etl") -def region_etl(region: str) -> Workflow: - wf = Workflow() - wf.step("extract", extract, args=(region,)) - wf.step("load", load, after="extract") - return wf - - -@queue.workflow("global_etl") -def global_etl() -> Workflow: - wf = Workflow() - wf.step("eu", region_etl.as_step(region="eu")) - wf.step("us", region_etl.as_step(region="us")) - wf.step("ap", region_etl.as_step(region="ap")) - wf.step("reconcile", reconcile, after=["eu", "us", "ap"]) - return wf - - -run = global_etl.submit() -run.wait(timeout=120) -``` - -EU, US, and AP ETL pipelines run concurrently as child workflows. `reconcile` runs after all three complete. - ---- - -## Incremental Re-Runs - -Skip unchanged steps on the second run. - -```python -wf = Workflow(name="nightly", cache_ttl=86400) # 24h TTL -wf.step("extract", extract) -wf.step("transform", transform, after="extract") -wf.step("load", load, after="transform") - -# First run: everything executes -run1 = queue.submit_workflow(wf) -run1.wait() - -# Next day: skip completed steps -run2 = queue.submit_workflow(wf, incremental=True, base_run=run1.id) -run2.wait() - -for name, node in run2.status().nodes.items(): - print(f"{name}: {node.status}") -# extract: cache_hit -# transform: cache_hit -# load: cache_hit -``` - ---- - -## Pre-Execution Analysis - -Analyze a workflow before submitting it. - -```python -wf = Workflow(name="complex") -wf.step("a", task_a) -wf.step("b", task_b, after="a") -wf.step("c", task_c, after="a") -wf.step("d", task_d, after=["b", "c"]) - -# Structure -print(wf.topological_levels()) -# [["a"], ["b", "c"], ["d"]] - -print(wf.stats()) -# {"nodes": 4, "edges": 4, "depth": 3, "width": 2, "density": 0.67} - -# Critical path with estimated durations -path, cost = wf.critical_path({"a": 2, "b": 10, "c": 3, "d": 1}) -print(f"Critical path: {path}, cost: {cost}s") -# Critical path: ["a", "b", "d"], cost: 13s - -# Bottleneck -analysis = wf.bottleneck_analysis({"a": 2, "b": 10, "c": 3, "d": 1}) -print(analysis["suggestion"]) -# "b is the bottleneck (76.9% of total time). Consider optimizing." - -# Visualization -print(wf.visualize("mermaid")) -``` diff --git a/docs/faq.md b/docs/faq.md deleted file mode 100644 index bf27e84..0000000 --- a/docs/faq.md +++ /dev/null @@ -1,178 +0,0 @@ -# FAQ - -## Can I use taskito with Django? - -Yes. Create a `Queue` instance in one of your Django apps and import it where needed: - -```python -# myproject/tasks.py -from taskito import Queue - -queue = Queue(db_path="taskito.db") - -@queue.task() -def send_welcome_email(user_id: int): - from myapp.models import User - user = User.objects.get(id=user_id) - user.email_user("Welcome!", "Thanks for signing up.") -``` - -Import tasks lazily inside the function body to avoid Django app registry issues. Start the worker separately: - -```bash -DJANGO_SETTINGS_MODULE=myproject.settings taskito worker --app myproject.tasks:queue -``` - -## Can I use taskito with Flask? - -Yes. Same pattern — define a queue, decorate tasks, run the worker: - -```python -# tasks.py -from taskito import Queue - -queue = Queue(db_path="taskito.db") - -@queue.task() -def generate_report(report_id: int): - from myapp import create_app - app = create_app() - with app.app_context(): - ... -``` - -## Can multiple processes share the same SQLite file? - -Yes, with caveats. SQLite in WAL mode allows concurrent readers and one writer at a time. taskito sets `busy_timeout=5000ms` to handle contention. - -However, taskito is designed as a **single-process** task queue. Multiple worker processes against one database works but will see diminishing returns due to write lock contention. For most workloads, one worker process with multiple threads is sufficient. - -## What happens if my worker crashes mid-task? - -The job stays in `running` status in SQLite. On the next worker start, the **stale job reaper** detects jobs that have been running longer than their `timeout` and marks them as failed (triggering retries or DLQ). - -If no timeout is set, stale jobs remain in `running` status indefinitely. **Always set a timeout on your tasks.** - -```python -@queue.task(timeout=300) # 5 minute timeout -def process(data): - ... -``` - -## How big can the SQLite database get? - -SQLite can handle databases up to 281 TB (theoretical limit). In practice, taskito databases stay small if you set `result_ttl` to auto-purge old jobs: - -```python -queue = Queue(db_path="myapp.db", result_ttl=86400) # Purge after 24h -``` - -Without cleanup, expect ~1 KB per job. A million completed jobs ≈ 1 GB. - -## Can I use a remote or networked SQLite? - -No. SQLite requires local filesystem access for file locking. Network filesystems (NFS, SMB, CIFS) do not reliably support the locking primitives SQLite depends on. Always place the database on local storage. - -## When should I use Postgres instead of SQLite? - -Use the **Postgres backend** (`pip install taskito[postgres]`) when you need: - -- **Multi-machine workers** — run workers on separate servers sharing the same queue -- **Higher write throughput** — Postgres handles concurrent writers better than SQLite -- **Existing Postgres infrastructure** — leverage your existing database and backups - -For single-machine workloads, SQLite is simpler and requires zero setup. See the [Postgres Backend guide](guide/operations/postgres.md). - -## Is taskito production-ready? - -taskito is suitable for production workloads — background job processing, periodic tasks, data pipelines, and similar use cases. - -For single-machine deployments, use the default SQLite backend. For multi-server setups, use the [Postgres backend](guide/operations/postgres.md). - -## What observability options does taskito support? - -taskito offers three observability integrations, each suited to different needs: - -| Integration | Best for | Install | -|-------------|----------|---------| -| **[OpenTelemetry](integrations/otel.md)** | Distributed tracing, correlating tasks with HTTP requests | `pip install taskito[otel]` | -| **[Prometheus](integrations/prometheus.md)** | Metrics dashboards, alerting on queue depth/error rates | `pip install taskito[prometheus]` | -| **[Sentry](integrations/sentry.md)** | Error tracking with rich context and breadcrumbs | `pip install taskito[sentry]` | - -All three are implemented as `TaskMiddleware` and can be combined together. - -## How does taskito compare to running Celery with SQLite? - -Celery can use SQLite as a result backend, but still requires a broker (Redis or RabbitMQ). taskito replaces **both** broker and backend with a single SQLite database. Additionally: - -- taskito's scheduler runs in Rust (faster polling, lower overhead) -- Worker threads are OS threads managed by Rust, not Python processes -- No external dependencies beyond `cloudpickle` - -## Can I use async tasks? - -Yes. Define the task function with `async def` and the worker dispatches it natively — no `asyncio.run()` wrapping, no thread-pool bridging: - -```python -@queue.task() -async def fetch_urls(urls: list[str]) -> list[str]: - import httpx - async with httpx.AsyncClient() as client: - return [r.text for r in await asyncio.gather( - *[client.get(url) for url in urls] - )] -``` - -Enqueue and await results from async application code: - -```python -job = fetch_urls.delay(urls) -result = await job.aresult(timeout=30) -stats = await queue.astats() -``` - -Sync and async tasks can coexist in the same queue. The worker automatically routes each job to the correct pool based on the task type. See the [Native Async Tasks](guide/execution/async-tasks.md) guide for details including `async_concurrency` tuning and `current_job` context in async tasks. - -## What serialization format does taskito use? - -By default, `CloudpickleSerializer` — which supports most Python objects including lambdas and closures. You can switch to `JsonSerializer` for simpler, cross-language payloads, or provide a custom serializer: - -```python -from taskito import Queue, JsonSerializer - -queue = Queue(serializer=JsonSerializer()) -``` - -Custom serializers implement the `Serializer` protocol with `dumps(obj) -> bytes` and `loads(data) -> Any` methods. - -Regardless of serializer, avoid passing unpicklable/unserializable objects like open file handles, database connections, or thread locks. - -## Can I run the dashboard and worker in the same process? - -They're designed to run as separate processes sharing the same database: - -```bash -# Terminal 1 -taskito worker --app myapp:queue - -# Terminal 2 -taskito dashboard --app myapp:queue -``` - -For embedding in a FastAPI app, use `TaskitoRouter` instead — it provides the same stats and job management as REST endpoints. - -## How do I reset / clear all jobs? - -```python -# Purge all completed jobs -queue.purge_completed(older_than=0) - -# Purge all dead letters -queue.purge_dead(older_than=0) -``` - -Or delete the database file and restart: - -```bash -rm myapp.db myapp.db-wal myapp.db-shm -``` diff --git a/docs/getting-started/index.md b/docs/getting-started/index.md deleted file mode 100644 index 28f0a66..0000000 --- a/docs/getting-started/index.md +++ /dev/null @@ -1,23 +0,0 @@ -# Getting Started - -Get taskito installed and running in under 5 minutes. - -
- -- **Installation** - - --- - - Install taskito from PyPI with optional extras for your framework. - - [:octicons-arrow-right-24: Install](installation.md) - -- **Quickstart** - - --- - - Define a task, enqueue it, and start a worker — step by step. - - [:octicons-arrow-right-24: Quickstart](quickstart.md) - -
diff --git a/docs/getting-started/installation.md b/docs/getting-started/installation.md deleted file mode 100644 index 92ae484..0000000 --- a/docs/getting-started/installation.md +++ /dev/null @@ -1,60 +0,0 @@ -# Installation - -## From PyPI - -```bash -pip install taskito -``` - -taskito has a single runtime dependency: [`cloudpickle`](https://github.com/cloudpipe/cloudpickle) for serializing task arguments and results. It is installed automatically. - -!!! note "SQLite is bundled" - taskito ships with SQLite compiled in via Rust's `libsqlite3-sys` crate. You do **not** need a system SQLite installation. - -## Postgres Backend - -To use PostgreSQL as the storage backend instead of SQLite: - -```bash -pip install taskito[postgres] -``` - -See the [Postgres Backend guide](../guide/operations/postgres.md) for configuration details. - -## From Source - -Building from source requires a Rust toolchain (1.70+). - -```bash -# Install Rust (if needed) -curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh - -# Clone and build -git clone https://github.com/ByteVeda/taskito.git -cd taskito -python -m venv .venv -source .venv/bin/activate -pip install maturin -maturin develop --release -``` - -## Development Setup - -```bash -pip install -e ".[dev]" # Tests, linting, type checking -pip install -e ".[docs]" # Documentation (Zensical) -``` - -## Verify Installation - -```python -import taskito -print(taskito.__version__) # 0.3.0 -``` - -## Requirements - -- Python 3.9+ -- Any OS with SQLite support (Linux, macOS, Windows) -- PostgreSQL 12+ (optional, for `taskito[postgres]`) -- Rust toolchain only needed for building from source diff --git a/docs/getting-started/quickstart.md b/docs/getting-started/quickstart.md deleted file mode 100644 index ae1b7a8..0000000 --- a/docs/getting-started/quickstart.md +++ /dev/null @@ -1,121 +0,0 @@ -# Quickstart - -Build your first task queue in 5 minutes. - -## 1. Define Tasks - -Create a file called `tasks.py`: - -```python -from taskito import Queue - -# Create a queue backed by SQLite -queue = Queue(db_path="tasks.db") - -@queue.task() -def add(a: int, b: int) -> int: - return a + b - -@queue.task(max_retries=3, retry_backoff=2.0) -def send_email(to: str, subject: str, body: str) -> str: - # Your email sending logic here - print(f"Sending email to {to}: {subject}") - return f"sent to {to}" -``` - -## 2. Enqueue Jobs - -```python -from tasks import add, send_email - -# Enqueue returns a JobResult handle -job = add.delay(2, 3) -print(f"Job ID: {job.id}") # Job ID: 01936... -print(f"Status: {job.status}") # Status: pending -``` - -## 3. Start a Worker - -=== "CLI (Recommended)" - - ```bash - taskito worker --app tasks:queue - ``` - -=== "Threading" - - ```python - import threading - from tasks import queue - - t = threading.Thread(target=queue.run_worker, daemon=True) - t.start() - ``` - -=== "Async" - - ```python - import asyncio - from tasks import queue - - async def main(): - await queue.arun_worker() - - asyncio.run(main()) - ``` - -## 4. Get Results - -```python -from tasks import add - -job = add.delay(2, 3) - -# Block until complete (with exponential backoff polling) -result = job.result(timeout=30) -print(result) # 5 - -# Or use async -result = await job.aresult(timeout=30) -``` - -## 5. Monitor - -```python -from tasks import queue - -stats = queue.stats() -print(stats) -# {'pending': 0, 'running': 0, 'completed': 5, 'failed': 0, 'dead': 0, 'cancelled': 0} -``` - -Or use the CLI: - -```bash -# One-shot stats -taskito info --app tasks:queue - -# Live dashboard (refreshes every 2s) -taskito info --app tasks:queue --watch -``` - -### Web Dashboard - -For a full visual interface with job browsing, metrics charts, dead letter management, and queue controls: - -```bash -taskito dashboard --app tasks:queue -``` - -Open `http://localhost:8080` in your browser. The dashboard includes 11 pages covering every aspect of your task queue — no extra dependencies needed. - -[:octicons-arrow-right-24: Dashboard guide](../guide/observability/dashboard.md) - -## Next Steps - -- [Tasks](../guide/core/tasks.md) — decorator options, `.delay()` vs `.apply_async()` -- [Workers](../guide/core/workers.md) — CLI flags, graceful shutdown, worker count -- [Retries](../guide/reliability/retries.md) — exponential backoff, dead letter queue -- [Workflows](../guide/core/workflows.md) — chain, group, chord -- [Testing](../guide/operations/testing.md) — run tasks synchronously in tests with `queue.test_mode()` -- [Migrating from Celery](../guide/operations/migration.md) — concept mapping and side-by-side examples diff --git a/docs/guide/core/execution-model.md b/docs/guide/core/execution-model.md deleted file mode 100644 index 99f6a30..0000000 --- a/docs/guide/core/execution-model.md +++ /dev/null @@ -1,112 +0,0 @@ -# Execution Models - -Choose how tasks execute: OS threads (default), child processes (prefork), or native async. - -## Decision Tree - -```mermaid -graph TD - A[What kind of task?] -->|CPU-bound| B[Prefork Pool] - A -->|I/O-bound sync| C[Thread Pool] - A -->|I/O-bound async| D[Native Async] - A -->|Mixed| B -``` - -## Comparison - -| Mode | Concurrency | GIL | Memory per worker | Startup cost | Best for | -|------|------------|-----|-------------------|--------------|----------| -| **Thread Pool** | `workers` OS threads | Shared | ~1 MB | None | I/O-bound sync tasks | -| **Prefork** | `workers` child processes | Independent | ~30 MB | One app import per child | CPU-bound tasks, mixed workloads | -| **Native Async** | `async_concurrency` coroutines | Shared (event loop) | Negligible per coroutine | None | I/O-bound async tasks | - -## Thread Pool (default) - -The default. Runs sync task functions on Rust `std::thread` threads. Each worker acquires the Python GIL only during task execution — the scheduler and dispatch logic never touch it. - -```python -# Default — thread pool with auto-detected worker count -queue.run_worker() - -# Explicit worker count -queue.run_worker(workers=8) -``` - -```bash -taskito worker --app myapp:queue --workers 8 -``` - -Because threads share a single GIL, CPU-bound tasks block each other. For Python code that spends most of its time in C extensions (numpy, pandas) that release the GIL, threads still work well. - -## Prefork Pool - -Spawns separate child processes. Each process has its own Python interpreter and GIL, so CPU-bound tasks run in true parallel. - -```python -queue.run_worker(pool="prefork", app="myapp:queue") -``` - -```bash -taskito worker --app myapp:queue --pool prefork -``` - -The `app` parameter tells each child process where to import your `Queue` instance. It must be a module-level name (`"module:attribute"` format) — tasks defined inside functions or closures cannot be imported by child processes. - -For more details, see the [Prefork Pool guide](../execution/prefork.md). - -## Native Async - -`async def` task functions run on a dedicated Python event loop thread. No `asyncio.run()` wrapping, no thread-per-task overhead. - -```python -@queue.task() -async def fetch_prices(symbol: str) -> dict: - async with httpx.AsyncClient() as client: - r = await client.get(f"https://api.example.com/prices/{symbol}") - return r.json() -``` - -Control how many coroutines run at once: - -```python -queue = Queue( - db_path="myapp.db", - async_concurrency=200, # default: 100 -) -``` - -For more details, see the [Native Async Tasks guide](../execution/async-tasks.md). - -## Mixing Sync and Async - -A single queue handles both sync and async tasks. No configuration needed — the worker inspects each task at registration time and routes it to the correct pool. - -```python -@queue.task() -def resize_image(path: str) -> str: - # Sync — runs on thread pool - ... - -@queue.task() -async def send_notification(user_id: str) -> None: - # Async — runs on event loop - ... -``` - -Both are enqueued, retried, rate-limited, and monitored identically. - -## workers vs async_concurrency - -These two parameters are independent: - -```python -queue = Queue( - workers=4, # OS threads (or child processes) for sync tasks - async_concurrency=200, # concurrent coroutines for async tasks -) -``` - -`workers=4` means 4 sync tasks can execute at the same time. `async_concurrency=200` means 200 async tasks can be in-flight concurrently on the event loop. A queue with both set runs up to `4 + 200` tasks simultaneously. - -!!! tip - For mostly-async workloads, keep `workers` small (2–4) and raise `async_concurrency`. For mostly-sync I/O workloads, raise `workers`. For CPU-bound workloads, switch to prefork. diff --git a/docs/guide/core/index.md b/docs/guide/core/index.md deleted file mode 100644 index 1c7b4f0..0000000 --- a/docs/guide/core/index.md +++ /dev/null @@ -1,12 +0,0 @@ -# Core Concepts - -The building blocks of every taskito application. - -| Guide | Description | -|-------|-------------| -| [Tasks](tasks.md) | Define tasks with `@queue.task()`, configure retries, timeouts, and options | -| [Workers](workers.md) | Start workers, control concurrency, graceful shutdown | -| [Execution Models](execution-model.md) | How tasks move from enqueue to completion | -| [Queues & Priority](queues.md) | Named queues, priority levels, and routing | -| [Scheduling](scheduling.md) | Periodic tasks with cron expressions | -| [Workflows](workflows.md) | Chains, groups, and chords for multi-step pipelines | diff --git a/docs/guide/core/queues.md b/docs/guide/core/queues.md deleted file mode 100644 index bf8699e..0000000 --- a/docs/guide/core/queues.md +++ /dev/null @@ -1,127 +0,0 @@ -# Queues & Priority - -## Named Queues - -Route tasks to different queues for isolation and dedicated processing: - -```python -@queue.task(queue="emails") -def send_email(to, subject, body): - ... - -@queue.task(queue="reports") -def generate_report(report_id): - ... - -@queue.task() # Goes to "default" queue -def process_data(data): - ... -``` - -### Worker Queue Subscription - -Workers can listen to specific queues: - -```bash -# Process only email tasks -taskito worker --app myapp:queue --queues emails - -# Process multiple queues -taskito worker --app myapp:queue --queues emails,reports - -# Process all registered queues (default) -taskito worker --app myapp:queue -``` - -Or programmatically: - -```python -queue.run_worker(queues=["emails", "reports"]) -``` - -!!! tip "Use queues to isolate workloads" - Separate I/O-bound tasks (API calls, emails) from CPU-bound tasks (data processing, report generation) into different queues. Run them on different worker processes for optimal resource usage. - -## Priority - -Higher priority jobs are dequeued first within the same queue. Priority is an integer — higher values mean more urgent. - -### Default Priority - -Set at task registration: - -```python -@queue.task(priority=10) -def urgent_task(data): - ... - -@queue.task(priority=0) # Default -def normal_task(data): - ... -``` - -### Override at Enqueue Time - -```python -# This specific job is extra urgent -urgent_task.apply_async(args=(data,), priority=100) -``` - -### How It Works - -Jobs are dequeued using a compound index: `(queue, status, priority DESC, scheduled_at ASC)`. This means: - -1. Higher priority jobs go first -2. Among equal priority, older jobs (earlier `scheduled_at`) go first -3. Each queue is processed independently - -```python -# These three jobs are in the same queue -low = task.apply_async(args=(1,), priority=1) -mid = task.apply_async(args=(2,), priority=5) -high = task.apply_async(args=(3,), priority=10) - -# Processing order: high (10), mid (5), low (1) -``` - -## Queue-Level Limits - -Apply a rate limit or concurrency cap to an entire queue, independently of per-task settings. These limits are checked in the scheduler before any per-task limits. - -### Rate limiting a queue - -```python -queue.set_queue_rate_limit("default", "100/m") # Max 100 jobs per minute -queue.set_queue_rate_limit("emails", "20/s") # Max 20 emails per second -``` - -The format is the same as `rate_limit` on `@queue.task()`: `"N/s"`, `"N/m"`, or `"N/h"`. - -### Capping concurrency per queue - -```python -queue.set_queue_concurrency("default", 10) # Max 10 jobs running at once -queue.set_queue_concurrency("reports", 2) # Heavy tasks: max 2 at a time -``` - -`set_queue_concurrency` limits how many jobs from that queue run simultaneously across all workers. - -!!! tip "Queue limits vs task limits" - Queue-level limits apply to all tasks in the queue regardless of their individual settings. Per-task `rate_limit` and `max_concurrent` are checked afterwards and may impose stricter caps. Set queue limits to protect shared downstream resources (APIs, databases) and per-task limits to manage individual task capacity. - -Both methods can be called at any point before or after `run_worker()` starts. - -## Default Queue Settings - -Configure defaults at the Queue level: - -```python -queue = Queue( - db_path="myapp.db", - default_priority=0, # Default priority for all tasks - default_retry=3, # Default max retries - default_timeout=300, # Default timeout in seconds -) -``` - -Individual `@queue.task()` decorators override these defaults. diff --git a/docs/guide/core/scheduling.md b/docs/guide/core/scheduling.md deleted file mode 100644 index b6cce12..0000000 --- a/docs/guide/core/scheduling.md +++ /dev/null @@ -1,135 +0,0 @@ -# Scheduling - -taskito supports both **delayed tasks** (run once in the future) and **periodic tasks** (run on a cron schedule). - -## Delayed Tasks - -Schedule a task to run after a delay: - -```python -# Run 1 hour from now -send_email.apply_async( - args=("user@example.com", "Reminder", "Don't forget!"), - delay=3600, # seconds -) - -# Run 30 seconds from now -cleanup.apply_async(args=(), delay=30) -``` - -The job is created immediately with `status=pending` but won't be picked up by a worker until the `scheduled_at` timestamp is reached. - -## Periodic Tasks - -Register recurring tasks with cron expressions: - -```python -@queue.periodic(cron="0 */5 * * * *") -def health_check(): - """Run every 5 minutes.""" - ping_services() - -@queue.periodic(cron="0 0 0 * * *") -def daily_cleanup(): - """Run at midnight every day.""" - queue.purge_completed(older_than=86400) - -@queue.periodic(cron="0 0 9 * * 1", args=("weekly",)) -def weekly_report(report_type): - """Run every Monday at 9:00 AM.""" - generate_report(report_type) -``` - -### Cron Expression Format - -taskito uses **6-field cron expressions** (with seconds): - -``` -┌─────────── second (0-59) -│ ┌───────── minute (0-59) -│ │ ┌─────── hour (0-23) -│ │ │ ┌───── day of month (1-31) -│ │ │ │ ┌─── month (1-12) -│ │ │ │ │ ┌─ day of week (0-6, Sun=0) -│ │ │ │ │ │ -* * * * * * -``` - -| Expression | Schedule | -|---|---| -| `*/30 * * * * *` | Every 30 seconds | -| `0 */5 * * * *` | Every 5 minutes | -| `0 0 * * * *` | Every hour | -| `0 30 * * * *` | Every hour at :30 | -| `0 0 */2 * * *` | Every 2 hours | -| `0 0 0 * * *` | Every day at midnight | -| `0 0 9 * * *` | Every day at 9:00 AM | -| `0 0 9 * * 1-5` | Weekdays at 9:00 AM | -| `0 30 9 * * 1-5` | Weekdays at 9:30 AM | -| `0 0 0 1 * *` | First day of every month at midnight | -| `0 0 0 * * 0` | Every Sunday at midnight | -| `0 0 0 1 1 *` | January 1st at midnight (yearly) | - -### Decorator Options - -```python -@queue.periodic( - cron="0 0 * * * *", # Required: cron expression - name="hourly-cleanup", # Optional: explicit name - args=(3600,), # Optional: positional args - kwargs={"force": True}, # Optional: keyword args - queue="maintenance", # Optional: target queue - timezone="America/New_York", # Optional: IANA timezone (default: UTC) -) -def cleanup(older_than, force=False): - ... -``` - -### Timezone Support - -By default, cron expressions are evaluated in UTC. Pass any IANA timezone name to schedule in a specific timezone: - -```python -@queue.periodic(cron="0 0 9 * * 1-5", timezone="America/New_York") -def morning_report(): - """Run weekdays at 9:00 AM Eastern.""" - generate_report() - -@queue.periodic(cron="0 0 18 * * *", timezone="Europe/London") -def end_of_day_summary(): - """Run at 6:00 PM London time.""" - send_summary() -``` - -Timezone handling uses `chrono-tz` under the hood. Daylight saving time transitions are handled automatically. The `timezone` parameter defaults to UTC when omitted. - -### How Periodic Tasks Work - -1. Periodic tasks are registered with the Rust scheduler when the worker starts -2. The scheduler checks for due tasks every ~3 seconds -3. When a task is due, a new job is enqueued automatically -4. The task's `next_run` is computed using the cron expression -5. Periodic task state is persisted in the `periodic_tasks` SQLite table - -!!! note - Periodic tasks are only active while a worker is running. If no worker is running, tasks accumulate and the **next due** job is enqueued when a worker starts. - -## Edge Cases - -### Task takes longer than the interval - -If a periodic task's execution time exceeds its cron interval, the next run is **skipped**, not stacked. Periodic tasks use `unique_key` deduplication internally — if the previous run is still pending or running, the new enqueue is silently dropped. - -### Multiple workers running periodic tasks - -Safe by design. Each worker's scheduler checks for due periodic tasks independently, but they all use the same `unique_key` for deduplication. Only one instance of each periodic task runs at a time, regardless of how many workers are active. - -### Timezone handling - -```python -@queue.periodic(cron="0 9 * * *", timezone="America/New_York") -def morning_report(): - ... -``` - -Without `timezone`, cron expressions are evaluated in **UTC**. Specify a timezone string (any valid [IANA timezone](https://en.wikipedia.org/wiki/List_of_tz_database_time_zones)) to schedule in local time. Daylight saving transitions are handled automatically via `chrono-tz`. diff --git a/docs/guide/core/tasks.md b/docs/guide/core/tasks.md deleted file mode 100644 index 87634c6..0000000 --- a/docs/guide/core/tasks.md +++ /dev/null @@ -1,238 +0,0 @@ -# Tasks - -Tasks are Python functions registered with a queue via the `@queue.task()` decorator. - -## Defining a Task - -```python -from taskito import Queue - -queue = Queue(db_path="myapp.db") - -@queue.task() -def process_data(data: dict) -> str: - # Your logic here - return "done" -``` - -## Decorator Options - -| Parameter | Type | Default | Description | -|---|---|---|---| -| `name` | `str | None` | Auto-generated | Explicit task name. Defaults to `module.qualname`. | -| `max_retries` | `int` | `3` | Max retry attempts before moving to DLQ. | -| `retry_backoff` | `float` | `1.0` | Base delay in seconds for exponential backoff. | -| `retry_delays` | `list[float] | None` | `None` | Per-attempt delays in seconds, overrides backoff. e.g. `[1, 5, 30]`. | -| `max_retry_delay` | `int | None` | `None` | Cap on backoff delay in seconds (default 300 s). | -| `timeout` | `int` | `300` | Max execution time in seconds (hard timeout). | -| `soft_timeout` | `float | None` | `None` | Cooperative time limit in seconds; checked via `current_job.check_timeout()`. | -| `priority` | `int` | `0` | Default priority (higher = more urgent). | -| `rate_limit` | `str | None` | `None` | Rate limit string, e.g. `"100/m"`. | -| `queue` | `str` | `"default"` | Named queue to submit to. | -| `circuit_breaker` | `dict | None` | `None` | Circuit breaker config: `{"threshold": 5, "window": 60, "cooldown": 120}`. | -| `middleware` | `list[TaskMiddleware] | None` | `None` | Per-task middleware, applied in addition to queue-level middleware. | -| `expires` | `float | None` | `None` | Seconds until the job expires if not started. | -| `inject` | `list[str] | None` | `None` | Worker resource names to inject as keyword arguments. See [Resource System](../../resources/index.md). | -| `serializer` | `Serializer | None` | `None` | Per-task serializer override. Falls back to the queue-level serializer. | -| `max_concurrent` | `int | None` | `None` | Max concurrent running instances of this task. `None` means no limit. | - -```python -@queue.task( - name="emails.send", - max_retries=5, - retry_backoff=2.0, - max_retry_delay=60, # cap backoff at 60 s - timeout=60, - priority=10, - rate_limit="100/m", - queue="emails", - max_concurrent=10, -) -def send_email(to: str, subject: str, body: str): - ... -``` - -### Custom Retry Delays - -Use `retry_delays` to specify exact wait times between each retry attempt instead of exponential backoff: - -```python -@queue.task(retry_delays=[1, 5, 30]) # 1s after 1st fail, 5s after 2nd, 30s after 3rd -def flaky_api_call(): - ... -``` - -### Soft Timeouts - -A soft timeout raises `SoftTimeoutError` only when the task cooperatively checks: - -```python -from taskito import current_job - -@queue.task(timeout=300, soft_timeout=60) -def long_running(items): - for item in items: - current_job.check_timeout() # raises SoftTimeoutError if soft_timeout exceeded - process(item) -``` - -### Circuit Breakers - -Automatically open a circuit after repeated failures and refuse new executions during the cooldown period: - -```python -@queue.task(circuit_breaker={"threshold": 5, "window": 60, "cooldown": 120}) -def call_external_api(): - ... -``` - -- `threshold`: number of failures to trip the breaker -- `window`: rolling time window in seconds -- `cooldown`: seconds the breaker stays open before allowing a retry - -### Per-Task Middleware - -Apply middleware to a specific task only: - -```python -from taskito.contrib.sentry import SentryMiddleware - -@queue.task(middleware=[SentryMiddleware()]) -def important_task(): - ... -``` - -### Job Expiration - -Skip jobs that weren't started within the deadline: - -```python -@queue.task(expires=300) # skip if not started within 5 minutes -def time_sensitive(): - ... -``` - -### Max Retry Delay - -Cap the exponential backoff so waits don't grow unbounded: - -```python -@queue.task(retry_backoff=2.0, max_retries=10, max_retry_delay=120) -def flaky_service(): - ... -# Delays: 2, 4, 8, 16, 32, 64, 120, 120, 120 s (capped at 2 min) -``` - -### Per-Task Concurrency Limit - -Prevent a single task type from consuming all workers: - -```python -@queue.task(max_concurrent=3) -def expensive_render(): - ... -# At most 3 instances of expensive_render run simultaneously across all workers. -``` - -### Per-Task Serializer - -Override the queue-level serializer for a specific task: - -```python -from taskito.serializers import JSONSerializer - -@queue.task(serializer=JSONSerializer()) -def api_event(payload: dict) -> dict: - ... -``` - -The per-task serializer is used for the full round-trip: arguments are serialized with it at enqueue time and deserialized with it on the worker before the task function is called. Both the sync worker and the native async worker honour the per-task serializer, falling back to the queue-level serializer for tasks that have none registered. - -Useful when a task needs a different format (e.g., human-readable JSON for audit tasks) or when the payload is not picklable. - -## Task Naming - -By default, tasks are named using `module.qualname`: - -```python -# In myapp/tasks.py -@queue.task() -def process(): # Named: myapp.tasks.process - ... -``` - -You can override with an explicit name: - -```python -@queue.task(name="my-custom-name") -def process(): # Named: my-custom-name - ... -``` - -## Enqueuing Jobs - -### `.delay()` — Quick Submit - -Submit with default options: - -```python -job = send_email.delay("user@example.com", "Hello", "World") -``` - -### `.apply_async()` — Full Control - -Override any option at enqueue time: - -```python -job = send_email.apply_async( - args=("user@example.com", "Hello", "World"), - priority=100, # Override priority - delay=3600, # Run 1 hour from now - queue="urgent-emails", # Override queue - max_retries=10, # Override retries - timeout=120, # Override timeout - unique_key="welcome-user@example.com", # Deduplicate - metadata='{"source": "signup"}', # Attach JSON metadata -) -``` - -### Direct Call - -Calling a task directly runs it synchronously, bypassing the queue: - -```python -result = send_email("user@example.com", "Hello", "World") # Runs immediately -``` - -## Batch Enqueue - -Enqueue many jobs in a single SQLite transaction: - -```python -# Via task.map() -jobs = send_email.map([ - ("alice@example.com", "Hi", "Body"), - ("bob@example.com", "Hi", "Body"), - ("carol@example.com", "Hi", "Body"), -]) - -# Via queue.enqueue_many() -jobs = queue.enqueue_many( - task_name=send_email.name, - args_list=[("alice@example.com",), ("bob@example.com",)], - kwargs_list=[{"subject": "Hi", "body": "Body"}] * 2, -) -``` - -## Metadata - -Attach arbitrary JSON metadata to jobs: - -```python -job = process.apply_async( - args=(data,), - metadata='{"user_id": 42, "source": "api"}', -) -``` - -Metadata is stored with the job and visible in dead letter queue entries. diff --git a/docs/guide/core/workers.md b/docs/guide/core/workers.md deleted file mode 100644 index 3706207..0000000 --- a/docs/guide/core/workers.md +++ /dev/null @@ -1,276 +0,0 @@ -# Workers - -Workers process queued jobs. taskito runs workers as OS threads within a single process, managed by a Rust scheduler. - -## Starting a Worker - -=== "CLI (Recommended)" - - ```bash - taskito worker --app myapp.tasks:queue - ``` - - | Flag | Description | - |---|---| - | `--app` | Python path to your Queue instance (`module:attribute`) | - | `--queues` | Comma-separated queue names (default: all registered) | - -=== "Programmatic" - - ```python - # Blocks the current thread - queue.run_worker() - - # With specific queues - queue.run_worker(queues=["emails", "reports"]) - ``` - -=== "Background Thread" - - ```python - import threading - - t = threading.Thread(target=queue.run_worker, daemon=True) - t.start() - - # Your application continues... - ``` - -=== "Async" - - ```python - import asyncio - - async def main(): - # Runs worker in a thread pool, non-blocking - await queue.arun_worker() - - asyncio.run(main()) - ``` - -## Worker Count - -By default, taskito auto-detects the number of CPU cores: - -```python -queue = Queue(db_path="myapp.db", workers=0) # Auto-detect (default) -queue = Queue(db_path="myapp.db", workers=8) # Explicit count -``` - -## Prefork Pool - -The default worker pool uses OS threads, which share a single Python GIL. For CPU-bound tasks, use the prefork pool — it spawns separate child processes, each with its own GIL: - -```python -queue.run_worker(pool="prefork", app="myapp:queue", workers=4) -``` - -```bash -taskito worker --app myapp:queue --pool prefork -``` - -Each child is a full Python interpreter that imports your app, builds the task registry, and executes tasks independently. - -### When to use prefork - -| Workload | Pool | Why | -|----------|------|-----| -| I/O-bound (HTTP, DB) | `thread` (default) | Threads release the GIL during I/O | -| CPU-bound (data processing) | `prefork` | Each process has its own GIL | -| Mixed | `prefork` | CPU tasks benefit; I/O tasks work fine too | - -### How it works - -```mermaid -graph LR - S["Scheduler"] -->|"Job JSON"| P["PreforkPool"] - P -->|stdin| C1["Child 1
(own GIL)"] - P -->|stdin| C2["Child 2
(own GIL)"] - P -->|stdin| CN["Child N
(own GIL)"] - - C1 -->|stdout| R1["Reader 1"] - C2 -->|stdout| R2["Reader 2"] - CN -->|stdout| RN["Reader N"] - - R1 -->|JobResult| RCH["Result Channel"] - R2 -->|JobResult| RCH - RN -->|JobResult| RCH - - RCH --> ML["Result Handler"] -``` - -Jobs are serialized as JSON Lines over stdin pipes. Each child reads a job, executes the task wrapper (with middleware, resources, proxies), and writes the result as JSON to stdout. The parent's reader threads parse results and feed them to the scheduler. - -### Configuration - -| Parameter | Type | Default | Description | -|-----------|------|---------|-------------| -| `pool` | `str` | `"thread"` | Worker pool type: `"thread"` or `"prefork"` | -| `app` | `str` | — | Import path to Queue (required for prefork) | -| `workers` | `int` | CPU count | Number of child processes | - -!!! note - The `app` parameter must be an importable path like `"myapp.tasks:queue"`. Each child process imports this path to build its task registry. Tasks defined inside functions or closures cannot be imported by children. - -## Worker Specialization - -Tag workers to route jobs to specific machines or capabilities: - -```python -# Start a worker that only processes jobs tagged for GPU or heavy workloads -queue.run_worker(tags=["gpu", "heavy"]) -``` - -Jobs submitted to a queue with `tags` are only picked up by workers that have all the required tags. Workers without tags process untagged jobs. - -```bash -# CLI equivalent -taskito worker --app myapp:queue --tags gpu,heavy -``` - -!!! note - Workers are **OS threads**, not processes. Each worker acquires the Python GIL only during task execution, so the scheduler and dispatch logic run without GIL contention. - -## Graceful Shutdown - -taskito supports graceful shutdown via `Ctrl+C`: - -1. **First `Ctrl+C`**: Stops accepting new jobs, waits for in-flight tasks to complete (up to `drain_timeout` seconds) -2. **Second `Ctrl+C`**: Force-kills immediately - -Configure the drain timeout when constructing the queue: - -```python -queue = Queue(db_path="myapp.db", drain_timeout=60) # wait up to 60 seconds -``` - -The default `drain_timeout` is 30 seconds. - -``` -$ taskito worker --app myapp:queue -[taskito] Starting worker... -[taskito] Registered tasks: 3 -[taskito] Queues: default, emails -^C -[taskito] Shutting down gracefully (waiting for in-flight jobs)... -[taskito] Worker stopped. -``` - -### Programmatic Shutdown - -```python -# From another thread or signal handler -queue._inner.request_shutdown() -``` - -## Worker Discovery - -Inspect live workers across all machines: - -```python -for w in queue.workers(): - print(f"{w['worker_id']} on {w['hostname']} (pid {w['pid']}, {w['status']})") -``` - -Each worker entry includes: - -| Field | Type | Description | -|-------|------|-------------| -| `worker_id` | `str` | Unique ID (UUIDv7) | -| `hostname` | `str` | OS hostname | -| `pid` | `int` | Process ID | -| `status` | `str` | `"active"`, `"draining"`, or deleted on exit | -| `pool_type` | `str` | `"thread"`, `"prefork"`, or `"native-async"` | -| `started_at` | `int` | Registration timestamp (ms) | -| `queues` | `str` | Comma-separated queue names | -| `threads` | `int` | Worker thread/process count | -| `last_heartbeat` | `int` | Last heartbeat timestamp (ms) | - -### Status Lifecycle - -```mermaid -stateDiagram-v2 - [*] --> active: register - active --> draining: shutdown signal - draining --> [*]: clean exit - active --> [*]: crash (reaped after 30s) -``` - -### Lifecycle Events - -Subscribe to worker lifecycle changes: - -```python -from taskito import EventType - -@queue.on_event(EventType.WORKER_ONLINE) -def on_online(event_type, payload): - print(f"Worker {payload['worker_id']} joined") - -@queue.on_event(EventType.WORKER_OFFLINE) -def on_offline(event_type, payload): - print(f"Worker {payload['worker_id']} went away") - -@queue.on_event(EventType.WORKER_UNHEALTHY) -def on_unhealthy(event_type, payload): - print(f"Worker {payload['worker_id']} unhealthy: {payload['resources']}") -``` - -| Event | Fires when | Payload | -|-------|-----------|---------| -| `WORKER_ONLINE` | Worker registered in storage | `worker_id`, `queues`, `pool` | -| `WORKER_OFFLINE` | Dead worker reaped (no heartbeat for 30s) | `worker_id` | -| `WORKER_UNHEALTHY` | Resource health transitions to unhealthy | `worker_id`, `resources` | - -## Async Tasks - -`async def` task functions are dispatched natively — they run on a dedicated event loop thread, not wrapped in `asyncio.run()` on a worker thread. - -```python -@queue.task() -async def fetch_data(url: str) -> dict: - import httpx - async with httpx.AsyncClient() as client: - r = await client.get(url) - return r.json() -``` - -Control the max number of async tasks running concurrently: - -```python -queue = Queue( - db_path="myapp.db", - workers=4, # OS threads for sync tasks - async_concurrency=200, # concurrent async tasks (default: 100) -) -``` - -See [Native Async Tasks](../execution/async-tasks.md) for the full guide. - -## How Workers Work - -```mermaid -graph LR - S["Scheduler
(Tokio async)"] -->|"sync job"| CH["Bounded Channel"] - S -->|"async job"| AP["Native Async Pool"] - - CH --> W1["Worker 1"] - CH --> W2["Worker 2"] - CH --> WN["Worker N"] - - AP --> EL["Event Loop Thread"] - - W1 -->|Result| RCH["Result Channel"] - W2 -->|Result| RCH - WN -->|Result| RCH - EL -->|Result| RCH - - RCH --> ML["Result Handler"] - ML -->|"complete / retry / DLQ"| DB[("SQLite")] -``` - -1. The **scheduler** runs in a dedicated Tokio async thread, polling SQLite for ready jobs every 50ms -2. Sync jobs are sent to the **worker thread pool** via a bounded crossbeam channel; each worker acquires the GIL and runs the Python function -3. Async jobs are dispatched to the **native async pool** and scheduled on a dedicated Python event loop -4. Results from both pools flow back through a **result channel** to the main loop -5. The main loop updates job status in SQLite (complete, retry, or DLQ) diff --git a/docs/guide/core/workflows.md b/docs/guide/core/workflows.md deleted file mode 100644 index 5cf3033..0000000 --- a/docs/guide/core/workflows.md +++ /dev/null @@ -1,6 +0,0 @@ -# Workflows - -taskito provides two workflow models. See the dedicated **[Workflows section](../../workflows/index.md)** for full documentation. - -- **[DAG Workflows](../../workflows/building.md)** — Multi-step pipelines as directed acyclic graphs with fan-out, conditions, gates, sub-workflows, incremental caching, and visualization. -- **[Canvas Primitives](../../workflows/canvas.md)** — Lightweight chain, group, and chord composition for simple pipelines. diff --git a/docs/guide/execution/async-tasks.md b/docs/guide/execution/async-tasks.md deleted file mode 100644 index 096f8f0..0000000 --- a/docs/guide/execution/async-tasks.md +++ /dev/null @@ -1,180 +0,0 @@ -# Native Async Tasks - -taskito runs async task functions natively — no wrapping in `asyncio.run()`, no thread-pool bridging. Define a coroutine and the worker dispatches it directly onto a dedicated event loop. - -```python -from taskito import Queue - -queue = Queue(db_path="myapp.db") - -@queue.task() -async def fetch_data(url: str) -> dict: - import httpx - async with httpx.AsyncClient() as client: - response = await client.get(url) - return response.json() -``` - -Enqueue it the same way as a sync task: - -```python -job = fetch_data.delay("https://api.example.com/data") -result = job.result(timeout=30) -``` - -## How It Works - -When a task decorated with `@queue.task()` is an `async def`, taskito marks it for native dispatch. At worker startup, a `NativeAsyncPool` is initialized alongside the standard thread pool. When the scheduler dequeues an async job it routes it to the native pool instead of a sync worker thread. - -```mermaid -graph LR - S["Scheduler
(Tokio)"] -->|"sync job"| TP["Thread Pool
spawn_blocking"] - S -->|"async job"| AP["Native Async Pool
AsyncTaskExecutor"] - AP --> EL["Dedicated Event Loop
(daemon thread)"] - EL -->|"result"| RS["PyResultSender"] - RS -->|"report_success / report_failure"| S -``` - -The dedicated event loop lives in its own Python daemon thread. All async tasks for a single worker share that loop; a semaphore caps simultaneous execution. - -## Concurrency Limit - -Control how many async tasks run concurrently on the event loop: - -```python -queue = Queue( - db_path="myapp.db", - async_concurrency=50, # default: 100 -) -``` - -This is independent of the `workers` parameter (sync thread count). A typical mixed setup might be: - -```python -queue = Queue( - db_path="myapp.db", - workers=4, # 4 OS threads for sync tasks - async_concurrency=200, # up to 200 concurrent async tasks -) -``` - -## Job Context - -`current_job` works inside async tasks — it reads from `contextvars` rather than `threading.local`, so it's safe across `await` boundaries: - -```python -from taskito.context import current_job - -@queue.task() -async def process(item_id: str) -> str: - current_job.log(f"Starting {item_id}") - await asyncio.sleep(1) - current_job.update_progress(50) - await asyncio.sleep(1) - current_job.update_progress(100) - return f"done:{item_id}" -``` - -Each async task gets an isolated `ContextVar` token. Concurrent tasks on the same loop do not see each other's contexts. - -## Resource Injection - -Async tasks support `inject=` and `Inject["name"]` annotations the same way as sync tasks: - -```python -@queue.worker_resource("http_client") -def make_http_client(): - import httpx - return httpx.AsyncClient() - -@queue.task(inject=["http_client"]) -async def fetch(url: str, http_client=None) -> dict: - response = await http_client.get(url) - return response.json() -``` - -!!! note - Resource initialization still runs on worker startup in the main thread. The resource instance is then passed into the async task at dispatch time. - -## Middleware - -Middleware `before()` and `after()` hooks run for async tasks the same as for sync tasks. They are called from within the async execution context, so `current_job` is available: - -```python -class LoggingMiddleware(TaskMiddleware): - def before(self, ctx): - current_job.log("task started") - - def after(self, ctx, result, error): - current_job.log("task finished") -``` - -## Retry Filtering - -`retry_on` and `dont_retry_on` on `@queue.task()` apply to async tasks: - -```python -@queue.task( - max_retries=5, - retry_on=[httpx.TimeoutException], - dont_retry_on=[httpx.HTTPStatusError], -) -async def fetch_with_retry(url: str) -> dict: - ... -``` - -## Mixing Sync and Async Tasks - -A single queue handles both sync and async tasks transparently. The worker dispatches each job to the correct pool based on the `_taskito_is_async` attribute set at registration time: - -```python -@queue.task() -def sync_task(x: int) -> int: - return x * 2 - -@queue.task() -async def async_task(x: int) -> int: - await asyncio.sleep(0.1) - return x * 2 -``` - -Both are enqueued, retried, rate-limited, and monitored identically. - -## Feature Flag - -The native async dispatch path is compiled into the `taskito-async` Rust crate and enabled via the `native-async` feature flag. Pre-built wheels on PyPI include it by default. If you build from source: - -```bash -maturin develop --features native-async -``` - -Without the feature, async tasks are still enqueued and processed — they fall back to running via `asyncio.run()` on a worker thread. - -## Async Queue Methods - -All inspection methods have async variants that run in a thread pool: - -```python -# Sync -stats = queue.stats() -dead = queue.dead_letters() -new_id = queue.retry_dead(dead_id) -cancelled = queue.cancel_job(job_id) -result = job.result(timeout=30) - -# Async equivalents -stats = await queue.astats() -dead = await queue.adead_letters() -new_id = await queue.aretry_dead(dead_id) -cancelled = await queue.acancel_job(job_id) -result = await job.aresult(timeout=30) -``` - -### Async Worker - -```python -async def main(): - await queue.arun_worker(queues=["default"]) - -asyncio.run(main()) -``` diff --git a/docs/guide/execution/batch-enqueue.md b/docs/guide/execution/batch-enqueue.md deleted file mode 100644 index 8d1da5b..0000000 --- a/docs/guide/execution/batch-enqueue.md +++ /dev/null @@ -1,39 +0,0 @@ -# Batch Enqueue - -Insert many jobs in a single SQLite transaction for high throughput. - -## `task.map()` - -```python -@queue.task() -def process(item_id): - return fetch_and_process(item_id) - -# Enqueue 1000 jobs in one transaction -jobs = process.map([(i,) for i in range(1000)]) -``` - -## `queue.enqueue_many()` - -```python -# Basic batch — same options for all jobs -jobs = queue.enqueue_many( - task_name="myapp.process", - args_list=[(i,) for i in range(1000)], - priority=5, - queue="processing", -) - -# Full parity with enqueue() — per-job overrides -jobs = queue.enqueue_many( - task_name="myapp.process", - args_list=[(i,) for i in range(100)], - delay=5.0, # uniform 5s delay for all - unique_keys=[f"item-{i}" for i in range(100)], # per-job dedup - metadata='{"source": "batch"}', # uniform metadata - expires=3600.0, # expire after 1 hour - result_ttl=600, # keep results for 10 minutes -) -``` - -Per-job lists (`delay_list`, `metadata_list`, `expires_list`, `result_ttl_list`) override uniform values when both are provided. See the [API reference](../../api/queue/index.md#queueenqueue_many) for the full parameter list. diff --git a/docs/guide/execution/dependencies.md b/docs/guide/execution/dependencies.md deleted file mode 100644 index 69fb8bb..0000000 --- a/docs/guide/execution/dependencies.md +++ /dev/null @@ -1,245 +0,0 @@ -# Task Dependencies - -taskito supports declaring dependencies between jobs, allowing you to build DAG-style workflows where a job only runs after its upstream dependencies have completed successfully. - -## Basic Usage - -Pass `depends_on` when enqueuing a job to declare that it should wait for one or more other jobs to finish: - -=== "Single dependency" - - ```python - job_a = extract.delay(url) - - # job_b won't start until job_a completes successfully - job_b = transform.apply_async( - args=(job_a.id,), - depends_on=job_a.id, - ) - ``` - -=== "Multiple dependencies" - - ```python - job_a = fetch.delay("https://api1.example.com") - job_b = fetch.delay("https://api2.example.com") - - # job_c waits for both job_a and job_b - job_c = merge.apply_async( - args=(), - depends_on=[job_a.id, job_b.id], - ) - ``` - -The `depends_on` parameter accepts: - -| Value | Description | -|---|---| -| `str` | A single job ID | -| `list[str]` | Multiple job IDs (all must complete) | -| `None` (default) | No dependencies | - -!!! tip - You can also use `depends_on` with `queue.enqueue()` directly: - - ```python - job_id = queue.enqueue( - task_name="myapp.tasks.merge", - args=(), - depends_on=[job_a.id, job_b.id], - ) - ``` - -## How It Works - -1. When a job with `depends_on` is enqueued, it enters a **waiting** state -2. The scheduler periodically checks waiting jobs to see if all dependencies have completed -3. Once every dependency has `status=completed`, the job transitions to `pending` and becomes eligible for dispatch -4. If any dependency fails, dies, or is cancelled, the dependent job is **cascade cancelled** - -```mermaid -flowchart TD - E["Enqueue with depends_on"] --> W["Status: Waiting"] - W --> CHECK{"All deps completed?"} - CHECK -->|Yes| P["Status: Pending"] - CHECK -->|"Any dep failed/dead/cancelled"| CC["Cascade Cancel"] - P --> R["Dispatched to worker"] - CC --> DONE["Status: Cancelled
reason: dependency_failed"] -``` - -## Cascade Cancel - -When a dependency fails (exhausts retries and moves to DLQ), dies, or is cancelled, all downstream dependents are automatically cancelled. This propagates transitively through the entire dependency graph: - -```python -job_a = step_one.delay() -job_b = step_two.apply_async(args=(), depends_on=job_a.id) -job_c = step_three.apply_async(args=(), depends_on=job_b.id) - -# If job_a fails permanently: -# - job_b is cascade cancelled -# - job_c is cascade cancelled (transitive) -``` - -!!! warning "Cascade is immediate" - As soon as a dependency enters a terminal failure state (`dead` or `cancelled`), all downstream dependents are cancelled in the same scheduler tick. There is no grace period. - -## Inspecting Dependencies - -### `job.dependencies` - -Returns the list of job IDs this job depends on: - -```python -job_c = merge.apply_async( - args=(), - depends_on=[job_a.id, job_b.id], -) - -fetched = queue.get_job(job_c.id) -print(fetched.dependencies) # ['01H5K6X...', '01H5K7Y...'] -``` - -### `job.dependents` - -Returns the list of job IDs that depend on this job: - -```python -fetched_a = queue.get_job(job_a.id) -print(fetched_a.dependents) # ['01H5K8Z...'] (job_c's ID) -``` - -## Error Handling - -### Missing Dependencies - -If you reference a job ID that does not exist, enqueue raises a `ValueError`: - -```python -try: - job = transform.apply_async( - args=(), - depends_on="nonexistent-job-id", - ) -except ValueError as e: - print(e) # "Dependency job 'nonexistent-job-id' not found" -``` - -### Already-Dead Dependencies - -If a dependency is already in a terminal failure state (`dead` or `cancelled`) at enqueue time, the dependent job is immediately cancelled: - -```python -dead_job = queue.get_job(some_dead_id) -assert dead_job.status == "dead" - -# This job is immediately cancelled — it will never run -job = transform.apply_async( - args=(), - depends_on=dead_job.id, -) - -fetched = queue.get_job(job.id) -print(fetched.status) # "cancelled" -``` - -## DAG Workflow Examples - -### Diamond Pattern - -A classic diamond dependency graph where two branches converge: - -```mermaid -graph TD - A["extract.delay()"] --> B["transform_a.apply_async(depends_on=A)"] - A --> C["transform_b.apply_async(depends_on=A)"] - B --> D["load.apply_async(depends_on=[B, C])"] - C --> D -``` - -```python -# Extract -job_a = extract.delay(source_url) - -# Two parallel transforms, each depending on extract -job_b = transform_a.apply_async( - args=("schema_a",), - depends_on=job_a.id, -) -job_c = transform_b.apply_async( - args=("schema_b",), - depends_on=job_a.id, -) - -# Load waits for both transforms -job_d = load.apply_async( - args=(), - depends_on=[job_b.id, job_c.id], -) -``` - -### Multi-Stage Pipeline - -A sequential pipeline with fan-out at one stage: - -```python -# Stage 1: Download -download_jobs = [ - download.delay(url) for url in urls -] - -# Stage 2: Process each download (each depends on its own download) -process_jobs = [ - process.apply_async( - args=(url,), - depends_on=dl.id, - ) - for dl, url in zip(download_jobs, urls) -] - -# Stage 3: Aggregate all results -aggregate_job = aggregate.apply_async( - args=(), - depends_on=[j.id for j in process_jobs], -) -``` - -### Conditional Branching - -Combine dependencies with metadata to build conditional workflows: - -```python -job_a = validate.delay(data) - -# Both branches depend on validation -job_success = on_valid.apply_async( - args=(data,), - depends_on=job_a.id, - metadata='{"branch": "success"}', -) - -# Use a separate task to handle the "validation failed" path -# by inspecting job_a's result in the task body -``` - -!!! note "Dependencies vs. Workflows" - `depends_on` is a lower-level primitive than [chains, groups, and chords](../core/workflows.md). Use `depends_on` when you need fine-grained control over a custom DAG. Use the workflow primitives when your pipeline fits a standard pattern. - -## Combining with Other Features - -Dependencies compose naturally with other taskito features: - -```python -job = transform.apply_async( - args=(data,), - depends_on=job_a.id, - priority=10, # High priority once unblocked - queue="processing", # Target queue - max_retries=5, # Retry policy - delay=60, # Additional delay after deps resolve - unique_key="transform-daily", # Deduplication -) -``` - -!!! info "Delay + depends_on" - When both `delay` and `depends_on` are set, the job first waits for all dependencies to complete, then waits for the additional delay before becoming eligible for dispatch. diff --git a/docs/guide/execution/index.md b/docs/guide/execution/index.md deleted file mode 100644 index 05de40e..0000000 --- a/docs/guide/execution/index.md +++ /dev/null @@ -1,12 +0,0 @@ -# Advanced Execution - -Patterns for scaling and optimizing task execution. - -| Guide | Description | -|-------|-------------| -| [Prefork Pool](prefork.md) | Process-based isolation for CPU-bound or memory-leaking tasks | -| [Native Async Tasks](async-tasks.md) | `async def` tasks with native event loop integration | -| [Result Streaming](streaming.md) | Stream partial results and progress updates in real time | -| [Dependencies](dependencies.md) | DAG-based job dependencies — run tasks in order | -| [Batch Enqueue](batch-enqueue.md) | Enqueue many jobs efficiently with `task.map()` and `enqueue_many()` | -| [Unique Tasks](unique-tasks.md) | Deduplicate active jobs with unique keys | diff --git a/docs/guide/execution/prefork.md b/docs/guide/execution/prefork.md deleted file mode 100644 index a4c1ebc..0000000 --- a/docs/guide/execution/prefork.md +++ /dev/null @@ -1,102 +0,0 @@ -# Prefork Worker Pool - -Spawn separate child processes for true CPU parallelism. Each child has its own Python GIL, so CPU-bound tasks don't block each other. - -## When to Use - -| Workload | Recommended pool | Why | -|----------|-----------------|-----| -| I/O-bound (HTTP calls, DB queries) | `thread` (default) | Threads release the GIL during I/O | -| CPU-bound (data processing, ML) | `prefork` | Each process owns its GIL | -| Mixed workloads | `prefork` | CPU tasks benefit; I/O tasks work fine too | - -## Getting Started - -```python -queue = Queue(db_path="myapp.db", workers=4) -queue.run_worker(pool="prefork", app="myapp:queue") -``` - -```bash -taskito worker --app myapp:queue --pool prefork -``` - -The `app` parameter tells each child process how to import your Queue instance. It must be an importable path in `module:attribute` format. - -## How It Works - -```mermaid -graph LR - S["Scheduler
(Rust)"] -->|"Job JSON"| P["PreforkPool"] - P -->|stdin| C1["Child 1
(own GIL)"] - P -->|stdin| C2["Child 2
(own GIL)"] - P -->|stdin| CN["Child N
(own GIL)"] - - C1 -->|stdout| R1["Reader 1"] - C2 -->|stdout| R2["Reader 2"] - CN -->|stdout| RN["Reader N"] - - R1 -->|JobResult| RCH["Result Channel"] - R2 -->|JobResult| RCH - RN -->|JobResult| RCH - - RCH --> ML["Result Handler"] - ML -->|"complete / retry / DLQ"| DB[("Storage")] -``` - -1. The Rust scheduler dequeues jobs from storage -2. `PreforkPool` serializes each job as JSON and writes it to the least-loaded child's stdin pipe -3. Each child deserializes the job, executes the task wrapper (with middleware, resources, proxies), and writes the result as JSON to stdout -4. Reader threads parse results and feed them back to the scheduler -5. The scheduler updates job status in storage - -## Configuration - -| Parameter | Type | Default | Description | -|-----------|------|---------|-------------| -| `pool` | `str` | `"thread"` | Set to `"prefork"` to enable | -| `app` | `str` | — | Import path to Queue instance (required) | -| `workers` | `int` | CPU count | Number of child processes | - -## Migrating from Thread Pool - -The thread pool is the default. To switch to prefork: - -=== "Before (thread pool)" - - ```python - queue.run_worker() - ``` - -=== "After (prefork)" - - ```python - queue.run_worker(pool="prefork", app="myapp:queue") - ``` - -Everything else stays the same — task decorators, middleware, resources, events, and the scheduler all work identically. The only difference is where task code executes (child process vs. worker thread). - -## Debugging Child Processes - -Children inherit the parent's stderr, so `print()` statements and Python logging appear in the parent's terminal. - -Enable debug logging to see child lifecycle events: - -```python -import logging -logging.getLogger("taskito.prefork.child").setLevel(logging.DEBUG) -``` - -Log output includes: -- `child ready (app=..., pid=...)` — child initialized and waiting for jobs -- `executing task_name[job_id]` — job received (DEBUG level) -- `task task_name[job_id] failed: ...` — task error -- `shutdown received` — clean shutdown -- `resource teardown error` — resource cleanup failure - -## Limitations - -- **Tasks must be importable**: Each child process imports the app module independently. Tasks defined inside functions or closures cannot be imported. -- **No shared state**: Children are separate processes. In-memory caches, globals, or module-level state are not shared between children. -- **Startup cost**: Each child imports the full app module on start. This happens once per child, not per job. -- **Resource re-initialization**: Worker resources (DB connections, etc.) are initialized independently in each child. diff --git a/docs/guide/execution/streaming.md b/docs/guide/execution/streaming.md deleted file mode 100644 index f1ed540..0000000 --- a/docs/guide/execution/streaming.md +++ /dev/null @@ -1,126 +0,0 @@ -# Result Streaming - -Stream intermediate results from long-running tasks. Instead of waiting for the final result, consumers receive partial data as it becomes available. - -## Publishing Partial Results - -Inside a task, call `current_job.publish(data)` to emit a partial result: - -```python -from taskito import current_job - -@queue.task() -def process_batch(items): - results = [] - for i, item in enumerate(items): - result = process(item) - results.append(result) - current_job.publish({"item_id": item.id, "status": "ok", "result": result}) - current_job.update_progress(int((i + 1) / len(items) * 100)) - return {"total": len(items), "results": results} -``` - -`publish()` accepts any JSON-serializable value — dicts, lists, strings, numbers. - -## Consuming with `stream()` - -The caller iterates over partial results as they arrive: - -```python -job = process_batch.delay(items) - -for partial in job.stream(timeout=120, poll_interval=0.5): - print(f"Processed item {partial['item_id']}: {partial['status']}") - -# After stream ends, get the final result -final = job.result(timeout=5) -``` - -`stream()` polls the database for new partial results, yields each one, and stops when the job reaches a terminal state (complete, failed, dead, cancelled). - -| Parameter | Type | Default | Description | -|-----------|------|---------|-------------| -| `timeout` | `float` | `60.0` | Maximum seconds to wait | -| `poll_interval` | `float` | `0.5` | Seconds between polls | - -## Async Streaming - -Use `astream()` in async contexts: - -```python -async for partial in job.astream(timeout=120, poll_interval=0.5): - print(f"Got: {partial}") -``` - -## FastAPI SSE - -The built-in FastAPI progress endpoint supports streaming partial results: - -``` -GET /jobs/{job_id}/progress?include_results=true -``` - -Events include partial results alongside progress: - -``` -data: {"status": "running", "progress": 25} -data: {"status": "running", "progress": 25, "partial_result": {"item_id": 1, "status": "ok"}} -data: {"status": "running", "progress": 50} -data: {"status": "complete", "progress": 100} -``` - -## Patterns - -### ETL Pipeline - -```python -@queue.task() -def etl_pipeline(source_tables): - for table in source_tables: - rows = extract(table) - transformed = transform(rows) - load(transformed) - current_job.publish({ - "table": table, - "rows_processed": len(rows), - "status": "loaded", - }) - return {"tables": len(source_tables)} -``` - -### ML Training - -```python -@queue.task() -def train_model(config): - model = build_model(config) - for epoch in range(config["epochs"]): - loss = train_epoch(model) - current_job.publish({ - "epoch": epoch + 1, - "loss": float(loss), - "lr": optimizer.param_groups[0]["lr"], - }) - save_model(model) - return {"final_loss": float(loss)} -``` - -### Batch Processing with Error Tracking - -```python -@queue.task() -def process_orders(order_ids): - for oid in order_ids: - try: - process_order(oid) - current_job.publish({"order_id": oid, "status": "ok"}) - except Exception as e: - current_job.publish({"order_id": oid, "status": "error", "error": str(e)}) - return {"total": len(order_ids)} -``` - -## How It Works - -`publish()` stores data as a task log entry with `level="result"`, reusing the existing `task_logs` table. No new tables or Rust changes are needed. - -`stream()` polls `get_task_logs(job_id)`, filters for `level == "result"`, tracks the last-seen timestamp, and yields only new entries. It stops when the job's status becomes terminal. diff --git a/docs/guide/execution/unique-tasks.md b/docs/guide/execution/unique-tasks.md deleted file mode 100644 index a0c9ea2..0000000 --- a/docs/guide/execution/unique-tasks.md +++ /dev/null @@ -1,14 +0,0 @@ -# Unique Tasks - -Deduplicate active jobs by key — if a job with the same `unique_key` is already pending or running, the existing job is returned instead of creating a new one: - -```python -job1 = process.apply_async(args=("report",), unique_key="daily-report") -job2 = process.apply_async(args=("report",), unique_key="daily-report") -assert job1.id == job2.id # Same job, not duplicated -``` - -Once the original job completes (or fails to DLQ), the key is released and a new job can be created with the same key. - -!!! info "Implementation" - Deduplication uses a partial unique index: `CREATE UNIQUE INDEX ... ON jobs(unique_key) WHERE unique_key IS NOT NULL AND status IN (0, 1)`. Only pending and running jobs participate. The check-and-insert is atomic (transaction-protected), so concurrent calls with the same `unique_key` are handled gracefully without race conditions. diff --git a/docs/guide/extensibility/events-webhooks.md b/docs/guide/extensibility/events-webhooks.md deleted file mode 100644 index a68225b..0000000 --- a/docs/guide/extensibility/events-webhooks.md +++ /dev/null @@ -1,259 +0,0 @@ -# Events & Webhooks - -taskito includes an in-process event bus and webhook delivery system for reacting to job lifecycle events. - -## Event Types - -The `EventType` enum defines all available lifecycle events: - -| Event | Fired when | Payload fields | -|-------|------------|----------------| -| `JOB_ENQUEUED` | A job is added to the queue | `job_id`, `task_name`, `queue` | -| `JOB_COMPLETED` | A job finishes successfully | `job_id`, `task_name`, `queue` | -| `JOB_FAILED` | A job raises an exception (before retry) | `job_id`, `task_name`, `queue`, `error` | -| `JOB_RETRYING` | A failed job will be retried | `job_id`, `task_name`, `error`, `retry_count` | -| `JOB_DEAD` | A job exhausts all retries and enters the DLQ | `job_id`, `task_name`, `error` | -| `JOB_CANCELLED` | A job is cancelled | `job_id`, `task_name` | -| `WORKER_STARTED` | A worker process/thread comes online | `worker_id`, `hostname` | -| `WORKER_STOPPED` | A worker process/thread shuts down | `worker_id`, `hostname` | -| `WORKER_ONLINE` | Worker registered in storage (visible to fleet) | `worker_id`, `queues`, `pool` | -| `WORKER_OFFLINE` | Dead worker reaped (no heartbeat for 30s) | `worker_id` | -| `WORKER_UNHEALTHY` | Resource health transitions to unhealthy | `worker_id`, `resources` | -| `QUEUE_PAUSED` | A named queue is paused | `queue` | -| `QUEUE_RESUMED` | A paused queue is resumed | `queue` | - -`JOB_RETRYING`, `JOB_DEAD`, and `JOB_CANCELLED` are emitted by the Rust result handler immediately after the scheduler records the outcome. Middleware hooks (`on_retry`, `on_dead_letter`, `on_cancel`) are called in the same result-handling pass, after the event fires. - -`QUEUE_PAUSED` and `QUEUE_RESUMED` are emitted synchronously by `queue.pause()` and `queue.resume()` after the queue state is written to storage. - -## Registering Listeners - -Use `queue.on_event()` to subscribe a callback to a specific event type: - -```python -from taskito import Queue -from taskito.events import EventType - -queue = Queue(db_path="tasks.db") - -def on_failure(event_type: EventType, payload: dict): - print(f"Job {payload['job_id']} failed: {payload.get('error')}") - -queue.on_event(EventType.JOB_FAILED, on_failure) -``` - -### Callback Signature - -All callbacks receive two arguments: - -- `event_type` (`EventType`) — the event that occurred -- `payload` (`dict`) — event details including `job_id`, `task_name`, `queue`, and event-specific fields - -### Async Delivery - -Callbacks are dispatched asynchronously in a `ThreadPoolExecutor`. The thread pool size defaults to 4 and can be configured via `Queue(event_workers=N)`. This means: - -- Callbacks never block the worker -- Exceptions in callbacks are logged but do not affect job processing -- Callbacks may execute slightly after the event occurs - -## Webhooks - -For external systems, register webhook URLs to receive HTTP POST requests on job events. - -### Registering a Webhook - -```python -queue.add_webhook( - url="https://example.com/hooks/taskito", - events=[EventType.JOB_FAILED, EventType.JOB_DEAD], - headers={"Authorization": "Bearer mytoken"}, - secret="my-signing-secret", -) -``` - -| Parameter | Type | Default | Description | -|-----------|------|---------|-------------| -| `url` | `str` | — | URL to POST event payloads to (must be `http://` or `https://`) | -| `events` | `list[EventType] | None` | `None` | Event types to subscribe to. `None` means all events | -| `headers` | `dict[str, str] | None` | `None` | Extra HTTP headers to include in requests | -| `secret` | `str | None` | `None` | HMAC-SHA256 signing secret | -| `max_retries` | `int` | `3` | Maximum delivery attempts | -| `timeout` | `float` | `10.0` | HTTP request timeout in seconds | -| `retry_backoff` | `float` | `2.0` | Base for exponential backoff between retries | - -### HMAC Signing - -When a `secret` is provided, each webhook request includes an `X-Taskito-Signature` header: - -``` -X-Taskito-Signature: sha256= -``` - -The signature is computed over the JSON request body using HMAC-SHA256. Verify it on the receiving end: - -```python -import hashlib -import hmac - -def verify_signature(body: bytes, signature: str, secret: str) -> bool: - expected = hmac.new(secret.encode(), body, hashlib.sha256).hexdigest() - return hmac.compare_digest(f"sha256={expected}", signature) -``` - -### Retry Behavior - -Failed webhook deliveries are retried with exponential backoff. The number of attempts, request timeout, and backoff base are configurable per webhook via `max_retries`, `timeout`, and `retry_backoff`. With the defaults (`max_retries=3`, `retry_backoff=2.0`): - -| Attempt | Delay before next retry | -|---------|------------------------| -| 1st retry | 1 second (`2.0 ** 0`) | -| 2nd retry | 2 seconds (`2.0 ** 1`) | -| 3rd retry | — (final) | - -4xx responses are not retried. If all attempts fail, a warning is logged and the event is dropped. - -### Event Filtering - -Subscribe to specific events or all events: - -```python -# Only failure events -queue.add_webhook( - url="https://slack.example.com/webhook", - events=[EventType.JOB_FAILED, EventType.JOB_DEAD], -) - -# All events -queue.add_webhook(url="https://monitoring.example.com/events") -``` - -## Examples - -### Slack Notification on Job Failure - -```python -import requests -from taskito.events import EventType - -def notify_slack(event_type: EventType, payload: dict): - requests.post( - "https://hooks.slack.com/services/T.../B.../xxx", - json={ - "text": f":x: Task `{payload['task_name']}` failed\n" - f"Job ID: `{payload['job_id']}`\n" - f"Error: {payload.get('error', 'unknown')}" - }, - ) - -queue.on_event(EventType.JOB_FAILED, notify_slack) -queue.on_event(EventType.JOB_DEAD, notify_slack) -``` - -### Webhook to External Monitoring - -```python -queue.add_webhook( - url="https://monitoring.example.com/api/taskito-events", - events=[EventType.JOB_COMPLETED, EventType.JOB_FAILED, EventType.JOB_DEAD], - secret="whsec_abc123", - headers={"X-Source": "taskito-prod"}, -) -``` - -The monitoring service receives JSON payloads like: - -```json -{ - "event": "job.failed", - "job_id": "01H5K6X...", - "task_name": "myapp.tasks.process", - "queue": "default", - "error": "ConnectionError: ..." -} -``` - -### Job Completion Tracking - -```python -from taskito.events import EventType - -completed_count = 0 - -def track_completion(event_type: EventType, payload: dict): - global completed_count - completed_count += 1 - if completed_count % 100 == 0: - print(f"Milestone: {completed_count} jobs completed") - -queue.on_event(EventType.JOB_COMPLETED, track_completion) -``` - -### Database Logging for Audit Trail - -```python -from taskito.events import EventType - -def audit_log(event_type: EventType, payload: dict): - db.execute( - "INSERT INTO audit_log (event, job_id, task_name, timestamp) VALUES (?, ?, ?, ?)", - (event_type.value, payload["job_id"], payload["task_name"], time.time()), - ) - -# Subscribe to all important events -for event in [EventType.JOB_ENQUEUED, EventType.JOB_COMPLETED, EventType.JOB_FAILED, EventType.JOB_DEAD]: - queue.on_event(event, audit_log) -``` - -## Event Ordering - -Events fire in the order the scheduler processes results — typically the order jobs complete. For jobs that complete nearly simultaneously, ordering is **not guaranteed** across different workers or threads. - -Within a single job's lifecycle, events always fire in this order: - -1. `JOB_ENQUEUED` (at enqueue time) -2. `JOB_COMPLETED` / `JOB_FAILED` / `JOB_CANCELLED` (at completion) -3. `JOB_RETRYING` (if retried, before the next attempt) -4. `JOB_DEAD` (if all retries exhausted) - -## Backpressure - -Events are dispatched to a thread pool (default size: 4, configurable via `event_workers=N`). If callbacks are slow and events arrive faster than they can be processed, they queue in memory. - -For high-volume event scenarios: - -```python -queue = Queue(event_workers=16) # More threads for slow callbacks -``` - -If a callback raises an exception, it is logged and the event is dropped — it does not retry or block other callbacks. - -## Webhook Failure - -Webhooks retry with exponential backoff (up to `max_retries`). After all retries are exhausted, the webhook delivery is **logged and dropped** — there is no dead-letter queue for webhooks. Monitor webhook failures via the `on_failure` callback or structured logging. - -### Webhook Receiver (Flask) - -A minimal Flask app that receives and verifies taskito webhooks: - -```python -from flask import Flask, request, abort -import hashlib, hmac - -app = Flask(__name__) -WEBHOOK_SECRET = "my-signing-secret" - -@app.route("/hooks/taskito", methods=["POST"]) -def receive_webhook(): - signature = request.headers.get("X-Taskito-Signature", "") - expected = hmac.new( - WEBHOOK_SECRET.encode(), request.data, hashlib.sha256 - ).hexdigest() - - if not hmac.compare_digest(f"sha256={expected}", signature): - abort(401) - - event = request.json - print(f"Received event: {event['event']} for job {event['job_id']}") - return "", 204 -``` diff --git a/docs/guide/extensibility/index.md b/docs/guide/extensibility/index.md deleted file mode 100644 index b231646..0000000 --- a/docs/guide/extensibility/index.md +++ /dev/null @@ -1,9 +0,0 @@ -# Extensibility - -Extend taskito with custom behavior at every stage of the task lifecycle. - -| Guide | Description | -|-------|-------------| -| [Middleware](middleware.md) | Hook into task execution with `before`, `after`, `on_retry`, and more | -| [Serializers](serializers.md) | Custom payload serialization — msgpack, orjson, encryption | -| [Events & Webhooks](events-webhooks.md) | React to queue events and push notifications to external services | diff --git a/docs/guide/extensibility/middleware.md b/docs/guide/extensibility/middleware.md deleted file mode 100644 index dd7a42c..0000000 --- a/docs/guide/extensibility/middleware.md +++ /dev/null @@ -1,195 +0,0 @@ -# Per-Task Middleware - -taskito supports a middleware system that lets you run code at key points in the task lifecycle. Middleware can be applied globally (to all tasks) or per-task. - -## TaskMiddleware Base Class - -Create middleware by subclassing `TaskMiddleware` and overriding the hooks you need: - -```python -from taskito import TaskMiddleware - -class LoggingMiddleware(TaskMiddleware): - def before(self, ctx): - print(f"[START] {ctx.task_name} (job {ctx.id})") - - def after(self, ctx, result, error): - status = "OK" if error is None else f"FAILED: {error}" - print(f"[END] {ctx.task_name}: {status}") - - def on_retry(self, ctx, error, retry_count): - print(f"[RETRY] {ctx.task_name} attempt {retry_count}: {error}") -``` - -### Hook Signatures - -| Hook | Called when | -|---|---| -| `before(ctx)` | Before task execution | -| `after(ctx, result, error)` | After task execution (success or failure) | -| `on_retry(ctx, error, retry_count)` | A job fails and will be retried | -| `on_enqueue(task_name, args, kwargs, options)` | A job is about to be enqueued | -| `on_dead_letter(ctx, error)` | A job exhausts all retries and moves to the DLQ | -| `on_timeout(ctx)` | A job hits its timeout limit | -| `on_cancel(ctx)` | A job is cancelled during execution | - -The `ctx` parameter is a `JobContext` — the same object as `current_job` — providing `ctx.id`, `ctx.task_name`, `ctx.retry_count`, and `ctx.queue_name`. - -!!! note "Lifecycle hooks dispatched from Rust" - `on_retry`, `on_dead_letter`, `on_timeout`, and `on_cancel` are called by the Rust result handler after the scheduler records the outcome. They fire after `after()` and after the corresponding event is emitted on the event bus. Exceptions raised inside these hooks are logged and do not affect job processing. - -### `on_timeout` — Handling Timed-Out Jobs - -`on_timeout` fires when the Rust scheduler detects a stale job that exceeded its hard `timeout`. Detection happens in the maintenance reaper, which periodically scans for jobs still marked as running past their deadline. - -When a job times out, `on_timeout` is called **before** `on_retry` (if the job will be retried) or `on_dead_letter` (if retries are exhausted). This lets you react to the timeout itself independently of whether the job will be retried: - -```python -class TimeoutAlerter(TaskMiddleware): - def on_timeout(self, ctx): - # Fires for every timed-out job, regardless of retry/DLQ outcome - logger.error("Job %s (%s) timed out", ctx.id, ctx.task_name) - - def on_retry(self, ctx, error, retry_count): - # Fires after on_timeout when the job will be retried - logger.warning("Retrying %s (attempt %d)", ctx.task_name, retry_count) - - def on_dead_letter(self, ctx, error): - # Fires after on_timeout when retries are exhausted - logger.critical("Job %s exhausted retries after timeout", ctx.id) -``` - -!!! tip - Use `on_timeout` to update dashboards, fire alerts, or record SLA violations. Combine with `on_retry` and `on_dead_letter` for full visibility into the job's fate after the timeout. - -### `on_enqueue` — Modifying Enqueue Parameters - -`on_enqueue` is unique: it fires before the job is written to the database, and the `options` dict is **mutable**. Modify it to change how the job is enqueued: - -```python -class PriorityBoostMiddleware(TaskMiddleware): - def on_enqueue(self, task_name, args, kwargs, options): - # Bump priority for urgent tasks during business hours - import datetime - hour = datetime.datetime.now().hour - if 9 <= hour < 18 and task_name.startswith("alerts."): - options["priority"] = max(options.get("priority", 0), 50) -``` - -Keys present in `options`: `priority`, `delay`, `queue`, `max_retries`, `timeout`, `unique_key`, `metadata`. - -## Queue-Level Middleware - -Apply middleware to **all tasks** by passing it to the `Queue` constructor: - -```python -from taskito import Queue - -queue = Queue(middleware=[LoggingMiddleware()]) -``` - -## Per-Task Middleware - -Apply middleware to a **specific task** using the `middleware` parameter: - -```python -@queue.task(middleware=[MetricsMiddleware()]) -def process(data): - ... -``` - -Per-task middleware runs **after** global middleware, in registration order. - -## Example: Metrics Middleware - -```python -import time -from taskito import TaskMiddleware - -class MetricsMiddleware(TaskMiddleware): - def before(self, ctx): - ctx._start_time = time.monotonic() - - def after(self, ctx, result, error): - elapsed = time.monotonic() - ctx._start_time - status = "success" if error is None else "failure" - metrics.histogram("task.duration_seconds", elapsed, tags={ - "task": ctx.task_name, - "status": status, - }) -``` - -## Composition and Ordering - -### Multiple middleware on the same task - -```python -import time -from taskito import TaskMiddleware - -class TimingMiddleware(TaskMiddleware): - def before(self, ctx): - ctx._start = time.monotonic() - def after(self, ctx, result, error): - elapsed = time.monotonic() - ctx._start - print(f"{ctx.task_name} took {elapsed:.3f}s") - -class LoggingMiddleware(TaskMiddleware): - def before(self, ctx): - print(f"Starting {ctx.task_name}[{ctx.id}]") - def after(self, ctx, result, error): - print(f"Finished {ctx.task_name}[{ctx.id}]") - -@queue.task(middleware=[TimingMiddleware(), LoggingMiddleware()]) -def process(data): - ... -``` - -### Execution order - -1. **Global middleware** (registered via `Queue(middleware=[...])`) runs first -2. **Per-task middleware** (via `@queue.task(middleware=[...])`) runs second -3. Within each group, middleware runs in **registration order** -4. `after()` hooks run in **reverse order** (like a stack) - -### Exception handling - -If a middleware hook raises an exception: - -- **`before()`**: The exception is logged, but subsequent middleware `before()` hooks still run. The task executes normally. -- **`after()`**: The exception is logged. Other `after()` hooks still run. -- **`on_retry()` / `on_dead_letter()`**: Logged and swallowed — these are notification hooks, not control flow. - -Middleware exceptions never prevent task execution or result handling. - -## Middleware vs Hooks - -taskito has two systems for running code around tasks: - -| | Hooks (`@queue.on_failure`, etc.) | Middleware (`TaskMiddleware`) | -|---|---|---| -| **Scope** | Queue-level only | Queue-level or per-task | -| **Interface** | Decorated functions | Class with up to 7 hooks | -| **Context** | Receives `task_name, args, kwargs` | Receives `JobContext` | -| **Enqueue hook** | No | Yes (`on_enqueue`, can mutate options) | -| **Retry hook** | No | Yes (`on_retry`) | -| **DLQ / timeout / cancel hooks** | No | Yes | -| **Execution order** | After middleware | Before hooks | - -Middleware runs **inside** the task wrapper (closer to the task function), while hooks run **outside**. In practice, middleware `before()` fires first, then `before_task` hooks. On completion, `on_success`/`on_failure` hooks fire, then middleware `after()`. - -## Combining with OpenTelemetry - -The built-in `OpenTelemetryMiddleware` is itself a `TaskMiddleware`, so it composes naturally with your own middleware: - -```python -from taskito import Queue -from taskito.contrib.otel import OpenTelemetryMiddleware - -queue = Queue(middleware=[ - OpenTelemetryMiddleware(), - LoggingMiddleware(), -]) -``` - -See the [OpenTelemetry guide](../../integrations/otel.md) for setup details. diff --git a/docs/guide/extensibility/serializers.md b/docs/guide/extensibility/serializers.md deleted file mode 100644 index fd6ecab..0000000 --- a/docs/guide/extensibility/serializers.md +++ /dev/null @@ -1,129 +0,0 @@ -# Pluggable Serializers - -taskito uses a pluggable serializer for task arguments and results. By default, it uses `CloudpickleSerializer`, but you can switch to `JsonSerializer` or provide your own. - -## Built-in Serializers - -### CloudpickleSerializer (default) - -Handles lambdas, closures, and complex Python objects. This is the default — no configuration needed. - -```python -from taskito import Queue - -queue = Queue() # uses CloudpickleSerializer -``` - -### JsonSerializer - -Produces human-readable JSON payloads. Useful for debugging, cross-language interop, or when you only pass simple types (strings, numbers, dicts, lists). - -```python -from taskito import Queue, JsonSerializer - -queue = Queue(serializer=JsonSerializer()) -``` - -### MsgPackSerializer - -MessagePack serialization: faster than cloudpickle, produces smaller payloads, and is cross-language compatible. Requires the `msgpack` package. - -```bash -pip install msgpack -``` - -```python -from taskito.serializers import MsgPackSerializer - -queue = Queue(serializer=MsgPackSerializer()) -``` - -!!! note "Type restrictions" - `MsgPackSerializer` only handles basic types: dicts, lists, strings, numbers, booleans, and `None`. It does not support lambdas, closures, or arbitrary Python objects. Use `CloudpickleSerializer` when you need to pass complex objects. - -### EncryptedSerializer - -AES-256-GCM encryption for task arguments and results. Payloads stored in the database are opaque ciphertext — only the key holder can read them. Requires the `cryptography` package. - -```bash -pip install cryptography -``` - -```python -import os -from taskito.serializers import EncryptedSerializer - -queue = Queue(serializer=EncryptedSerializer(key=os.environ["QUEUE_KEY"])) -``` - -The key must be exactly 32 bytes, base64-encoded. Generate one with: - -```bash -python -c "import os, base64; print(base64.b64encode(os.urandom(32)).decode())" -``` - -By default, `EncryptedSerializer` wraps `CloudpickleSerializer`. To wrap a different serializer: - -```python -from taskito.serializers import EncryptedSerializer, MsgPackSerializer - -queue = Queue(serializer=EncryptedSerializer(key=key, inner=MsgPackSerializer())) -``` - -## When to Use Each - -| | CloudpickleSerializer | JsonSerializer | MsgPackSerializer | EncryptedSerializer | -|---|---|---|---|---| -| **Complex objects** | Yes | No | No | Depends on inner serializer | -| **Debugging** | Binary payloads (opaque) | Human-readable JSON | Binary (opaque) | Ciphertext (opaque) | -| **Cross-language** | Python only | Any language | Any language | Python only (by default) | -| **Performance** | Good | Good for simple types | Best | Adds encryption overhead | -| **Security** | None | None | None | AES-256-GCM | -| **Extra dependency** | No | No | `msgpack` | `cryptography` | -| **Default** | Yes | No | No | No | - -**Rule of thumb**: Use `CloudpickleSerializer` (default) unless you have a specific reason to switch. Use `EncryptedSerializer` when tasks carry sensitive data that must not be readable in the database. - -## Custom Serializers - -Implement the `Serializer` protocol with two methods: - -```python -from taskito import Serializer - -class MsgpackSerializer: - def dumps(self, obj) -> bytes: - import msgpack - return msgpack.packb(obj) - - def loads(self, data: bytes): - import msgpack - return msgpack.unpackb(data, raw=False) - -queue = Queue(serializer=MsgpackSerializer()) -``` - -The protocol requires: - -| Method | Signature | Description | -|---|---|---| -| `dumps` | `(obj: Any) -> bytes` | Serialize a Python object to bytes | -| `loads` | `(data: bytes) -> Any` | Deserialize bytes back to a Python object | - -The serializer is used for both task arguments (`(args, kwargs)` tuple) and return values. - -!!! note "Result deserialization" - `job.result()` uses the queue's configured serializer for deserialization. If you're using `JsonSerializer` or a custom serializer, results are correctly deserialized with that serializer — not hardcoded cloudpickle. - -## Configuration - -Pass the serializer to the `Queue` constructor: - -```python -queue = Queue( - db_path="myapp.db", - serializer=JsonSerializer(), -) -``` - -All tasks on the queue use the same serializer. The serializer must be consistent between the enqueuing code and the worker — using different serializers will cause deserialization failures. diff --git a/docs/guide/index.md b/docs/guide/index.md deleted file mode 100644 index 37acf78..0000000 --- a/docs/guide/index.md +++ /dev/null @@ -1,55 +0,0 @@ -# User Guide - -Everything you need to build, run, and operate taskito in production. - -
- -- **Core** - - --- - - Tasks, workers, queues, scheduling, and workflows — the fundamentals. - - [:octicons-arrow-right-24: Core concepts](core/index.md) - -- **Reliability** - - --- - - Retries, error handling, rate limiting, circuit breakers, and distributed locking. - - [:octicons-arrow-right-24: Reliability](reliability/index.md) - -- **Advanced Execution** - - --- - - Prefork pool, native async, result streaming, dependencies, and batching. - - [:octicons-arrow-right-24: Execution](execution/index.md) - -- **Extensibility** - - --- - - Middleware, custom serializers, and event hooks. - - [:octicons-arrow-right-24: Extensibility](extensibility/index.md) - -- **Observability** - - --- - - Monitoring, structured logging, and the web dashboard. - - [:octicons-arrow-right-24: Observability](observability/index.md) - -- **Operations** - - --- - - Testing, deployment, troubleshooting, and migration. - - [:octicons-arrow-right-24: Operations](operations/index.md) - -
diff --git a/docs/guide/observability/dashboard-api.md b/docs/guide/observability/dashboard-api.md deleted file mode 100644 index 2891728..0000000 --- a/docs/guide/observability/dashboard-api.md +++ /dev/null @@ -1,228 +0,0 @@ -# Dashboard REST API - -The dashboard exposes a JSON API you can use independently of the UI. All endpoints return `application/json` with `Access-Control-Allow-Origin: *`. - -## Stats - -### `GET /api/stats` - -Queue statistics snapshot. - -```json -{ - "pending": 12, - "running": 3, - "completed": 450, - "failed": 2, - "dead": 1, - "cancelled": 0 -} -``` - -### `GET /api/stats/queues` - -Per-queue statistics. Pass `?queue=name` for a single queue, or omit for all queues. - -```bash -curl http://localhost:8080/api/stats/queues -curl http://localhost:8080/api/stats/queues?queue=emails -``` - -## Jobs - -### `GET /api/jobs` - -Paginated list of jobs with filtering. - -| Parameter | Type | Default | Description | -|---|---|---|---| -| `status` | `string` | all | Filter by status | -| `queue` | `string` | all | Filter by queue name | -| `task` | `string` | all | Filter by task name | -| `metadata` | `string` | — | Search metadata (LIKE) | -| `error` | `string` | — | Search error text (LIKE) | -| `created_after` | `int` | — | Unix ms timestamp | -| `created_before` | `int` | — | Unix ms timestamp | -| `limit` | `int` | `20` | Page size | -| `offset` | `int` | `0` | Pagination offset | - -```bash -curl http://localhost:8080/api/jobs?status=running&limit=10 -``` - -### `GET /api/jobs/{id}` - -Full detail for a single job. - -### `GET /api/jobs/{id}/errors` - -Error history for a job (one entry per failed attempt). - -### `GET /api/jobs/{id}/logs` - -Task execution logs for a specific job. - -### `GET /api/jobs/{id}/replay-history` - -Replay history for a job that has been replayed. - -### `GET /api/jobs/{id}/dag` - -Dependency graph for a job (nodes and edges). - -### `POST /api/jobs/{id}/cancel` - -Cancel a pending job. - -```json -{ "cancelled": true } -``` - -### `POST /api/jobs/{id}/replay` - -Replay a completed or failed job with the same payload. - -```json -{ "replay_job_id": "01H5K7Y..." } -``` - -## Dead Letters - -### `GET /api/dead-letters` - -Paginated list of dead letter entries. Supports `limit` and `offset` parameters. - -### `POST /api/dead-letters/{id}/retry` - -Re-enqueue a dead letter job. - -```json -{ "new_job_id": "01H5K7Y..." } -``` - -### `POST /api/dead-letters/purge` - -Purge all dead letters. - -```json -{ "purged": 42 } -``` - -## Metrics - -### `GET /api/metrics` - -Per-task execution metrics. - -| Parameter | Type | Default | Description | -|---|---|---|---| -| `task` | `string` | all | Filter by task name | -| `since` | `int` | `3600` | Lookback window in seconds | - -### `GET /api/metrics/timeseries` - -Time-bucketed metrics for charts. - -| Parameter | Type | Default | Description | -|---|---|---|---| -| `task` | `string` | all | Filter by task name | -| `since` | `int` | `3600` | Lookback window in seconds | -| `bucket` | `int` | `60` | Bucket size in seconds | - -## Logs - -### `GET /api/logs` - -Query task execution logs across all jobs. - -| Parameter | Type | Default | Description | -|---|---|---|---| -| `task` | `string` | all | Filter by task name | -| `level` | `string` | all | Filter by log level | -| `since` | `int` | `3600` | Lookback window in seconds | -| `limit` | `int` | `100` | Max entries | - -## Infrastructure - -### `GET /api/workers` - -List registered workers with heartbeat status. - -### `GET /api/circuit-breakers` - -Current state of all circuit breakers. - -### `GET /api/resources` - -Worker resource health and pool status. - -### `GET /api/queues/paused` - -List paused queue names. - -### `POST /api/queues/{name}/pause` - -Pause a queue (jobs stop being dequeued). - -### `POST /api/queues/{name}/resume` - -Resume a paused queue. - -## Observability - -### `GET /api/proxy-stats` - -Per-handler proxy reconstruction metrics. - -### `GET /api/interception-stats` - -Interception strategy performance metrics. - -### `GET /api/scaler` - -KEDA-compatible autoscaler payload. Pass `?queue=name` for a specific queue. - -### `GET /health` - -Liveness check. Always returns `{"status": "ok"}`. - -### `GET /readiness` - -Readiness check with storage, worker, and resource health. - -### `GET /metrics` - -Prometheus metrics endpoint (requires `prometheus-client` package). - -## Using the API Programmatically - -```python -import requests - -# Health check script -stats = requests.get("http://localhost:8080/api/stats").json() - -if stats["dead"] > 0: - print(f"WARNING: {stats['dead']} dead letter(s)") - -if stats["running"] > 100: - print(f"WARNING: {stats['running']} jobs running, possible backlog") -``` - -```python -# Pause a queue during deployment -requests.post("http://localhost:8080/api/queues/default/pause") - -# ... deploy ... - -# Resume after deployment -requests.post("http://localhost:8080/api/queues/default/resume") -``` - -```python -# Retry all dead letters -dead = requests.get("http://localhost:8080/api/dead-letters?limit=100").json() -for entry in dead: - requests.post(f"http://localhost:8080/api/dead-letters/{entry['id']}/retry") - print(f"Retried {entry['task_name']}") -``` diff --git a/docs/guide/observability/dashboard.md b/docs/guide/observability/dashboard.md deleted file mode 100644 index be7f626..0000000 --- a/docs/guide/observability/dashboard.md +++ /dev/null @@ -1,333 +0,0 @@ -# Web Dashboard - -taskito ships with a built-in web dashboard for monitoring jobs, inspecting dead letters, and managing your task queue in real time. The dashboard is a single-page application served directly from the Python package -- **zero extra dependencies required**. - -## Launching the Dashboard - -=== "CLI" - - ```bash - taskito dashboard --app myapp:queue - ``` - - The `--app` argument uses the same `module:attribute` format as the worker. - -=== "Programmatic" - - ```python - from taskito.dashboard import serve_dashboard - from myapp import queue - - serve_dashboard(queue, host="0.0.0.0", port=8000) - ``` - -By default the dashboard starts on `http://localhost:8080`. - -### CLI Options - -| Flag | Default | Description | -|---|---|---| -| `--app` | *required* | Module path to your `Queue` instance, e.g. `myapp:queue` | -| `--host` | `127.0.0.1` | Bind address | -| `--port` | `8080` | Bind port | - -```bash -# Bind to all interfaces on port 9000 -taskito dashboard --app myapp:queue --host 0.0.0.0 --port 9000 -``` - -!!! tip "Running alongside the worker" - The dashboard reads directly from the same SQLite database as the worker. You can run them side by side without any coordination: - - ```bash - # Terminal 1 - taskito worker --app myapp:queue - - # Terminal 2 - taskito dashboard --app myapp:queue - ``` - -## Dashboard Features - -The dashboard is a React + Vite + TypeScript SPA routed via TanStack Router, styled with Tailwind v4 and shadcn/ui, and shipped as hash-busted multi-file assets under `py_src/taskito/static/dashboard/`. - -### Design - -- **Dark and light mode** -- Toggle between themes via the sun/moon button in the header. Preference is stored in `localStorage` and follows the system scheme by default. -- **Auto-refresh** -- Configurable refresh interval (2s, 5s, 10s, or off) via the header dropdown. All pages auto-refresh at the selected interval; TanStack Query handles caching and background revalidation. -- **Command palette** -- `⌘K` / `Ctrl+K` opens a cmdk palette for route navigation and common actions. -- **Icons** -- Lucide icons throughout for visual clarity — every nav item, stat card, and action button has a meaningful icon. -- **Toast notifications** -- Every action (cancel, retry, replay, pause, resume, purge) shows a success or error toast via sonner. Optimistic mutations update the UI immediately and roll back on error. -- **Destructive confirms** -- Irreversible actions (purge, retry all) use a type-to-confirm dialog. -- **Loading states** -- Skeleton screens for tables and cards, error boundaries with retry. -- **Responsive layout** -- Sidebar navigation with grouped sections (Monitoring, Infrastructure, Advanced). The main content area scrolls independently. - -### Pages - -| Page | Description | -|---|---| -| **Overview** | Stats cards with status icons, throughput sparkline chart, recent jobs table | -| **Jobs** | Filterable job listing (status, queue, task, metadata, error, date range) with pagination | -| **Job Detail** | Full job info, error history, task logs, replay history, dependency DAG visualization | -| **Metrics** | Per-task performance table (avg, P50, P95, P99) with timeseries chart and time range selector | -| **Logs** | Structured task execution logs with task/level filters | -| **Workers** | Worker cards with heartbeat status, queue assignments, and tags | -| **Queues** | Per-queue stats (pending/running), pause and resume controls | -| **Resources** | Worker DI runtime status -- health, scope, init duration, pool stats, dependencies | -| **Circuit Breakers** | Automatic failure protection state (closed/open/half_open), thresholds, cooldowns | -| **Dead Letters** | Failed jobs that exhausted retries -- retry individual entries or purge all | -| **System** | Proxy reconstruction and interception strategy metrics | - -!!! info "Zero extra dependencies at runtime" - The built SPA ships inside the Python wheel under `py_src/taskito/static/dashboard/` and is served by the Python dashboard process. No Node.js, no pnpm, no CDN at runtime -- just `pip install taskito`. Node.js and pnpm are only needed by contributors rebuilding the dashboard source in `dashboard/`. - -## Tutorial - -This walkthrough covers every dashboard page and how to use it. - -### Step 1: Start the Dashboard - -Start a worker and the dashboard in two terminals: - -```bash -# Terminal 1 — start the worker -taskito worker --app myapp:queue - -# Terminal 2 — start the dashboard -taskito dashboard --app myapp:queue -``` - -You should see: - -``` -taskito dashboard → http://127.0.0.1:8080 -Press Ctrl+C to stop -``` - -Open `http://localhost:8080` in your browser. - -### Step 2: Overview Page - -The first page you see is the **Overview**. It shows: - -- **Stats cards** -- Six cards at the top showing pending, running, completed, failed, dead, and cancelled job counts. Each card has a colored icon matching its status. -- **Throughput chart** -- A green sparkline showing jobs processed per second over the last 60 refresh intervals. The current throughput value is displayed in the top-right. -- **Recent jobs table** -- The 10 most recent jobs. Click any row to open its detail view. - -The stats update automatically based on the refresh interval you select in the header (default: 5 seconds). - -### Step 3: Browsing and Filtering Jobs - -Click **Jobs** in the sidebar. This page shows: - -- **Stats grid** -- Same six stat cards as the overview. -- **Filter panel** -- A card with seven filter fields: - - **Status dropdown** -- Filter by pending, running, complete, failed, dead, or cancelled. - - **Queue** -- Text input to filter by queue name. - - **Task** -- Text input to filter by task name. - - **Metadata** -- Search within job metadata (uses SQL LIKE). - - **Error text** -- Search within error messages. - - **Created after / before** -- Date pickers for time-range filtering. -- **Results table** -- Paginated list showing ID, task, queue, status, priority, progress, retries, and created time. Click any row to see the full job detail. - -Use the **Prev / Next** buttons at the bottom to paginate. The current page range is shown (e.g., "Showing 1–20 items"). - -### Step 4: Inspecting a Job - -Click any job row to open the **Job Detail** page. The detail card shows: - -- A colored top border matching the job status (green for complete, red for failed, etc.) -- Full job ID, status badge, task name, queue, priority, progress bar, retries, timestamps -- **Error** field (if the job failed) displayed in a red-highlighted box -- Unique key and metadata (if set) - -**Actions:** - -- **Cancel Job** -- Visible only for pending jobs. Sends a cancel request and shows a toast. -- **Replay** -- Re-enqueue the job with the same payload. Navigates to the new job's detail page. - -**Sections below the detail card:** - -- **Error History** -- One row per failed attempt, showing the attempt number, error message, and timestamp. -- **Task Logs** -- Structured log entries emitted during task execution (time, level, message, extra data). -- **Replay History** -- If the job has been replayed, shows each replay with the original and replay errors. -- **Dependency Graph** -- If the job has dependencies, renders an SVG visualization with colored nodes (click a node to navigate to that job). - -Click **← Back to jobs** at the bottom to return. - -### Step 5: Monitoring Metrics - -Click **Metrics** in the sidebar. This page shows: - -- **Time range selector** -- Three buttons in the top-right: **1h**, **6h**, **24h**. Controls the lookback window for both the chart and the table. -- **Timeseries chart** -- Stacked bar chart showing success (green) and failure (red) counts per time bucket. X-axis shows timestamps, Y-axis shows job counts. -- **Per-task table** -- One row per task name with columns: - - Total, Success (green), Failures (red if > 0) - - Avg, P50, P95, P99, Min, Max latency — color-coded green/yellow/red based on thresholds - -### Step 6: Viewing Logs - -Click **Logs** in the sidebar. This page shows structured task execution logs from the last hour: - -- **Filter by task** -- Text input to narrow logs to a specific task name. -- **Filter by level** -- Dropdown to show only error, warning, info, or debug logs. -- **Log table** -- Time, level badge (colored), task name, job ID (clickable link), message, and extra data. - -### Step 7: Checking Workers - -Click **Workers** in the sidebar. Each active worker is displayed as a card showing: - -- **Green dot** -- Indicates the worker is alive. -- **Worker ID** -- The unique identifier in monospace text. -- **Queues** -- Which queues the worker consumes from. -- **Last heartbeat** -- When the worker last reported in. If this is stale (many seconds old), the worker may be hung or dead. -- **Registered at** -- When the worker connected. -- **Tags** -- Custom worker tags (if configured). - -The header shows the total number of active workers and currently running jobs. - -### Step 8: Managing Queues - -Click **Queues** in the sidebar. This page shows: - -- **Per-queue table** -- Each queue with its pending count (yellow), running count (blue), and status badge. -- **Pause button** -- Pauses the queue so no new jobs are dequeued. Workers currently running jobs on that queue will finish, but no new work starts. A toast confirms the action. -- **Resume button** -- Resumes a paused queue. Processing starts again immediately. - -!!! note "What pausing does" - Pausing a queue prevents the scheduler from dequeuing new jobs from it. Jobs already running will complete normally. Enqueuing new jobs still works — they'll be picked up when the queue is resumed. - -### Step 9: Inspecting Resources - -Click **Resources** in the sidebar. This page shows the worker dependency injection runtime: - -- **Name** -- The resource name as registered with `@queue.worker_resource()`. -- **Scope** -- WORKER, TASK, THREAD, or REQUEST. -- **Health** -- Badge showing healthy (green), unhealthy (red), or degraded (yellow). -- **Init (ms)** -- How long the resource took to initialize. -- **Recreations** -- Number of times the resource was re-created (e.g., after a health check failure). -- **Dependencies** -- Other resources this one depends on. -- **Pool** -- For pooled resources: active/total count and idle workers. - -### Step 10: Circuit Breakers - -Click **Circuit Breakers** in the sidebar. Each circuit breaker shows: - -- **State** -- Badge: closed (green, normal), open (red, tripping), or half_open (yellow, testing recovery). -- **Failure count** -- Current failures within the window. -- **Threshold** -- Failures needed to trip open. -- **Window / Cooldown** -- Time windows for failure counting and recovery. - -### Step 11: Dead Letter Queue - -Click **Dead Letters** in the sidebar. This page shows jobs that failed all retry attempts: - -- **Retry button** -- Re-enqueue an individual dead letter as a new job. A toast shows the result. -- **Purge All** -- Button in the top-right header. Opens a confirmation dialog before deleting all dead letters permanently. -- **Error column** -- Shows the final error message (truncated, hover for full text). -- **Original Job** -- Clickable link to the job that originally failed. - -### Step 12: System Internals - -Click **System** in the sidebar. Two tables: - -- **Proxy Reconstruction** -- Per-handler metrics for non-serializable object proxying (reconstructions, average duration, errors). -- **Interception** -- Per-strategy metrics for argument interception (count, average duration). - -These are empty unless your app uses the proxy or interception features. - -### Step 13: Switching Themes - -Click the **sun icon** (in dark mode) or **moon icon** (in light mode) in the top-right of the header. The theme switches immediately and persists in `localStorage`. - -### Step 14: Changing Refresh Rate - -Use the **Refresh** dropdown in the header to change how often all data refreshes: - -- **2s** -- Near-real-time monitoring. -- **5s** -- Default, good balance. -- **10s** -- Low-frequency polling. -- **Off** -- Manual only (reload the page to refresh). - -!!! tip "REST API" - The dashboard also exposes a full JSON API. See the [Dashboard REST API](dashboard-api.md) reference for all endpoints. - -## Development - -Contributors who want to modify the dashboard source: - -```bash -# Install dependencies (pnpm is pinned via the `packageManager` field) -cd dashboard && pnpm install - -# Start Vite dev server (proxies /api/* to localhost:8080) -pnpm run dev - -# In another terminal, start the backend -taskito dashboard --app myapp:queue - -# Build and copy to Python package -pnpm run build -``` - -!!! tip "Don't have pnpm?" - Run `corepack enable` once (Node 16+) and pnpm will be provisioned automatically from the version pinned in `dashboard/package.json`. - -The build produces a static `index.html` plus hashed JS/CSS chunks under `py_src/taskito/static/dashboard/`. The built assets aren't committed — release tooling runs `pnpm -C dashboard build` before packaging so the wheel ships them. - -### How the build works - -``` -dashboard/src/ (React + TypeScript) - ↓ pnpm run build - ↓ Vite compiles, tree-shakes, minifies, code-splits - ↓ Outputs to py_src/taskito/static/dashboard/ - ↓ -py_src/taskito/static/dashboard/ - index.html + assets/index-.{js,css} - + lazy chunks (metrics, logs) - ↓ pip install / maturin develop / uv sync - ↓ Bundled in the Python wheel - ↓ -dashboard.py serves assets via importlib.resources + http.server - ↓ /api/* → Queue methods · /assets/* → hashed bundles · else → index.html - ↓ -Browser loads index.html → React SPA boots → fetches /api/* -``` - -### Project structure - -``` -dashboard/ -├── package.json # Dependencies: react, @tanstack/*, tailwindcss, radix-ui, recharts, cmdk -├── vite.config.ts # Vite + TanStack Router + React + Tailwind, API proxy -├── vitest.config.ts # Unit-test config -├── biome.json # Lint + format config -├── components.json # shadcn config -├── tsconfig.json # TypeScript config (strict, React JSX) -├── index.html # Vite entry point -└── src/ - ├── main.tsx # Mount point + router bootstrap - ├── globals.css # Tailwind + design tokens - ├── routes/ # TanStack file-based routes (11 pages; metrics + logs lazy-loaded) - ├── components/ - │ ├── layout/ # AppShell, Sidebar, Header, CommandPalette, RouteErrorBoundary - │ └── ui/ # Button, Dialog, DataTable, Badge, Toaster, etc. (shadcn-style) - ├── features/ # Per-feature api / hooks / components / utils - │ ├── overview/ # Stats grid, throughput sparkline, recent jobs - │ ├── jobs/ # List with URL-synced filters + detail tabs + DAG - │ ├── queues/ # Pause/resume mutations - │ ├── workers/ # Worker table with heartbeat freshness - │ ├── metrics/ # Recharts-backed throughput + latency (lazy) - │ ├── logs/ # Virtualized live tail (lazy) - │ ├── circuit-breakers/ - │ ├── resources/ - │ ├── dead-letters/ # Group by task + exception class, typed-purge - │ └── system/ # Proxy + interception stats - ├── hooks/ # Shared hooks (useDebouncedValue, useMediaQuery, useLastRefreshed) - ├── lib/ # api-client, cn, time, number, status, api-types - └── providers/ # Query, Theme, RefreshInterval, CommandPalette -``` - -!!! warning "Authentication" - The dashboard does not include authentication. If you expose it beyond `localhost`, place it behind a reverse proxy with authentication (e.g. nginx with basic auth, or an OAuth2 proxy). diff --git a/docs/guide/observability/index.md b/docs/guide/observability/index.md deleted file mode 100644 index 83a13ed..0000000 --- a/docs/guide/observability/index.md +++ /dev/null @@ -1,10 +0,0 @@ -# Observability - -Monitor, log, and inspect your task queue in real time. - -| Guide | Description | -|-------|-------------| -| [Monitoring & Hooks](monitoring.md) | Queue stats, progress tracking, worker heartbeat, and alerting hooks | -| [Structured Logging](logging.md) | Per-task structured logs with automatic context | -| [Web Dashboard](dashboard.md) | Built-in web UI for browsing jobs, metrics, and worker status | -| [Dashboard REST API](dashboard-api.md) | Programmatic access to all dashboard data via REST endpoints | diff --git a/docs/guide/observability/logging.md b/docs/guide/observability/logging.md deleted file mode 100644 index 2a18741..0000000 --- a/docs/guide/observability/logging.md +++ /dev/null @@ -1,126 +0,0 @@ -# Structured Task Logging - -taskito provides structured logging from within tasks via `current_job.log()`. Logs are stored in the database alongside job data, making them queryable and visible in the dashboard. - -## Writing Logs - -Use `current_job.log()` inside any task: - -```python -from taskito import current_job - -@queue.task() -def process_order(order_id: int): - current_job.log("Starting order processing", extra={"order_id": order_id}) - - items = fetch_items(order_id) - current_job.log(f"Found {len(items)} items", level="debug") - - for item in items: - try: - process_item(item) - except ValueError as e: - current_job.log(f"Skipping invalid item: {e}", level="warning", extra={"item": item}) - - current_job.log("Order processing complete") -``` - -### Parameters - -| Parameter | Type | Default | Description | -|-----------|------|---------|-------------| -| `message` | `str` | *required* | The log message | -| `level` | `str` | `"info"` | Log level: `"debug"`, `"info"`, `"warning"`, `"error"` | -| `extra` | `dict | None` | `None` | Structured data to attach as JSON | - -## Querying Logs - -### Per-Job Logs - -```python -logs = queue.task_logs(job_id) -for log in logs: - print(f"[{log['level']}] {log['message']}") -``` - -### Cross-Job Log Query - -```python -logs = queue.query_logs( - task_name="myapp.tasks.process_order", - level="error", - since=1700000000, - limit=50, -) -``` - -| Parameter | Type | Description | -|-----------|------|-------------| -| `task_name` | `str | None` | Filter by task name | -| `level` | `str | None` | Filter by log level | -| `since` | `int | None` | Unix timestamp — only logs after this time | -| `limit` | `int` | Maximum number of logs to return | - -## Dashboard - -Logs are accessible via the dashboard REST API: - -- **`GET /api/jobs/{id}/logs`** — logs for a specific job -- **`GET /api/logs`** — query logs across all jobs (supports `limit` and `offset` parameters) - -```bash -# Logs for a specific job -curl http://localhost:8080/api/jobs/01H5K6X.../logs - -# Recent logs across all jobs -curl http://localhost:8080/api/logs?limit=20 -``` - -## Examples - -### ETL Pipeline with Progress Logging - -```python -from taskito import current_job - -@queue.task() -def etl_pipeline(source: str, destination: str): - current_job.log("Starting extraction", extra={"source": source}) - - records = extract(source) - current_job.log(f"Extracted {len(records)} records", level="info") - current_job.update_progress(33) - - transformed = [] - for i, record in enumerate(records): - try: - transformed.append(transform(record)) - except Exception as e: - current_job.log( - f"Transform failed for record {i}", - level="warning", - extra={"record_id": record.get("id"), "error": str(e)}, - ) - current_job.update_progress(66) - - loaded = load(destination, transformed) - current_job.log( - "Pipeline complete", - extra={"extracted": len(records), "loaded": loaded, "skipped": len(records) - loaded}, - ) - current_job.update_progress(100) -``` - -### Debugging Failed Jobs - -```python -# After a job fails, inspect its logs to understand what happened -job = queue.get_job(failed_job_id) -logs = queue.task_logs(failed_job_id) - -print(f"Job {job.id} ({job.task_name}): {job.status}") -for log in logs: - print(f" [{log['level'].upper()}] {log['message']}") - if log.get("extra"): - print(f" {log['extra']}") -``` diff --git a/docs/guide/observability/monitoring.md b/docs/guide/observability/monitoring.md deleted file mode 100644 index f144e3b..0000000 --- a/docs/guide/observability/monitoring.md +++ /dev/null @@ -1,248 +0,0 @@ -# Monitoring & Hooks - -## Queue Statistics - -Get a snapshot of job counts by status: - -```python -stats = queue.stats() -# {'pending': 12, 'running': 3, 'completed': 450, 'failed': 2, 'dead': 1, 'cancelled': 0} -``` - -Async variant: - -```python -stats = await queue.astats() -``` - -## CLI Monitoring - -### One-Shot Stats - -```bash -taskito info --app myapp:queue -``` - -``` -taskito queue statistics ------------------------------- - pending 12 - running 3 - completed 450 - failed 2 - dead 1 - cancelled 0 ------------------------------- - total 468 -``` - -### Live Dashboard - -```bash -taskito info --app myapp:queue --watch -``` - -Refreshes every 2 seconds with throughput calculation (completed jobs per second). - -## Progress Tracking - -Report progress from inside tasks using `current_job`: - -```python -from taskito import current_job - -@queue.task() -def process_batch(items): - total = len(items) - for i, item in enumerate(items): - process(item) - current_job.update_progress(int((i + 1) / total * 100)) - return f"Processed {total} items" -``` - -Read progress from outside: - -```python -job = process_batch.delay(items) - -# Poll progress -fetched = queue.get_job(job.id) -print(fetched.progress) # 0-100 or None -``` - -### Job Context - -Inside a running task, `current_job` provides: - -| Property | Type | Description | -|---|---|---| -| `current_job.id` | `str` | The current job ID | -| `current_job.task_name` | `str` | The registered task name | -| `current_job.retry_count` | `int` | Current retry attempt (0 = first run) | -| `current_job.queue_name` | `str` | The queue this job is running on | - -```python -from taskito import current_job - -@queue.task() -def my_task(): - print(f"Running job {current_job.id}") - print(f"Task: {current_job.task_name}") - print(f"Attempt: {current_job.retry_count}") - print(f"Queue: {current_job.queue_name}") -``` - -!!! warning - `current_job` properties raise `RuntimeError` when accessed outside of a running task. - -## Worker Heartbeat - -Monitor active workers and their health: - -```python -workers = queue.workers() -for w in workers: - print(f"Worker {w['worker_id']}: {w['status']} (last seen: {w['last_heartbeat']})") -``` - -Async variant: - -```python -workers = await queue.aworkers() -``` - -The worker heartbeat is also available via the dashboard REST API at `GET /api/workers`. See the [Dashboard](dashboard.md) guide for details. - -## Events System - -taskito includes an in-process event bus for reacting to job lifecycle events (`JOB_ENQUEUED`, `JOB_COMPLETED`, `JOB_FAILED`, `JOB_RETRYING`, `JOB_DEAD`, `JOB_CANCELLED`). Events can also be delivered as signed HTTP webhooks to external systems. - -[:octicons-arrow-right-24: Events & Webhooks guide](../extensibility/events-webhooks.md) - -## Prometheus Metrics - -For production monitoring, the optional Prometheus integration provides counters, histograms, and gauges for task execution: - -```bash -pip install taskito[prometheus] -``` - -[:octicons-arrow-right-24: Prometheus integration](../../integrations/prometheus.md) - -## Hooks - -Run code before/after every task, or on success/failure. - -### `@queue.before_task` - -Called before each task executes: - -```python -@queue.before_task -def log_start(task_name, args, kwargs): - print(f"[START] {task_name}") -``` - -### `@queue.after_task` - -Called after each task, regardless of success or failure: - -```python -@queue.after_task -def log_end(task_name, args, kwargs, result, error): - status = "OK" if error is None else f"FAILED: {error}" - print(f"[END] {task_name} - {status}") -``` - -### `@queue.on_success` - -Called only when a task succeeds: - -```python -@queue.on_success -def track_metrics(task_name, args, kwargs, result): - metrics.increment(f"task.{task_name}.success") -``` - -### `@queue.on_failure` - -Called only when a task raises an exception: - -```python -@queue.on_failure -def alert_on_error(task_name, args, kwargs, error): - sentry_sdk.capture_exception(error) -``` - -### Hook Signatures - -| Hook | Signature | -|---|---| -| `before_task` | `fn(task_name, args, kwargs)` | -| `after_task` | `fn(task_name, args, kwargs, result, error)` | -| `on_success` | `fn(task_name, args, kwargs, result)` | -| `on_failure` | `fn(task_name, args, kwargs, error)` | - -!!! tip "Multiple hooks" - You can register multiple hooks of the same type. They execute in registration order. - -## Grafana Setup - -A minimal Prometheus + Grafana stack for monitoring taskito: - -```yaml -# docker-compose.monitoring.yml -services: - prometheus: - image: prom/prometheus - volumes: - - ./prometheus.yml:/etc/prometheus/prometheus.yml - ports: - - "9090:9090" - - grafana: - image: grafana/grafana - ports: - - "3000:3000" - environment: - - GF_SECURITY_ADMIN_PASSWORD=admin -``` - -```yaml -# prometheus.yml -scrape_configs: - - job_name: taskito - static_configs: - - targets: ["host.docker.internal:8080"] - metrics_path: /metrics -``` - -### Essential Grafana Panels - -**Queue Depth** (gauge): -```promql -taskito_queue_depth{queue="default"} -``` - -**Job Processing Rate** (rate): -```promql -rate(taskito_jobs_completed_total[5m]) -``` - -**Job Duration p99** (histogram): -```promql -histogram_quantile(0.99, rate(taskito_job_duration_seconds_bucket[5m])) -``` - -### Alert Rules - -```yaml -# Alert if queue depth stays above 1000 for 5 minutes -- alert: TaskitoQueueBacklog - expr: taskito_queue_depth > 1000 - for: 5m - -# Alert if p99 latency exceeds 5 seconds -- alert: TaskitoHighLatency - expr: histogram_quantile(0.99, rate(taskito_job_duration_seconds_bucket[5m])) > 5 -``` diff --git a/docs/guide/operations/deployment.md b/docs/guide/operations/deployment.md deleted file mode 100644 index 6156a8e..0000000 --- a/docs/guide/operations/deployment.md +++ /dev/null @@ -1,357 +0,0 @@ -# Deployment - -This guide covers running taskito in production environments. - -## SQLite File Location - -Choose a persistent, backed-up location for your database: - -```python -queue = Queue(db_path="/var/lib/myapp/taskito.db") -``` - -**Best practices:** - -- Use an absolute path — relative paths depend on the working directory -- Place the database on local storage (not NFS or network mounts) — SQLite file locking doesn't work reliably over network filesystems -- Ensure the directory exists and the worker process has read/write permissions -- The database file, WAL file (`taskito.db-wal`), and shared memory file (`taskito.db-shm`) must all be on the same filesystem - -## systemd Service - -Create `/etc/systemd/system/taskito-worker.service`: - -```ini -[Unit] -Description=taskito worker -After=network.target - -[Service] -Type=simple -User=myapp -Group=myapp -WorkingDirectory=/opt/myapp -ExecStart=/opt/myapp/.venv/bin/taskito worker --app myapp:queue -Restart=always -RestartSec=5 - -# Graceful shutdown — taskito handles SIGINT -KillSignal=SIGINT -TimeoutStopSec=35 - -# Environment -Environment=PYTHONPATH=/opt/myapp - -[Install] -WantedBy=multi-user.target -``` - -```bash -sudo systemctl daemon-reload -sudo systemctl enable taskito-worker -sudo systemctl start taskito-worker - -# Check logs -journalctl -u taskito-worker -f -``` - -!!! tip - Set `TimeoutStopSec` to slightly longer than your longest task timeout (default graceful shutdown is 30s). This gives in-flight tasks time to complete before systemd force-kills the process. - -## Docker - -### Dockerfile - -```dockerfile -FROM python:3.12-slim - -WORKDIR /app -COPY requirements.txt . -RUN pip install --no-cache-dir -r requirements.txt - -COPY . . - -# Store the database in a volume -VOLUME /data -ENV TASKITO_DB_PATH=/data/taskito.db - -CMD ["taskito", "worker", "--app", "myapp:queue"] -``` - -### docker-compose.yml - -```yaml -services: - worker: - build: . - volumes: - - taskito-data:/data - stop_signal: SIGINT - stop_grace_period: 35s - - dashboard: - build: . - command: taskito dashboard --app myapp:queue --host 0.0.0.0 - volumes: - - taskito-data:/data - ports: - - "8080:8080" - -volumes: - taskito-data: -``` - -!!! warning "Shared volumes" - The worker and dashboard must access the **same SQLite file**. In Docker, use a named volume shared between containers. Do not use bind mounts on network storage. - -### Graceful Shutdown in Containers - -taskito handles `SIGINT` for graceful shutdown. Configure your container orchestrator to send `SIGINT` (not `SIGTERM`): - -- **Docker Compose**: `stop_signal: SIGINT` -- **Kubernetes**: Use a `preStop` hook or configure `STOPSIGNAL` in the Dockerfile: - -```dockerfile -STOPSIGNAL SIGINT -``` - -For Kubernetes, set `terminationGracePeriodSeconds` to match your longest task timeout: - -```yaml -spec: - terminationGracePeriodSeconds: 60 - containers: - - name: worker - ... -``` - -## WAL Mode and Backups - -taskito uses SQLite in WAL (Write-Ahead Logging) mode for concurrent read/write access. This affects how you back up the database. - -**Do NOT** simply copy the `.db` file while the worker is running — you may get a corrupted backup if the WAL hasn't been checkpointed. - -**Safe backup methods:** - -```bash -# Option 1: Use sqlite3 .backup command (safe, online) -sqlite3 /var/lib/myapp/taskito.db ".backup /backups/taskito-$(date +%Y%m%d).db" - -# Option 2: Use the SQLite VACUUM INTO command -sqlite3 /var/lib/myapp/taskito.db "VACUUM INTO '/backups/taskito-$(date +%Y%m%d).db';" -``` - -Both methods are safe while the worker is running. - -## Postgres Deployment - -If you're using the [Postgres backend](postgres.md), deployment is simpler in several ways: - -- **No shared-file constraints** — workers connect over the network, no need for shared volumes or local storage -- **Multi-machine workers** — run workers on separate hosts against the same database -- **Standard backups** — use `pg_dump` instead of `sqlite3 .backup` - -### Docker Compose with Postgres - -```yaml -services: - postgres: - image: postgres:16 - environment: - POSTGRES_DB: myapp - POSTGRES_USER: taskito - POSTGRES_PASSWORD: secret - volumes: - - pgdata:/var/lib/postgresql/data - - worker: - build: . - environment: - TASKITO_BACKEND: postgres - TASKITO_DB_URL: postgresql://taskito:secret@postgres:5432/myapp - depends_on: - - postgres - stop_signal: SIGINT - stop_grace_period: 35s - -volumes: - pgdata: -``` - -### Postgres Backups - -```bash -# Dump the taskito schema -pg_dump -h localhost -U taskito -d myapp -n taskito > backup.sql - -# Restore -psql -h localhost -U taskito -d myapp < backup.sql -``` - -See the [Postgres Backend guide](postgres.md) for full configuration details. - -## Database Maintenance - -### Auto-Cleanup - -Set `result_ttl` to automatically purge old completed jobs: - -```python -queue = Queue( - db_path="/var/lib/myapp/taskito.db", - result_ttl=86400, # Purge completed/dead jobs older than 24 hours -) -``` - -### Manual Cleanup - -```python -# Purge completed jobs older than 7 days -queue.purge_completed(older_than=604800) - -# Purge dead letters older than 30 days -queue.purge_dead(older_than=2592000) -``` - -### Database Size - -SQLite databases grow as jobs accumulate. Without cleanup, expect roughly: - -- ~1 KB per job (metadata + small payloads) -- ~1-10 KB per job with large arguments or results - -With `result_ttl` set, the database stays compact. You can also periodically run `VACUUM` to reclaim space: - -```bash -sqlite3 /var/lib/myapp/taskito.db "VACUUM;" -``` - -!!! note - `VACUUM` rewrites the entire database and requires exclusive access. Run it during low-traffic periods or during a maintenance window. - -## Monitoring in Production - -### Dashboard - -Run the built-in dashboard alongside the worker: - -```bash -taskito dashboard --app myapp:queue --host 0.0.0.0 --port 8080 -``` - -Place it behind a reverse proxy with authentication for production use — the dashboard has no built-in auth. - -### Programmatic Stats - -Poll `queue.stats()` and export to your monitoring system: - -```python -import time - -def export_metrics(): - while True: - stats = queue.stats() - # Export to Prometheus, Datadog, StatsD, etc. - gauge("taskito.pending", stats["pending"]) - gauge("taskito.running", stats["running"]) - gauge("taskito.dead", stats["dead"]) - time.sleep(15) -``` - -### Hooks for Alerting - -```python -@queue.on_failure -def alert_on_failure(task_name, args, kwargs, error): - # Send to PagerDuty, Slack, email, etc. - notify(f"Task {task_name} failed: {error}") -``` - -### Health Check Endpoint - -If you're using FastAPI: - -```python -from fastapi import FastAPI -from taskito.contrib.fastapi import TaskitoRouter - -app = FastAPI() -app.include_router(TaskitoRouter(queue), prefix="/tasks") - -# GET /tasks/stats returns queue health -# Use this as a health check endpoint in your load balancer -``` - -## Multiple Workers - -taskito is designed as a **single-process** task queue when using SQLite. Running multiple worker processes against the same SQLite file is possible (WAL mode allows concurrent access), but: - -- Only one process can write at a time — this limits throughput -- SQLite lock contention increases with more writers -- There is no distributed coordination between workers - -For most single-machine workloads, one worker process with multiple threads (the default) is sufficient: - -```python -queue = Queue( - db_path="myapp.db", - workers=8, # 8 OS threads in the worker pool -) -``` - -If you need distributed workers across multiple machines, use the [Postgres backend](postgres.md) which removes the single-writer constraint and supports multi-machine deployments. Alternatively, consider [Celery or Dramatiq](../../comparison.md). - -## SQLite Scaling Limits - -taskito uses SQLite as its storage backend. Understanding its limitations helps you plan for production: - -**Single-writer constraint.** SQLite allows only one write transaction at a time. WAL mode lets reads proceed concurrently with writes, but all writes are serialized. This is the primary throughput ceiling. - -**Expected throughput.** On modern hardware with an SSD, expect: - -- **1,000–5,000 jobs/second** for enqueue + dequeue cycles (small payloads) -- Throughput decreases with larger payloads, complex queries, or spinning disks -- The connection pool size (default: 8) controls read concurrency — tune it based on your read/write ratio - -**When to upgrade to Postgres:** - -- You need multi-machine distributed workers -- You consistently exceed ~5,000 jobs/second sustained throughput -- Multiple processes contend heavily for writes (high lock wait times) -- You need sub-millisecond dequeue latency under high load - -taskito's [Postgres backend](postgres.md) addresses all of these limitations while keeping the same API. See the [Postgres Backend guide](postgres.md) for setup instructions. - -**Connection pool tuning.** The default pool size of 8 connections works well for most workloads. If you're running many concurrent readers (e.g., a dashboard alongside workers), you can increase it: - -```python -# In Rust: SqliteStorage::with_pool_size("path.db", 16) -# Pool size is set at the Rust layer; the Python API uses the default (8) -``` - -Increasing the pool beyond ~16 typically doesn't help, since SQLite write serialization is the bottleneck. - -## Sizing Your Deployment - -| Throughput | Backend | Workers | Pool | Notes | -|-----------|---------|---------|------|-------| -| < 100 jobs/s | SQLite | 4 | thread | Default config works fine | -| 100–1K jobs/s | SQLite | 8–16 | thread or prefork | Increase `workers`, monitor WAL size | -| 1K–5K jobs/s | SQLite | 16 | prefork | Prefork for CPU-bound; SQLite handles this well with WAL | -| 5K–20K jobs/s | Postgres | 16–32 | prefork | Switch to Postgres for concurrent writers | -| 20K–50K jobs/s | Postgres | 32+ | prefork | Multiple worker processes, tune `pool_size` | -| > 50K jobs/s | — | — | — | Consider Celery + RabbitMQ for this scale | - -!!! note - These are rough guidelines for noop tasks. Real throughput depends on task duration, payload size, and I/O patterns. Run the [benchmark](../../examples/benchmark.md) on your hardware to get accurate numbers. - -## Checklist - -- [ ] Use an absolute path for `db_path` -- [ ] Place SQLite on local (not network) storage -- [ ] Set `result_ttl` to prevent unbounded database growth -- [ ] Set `timeout` on tasks to recover from worker crashes -- [ ] Configure `SIGINT` as the stop signal in your process manager -- [ ] Set up failure hooks or monitoring for alerting -- [ ] Back up the database using `sqlite3 .backup` (not file copy), or `pg_dump` for Postgres -- [ ] Place the dashboard behind a reverse proxy with authentication diff --git a/docs/guide/operations/index.md b/docs/guide/operations/index.md deleted file mode 100644 index d580852..0000000 --- a/docs/guide/operations/index.md +++ /dev/null @@ -1,13 +0,0 @@ -# Operations - -Run taskito reliably in production. - -| Guide | Description | -|-------|-------------| -| [Testing](testing.md) | Test mode, fixtures, mocking resources, and workflow testing | -| [Job Management](job-management.md) | Cancel, pause, archive, revoke, replay, and clean up jobs | -| [Troubleshooting](troubleshooting.md) | Diagnose stuck jobs, lock contention, and worker issues | -| [Deployment](deployment.md) | systemd, Docker, WAL mode, Postgres, and production checklists | -| [KEDA Autoscaling](keda.md) | Kubernetes event-driven autoscaling for workers | -| [Postgres Backend](postgres.md) | Set up and run taskito with PostgreSQL | -| [Migrating from Celery](migration.md) | Side-by-side comparison and step-by-step migration guide | diff --git a/docs/guide/operations/job-management.md b/docs/guide/operations/job-management.md deleted file mode 100644 index 834591e..0000000 --- a/docs/guide/operations/job-management.md +++ /dev/null @@ -1,189 +0,0 @@ -# Job Management - -Manage running jobs — cancel, pause queues, archive, revoke, replay, and clean up. - -## Job Cancellation - -Cancel a pending job before it starts: - -```python -job = send_email.delay("user@example.com", "Hello", "World") -cancelled = queue.cancel_job(job.id) # True if was pending -``` - -- Returns `True` if the job was pending and is now cancelled -- Returns `False` if the job was already running, completed, or in another non-pending state -- Cancelled jobs cannot be un-cancelled - -## Result TTL & Auto-Cleanup - -### Manual Cleanup - -```python -# Purge completed jobs older than 1 hour -deleted = queue.purge_completed(older_than=3600) - -# Purge dead letters older than 24 hours -deleted = queue.purge_dead(older_than=86400) -``` - -### Automatic Cleanup - -Set `result_ttl` on the Queue to automatically purge old jobs while the worker runs: - -```python -queue = Queue( - db_path="myapp.db", - result_ttl=3600, # Auto-purge completed/dead jobs older than 1 hour -) -``` - -The scheduler checks every ~60 seconds and purges: - -- Completed jobs older than `result_ttl` -- Dead letter entries older than `result_ttl` -- Error history records older than `result_ttl` - -Set to `None` (default) to disable auto-cleanup. - -### Cascade Cleanup - -When jobs are purged — either manually via `purge_completed()` or automatically via `result_ttl` — their related child records are also deleted: - -- Error history (`job_errors`) -- Task logs (`task_logs`) -- Task metrics (`task_metrics`) -- Job dependencies (`job_dependencies`) -- Replay history (`replay_history`) - -This prevents orphaned records from accumulating when parent jobs are removed. - -```python -# Manual purge — child records are cleaned up automatically -deleted = queue.purge_completed(older_than=3600) -print(f"Purged {deleted} jobs and their related records") - -# With per-job TTL — cascade cleanup still applies -job = resize_image.apply_async( - args=("photo.jpg",), - result_ttl=600, # This job's results expire after 10 minutes -) -# When this job is purged (after 10 min), its errors, logs, -# metrics, dependencies, and replay history are also removed. -``` - -!!! note - Dead letter entries are **not** cascade-deleted — they have their own lifecycle managed by `purge_dead()`. Timestamp-based cleanup (`result_ttl`) of error history, logs, and metrics also continues to run independently, catching old records regardless of whether the parent job still exists. - -## Queue Pause/Resume - -Temporarily pause job processing on a queue without stopping the worker: - -```python -# Pause the "emails" queue -queue.pause("emails") - -# Check which queues are paused -print(queue.paused_queues()) # ["emails"] - -# Resume processing -queue.resume("emails") -``` - -Paused queues still accept new jobs — they just won't be dequeued until resumed. - -### Maintenance Window Example - -```python -# Before maintenance: pause all queues -for q in ["default", "emails", "reports"]: - queue.pause(q) -print(f"Paused: {queue.paused_queues()}") - -# ... perform maintenance ... - -# After maintenance: resume all queues -for q in ["default", "emails", "reports"]: - queue.resume(q) -``` - -## Job Archival - -Move old completed jobs to an archive table to keep the main jobs table lean: - -```python -# Archive completed jobs older than 24 hours -archived_count = queue.archive(older_than=86400) -print(f"Archived {archived_count} jobs") - -# Browse archived jobs -archived = queue.list_archived(limit=50, offset=0) -for job in archived: - print(f"{job.id}: {job.task_name} ({job.status})") -``` - -Archived jobs are no longer returned by `queue.stats()` or `queue.list_jobs()`, but remain queryable via `queue.list_archived()`. - -### Scheduled Archival - -```python -@queue.periodic(cron="0 0 2 * * *") # Daily at 2 AM -def nightly_archival(): - archived = queue.archive(older_than=7 * 86400) # Archive jobs older than 7 days - current_job.log(f"Archived {archived} jobs") -``` - -## Task Revocation - -Cancel all pending jobs for a specific task: - -```python -# Revoke all pending "send_newsletter" jobs -cancelled = queue.revoke_task("myapp.tasks.send_newsletter") -print(f"Revoked {cancelled} jobs") -``` - -## Queue Purge - -Remove all pending jobs from a specific queue: - -```python -purged = queue.purge("emails") -print(f"Purged {purged} jobs from the emails queue") -``` - -## Job Replay - -Replay a completed or dead job with the same arguments: - -```python -new_job = queue.replay(job_id) -print(f"Replayed as {new_job.id}") - -# Check replay history -history = queue.replay_history(job_id) -``` - -### Retry from Dead Letter with Replay - -```python -# List dead letters and replay them -dead = queue.dead_letters() -for entry in dead: - print(f"Replaying dead job {entry['original_job_id']}: {entry['task_name']}") - new_id = queue.retry_dead(entry["id"]) - print(f" -> New job: {new_id}") -``` - -## SQLite Configuration - -taskito configures SQLite for optimal performance: - -| Pragma | Value | Purpose | -|---|---|---| -| `journal_mode` | WAL | Concurrent reads during writes | -| `busy_timeout` | 5000ms | Wait instead of failing on lock contention | -| `synchronous` | NORMAL | Balance between safety and speed | -| `journal_size_limit` | 64MB | Prevent unbounded WAL growth | - -The connection pool uses up to 8 connections via `r2d2`. diff --git a/docs/guide/operations/keda.md b/docs/guide/operations/keda.md deleted file mode 100644 index 180549a..0000000 --- a/docs/guide/operations/keda.md +++ /dev/null @@ -1,199 +0,0 @@ -# KEDA Autoscaling - -[KEDA](https://keda.sh) (Kubernetes Event-driven Autoscaling) can scale your taskito worker deployment up and down based on queue depth. taskito ships a dedicated scaler server that KEDA queries directly. - -## Scaler Server - -Start the scaler alongside your worker: - -```bash -taskito scaler --app myapp:queue --port 9091 -``` - -| Flag | Default | Description | -|---|---|---| -| `--app` | — | Python path to the `Queue` instance | -| `--host` | `0.0.0.0` | Bind address | -| `--port` | `9091` | Bind port | -| `--target-queue-depth` | `10` | Scaling target hint returned to KEDA | - -The scaler exposes three endpoints: - -| Endpoint | Description | -|---|---| -| `GET /api/scaler` | Returns current queue depth and scaling target for KEDA | -| `GET /metrics` | Prometheus text format (requires `prometheus-client`) | -| `GET /health` | Liveness check — always returns `{"status": "ok"}` | - -### `/api/scaler` Response - -```json -{ - "metricValue": 42, - "targetValue": 10, - "queueName": "default" -} -``` - -Filter to a specific queue: - -``` -GET /api/scaler?queue=emails -``` - -### Programmatic Usage - -```python -from taskito.scaler import serve_scaler - -serve_scaler(queue, host="0.0.0.0", port=9091, target_queue_depth=10) -``` - -## Kubernetes Deployment - -Deploy the scaler as a separate `Deployment` and expose it as a `ClusterIP` service: - -```yaml -apiVersion: apps/v1 -kind: Deployment -metadata: - name: taskito-scaler -spec: - replicas: 1 - selector: - matchLabels: - app: taskito-scaler - template: - metadata: - labels: - app: taskito-scaler - spec: - containers: - - name: scaler - image: your-image:latest - command: ["taskito", "scaler", "--app", "myapp:queue", "--port", "9091"] - ports: - - containerPort: 9091 ---- -apiVersion: v1 -kind: Service -metadata: - name: taskito-scaler -spec: - selector: - app: taskito-scaler - ports: - - port: 9091 - targetPort: 9091 -``` - -## ScaledObject (HTTP trigger) - -Scale a long-running worker `Deployment` based on pending job count: - -```yaml -apiVersion: keda.sh/v1alpha1 -kind: ScaledObject -metadata: - name: taskito-worker - namespace: default -spec: - scaleTargetRef: - name: taskito-worker # your worker Deployment name - pollingInterval: 15 # seconds between KEDA polls - cooldownPeriod: 60 # seconds before scaling to zero - minReplicaCount: 0 # scale to zero when idle - maxReplicaCount: 10 - triggers: - - type: metrics-api - metadata: - url: "http://taskito-scaler:9091/api/scaler" - valueLocation: "metricValue" - targetValue: "10" -``` - -Filter to a specific queue name: - -```yaml - url: "http://taskito-scaler:9091/api/scaler?queue=emails" -``` - -## ScaledJob (Ephemeral Batch Workers) - -For batch/ETL workloads, use `ScaledJob` to create short-lived Kubernetes Jobs — one pod per N pending tasks: - -```yaml -apiVersion: keda.sh/v1alpha1 -kind: ScaledJob -metadata: - name: taskito-batch-worker - namespace: default -spec: - jobTargetRef: - template: - spec: - containers: - - name: taskito-worker - image: your-image:latest - command: ["taskito", "worker", "--app", "myapp:queue"] - restartPolicy: Never - pollingInterval: 15 - successfulJobsHistoryLimit: 5 - failedJobsHistoryLimit: 5 - maxReplicaCount: 20 - scalingStrategy: - strategy: default # or "accurate" for 1:1 job-to-pod mapping - triggers: - - type: metrics-api - metadata: - url: "http://taskito-scaler:9091/api/scaler" - valueLocation: "metricValue" - targetValue: "5" # one pod per 5 pending jobs -``` - -## Scaling with Prometheus - -If you already have Prometheus scraping your workers, you can skip the scaler server and use the Prometheus KEDA trigger directly: - -```yaml -apiVersion: keda.sh/v1alpha1 -kind: ScaledObject -metadata: - name: taskito-worker-prometheus - namespace: default -spec: - scaleTargetRef: - name: taskito-worker - pollingInterval: 15 - cooldownPeriod: 60 - minReplicaCount: 0 - maxReplicaCount: 10 - triggers: - - type: prometheus - metadata: - serverAddress: "http://prometheus:9090" - metricName: taskito_queue_depth - query: sum(taskito_queue_depth{queue="default"}) - threshold: "10" - - type: prometheus - metadata: - serverAddress: "http://prometheus:9090" - metricName: taskito_worker_utilization - query: taskito_worker_utilization{queue="default"} - threshold: "0.8" -``` - -See the [Prometheus integration](../../integrations/prometheus.md) for setting up the metrics collector. - -## Deploy Templates - -Ready-to-use YAML templates are included in the repository under `deploy/keda/`: - -| File | Purpose | -|---|---| -| `scaled-object.yaml` | `ScaledObject` using the HTTP scaler endpoint | -| `scaled-object-prometheus.yaml` | `ScaledObject` using Prometheus metrics | -| `scaled-job.yaml` | `ScaledJob` for ephemeral batch workers | - -!!! tip - When using SQLite, all worker replicas must share the same database volume. For multi-replica Kubernetes deployments, use the [Postgres backend](postgres.md) — workers connect over the network and there's no shared-file constraint. diff --git a/docs/guide/operations/migration.md b/docs/guide/operations/migration.md deleted file mode 100644 index f8f9d6e..0000000 --- a/docs/guide/operations/migration.md +++ /dev/null @@ -1,300 +0,0 @@ -# Migrating from Celery - -This guide maps Celery concepts to their taskito equivalents. If you're coming from Celery, you'll find that most concepts translate directly — with less infrastructure and simpler configuration. - -## Concept Mapping - -| Celery | taskito | Notes | -|---|---|---| -| `Celery()` app | `Queue()` | No broker URL needed | -| `@app.task` | `@queue.task()` | Same decorator pattern | -| `.apply_async()` | `.apply_async()` | Same name, similar API | -| `.delay()` | `.delay()` | Identical | -| `AsyncResult` | `JobResult` | `.result()` instead of `.get()` | -| Canvas (`chain`, `group`, `chord`) | `chain`, `group`, `chord` | Same names, same concepts | -| `celery beat` | `@queue.periodic()` | Built-in, no separate process | -| Result backend (Redis/DB) | Built-in (SQLite) | No configuration needed | -| Broker (Redis/RabbitMQ) | Not needed | SQLite handles everything | -| `celery worker` | `taskito worker` | Similar CLI | -| `celery inspect` | `taskito info` | Similar CLI | - -## Side-by-Side Examples - -### App Setup - -=== "Celery" - - ```python - from celery import Celery - - app = Celery( - "myapp", - broker="redis://localhost:6379/0", - backend="redis://localhost:6379/1", - ) - app.conf.task_serializer = "json" - app.conf.result_serializer = "json" - ``` - -=== "taskito" - - ```python - from taskito import Queue - - queue = Queue(db_path="myapp.db") - # That's it. No broker, no backend, no serializer config. - ``` - -### Task Definition - -=== "Celery" - - ```python - @app.task(bind=True, max_retries=3) - def send_email(self, to, subject, body): - try: - do_send(to, subject, body) - except SMTPError as exc: - raise self.retry(exc=exc, countdown=60) - ``` - -=== "taskito" - - ```python - @queue.task(max_retries=3, retry_backoff=2.0, retry_on=[SMTPError]) - def send_email(to, subject, body): - do_send(to, subject, body) - # Retries happen automatically on matching exceptions. - # Use retry_on/dont_retry_on for selective retries. - ``` - -!!! note "Automatic retries" - In Celery, you must explicitly catch exceptions and call `self.retry()`. In taskito, any unhandled exception triggers a retry automatically (up to `max_retries`). - -### Enqueueing Tasks - -=== "Celery" - - ```python - # Simple - send_email.delay("user@example.com", "Hello", "World") - - # With options - send_email.apply_async( - args=("user@example.com", "Hello", "World"), - countdown=60, # delay in seconds - queue="emails", - priority=5, - ) - ``` - -=== "taskito" - - ```python - # Simple - send_email.delay("user@example.com", "Hello", "World") - - # With options - send_email.apply_async( - args=("user@example.com", "Hello", "World"), - delay=60, # delay in seconds - queue="emails", - priority=5, - ) - ``` - -The only change: `countdown` becomes `delay`. - -### Getting Results - -=== "Celery" - - ```python - result = send_email.delay("user@example.com", "Hi", "Body") - - # Block for result - value = result.get(timeout=30) - - # Check status - result.status # "PENDING", "SUCCESS", "FAILURE" - ``` - -=== "taskito" - - ```python - job = send_email.delay("user@example.com", "Hi", "Body") - - # Block for result - value = job.result(timeout=30) - - # Check status - job.status # "pending", "running", "complete", "failed", "dead" - ``` - -Key differences: -- `.get()` becomes `.result()` -- Status values are lowercase -- `"SUCCESS"` becomes `"complete"` - -### Workflows (Canvas) - -=== "Celery" - - ```python - from celery import chain, group, chord - - # Chain - chain(fetch.s(url), parse.s(), store.s()).apply_async() - - # Group - group(process.s(item) for item in items).apply_async() - - # Chord - chord( - [download.s(url) for url in urls], - merge.s() - ).apply_async() - ``` - -=== "taskito" - - ```python - from taskito import chain, group, chord - - # Chain - chain(fetch.s(url), parse.s(), store.s()).apply() - - # Group - group(process.s(item) for item in items).apply() - - # Chord - chord( - [download.s(url) for url in urls], - merge.s() - ).apply() - ``` - -Almost identical. The only change: `.apply_async()` becomes `.apply()`. - -### Periodic Tasks - -=== "Celery" - - ```python - # celery.py - app.conf.beat_schedule = { - "cleanup-every-hour": { - "task": "myapp.cleanup", - "schedule": crontab(minute=0), - }, - } - - # Requires a separate process: - # celery -A myapp beat - ``` - -=== "taskito" - - ```python - @queue.periodic(cron="0 0 * * * *") - def cleanup(): - ... - - # No separate process — the worker handles scheduling. - # taskito worker --app myapp:queue - ``` - -!!! tip - taskito uses 6-field cron expressions (with seconds). Celery's `crontab()` maps to the last 5 fields, with `0` prepended for seconds. - - | Celery `crontab()` | taskito cron | - |---|---| - | `crontab()` (every minute) | `0 * * * * *` | - | `crontab(minute=0)` (every hour) | `0 0 * * * *` | - | `crontab(minute=0, hour=0)` (daily) | `0 0 0 * * *` | - | `crontab(minute=30, hour=9, day_of_week='1-5')` | `0 30 9 * * 1-5` | - -### Rate Limiting - -=== "Celery" - - ```python - @app.task(rate_limit="100/m") - def call_api(endpoint): - ... - ``` - -=== "taskito" - - ```python - @queue.task(rate_limit="100/m") - def call_api(endpoint): - ... - ``` - -Identical syntax. - -### Worker - -=== "Celery" - - ```bash - celery -A myapp worker --loglevel=info -Q emails,default - ``` - -=== "taskito" - - ```bash - taskito worker --app myapp:queue --queues emails,default - ``` - -### Testing - -=== "Celery" - - ```python - # Celery has CELERY_ALWAYS_EAGER mode - app.conf.task_always_eager = True - app.conf.task_eager_propagates = True - - result = add.delay(2, 3) - assert result.get() == 5 - ``` - -=== "taskito" - - ```python - with queue.test_mode() as results: - add.delay(2, 3) - assert results[0].return_value == 5 - ``` - -taskito's test mode uses a context manager instead of a global setting, so it's safe to use in parallel test runs. - -## What taskito Doesn't Have - -Some Celery features don't have taskito equivalents: - -| Celery Feature | Status in taskito | -|---|---| -| Distributed workers (multi-server) | Not supported — single-process only | -| Message routing (exchanges, topics) | Use named queues instead | -| `celery multi` (process management) | Use systemd, supervisor, or Docker | -| Custom serializers (JSON, msgpack) | `JsonSerializer`, `CloudpickleSerializer` (default), or custom `Serializer` protocol | -| Task cancellation (mid-execution) | Cancel pending or running jobs (`cancel_running_job()` + `check_cancelled()`) | -| ETA (absolute datetime scheduling) | Use `delay` (relative seconds) | -| `bind=True` (self argument) | Use `current_job` context instead | -| Custom result backends | Built-in SQLite only | - -## Migration Checklist - -- [ ] Replace `Celery()` with `Queue()` -- [ ] Change `@app.task` to `@queue.task()` -- [ ] Remove `self.retry()` calls — retries are automatic -- [ ] Change `.get()` to `.result()` on job results -- [ ] Change `countdown=` to `delay=` in `.apply_async()` -- [ ] Replace celery beat schedule with `@queue.periodic()` -- [ ] Update cron expressions to 6-field format (prepend seconds) -- [ ] Remove broker and result backend configuration -- [ ] Change `celery worker` to `taskito worker` in deployment scripts -- [ ] Replace `task_always_eager` with `queue.test_mode()` in tests diff --git a/docs/guide/operations/postgres.md b/docs/guide/operations/postgres.md deleted file mode 100644 index 0a5e26d..0000000 --- a/docs/guide/operations/postgres.md +++ /dev/null @@ -1,226 +0,0 @@ -# Postgres Backend - -taskito supports PostgreSQL as an alternative storage backend for production deployments that need multi-machine workers or higher write throughput. - -## When to Use Postgres - -Choose Postgres over the default SQLite backend when you need: - -- **Multi-machine workers** — run workers on separate hosts against a shared database -- **Higher write throughput** — Postgres handles concurrent writes without SQLite's single-writer constraint -- **Existing Postgres infrastructure** — reuse your existing database server instead of managing SQLite files - -For single-machine workloads, SQLite remains the simpler choice — no external dependencies required. - -## Installation - -```bash -pip install taskito[postgres] -``` - -## Configuration - -```python -from taskito import Queue - -queue = Queue( - backend="postgres", - db_url="postgresql://user:password@localhost:5432/myapp", - schema="taskito", # optional, default: "taskito" -) -``` - -| Parameter | Type | Default | Description | -|-----------|------|---------|-------------| -| `backend` | `str` | `"sqlite"` | Set to `"postgres"` or `"postgresql"` | -| `db_url` | `str` | `None` | PostgreSQL connection URL (required for Postgres) | -| `schema` | `str` | `"taskito"` | PostgreSQL schema for all tables | -| `workers` | `int` | `0` (auto) | Number of worker threads | - -All other `Queue` parameters (`default_retry`, `default_timeout`, `default_priority`, `result_ttl`) work identically to the SQLite backend. - -## Django Integration - -Configure the Postgres backend via Django settings: - -```python -# settings.py -TASKITO_BACKEND = "postgres" -TASKITO_DB_URL = "postgresql://user:password@localhost:5432/myapp" -TASKITO_SCHEMA = "taskito" -``` - -Then use the Django integration as normal: - -```python -from taskito.contrib.django.settings import get_queue - -queue = get_queue() -``` - -All Django settings: - -| Setting | Default | Description | -|---------|---------|-------------| -| `TASKITO_BACKEND` | `"sqlite"` | Storage backend (`"sqlite"` or `"postgres"`) | -| `TASKITO_DB_URL` | `None` | PostgreSQL connection URL | -| `TASKITO_SCHEMA` | `"taskito"` | PostgreSQL schema name | -| `TASKITO_DB_PATH` | `".taskito/taskito.db"` | SQLite database path (ignored with Postgres) | -| `TASKITO_WORKERS` | `0` | Worker thread count (0 = auto-detect) | -| `TASKITO_DEFAULT_RETRY` | `3` | Default max retries | -| `TASKITO_DEFAULT_TIMEOUT` | `300` | Default task timeout in seconds | -| `TASKITO_DEFAULT_PRIORITY` | `0` | Default task priority | -| `TASKITO_RESULT_TTL` | `None` | Result TTL in seconds | - -## Schema Isolation - -taskito creates all tables inside a dedicated PostgreSQL schema (default: `taskito`). The schema is created automatically if it doesn't exist. - -```python -# Use a custom schema -queue = Queue(backend="postgres", db_url="postgresql://...", schema="myapp_tasks") -``` - -Schema names must contain only alphanumeric characters and underscores. Invalid names raise a `ConfigError` at startup. - -This lets you run multiple independent taskito instances in the same database by using different schemas, or keep taskito tables separate from your application tables. - -## Connection Pooling - -The Postgres backend uses Diesel's `r2d2` connection pool with a default size of **10 connections**. Each connection has the `search_path` set to the configured schema on acquisition. - -The pool size is configured at the Rust layer. For most workloads, the default of 10 connections is sufficient. - -## Migrations - -Migrations run automatically on first connection. taskito creates the following **11 tables** inside the configured schema: - -| Table | Purpose | -|-------|---------| -| `jobs` | Core job storage | -| `dead_letter` | Dead letter queue | -| `rate_limits` | Token bucket rate limiting state | -| `periodic_tasks` | Cron-scheduled task definitions | -| `job_errors` | Per-attempt error tracking | -| `job_dependencies` | Task dependency edges | -| `task_metrics` | Execution time and memory metrics | -| `replay_history` | Job replay audit trail | -| `task_logs` | Structured task log entries | -| `circuit_breakers` | Circuit breaker state | -| `workers` | Worker heartbeat tracking | - -All tables use PostgreSQL-native types (`TEXT`, `BYTEA`, `BIGINT`, `BOOLEAN`, `DOUBLE PRECISION`) rather than SQLite-compatible types. - -## Differences from SQLite - -| Aspect | SQLite | Postgres | -|--------|--------|----------| -| Connection model | Embedded, file-based | Client/server, networked | -| Write concurrency | Single writer (WAL mode) | Multiple concurrent writers | -| Distribution | Single machine only | Multi-machine workers | -| Setup | Zero config, bundled | Requires Postgres server | -| Connection pool default | 8 connections | 10 connections | -| Schema isolation | N/A (file per database) | Custom PostgreSQL schema | -| Tables | 6 tables | 11 tables (additional: `job_dependencies`, `task_metrics`, `replay_history`, `task_logs`, `circuit_breakers`) | -| Backup | `sqlite3 .backup` | `pg_dump` | - -## Deployment - -### Docker Compose - -```yaml -services: - postgres: - image: postgres:16 - environment: - POSTGRES_DB: myapp - POSTGRES_USER: taskito - POSTGRES_PASSWORD: secret - volumes: - - pgdata:/var/lib/postgresql/data - ports: - - "5432:5432" - - worker: - build: . - environment: - TASKITO_BACKEND: postgres - TASKITO_DB_URL: postgresql://taskito:secret@postgres:5432/myapp - depends_on: - - postgres - stop_signal: SIGINT - stop_grace_period: 35s - - dashboard: - build: . - command: taskito dashboard --app myapp:queue --host 0.0.0.0 - environment: - TASKITO_BACKEND: postgres - TASKITO_DB_URL: postgresql://taskito:secret@postgres:5432/myapp - depends_on: - - postgres - ports: - - "8080:8080" - -volumes: - pgdata: -``` - -With Postgres, there are no shared-file constraints — workers and dashboard connect over the network. You can run multiple worker containers across different hosts. - -### systemd - -```ini -[Unit] -Description=taskito worker -After=network.target postgresql.service - -[Service] -Type=simple -User=myapp -Group=myapp -WorkingDirectory=/opt/myapp -ExecStart=/opt/myapp/.venv/bin/taskito worker --app myapp:queue -Restart=always -RestartSec=5 -KillSignal=SIGINT -TimeoutStopSec=35 - -Environment=PYTHONPATH=/opt/myapp -Environment=TASKITO_BACKEND=postgres -Environment=TASKITO_DB_URL=postgresql://taskito:secret@db.internal:5432/myapp - -[Install] -WantedBy=multi-user.target -``` - -### Multi-Machine Workers - -With Postgres, you can run workers on multiple machines. Each worker connects to the same database and coordinates through PostgreSQL's row-level locking: - -```bash -# Machine 1 -taskito worker --app myapp:queue - -# Machine 2 -taskito worker --app myapp:queue - -# Machine 3 -taskito worker --app myapp:queue -``` - -All workers share the same job queue and dequeue work atomically. - -## Backups - -Use standard PostgreSQL backup tools instead of SQLite-specific commands: - -```bash -# Dump the taskito schema -pg_dump -h localhost -U taskito -d myapp -n taskito > backup.sql - -# Restore -psql -h localhost -U taskito -d myapp < backup.sql -``` - -For continuous backups, use PostgreSQL's built-in WAL archiving or a tool like [pgBackRest](https://pgbackrest.org/). diff --git a/docs/guide/operations/testing.md b/docs/guide/operations/testing.md deleted file mode 100644 index a6f4ffb..0000000 --- a/docs/guide/operations/testing.md +++ /dev/null @@ -1,414 +0,0 @@ -# Testing - -taskito includes a built-in test mode that runs tasks **synchronously** in the calling thread — no worker, no Rust scheduler, no SQLite. This makes tests fast, deterministic, and easy to write. - -## Quick Example - -```python -from taskito import Queue - -queue = Queue() - -@queue.task() -def add(a: int, b: int) -> int: - return a + b - -def test_add(): - with queue.test_mode() as results: - add.delay(2, 3) - - assert len(results) == 1 - assert results[0].return_value == 5 - assert results[0].succeeded -``` - -## How It Works - -When you enter `queue.test_mode()`, taskito patches the `enqueue()` method so that every `.delay()` or `.apply_async()` call: - -1. Looks up the task function in the registry -2. Calls it immediately in the current thread -3. Captures the return value (or exception) in a `TestResult` -4. Appends the result to the `TestResults` list - -No database is created. No worker threads are spawned. Tasks execute eagerly and synchronously. - -## `queue.test_mode()` - -```python -with queue.test_mode(propagate_errors=False, resources=None) as results: - # tasks run synchronously here - ... -``` - -| Parameter | Type | Default | Description | -|---|---|---|---| -| `propagate_errors` | `bool` | `False` | If `True`, task exceptions are re-raised immediately instead of being captured in `TestResult.error` | -| `resources` | `dict[str, Any] | None` | `None` | Map of resource name → mock instance or `MockResource` for injection. See [Resource System](../../resources/index.md#testing-with-resources). | - -The context manager yields a `TestResults` list that accumulates results as tasks execute. - -## `TestResult` - -Each executed task produces a `TestResult`: - -```python -with queue.test_mode() as results: - add.delay(2, 3) - - r = results[0] - r.job_id # "test-000001" - r.task_name # "mymodule.add" - r.args # (2, 3) - r.kwargs # {} - r.return_value # 5 - r.error # None - r.traceback # None - r.succeeded # True - r.failed # False -``` - -| Attribute | Type | Description | -|---|---|---| -| `job_id` | `str` | Synthetic ID like `"test-000001"` | -| `task_name` | `str` | Fully qualified task name | -| `args` | `tuple` | Positional arguments passed to the task | -| `kwargs` | `dict` | Keyword arguments passed to the task | -| `return_value` | `Any` | Return value on success, `None` on failure | -| `error` | `Exception | None` | The exception if the task failed | -| `traceback` | `str | None` | Formatted traceback if the task failed | -| `succeeded` | `bool` | `True` if no error | -| `failed` | `bool` | `True` if an error occurred | - -## `TestResults` - -`TestResults` is a list of `TestResult` with convenience methods: - -```python -with queue.test_mode() as results: - add.delay(2, 3) - failing_task.delay() - add.delay(10, 20) - - # Filter by outcome - results.succeeded # TestResults with 2 items - results.failed # TestResults with 1 item - - # Filter by task name - results.filter(task_name="mymodule.add") # 2 items - - # Combine filters - results.filter(task_name="mymodule.add", succeeded=True) # 2 items -``` - -### `.filter()` - -```python -results.filter(task_name=None, succeeded=None) -> TestResults -``` - -| Parameter | Type | Description | -|---|---|---| -| `task_name` | `str | None` | Filter by exact task name | -| `succeeded` | `bool | None` | `True` for successes, `False` for failures | - -## Testing Failures - -By default, task exceptions are captured — not raised: - -```python -@queue.task() -def risky(): - raise ValueError("something broke") - -def test_failure_captured(): - with queue.test_mode() as results: - risky.delay() - - assert len(results) == 1 - assert results[0].failed - assert isinstance(results[0].error, ValueError) - assert "something broke" in str(results[0].error) - assert results[0].traceback is not None -``` - -### Propagating Errors - -Use `propagate_errors=True` when you want exceptions to bubble up: - -```python -def test_failure_propagated(): - with queue.test_mode(propagate_errors=True) as results: - with pytest.raises(ValueError, match="something broke"): - risky.delay() -``` - -## Testing Workflows - -Chains, groups, and chords work in test mode because they call `enqueue()` internally, which is intercepted by the test mode patch. - -### Chains - -```python -from taskito import chain - -@queue.task() -def double(n: int) -> int: - return n * 2 - -@queue.task() -def add_ten(n: int) -> int: - return n + 10 - -def test_chain(): - with queue.test_mode() as results: - chain(double.s(5), add_ten.s()).apply() - - assert len(results) == 2 - assert results[0].return_value == 10 # double(5) - assert results[1].return_value == 20 # add_ten(10) -``` - -### Groups - -```python -from taskito import group - -def test_group(): - with queue.test_mode() as results: - group(double.s(1), double.s(2), double.s(3)).apply() - - assert len(results) == 3 - values = [r.return_value for r in results] - assert values == [2, 4, 6] -``` - -## Job Context in Tests - -`current_job` works inside test mode. The context is set up before each task runs: - -```python -from taskito import current_job - -@queue.task() -def context_aware(): - return { - "job_id": current_job.id, - "task_name": current_job.task_name, - "retry_count": current_job.retry_count, - "queue_name": current_job.queue_name, - } - -def test_context(): - with queue.test_mode() as results: - context_aware.delay() - - ctx = results[0].return_value - assert ctx["job_id"].startswith("test-") - assert ctx["retry_count"] == 0 - assert ctx["queue_name"] == "default" -``` - -## Pytest Integration - -### Fixture Pattern - -Create a reusable fixture for test mode: - -```python -# conftest.py -import pytest -from myapp import queue - -@pytest.fixture -def task_results(): - with queue.test_mode() as results: - yield results - -# test_tasks.py -def test_add(task_results): - add.delay(2, 3) - assert task_results[0].return_value == 5 - -def test_email(task_results): - send_email.delay("user@example.com", "Hello", "World") - assert task_results[0].succeeded -``` - -### Fixture with Error Propagation - -```python -@pytest.fixture -def strict_tasks(): - with queue.test_mode(propagate_errors=True) as results: - yield results -``` - -### Testing Async Code - -Test mode works with async test functions — the tasks still execute synchronously: - -```python -import pytest - -@pytest.mark.asyncio -async def test_async_enqueue(task_results): - add.delay(1, 2) - assert task_results[0].return_value == 3 -``` - -## Testing with Worker Resources - -If your tasks use [worker resources](../../resources/index.md) (injected via `inject=` or `Inject["name"]`), pass mock instances through `resources=`: - -```python -from unittest.mock import MagicMock - -@queue.worker_resource("db") -def create_db(): - return real_sessionmaker - -@queue.task(inject=["db"]) -def create_user(name: str, db): - session = db() - session.add(User(name=name)) - session.commit() - -def test_create_user(): - mock_db = MagicMock() - - with queue.test_mode(resources={"db": mock_db}) as results: - create_user.delay("Alice") - - assert results[0].succeeded - mock_db.return_value.add.assert_called_once() -``` - -### `MockResource` - -`MockResource` adds call tracking to a mock value: - -```python -from taskito import MockResource - -spy = MockResource("db", wraps=real_db, track_calls=True) - -with queue.test_mode(resources={"db": spy}) as results: - create_user.delay("Alice") - -assert spy.call_count == 1 -assert results[0].succeeded -``` - -| Parameter | Type | Description | -|---|---|---| -| `name` | `str` | Resource name (informational). | -| `return_value` | `Any` | Value returned when the resource is accessed. | -| `wraps` | `Any` | Wrap a real object — returned as-is when accessed. | -| `track_calls` | `bool` | Increment `call_count` each access. | - -#### `return_value` vs `wraps` - -Use `return_value` when you want a simple stub: - -```python -mock_cache = MockResource("cache", return_value={"key": "value"}) -``` - -Use `wraps` when you need the real object but want call tracking: - -```python -real_db = create_test_database() -spy_db = MockResource("db", wraps=real_db, track_calls=True) -``` - -#### Multiple resources - -Pass multiple resources to `test_mode`: - -```python -with queue.test_mode(resources={ - "db": MockResource("db", return_value=mock_db), - "cache": MockResource("cache", return_value={}), - "mailer": MockResource("mailer", return_value=mock_smtp), -}) as results: - process_order.delay(order_id=123) -``` - -#### Testing with `inject` - -Tasks that use `@queue.task(inject=["db"])` receive the mock resource automatically: - -```python -@queue.task(inject=["db"]) -def create_user(name, db=None): - db.execute("INSERT INTO users (name) VALUES (?)", (name,)) - -mock_db = MagicMock() -with queue.test_mode(resources={"db": mock_db}) as results: - create_user.delay("Alice") - -assert results[0].succeeded -mock_db.execute.assert_called_once() -``` - -!!! note - When `resources=` is provided, proxy reconstruction is bypassed automatically. Proxy markers in arguments are passed through as-is so tests don't fail due to missing files or network connections. - -## What Test Mode Does NOT Cover - -Test mode is designed for **unit and integration testing** of task logic. It does not exercise: - -- SQLite storage or queries -- Retry/backoff scheduling -- Rate limiting -- Timeout reaping -- Worker thread pool dispatch -- Priority ordering - -For end-to-end tests that exercise the full Rust scheduler, run a real worker in a background thread: - -```python -import threading -import time - -def test_e2e(): - queue_e2e = Queue(db_path=":memory:") - - @queue_e2e.task() - def add(a, b): - return a + b - - t = threading.Thread(target=queue_e2e.run_worker, daemon=True) - t.start() - - job = add.delay(2, 3) - result = job.result(timeout=10) - assert result == 5 -``` - -!!! info "Middleware in test mode" - Per-task and queue-level `TaskMiddleware` hooks (`before`, `after`, `on_retry`) **do fire** in test mode, since they run in the Python wrapper around your task function. This lets you verify middleware behavior in tests without running a real worker. - -## Running Tests Locally - -```bash -# Rust tests -cargo test --workspace - -# Rebuild the Python extension after Rust changes -uv run maturin develop - -# Python tests -uv run python -m pytest tests/python/ -v - -# Linting -uv run ruff check py_src/ tests/ -uv run mypy py_src/taskito/ --no-incremental -``` - -To build with native async support: - -```bash -uv run maturin develop --features native-async -``` diff --git a/docs/guide/operations/troubleshooting.md b/docs/guide/operations/troubleshooting.md deleted file mode 100644 index c8005f7..0000000 --- a/docs/guide/operations/troubleshooting.md +++ /dev/null @@ -1,224 +0,0 @@ -# Troubleshooting - -Common issues and how to fix them. - -## Jobs stuck in running - -**Symptom**: Jobs stay in `running` status long after they should have finished. - -**Diagnosis**: The worker process that picked up the job crashed before marking it complete. - -```python -# Check how many jobs are stuck -stats = queue.stats() -print(stats) # {'running': 47, 'pending': 0, ...} - -# See which jobs are stuck -stuck = queue.list_jobs(status="running", limit=20) -for job in stuck: - d = job.to_dict() - print(f"{d['id']} | {d['task_name']} | started {d['started_at']}") -``` - -**Fix**: The stale reaper handles this automatically — it detects jobs that have exceeded their `timeout_ms` and retries them. If a job has no timeout set, it stays stuck forever. - -To recover a stuck job manually: - -```python -import time - -# Mark the job as failed so it retries -queue._inner.retry(job_id, int(time.time() * 1000)) -``` - -To prevent this in future, always set a timeout on production tasks: - -```python -@queue.task(timeout=300) # 5 minutes max -def process_data(payload): - ... -``` - -!!! warning - Jobs without `timeout_ms` are never reaped. The stale reaper only detects jobs that have exceeded their deadline. - -## Worker is unresponsive - -**Symptom**: Worker process is alive but not processing jobs. Heartbeat is stale. - -**Diagnosis**: Check worker status via the heartbeat API. - -```python -workers = queue.workers() -for w in workers: - print(f"{w['worker_id']}: {w['status']} (last seen: {w['last_heartbeat']})") -``` - -**Possible causes**: - -1. **GIL-bound CPU task**: A long-running CPU task is holding the GIL, blocking the scheduler thread from dispatching new jobs. The scheduler runs in Rust, but it still needs the GIL to call Python functions. - - Fix: Switch to the prefork pool for CPU-bound tasks. - - ```bash - taskito worker --app myapp:queue --pool prefork - ``` - -2. **Deadlock**: A task is waiting on a resource held by another task in the same worker. Check for circular waits in your task code. - -3. **Infinite loop**: A task is looping without yielding. Add a timeout to detect this: - - ```python - @queue.task(timeout=60) - def risky_task(): - ... - ``` - -## Database growing too large - -**Symptom**: The SQLite file keeps growing; disk space is filling up. - -**Diagnosis**: Completed job records and their result payloads are accumulating. - -**Fix**: Set `result_ttl` to auto-purge old results. - -```python -queue = Queue( - db_path="myapp.db", - result_ttl=86400, # Purge completed/dead jobs older than 24 hours -) -``` - -Manually purge existing backlog: - -```python -# Purge completed jobs older than 7 days -queue.purge_completed(older_than=604800) - -# Purge dead-lettered jobs older than 30 days -queue.purge_dead(older_than=2592000) -``` - -After purging, reclaim disk space: - -```bash -sqlite3 myapp.db "VACUUM;" -``` - -!!! note - `VACUUM` rewrites the entire database and requires exclusive access. Run it during low-traffic periods. - -## High job latency - -**Symptom**: Jobs sit in `pending` for longer than expected before starting. - -**Diagnosis**: Check the queue depth and scheduler configuration. - -```python -stats = queue.stats() -print(f"Pending: {stats['pending']}, Running: {stats['running']}") -``` - -**Possible causes and fixes**: - -1. **Scheduler poll interval too high**: Default is 50ms. Jobs can wait up to one poll interval before being picked up. - - ```python - queue = Queue(scheduler_poll_interval_ms=10) # Poll every 10ms - ``` - - Lower values increase CPU/DB usage. Balance based on your latency requirements. - -2. **Not enough workers**: All workers are busy. Increase the worker count. - - ```python - queue = Queue(workers=16) - ``` - -3. **Rate limiting**: The task or queue has a rate limit active. - - ```python - # Check if rate limiting is the culprit - # Rate-limited jobs are rescheduled 1 second into the future - pending = queue.list_jobs(status="pending", limit=10) - for job in pending: - print(job.to_dict()["scheduled_at"]) - ``` - -4. **Database performance**: Slow dequeue queries. Check SQLite WAL size or Postgres query plans. - -## Memory usage growing - -**Symptom**: Worker process memory climbs over time. - -**Causes**: - -1. **Large result payloads**: Task return values are stored in the database but also held in the scheduler's result buffer briefly. If tasks return large objects (images, dataframes), memory spikes. - - Fix: Return a reference (file path, object key) instead of the data itself. - - ```python - # Bad — large result stored in memory and DB - @queue.task() - def process_image(path: str) -> bytes: - return open(path, "rb").read() - - # Good — return a path - @queue.task() - def process_image(path: str) -> str: - out = path + ".processed" - # ... write output to out ... - return out - ``` - -2. **Accumulated job records**: Without `result_ttl`, the database grows unbounded. See [Database growing too large](#database-growing-too-large). - -3. **Resource leaks in tasks**: A task opens a file or connection and never closes it. Use context managers. - -## Periodic task running twice - -**Symptom**: A periodic task fires more than once per interval, or appears to run on two workers simultaneously. - -**Behavior**: This is safe by design. Periodic tasks use `unique_key` deduplication — when a periodic task is due, each worker's scheduler checks and tries to enqueue it, but only one enqueue succeeds because the `unique_key` constraint prevents duplicates. - -If you see two completed jobs for the same periodic task in the same interval, check: - -```python -# Look for duplicate completions -jobs = queue.list_jobs(status="complete", limit=50) -periodic_jobs = [j for j in jobs if "daily_report" in j.to_dict()["task_name"]] -for j in periodic_jobs: - print(j.to_dict()["completed_at"]) -``` - -If you're genuinely seeing duplicate execution, ensure all workers use the same database (same SQLite file path or same Postgres DSN). - -## Task not found in worker - -**Symptom**: Worker logs `TaskNotFound` or jobs fail with an error like `unknown task: myapp.tasks.process`. - -**Cause**: The task name registered at enqueue time doesn't match what the worker has registered. - -Task names default to `module.function_name`. If you enqueue from one module path and run the worker with a different import path, the names won't match. - -**Diagnosis**: - -```python -# Check the task name stored in the job -job = queue.get_job(job_id) -print(job.to_dict()["task_name"]) # e.g. "myapp.tasks.process" - -# Check what the worker has registered -# (add this temporarily to your worker startup) -print(list(queue._task_registry.keys())) -``` - -**Fix**: Use consistent import paths. If the task is `myapp/tasks.py:process`, always import it as `myapp.tasks.process` — not `tasks.process` (relative) or `src.myapp.tasks.process` (with src prefix). - -You can also set an explicit name to decouple the task name from the module path: - -```python -@queue.task(name="process-data") -def process(payload): - ... -``` diff --git a/docs/guide/reliability/circuit-breakers.md b/docs/guide/reliability/circuit-breakers.md deleted file mode 100644 index fd8bf1b..0000000 --- a/docs/guide/reliability/circuit-breakers.md +++ /dev/null @@ -1,139 +0,0 @@ -# Circuit Breakers - -Circuit breakers prevent cascading failures by temporarily stopping task execution when a task fails repeatedly. This is especially useful for tasks that call external APIs or services. - -## How It Works - -A circuit breaker tracks failures within a time window and transitions through three states: - -```mermaid -stateDiagram-v2 - [*] --> Closed - Closed --> Open: failures >= threshold within window - Open --> HalfOpen: cooldown elapsed - HalfOpen --> Closed: success rate >= threshold - HalfOpen --> Open: success rate impossible OR timeout -``` - -- **Closed** — Normal operation. Tasks execute as usual. Failures are counted. -- **Open** — Too many failures. Tasks are immediately rejected without execution. -- **Half-Open** — After the cooldown period, up to N probe requests are allowed through (default 5). Success and failure counts are tracked. The circuit closes when the success rate meets the threshold (default 80%). If too many probes fail and the threshold becomes mathematically impossible, the circuit immediately re-opens. If probes don't complete within the cooldown period, the circuit re-opens as a safety valve. - -## Configuration - -Enable circuit breakers per task using the `circuit_breaker` parameter: - -```python -@queue.task( - circuit_breaker={ - "threshold": 5, # Open after 5 failures - "window": 60, # Within a 60-second window - "cooldown": 300, # Stay open for 5 minutes before half-open - "half_open_probes": 5, # Allow 5 probe requests in half-open - "half_open_success_rate": 0.8, # Close when 80% of probes succeed - } -) -def call_external_api(endpoint: str) -> dict: - return requests.get(endpoint).json() -``` - -| Parameter | Type | Default | Description | -|-----------|------|---------|-------------| -| `threshold` | `int` | `5` | Number of failures to trigger the breaker | -| `window` | `int` | `60` | Time window in seconds for counting failures | -| `cooldown` | `int` | `300` | Seconds to wait before allowing a test request | -| `half_open_probes` | `int` | `5` | Number of probe requests allowed in half-open | -| `half_open_success_rate` | `float` | `0.8` | Required success rate (0.0–1.0) to close from half-open | - -## Inspecting Circuit Breaker State - -### Python API - -```python -breakers = queue.circuit_breakers() -for cb in breakers: - print(f"{cb['task_name']}: {cb['state']} (failures: {cb['failure_count']})") -``` - -### Dashboard API - -```bash -curl http://localhost:8080/api/circuit-breakers -``` - -```json -[ - { - "task_name": "myapp.tasks.call_external_api", - "state": "open", - "failure_count": 5, - "last_failure": 1700000010000, - "cooldown_until": 1700000310000 - } -] -``` - -## When to Use - -Circuit breakers are most useful for tasks that interact with external systems: - -- **External API calls** — prevent hammering a down service -- **Database connections** — stop retrying when the database is unreachable -- **Third-party services** — email providers, payment gateways, etc. - -For purely internal computation tasks, circuit breakers are usually unnecessary — standard retries with backoff are sufficient. - -## Combining with Retries - -Circuit breakers and retries work together. A task with both will: - -1. Retry on failure up to `max_retries` times (with backoff) -2. Count each final failure toward the circuit breaker threshold -3. Once the breaker opens, new jobs for that task are rejected immediately - -```python -@queue.task( - max_retries=3, - retry_backoff=2.0, - circuit_breaker={"threshold": 5, "window": 120, "cooldown": 600}, -) -def send_email(to: str, subject: str, body: str): - smtp.send(to, subject, body) -``` - -## Examples - -### External Payment API - -```python -@queue.task( - max_retries=3, - circuit_breaker={"threshold": 3, "window": 60, "cooldown": 120}, -) -def charge_customer(customer_id: str, amount: float): - response = requests.post( - "https://api.payment-provider.com/charge", - json={"customer": customer_id, "amount": amount}, - timeout=10, - ) - response.raise_for_status() - return response.json() -``` - -If the payment API goes down, the circuit breaker opens after 3 failures within 60 seconds, preventing a flood of requests to the failing service. After 2 minutes, up to 5 probe requests are allowed through. If at least 80% succeed (4 out of 5), the circuit closes. - -### Health Check with Monitoring - -```python -from taskito.events import EventType - -# Log when circuit breakers change state -def monitor_breakers(event_type: EventType, payload: dict): - breakers = queue.circuit_breakers() - open_breakers = [b for b in breakers if b["state"] == "open"] - if open_breakers: - names = ", ".join(b["task_name"] for b in open_breakers) - print(f"WARNING: Open circuit breakers: {names}") - -queue.on_event(EventType.JOB_FAILED, monitor_breakers) -``` diff --git a/docs/guide/reliability/error-handling.md b/docs/guide/reliability/error-handling.md deleted file mode 100644 index cb89742..0000000 --- a/docs/guide/reliability/error-handling.md +++ /dev/null @@ -1,336 +0,0 @@ -# Error Handling & Troubleshooting - -This guide covers how to debug task failures, inspect error history, handle common problems, and understand what happens when things go wrong. - -## Task Failure Lifecycle - -When a task raises an exception: - -1. The error message is recorded in the `job_errors` table -2. If `retry_count < max_retries`, the job is rescheduled with exponential backoff -3. If all retries are exhausted, the job moves to the **dead letter queue** (status: `dead`) - -```mermaid -flowchart LR - A["Exception raised"] --> B["Error recorded"] - B --> C{"Retries left?"} - C -->|Yes| D["Reschedule with backoff"] - C -->|No| E["Move to DLQ"] -``` - -## Inspecting Error History - -Every failed attempt is recorded. Use `job.errors` to see the full history: - -```python -job = unreliable_task.delay() - -# After the job fails and retries... -for error in job.errors: - print(f"Attempt {error['attempt']}: {error['error']} at {error['failed_at']}") -``` - -Each error entry: - -| Field | Type | Description | -|---|---|---| -| `id` | `str` | Unique error record ID | -| `job_id` | `str` | The job this error belongs to | -| `attempt` | `int` | Attempt number (0-indexed) | -| `error` | `str` | Error message string | -| `failed_at` | `int` | Timestamp in milliseconds | - -## Diagnosing Dead Letters - -When a job exhausts all retries, it lands in the DLQ. Inspect dead letters to understand what went wrong: - -```python -dead = queue.dead_letters(limit=20) - -for d in dead: - print(f"Task: {d['task_name']}") - print(f"Error: {d['error']}") - print(f"Retries: {d['retry_count']}") - print(f"Queue: {d['queue']}") - print() -``` - -### Common Patterns - -**Same error on every attempt** — The failure is deterministic (e.g., bad arguments, missing dependency). Fix the root cause, then replay: - -```python -new_job_id = queue.retry_dead(dead[0]["id"]) -``` - -**Intermittent errors** — The failure is transient (e.g., network timeout). Replaying will likely succeed: - -```python -# Replay all dead letters -for d in dead: - queue.retry_dead(d["id"]) -``` - -!!! note "Config preservation" - Replayed jobs preserve the original job's `priority`, `max_retries`, `timeout`, and `result_ttl` settings — no need to re-specify them. - -**Error message mentions serialization** — See [Serialization Errors](#serialization-errors) below. - -## Common Error Scenarios - -### Serialization Errors - -taskito uses `cloudpickle` to serialize task arguments and return values. Serialization fails when: - -- **Passing unpicklable objects**: Open file handles, database connections, sockets, thread locks -- **Module-level objects that can't be resolved**: Dynamically generated classes without stable import paths -- **Very large objects**: While cloudpickle has no hard limit, extremely large payloads slow down SQLite writes - -**Fix**: Pass simple, serializable data (strings, numbers, dicts, lists) as task arguments. Reconstruct complex objects inside the task: - -```python -# Bad — passing a connection object -@queue.task() -def query(conn, sql): # conn can't be pickled - return conn.execute(sql) - -# Good — pass connection info, create inside the task -@queue.task() -def query(db_url, sql): - conn = create_connection(db_url) - return conn.execute(sql) -``` - -### Task Not Found in Registry - -``` -KeyError: "Task 'myapp.process_data' not found in registry" -``` - -This means the worker doesn't have the task registered. Causes: - -- The module defining the task wasn't imported before starting the worker -- The task name changed (function renamed or moved to a different module) -- The `--app` flag points to the wrong module - -**Fix**: Ensure all task modules are imported when your `Queue` instance is created. Typically, define tasks in the same module as the queue, or import them in your app's `__init__.py`. - -### SQLite Lock Contention - -``` -OperationalError: database is locked -``` - -SQLite allows concurrent reads (WAL mode), but only one writer at a time. taskito sets `busy_timeout=5000ms` to wait for locks, but heavy write loads can still cause contention. - -**Causes:** - -- Multiple processes writing to the same SQLite file simultaneously -- Very large batch inserts blocking the writer -- Long-running transactions from external tools (e.g., DB browser) holding locks - -**Fixes:** - -- Avoid opening the database file with other tools while the worker is running -- Use `enqueue_many()` / `task.map()` for batch inserts — they use a single transaction -- If running multiple processes, ensure only one worker process writes at a time - -### Timeout Reaping - -``` -Task timed out after 10s -``` - -If a task exceeds its `timeout`, the scheduler detects it (every ~5 seconds) and marks it as failed, triggering retry/DLQ logic. - -```python -@queue.task(timeout=30) # 30 second timeout -def long_task(): - ... -``` - -!!! warning - Timeout reaping marks the job as failed, but **does not kill the Python thread**. The task function continues running until it finishes — the result is simply discarded. For CPU-bound tasks that might hang, consider adding your own internal timeout logic. - -### Soft Timeouts - -While hard timeouts (above) are enforced by the scheduler, **soft timeouts** let the task itself react to time pressure: - -```python -from taskito import current_job - -@queue.task(soft_timeout=30) -def long_task(): - for chunk in data_chunks: - process(chunk) - current_job.check_timeout() # raises SoftTimeoutError after 30s -``` - -| Timeout Type | Mechanism | Exception | -|---|---|---| -| Hard timeout (`timeout`) | Scheduler reaps the job externally | `TaskTimeoutError` (internal) | -| Soft timeout (`soft_timeout`) | Task checks elapsed time via `check_timeout()` | `SoftTimeoutError` | - -Soft timeouts are **cooperative** — the task must call `check_timeout()` at safe points. - -### Worker Crash Behavior - -If the worker process crashes (kill -9, OOM, power loss): - -- **Running jobs** stay in `running` status in SQLite -- On the next worker start, the scheduler's **stale job reaper** detects jobs that have been `running` longer than their timeout and marks them as failed -- If no timeout is set, stale jobs with no timeout remain in `running` status until manually cleaned up - -**Recommendation**: Always set a `timeout` on tasks so that stale jobs are automatically recovered: - -```python -@queue.task(timeout=300) # 5 minute timeout -def process(data): - ... -``` - -## Debugging Tips - -### Check Queue Stats - -Quick health check: - -```python -stats = queue.stats() -print(stats) -# {'pending': 0, 'running': 0, 'completed': 450, 'failed': 2, 'dead': 1, 'cancelled': 0} -``` - -Or via CLI: - -```bash -taskito info --app myapp:queue --watch -``` - -### Use Hooks for Alerting - -Set up failure hooks to get notified immediately: - -```python -@queue.on_failure -def on_task_failure(task_name, args, kwargs, error): - print(f"FAILED: {task_name} — {error}") - # Send to Slack, PagerDuty, etc. -``` - -### Test Mode for Isolation - -Use [test mode](../operations/testing.md) to run tasks synchronously and inspect errors without a worker: - -```python -with queue.test_mode() as results: - risky_task.delay() - - if results[0].failed: - print(results[0].error) - print(results[0].traceback) -``` - -### Inspect the SQLite Database Directly - -For deep debugging, query the SQLite database: - -```bash -sqlite3 myapp.db "SELECT id, task_name, status, error FROM jobs WHERE status = 3 LIMIT 10;" -``` - -Status codes: 0=pending, 1=running, 2=complete, 3=failed, 4=dead, 5=cancelled. - -## Error Handling Patterns - -### Exception Filtering - -Use `retry_on` and `dont_retry_on` to control which exceptions trigger retries: - -```python -@queue.task( - max_retries=5, - retry_on=[ConnectionError, TimeoutError], - dont_retry_on=[ValueError], -) -def call_api(url): - resp = requests.get(url, timeout=10) - resp.raise_for_status() - return resp.json() -``` - -See [Retries — Exception Filtering](retries.md#exception-filtering) for details. - -### Cancelling Running Tasks - -Cancel a job that is already executing: - -```python -# Request cancellation -queue.cancel_running_job(job_id) -``` - -Inside the task, check for cancellation at safe points: - -```python -from taskito import current_job - -@queue.task() -def long_task(items): - for item in items: - process(item) - current_job.check_cancelled() # raises TaskCancelledError -``` - -`check_cancelled()` raises `TaskCancelledError` if cancellation was requested. Place these checks at natural breakpoints in long-running tasks. - -!!! note - Cancellation is **cooperative** — the task must call `check_cancelled()` to observe it. If the task never checks, it runs to completion. - -### Cleanup on Failure - -Use try/finally or hooks to clean up resources: - -```python -@queue.task() -def process_file(path): - tmp = download_to_temp(path) - try: - return parse(tmp) - finally: - os.unlink(tmp) -``` - -## Exception Hierarchy - -taskito defines a hierarchy of exceptions for precise error handling: - -``` -TaskitoError (base) -├── TaskTimeoutError — hard timeout exceeded -├── SoftTimeoutError — soft timeout exceeded (check_timeout) -├── TaskCancelledError — task cancelled (check_cancelled) -├── MaxRetriesExceededError — all retry attempts exhausted -├── SerializationError — serialization/deserialization failure -├── CircuitBreakerOpenError — circuit breaker is open -├── RateLimitExceededError — rate limit exceeded -├── JobNotFoundError — job ID not found (also a KeyError) -└── QueueError — queue-level operational error -``` - -All exceptions inherit from `TaskitoError`, so you can catch the base class for broad handling: - -```python -from taskito import TaskitoError, SoftTimeoutError, TaskCancelledError - -try: - result = job.result(timeout=30) -except TaskitoError as e: - print(f"Taskito error: {e}") -``` - -Import any exception directly from the `taskito` package: - -```python -from taskito import TaskCancelledError, SoftTimeoutError, SerializationError -``` diff --git a/docs/guide/reliability/guarantees.md b/docs/guide/reliability/guarantees.md deleted file mode 100644 index 38ae4ab..0000000 --- a/docs/guide/reliability/guarantees.md +++ /dev/null @@ -1,128 +0,0 @@ -# Delivery Guarantees - -Taskito provides **at-least-once delivery**. Every enqueued job will be executed at least once, but may be executed more than once if a worker crashes mid-execution. - -## What This Means - -- A job **will not be lost** — if a worker dies, the scheduler detects the stale job and retries it -- A job **may run twice** — if a worker crashes after starting but before marking the job complete -- A job **will not run concurrently** — `claim_execution` prevents two workers from picking up the same job - -## Why Not Exactly-Once? - -Exactly-once delivery is [impossible in distributed systems](https://bravenewgeek.com/you-cannot-have-exactly-once-delivery/) without two-phase commit. Taskito's approach matches Celery, SQS, and most production job systems: deliver at least once, design tasks to handle duplicates. - -## How Recovery Works - -```mermaid -sequenceDiagram - participant S as Scheduler - participant W as Worker - participant DB as Database - - S->>DB: dequeue + claim_execution - S->>W: dispatch job - W->>W: execute task - Note over W: Worker crashes here - Note over S: timeout_ms elapses... - S->>DB: reap_stale_jobs detects stuck job - S->>DB: mark failed + schedule retry - S->>W: dispatch again (new attempt) - W->>DB: complete + clear claim -``` - -The `claim_execution` mechanism prevents two workers from executing the same job simultaneously. But it cannot prevent re-execution after a crash — the claim is cleared when the stale reaper detects the timeout. - -## Writing Idempotent Tasks - -Since tasks may run more than once, design them to be safe on re-execution: - -### Use database upserts - -```python -@queue.task() -def create_user(email, name): - # UPSERT — safe to run twice - db.execute( - "INSERT INTO users (email, name) VALUES (?, ?) " - "ON CONFLICT (email) DO UPDATE SET name = ?", - (email, name, name), - ) -``` - -### Use idempotency keys - -```python -@queue.task() -def charge_customer(order_id, amount): - # Check if already charged - if db.execute("SELECT 1 FROM charges WHERE order_id = ?", (order_id,)).fetchone(): - return # Already processed - - payment_provider.charge(amount, idempotency_key=f"order-{order_id}") - db.execute("INSERT INTO charges (order_id, amount) VALUES (?, ?)", (order_id, amount)) -``` - -### Use unique tasks for deduplication - -```python -# Only one pending/running instance per key -job = send_report.apply_async( - args=(user_id,), - unique_key=f"report-{user_id}", -) -``` - -If a job with the same `unique_key` is already pending or running, the duplicate is silently dropped. See [Unique Tasks](../execution/unique-tasks.md) for details. - -### Avoid side effects that can't be undone - -```python -# Bad — sends duplicate emails on retry -@queue.task() -def notify(user_id): - send_email(user_id, "Your order shipped") - -# Good — check before sending -@queue.task() -def notify(user_id): - if not db.execute("SELECT notified FROM orders WHERE user_id = ?", (user_id,)).fetchone()[0]: - send_email(user_id, "Your order shipped") - db.execute("UPDATE orders SET notified = 1 WHERE user_id = ?", (user_id,)) -``` - -## Deduplication Window - -`unique_key` prevents duplicate enqueue only while a job with that key is **pending or running**. Once the job completes (or is dead-lettered/cancelled), the same `unique_key` can be enqueued again. - -```python -job1 = task.apply_async(args=(1,), unique_key="order-123") # Enqueued -job2 = task.apply_async(args=(1,), unique_key="order-123") # Skipped (job1 pending) -# ... job1 completes ... -job3 = task.apply_async(args=(1,), unique_key="order-123") # Enqueued (new job) -``` - -## How Claim Execution Works - -Before dispatching a job to a worker thread, the scheduler calls `claim_execution(job_id, worker_id)`. This is an atomic `SET NX` (SQLite: `INSERT OR IGNORE`, Postgres: `INSERT ... ON CONFLICT DO NOTHING`, Redis: `SET NX`). If another scheduler instance already claimed the job, the claim fails and the job is skipped. - -This prevents **duplicate dispatch** (two workers picking up the same job). It does NOT prevent **duplicate execution** after a crash — the claim is cleared by the stale reaper when it detects the timeout. - -## Framework vs Task Responsibility - -| Concern | Who handles it | -|---------|---------------| -| Job dispatch deduplication | Framework (`claim_execution`) | -| Job enqueue deduplication | Framework (`unique_key`) | -| Crash recovery | Framework (stale reaper) | -| Idempotent execution | **You** (task code) | -| Side-effect safety | **You** (task code) | - -## Summary - -| Guarantee | Taskito | Celery | SQS | -|-----------|---------|--------|-----| -| Delivery | At-least-once | At-least-once | At-least-once | -| Duplicate prevention | `claim_execution` (dispatch-level) | Visibility timeout | Visibility timeout | -| Deduplication | `unique_key` (enqueue-level) | Manual | Message dedup ID | -| Crash recovery | Stale reaper (timeout-based) | Worker ack timeout | Visibility timeout | diff --git a/docs/guide/reliability/index.md b/docs/guide/reliability/index.md deleted file mode 100644 index 9f7a898..0000000 --- a/docs/guide/reliability/index.md +++ /dev/null @@ -1,12 +0,0 @@ -# Reliability - -Harden your task queue for production workloads. - -| Guide | Description | -|-------|-------------| -| [Retries & Dead Letters](retries.md) | Automatic retries with exponential backoff, dead letter queue | -| [Error Handling](error-handling.md) | Task failure lifecycle, error inspection, debugging patterns | -| [Delivery Guarantees](guarantees.md) | At-least-once delivery, idempotency, and exactly-once patterns | -| [Rate Limiting](rate-limiting.md) | Throttle task execution with token bucket rate limits | -| [Circuit Breakers](circuit-breakers.md) | Protect downstream services from cascading failures | -| [Distributed Locking](locking.md) | Mutual exclusion across workers with database-backed locks | diff --git a/docs/guide/reliability/locking.md b/docs/guide/reliability/locking.md deleted file mode 100644 index 3297587..0000000 --- a/docs/guide/reliability/locking.md +++ /dev/null @@ -1,124 +0,0 @@ -# Distributed Locking - -taskito provides a distributed lock primitive backed by the same database used for the task queue. Locks work across multiple worker processes and machines sharing the same database. - -## Overview - -Use distributed locks when multiple workers or processes must not execute the same critical section at the same time — for example, refreshing a shared cache, running a singleton periodic task, or accessing an external API with a single-writer constraint. - -## Sync Context Manager - -```python -with queue.lock("cache-refresh"): - refresh_cache() -``` - -The lock is automatically released when the `with` block exits, even if an exception is raised. - -### Parameters - -```python -queue.lock( - name: str, - ttl: int = 30, - auto_extend: bool = True, - owner_id: str | None = None, - timeout: float | None = None, - retry_interval: float = 0.1, -) -``` - -| Parameter | Type | Default | Description | -|---|---|---|---| -| `name` | `str` | — | Lock name. All processes using the same name compete for the same lock. | -| `ttl` | `int` | `30` | Lock TTL in seconds. Auto-extended if `auto_extend=True`. | -| `auto_extend` | `bool` | `True` | Automatically extend the lock before it expires (background thread). | -| `owner_id` | `str | None` | `None` | Custom owner identifier. Defaults to a random UUID per acquisition. | -| `timeout` | `float | None` | `None` | Max seconds to wait for the lock. `None` raises immediately if unavailable. | -| `retry_interval` | `float` | `0.1` | Seconds between retry attempts when waiting for the lock. | - -## Async Context Manager - -```python -async with queue.alock("cache-refresh"): - await refresh_cache() -``` - -`alock()` accepts the same parameters as `lock()` and is safe to use inside async functions and FastAPI/Django async views. - -## Auto-Extension - -When `auto_extend=True` (the default), a background thread extends the lock's TTL at roughly half the TTL interval. This prevents the lock from expiring during a long-running operation without requiring you to set an artificially large TTL. - -```python -# This lock will stay alive for as long as the block runs, -# even if it takes several minutes. -with queue.lock("long-job", ttl=30, auto_extend=True): - run_slow_operation() -``` - -## Acquisition Timeout - -By default, `lock()` raises `LockNotAcquired` immediately if the lock is held by another process. Pass `timeout` to wait: - -```python -try: - with queue.lock("resource", timeout=5.0): - do_work() -except LockNotAcquired: - print("Could not acquire lock within 5 seconds") -``` - -The lock is retried every `retry_interval` seconds until `timeout` is exceeded. - -## Cross-Process Locking - -Because lock state is stored in the database, locks are effective across multiple worker processes on the same machine or different machines sharing the same database: - -```python -# Process A (machine 1) -with queue.lock("billing-run"): - run_billing() - -# Process B (machine 2) — will wait or raise while process A holds the lock -with queue.lock("billing-run"): - run_billing() -``` - -!!! note "SQLite vs Postgres" - On SQLite, cross-process locking works via WAL mode and exclusive transactions. For multi-machine deployments, use the PostgreSQL backend where `SELECT FOR UPDATE SKIP LOCKED` provides true distributed semantics. - -## Error Handling - -```python -from taskito import LockNotAcquired - -try: - with queue.lock("my-lock", timeout=2.0): - critical_section() -except LockNotAcquired: - # Another process holds the lock; handle gracefully - log.warning("Skipping — another process is running the critical section") -``` - -## Low-Level API - -For advanced use cases, you can manage locks manually without the context manager: - -```python -# Acquire -lock_id = queue._inner.acquire_lock("my-lock", ttl=30) - -# Extend -queue._inner.extend_lock("my-lock", lock_id, ttl=30) - -# Inspect -info = queue._inner.get_lock_info("my-lock") -# {"name": "my-lock", "owner_id": "...", "expires_at": 1710000000} - -# Release -queue._inner.release_lock("my-lock", lock_id) -``` - -!!! warning - The low-level API skips auto-extension and does not release on exception. Prefer the context manager (`lock()` / `alock()`) for production code. diff --git a/docs/guide/reliability/rate-limiting.md b/docs/guide/reliability/rate-limiting.md deleted file mode 100644 index e3674af..0000000 --- a/docs/guide/reliability/rate-limiting.md +++ /dev/null @@ -1,67 +0,0 @@ -# Rate Limiting - -taskito uses a **token bucket** algorithm to limit how fast tasks execute. Rate limits are per-task and persisted in SQLite. - -## Usage - -```python -@queue.task(rate_limit="100/m") # 100 per minute -def send_email(to, subject, body): - ... - -@queue.task(rate_limit="10/s") # 10 per second -def api_call(endpoint): - ... - -@queue.task(rate_limit="3600/h") # 3600 per hour -def generate_report(report_id): - ... -``` - -## Syntax - -Rate limits use the format `count/period`: - -| Format | Meaning | -|---|---| -| `"10/s"` | 10 per second | -| `"100/m"` | 100 per minute | -| `"3600/h"` | 3600 per hour | - -## How It Works - -The token bucket algorithm: - -1. Each task name has a bucket with `max_tokens = count` and a `refill_rate = count / period` -2. Before dispatching a job, the scheduler checks if a token is available -3. If a token is available, it's consumed and the job is dispatched -4. If no tokens are available, the job is **rescheduled** 1 second in the future - -!!! info "Rate limit state is persisted" - Token bucket state (current tokens, last refill time) is stored in the `rate_limits` SQLite table. This means rate limits survive worker restarts. - -## Per-Task, Not Per-Queue - -Rate limits apply to the **task name**, regardless of which queue the job is in: - -```python -@queue.task(rate_limit="10/s", queue="emails") -def send_email(to, subject, body): - ... - -# Both of these are rate-limited together (same task name) -send_email.delay("alice@example.com", "Hi", "Body") -send_email.apply_async(args=("bob@example.com", "Hi", "Body"), queue="urgent") -``` - -## Combining with Retries - -Rate limiting and retries work together seamlessly. If a rate-limited task fails and retries, the retry attempt is also subject to the rate limit: - -```python -@queue.task(rate_limit="5/s", max_retries=3, retry_backoff=2.0) -def external_api(url): - response = requests.get(url) - response.raise_for_status() - return response.json() -``` diff --git a/docs/guide/reliability/retries.md b/docs/guide/reliability/retries.md deleted file mode 100644 index 8de16e4..0000000 --- a/docs/guide/reliability/retries.md +++ /dev/null @@ -1,158 +0,0 @@ -# Retries & Dead Letters - -taskito automatically retries failed tasks with exponential backoff and moves permanently failed jobs to a dead letter queue. - -## Retry Policy - -Configure retries at the task level: - -```python -@queue.task(max_retries=5, retry_backoff=2.0) -def flaky_api_call(url): - response = requests.get(url) - response.raise_for_status() - return response.json() -``` - -| Parameter | Default | Description | -|---|---|---| -| `max_retries` | `3` | Maximum retry attempts before DLQ | -| `retry_backoff` | `1.0` | Base delay in seconds for exponential backoff | - -### Backoff Formula - -``` -delay = min(max_delay, base_delay * 2^retry_count) + jitter -``` - -- `base_delay` = `retry_backoff` (in seconds) -- `max_delay` = 300 seconds (5 minutes) -- `jitter` = random 0–500ms to prevent thundering herd - -**Example with `retry_backoff=2.0`:** - -| Attempt | Delay | -|---|---| -| 1st retry | ~2s | -| 2nd retry | ~4s | -| 3rd retry | ~8s | -| 4th retry | ~16s | -| 5th retry | ~32s | - -## Exception Filtering - -Control which exceptions trigger retries with `retry_on` and `dont_retry_on`: - -```python -@queue.task( - max_retries=5, - retry_on=[ConnectionError, TimeoutError], - dont_retry_on=[ValueError], -) -def fetch_data(url): - response = requests.get(url) - response.raise_for_status() - return response.json() -``` - -| Parameter | Description | -|---|---| -| `retry_on` | Whitelist — only retry on these exception types. All others skip straight to DLQ. | -| `dont_retry_on` | Blacklist — never retry on these exception types, even if retries remain. | - -If neither is set, all exceptions trigger retries (default behavior). - -!!! note - `retry_on` and `dont_retry_on` are mutually exclusive in practice — if `retry_on` is set, only those exceptions are retried regardless of `dont_retry_on`. - -## Retry Flow - -```mermaid -flowchart TD - A["Task Execution"] --> B{Success?} - B -->|Yes| C["Status: Complete
Store result"] - B -->|No| D["Record error in
job_errors table"] - D --> SR{"Exception passes
retry_on / dont_retry_on?"} - SR -->|No| I["Move to Dead Letter Queue
Status: Dead"] - SR -->|Yes| E{"retry_count < max_retries?"} - E -->|Yes| F["Calculate backoff delay"] - F --> G["Status: Pending
retry_count += 1"] - G --> H["Wait for scheduled time"] - H --> A - E -->|No| I -``` - -## Dead Letter Queue - -Jobs that exhaust all retries are moved to the DLQ for inspection and manual replay. - -### Inspect Dead Letters - -```python -# List the 10 most recent dead letters -dead = queue.dead_letters(limit=10, offset=0) - -for d in dead: - print(f"Job: {d['original_job_id']}") - print(f"Task: {d['task_name']}") - print(f"Error: {d['error']}") - print(f"Retries: {d['retry_count']}") - print() -``` - -### Replay Dead Letters - -```python -# Re-enqueue a dead letter job (creates a new job) -new_job_id = queue.retry_dead(dead[0]["id"]) -``` - -!!! note "Config preservation" - Replayed jobs preserve the original job's `priority`, `max_retries`, `timeout`, and `result_ttl` settings. You don't need to re-specify them — the DLQ stores the full configuration. - -### Purge Old Dead Letters - -```python -# Delete dead letters older than 24 hours -deleted = queue.purge_dead(older_than=86400) -print(f"Purged {deleted} dead letter(s)") -``` - -## Error History - -Every failed attempt is recorded with the error message. Access the full history via `job.errors`: - -```python -@queue.task(max_retries=3) -def unreliable(): - raise ConnectionError("timeout") - -job = unreliable.delay() - -# After the job fails and retries... -for error in job.errors: - print(f"Attempt {error['attempt']}: {error['error']}") - # Attempt 0: timeout - # Attempt 1: timeout - # Attempt 2: timeout -``` - -Each error entry contains: - -| Field | Type | Description | -|---|---|---| -| `id` | `str` | Unique error record ID | -| `job_id` | `str` | The job this error belongs to | -| `attempt` | `int` | Attempt number (0-indexed) | -| `error` | `str` | Error message | -| `failed_at` | `int` | Timestamp in milliseconds | - -## Timeout Reaping - -If a task exceeds its `timeout`, the scheduler automatically detects it (checking every ~5 seconds) and treats it as a failure — triggering the retry/DLQ logic. - -```python -@queue.task(timeout=10) # 10 second timeout -def slow_task(): - time.sleep(60) # Will be reaped after 10s -``` diff --git a/docs/index.md b/docs/index.md deleted file mode 100644 index 507065e..0000000 --- a/docs/index.md +++ /dev/null @@ -1,236 +0,0 @@ -# taskito - -**A brokerless, Rust-powered task queue for Python. Replace Celery without Redis.** - -Start with SQLite, scale to Postgres. No broker to install, configure, or manage. - -```bash -pip install taskito -``` - ---- - -## 5-Minute Quickstart - -```python -from taskito import Queue - -queue = Queue(db_path="tasks.db") - -@queue.task() -def add(a: int, b: int) -> int: - return a + b - -job = add.delay(2, 3) - -# Start worker (in production, use the CLI instead) -import threading -t = threading.Thread(target=queue.run_worker, daemon=True) -t.start() - -print(job.result(timeout=10)) # 5 -``` - -[:octicons-arrow-right-24: Get started](getting-started/quickstart.md) - ---- - -## Why taskito? - -Most Python task queues require a separate broker (Redis, RabbitMQ) that you need to install, configure, monitor, and keep running. **taskito** embeds everything into a single SQLite file — the queue, the results, the rate limits, the schedules. Just `pip install` and go. - -The core engine is written in **Rust** for performance: job dispatch, retry scheduling, rate limiting, and storage all happen in compiled native code. Python only runs during actual task execution. - ---- - -## Features - -
- -- :material-lightning-bolt:{ .lg .middle } **Zero Infrastructure** - - --- - - No Redis, no RabbitMQ — just a SQLite file. Install and start queuing in seconds. - -- :material-sort-ascending:{ .lg .middle } **Priority Queues** - - --- - - Higher priority jobs run first. Override at enqueue time for urgent work. - -- :material-refresh:{ .lg .middle } **Retry with Backoff** - - --- - - Automatic exponential backoff with jitter. Failed jobs land in a dead letter queue for inspection. - -- :material-speedometer:{ .lg .middle } **Rate Limiting** - - --- - - Token bucket rate limiting per task. `"100/m"`, `"10/s"`, `"3600/h"`. - -- :material-link-variant:{ .lg .middle } **Task Workflows** - - --- - - Compose pipelines with `chain`, parallelize with `group`, aggregate with `chord`. - -- :material-clock-outline:{ .lg .middle } **Cron Scheduling** - - --- - - `@queue.periodic(cron="0 */5 * * * *")` for recurring tasks with 6-field cron expressions. - -- :material-progress-check:{ .lg .middle } **Progress Tracking** - - --- - - Report progress from inside tasks. Monitor completion percentage in real time. - -- :material-language-rust:{ .lg .middle } **Rust-Powered** - - --- - - Scheduling, storage, and dispatch in native Rust. Python only runs your task code. - -- :material-database:{ .lg .middle } **Postgres Backend** - - --- - - Optional PostgreSQL storage for multi-machine workers with the same API. - -- :material-layers-outline:{ .lg .middle } **Per-task Middleware** - - --- - - `TaskMiddleware` with `before`/`after`/`on_retry` hooks for cross-cutting concerns. - -- :material-bell-outline:{ .lg .middle } **Events & Webhooks** - - --- - - Subscribe to job lifecycle events in-process or deliver them as signed HTTP webhooks. - -
- ---- - -## Integrations - -Install optional extras to unlock additional capabilities: - -
- -- :material-language-python:{ .lg .middle } **Flask** - - --- - - `pip install taskito[flask]` — `Taskito(app)` extension with CLI commands - -- :material-api:{ .lg .middle } **FastAPI** - - --- - - `pip install taskito[fastapi]` — `TaskitoRouter` for instant REST API - -- :material-cube-outline:{ .lg .middle } **Django** - - --- - - `pip install taskito[django]` — Admin integration and management commands - -- :material-chart-line:{ .lg .middle } **Prometheus** - - --- - - `pip install taskito[prometheus]` — Metrics middleware and `/metrics` endpoint - -- :material-radar:{ .lg .middle } **Sentry** - - --- - - `pip install taskito[sentry]` — Auto error capture with task context tags - -- :material-telescope:{ .lg .middle } **OpenTelemetry** - - --- - - `pip install taskito[otel]` — Distributed tracing with span-per-task - -- :material-database:{ .lg .middle } **Postgres** - - --- - - `pip install taskito[postgres]` — Multi-machine workers via PostgreSQL - -- :material-lock:{ .lg .middle } **Encryption** - - --- - - `pip install taskito[encryption]` — `EncryptedSerializer` for payload encryption - -
- ---- - -## Architecture - -```mermaid -graph TB - subgraph Python ["Python Layer"] - A["Queue / TaskWrapper"] - D["JobResult"] - end - - subgraph Rust ["Rust Core · PyO3"] - F["PyQueue"] - G["Scheduler · Tokio"] - H["Worker Pool · OS Threads"] - I["Rate Limiter"] - end - - subgraph Storage ["Storage Backend"] - J[("SQLite · WAL mode
Diesel ORM")] - K[("PostgreSQL
Diesel ORM")] - end - - A -->|enqueue| F - F -->|INSERT| J - G -->|poll & dequeue| J - G -->|crossbeam channel| H - H -->|acquire GIL · run task| A - H -->|result| G - G -->|UPDATE status| J - D -->|poll status| F - F -->|SELECT| J - G -.->|check limit| I - I -.->|token state| J -``` - -[:octicons-arrow-right-24: Architecture deep dive](architecture/index.md) - ---- - -## Comparison - -| Feature | taskito | Celery | RQ | Dramatiq | Huey | -|---|---|---|---|---|---| -| Broker required | **No** | Redis/RabbitMQ | Redis | Redis/RabbitMQ | Redis | -| Core language | Rust + Python | Python | Python | Python | Python | -| Priority queues | :white_check_mark: | :white_check_mark: | :x: | :x: | :white_check_mark: | -| Rate limiting | :white_check_mark: | :white_check_mark: | :x: | :white_check_mark: | :x: | -| Dead letter queue | :white_check_mark: | :x: | :white_check_mark: | :x: | :x: | -| Task dependencies | :white_check_mark: | :x: | :x: | :x: | :x: | -| Task workflows | :white_check_mark: | :white_check_mark: | :x: | :white_check_mark: | :x: | -| Per-task middleware | :white_check_mark: | :x: | :x: | :white_check_mark: | :x: | -| Job cancellation | :white_check_mark: | :white_check_mark: | :x: | :x: | :white_check_mark: | -| Cancel running tasks | :white_check_mark: | :white_check_mark: | :x: | :x: | :x: | -| Progress tracking | :white_check_mark: | :white_check_mark: | :x: | :x: | :x: | -| Unique tasks | :white_check_mark: | :x: | :x: | :x: | :white_check_mark: | -| Custom serializers | :white_check_mark: | :white_check_mark: | :x: | :x: | :x: | -| Postgres backend | :white_check_mark: | :white_check_mark: | :x: | :x: | :x: | -| Setup complexity | `pip install` | Broker + backend | Redis server | Broker | Redis server | - -[:octicons-arrow-right-24: Full comparison](comparison.md) diff --git a/docs/integrations/django.md b/docs/integrations/django.md deleted file mode 100644 index bf51075..0000000 --- a/docs/integrations/django.md +++ /dev/null @@ -1,149 +0,0 @@ -# Django Integration - -taskito provides Django admin views for browsing jobs, inspecting dead letters, and viewing queue statistics — all without Django ORM models. - -## Installation - -```bash -pip install taskito[django] -``` - -## Setup - -### Option 1: Register on the Default Admin Site - -In your project's `admin.py` or `urls.py`: - -```python -from taskito.contrib.django.admin import register_taskito_admin - -register_taskito_admin() -``` - -This adds taskito views to the default `admin.site`. - -### Option 2: Custom Admin Site - -Use `TaskitoAdminSite` for a dedicated admin: - -```python -from taskito.contrib.django.admin import TaskitoAdminSite - -admin_site = TaskitoAdminSite(name="taskito_admin") -``` - -## Django Settings - -The following settings can be defined in your Django `settings.py`: - -| Setting | Default | Description | -|---------|---------|-------------| -| `TASKITO_AUTODISCOVER_MODULE` | `"tasks"` | Module name auto-discovered in each installed app on startup. | -| `TASKITO_ADMIN_PER_PAGE` | `50` | Rows per page in the admin jobs and dead letters views. | -| `TASKITO_ADMIN_TITLE` | `"Taskito"` | Browser tab title for `TaskitoAdminSite`. | -| `TASKITO_ADMIN_HEADER` | `"Taskito Admin"` | Site header shown in `TaskitoAdminSite`. | -| `TASKITO_WATCH_INTERVAL` | `2` | Polling interval in seconds for `manage.py taskito_info --watch`. | -| `TASKITO_DASHBOARD_HOST` | `"127.0.0.1"` | Default bind host for `manage.py taskito_dashboard`. | -| `TASKITO_DASHBOARD_PORT` | `8080` | Default bind port for `manage.py taskito_dashboard`. | - -Example: - -```python -# settings.py -TASKITO_AUTODISCOVER_MODULE = "jobs" # import myapp.jobs instead of myapp.tasks -TASKITO_ADMIN_PER_PAGE = 25 -TASKITO_ADMIN_TITLE = "MyApp Tasks" -TASKITO_ADMIN_HEADER = "MyApp Task Queue" -TASKITO_DASHBOARD_HOST = "0.0.0.0" -TASKITO_DASHBOARD_PORT = 9000 -``` - -## Queue Configuration - -Create a `taskito` queue instance in your Django project. The `get_queue()` function in `taskito.contrib.django.settings` is used to retrieve the queue instance. - -```python -# myproject/tasks.py -from taskito import Queue - -queue = Queue(db_path="taskito.db") - -@queue.task() -def send_welcome_email(user_id: int): - from myapp.models import User - user = User.objects.get(id=user_id) - user.email_user("Welcome!", "Thanks for signing up.") -``` - -!!! tip "Lazy imports" - Import Django models inside the task function body to avoid app registry issues during startup. - -## Admin Views - -The integration provides the following views under `/admin/taskito/`: - -- **Dashboard** — Queue statistics overview (pending, running, completed, failed, dead, cancelled) -- **Jobs** — Paginated job list with status, queue, and task name filters -- **Job Detail** — Full job payload, error history, retry count, and metadata -- **Dead Letters** — Browse and retry dead letter entries - -## Running the Worker - -```bash -DJANGO_SETTINGS_MODULE=myproject.settings taskito worker --app myproject.tasks:queue -``` - -## Full Example - -### Project Structure - -``` -myproject/ - settings.py - urls.py - tasks.py # Queue + task definitions - myapp/ - admin.py # Register taskito admin views - views.py - models.py -``` - -### `tasks.py` - -```python -from taskito import Queue - -queue = Queue(db_path="taskito.db") - -@queue.task(max_retries=3) -def send_welcome_email(user_id: int): - from myapp.models import User - user = User.objects.get(id=user_id) - user.email_user("Welcome!", "Thanks for signing up.") - -@queue.task(rate_limit="60/h") -def generate_monthly_report(month: int, year: int): - from myapp.reports import build_report - return build_report(month, year) -``` - -### `myapp/admin.py` - -```python -from django.contrib import admin -from taskito.contrib.django.admin import register_taskito_admin - -register_taskito_admin() -``` - -### `myapp/views.py` - -```python -from django.http import JsonResponse -from myproject.tasks import send_welcome_email - -def signup_view(request): - user = create_user(request.POST) - send_welcome_email.delay(user.id) - return JsonResponse({"status": "ok"}) -``` diff --git a/docs/integrations/fastapi.md b/docs/integrations/fastapi.md deleted file mode 100644 index ab08939..0000000 --- a/docs/integrations/fastapi.md +++ /dev/null @@ -1,195 +0,0 @@ -# FastAPI Integration - -taskito provides a pre-built `APIRouter` for FastAPI with endpoints for job management, progress streaming via SSE, and dead letter queue operations. - -## Installation - -```bash -pip install taskito[fastapi] -``` - -This installs `fastapi` and `pydantic` as extras. - -## Quick Setup - -```python -from fastapi import FastAPI -from taskito import Queue -from taskito.contrib.fastapi import TaskitoRouter - -queue = Queue(db_path="myapp.db") - -@queue.task() -def process_data(payload: dict) -> str: - return "done" - -app = FastAPI() -app.include_router(TaskitoRouter(queue), prefix="/tasks") -``` - -Run with: - -```bash -uvicorn myapp:app --reload -``` - -## Endpoints - -| Method | Path | Description | -|--------|------|-------------| -| `GET` | `/stats` | Queue statistics | -| `GET` | `/stats/queues` | Per-queue statistics | -| `GET` | `/jobs/{job_id}` | Job status, progress, and metadata | -| `GET` | `/jobs/{job_id}/errors` | Error history for a job | -| `GET` | `/jobs/{job_id}/result` | Job result (optional `?timeout=N` for blocking) | -| `GET` | `/jobs/{job_id}/progress` | SSE stream of progress updates | -| `POST` | `/jobs/{job_id}/cancel` | Cancel a pending job | -| `GET` | `/dead-letters` | List dead letter entries (paginated) | -| `POST` | `/dead-letters/{dead_id}/retry` | Re-enqueue a dead letter | -| `GET` | `/health` | Liveness check | -| `GET` | `/readiness` | Readiness check | -| `GET` | `/resources` | Worker resource status | - -## Configuration - -`TaskitoRouter` accepts options to control which routes are registered, how results are serialized, and page sizes: - -```python -from fastapi import Depends, HTTPException -from taskito.contrib.fastapi import TaskitoRouter - -def require_api_key(x_api_key: str = Header(...)): - if x_api_key != "secret": - raise HTTPException(status_code=403) - -app.include_router( - TaskitoRouter( - queue, - include_routes={"stats", "jobs", "dead-letters", "retry-dead"}, - dependencies=[Depends(require_api_key)], - sse_poll_interval=1.0, - result_timeout=5.0, - default_page_size=25, - max_page_size=200, - result_serializer=lambda v: v if isinstance(v, (str, int, float, bool, None)) else str(v), - ), - prefix="/tasks", -) -``` - -| Parameter | Type | Default | Description | -|---|---|---|---| -| `include_routes` | `set[str] | None` | `None` | If set, only register these route names. Cannot be combined with `exclude_routes`. | -| `exclude_routes` | `set[str] | None` | `None` | If set, skip these route names. Cannot be combined with `include_routes`. | -| `dependencies` | `Sequence[Depends] | None` | `None` | FastAPI dependencies applied to every route (e.g. auth). | -| `sse_poll_interval` | `float` | `0.5` | Seconds between SSE progress polls. | -| `result_timeout` | `float` | `1.0` | Default timeout for non-blocking result fetch. | -| `default_page_size` | `int` | `20` | Default page size for paginated endpoints. | -| `max_page_size` | `int` | `100` | Maximum allowed page size. | -| `result_serializer` | `Callable[[Any], Any] | None` | `None` | Custom result serializer. Receives any value, must return a JSON-serializable value. | - -Valid route names: `stats`, `jobs`, `job-errors`, `job-result`, `job-progress`, `cancel`, `dead-letters`, `retry-dead`, `health`, `readiness`, `resources`, `queue-stats`. - -## Blocking Result Fetch - -The `/jobs/{job_id}/result` endpoint supports an optional `timeout` query parameter (0–300 seconds). When `timeout > 0`, the request blocks until the job completes or the timeout elapses: - -```bash -# Non-blocking (default) -curl http://localhost:8000/tasks/jobs/01H5K6X.../result - -# Block up to 30 seconds for the result -curl http://localhost:8000/tasks/jobs/01H5K6X.../result?timeout=30 -``` - -## SSE Progress Streaming - -Stream real-time progress for a running job using Server-Sent Events: - -```python -import httpx - -with httpx.stream("GET", "http://localhost:8000/tasks/jobs/01H5K6X.../progress") as r: - for line in r.iter_lines(): - print(line) - # data: {"progress": 25, "status": "running"} - # data: {"progress": 50, "status": "running"} - # data: {"progress": 100, "status": "completed"} -``` - -From the browser: - -```javascript -const source = new EventSource("/tasks/jobs/01H5K6X.../progress"); -source.onmessage = (event) => { - const data = JSON.parse(event.data); - console.log(`Progress: ${data.progress}%`); - if (data.status === "completed" || data.status === "failed") { - source.close(); - } -}; -``` - -The stream sends a JSON event every 0.5 seconds while the job is active, then a final event when the job reaches a terminal state. - -## Pydantic Response Models - -All endpoints return validated Pydantic models with clean OpenAPI docs. You can import them for type-safe client code: - -```python -from taskito.contrib.fastapi import ( - StatsResponse, - JobResponse, - JobErrorResponse, - JobResultResponse, - CancelResponse, - DeadLetterResponse, - RetryResponse, -) -``` - -## Full Example - -```python -from fastapi import FastAPI, Header, HTTPException, Depends -from taskito import Queue, current_job -from taskito.contrib.fastapi import TaskitoRouter - -queue = Queue(db_path="myapp.db") - -@queue.task() -def resize_image(image_url: str, sizes: list[int]) -> dict: - results = {} - for i, size in enumerate(sizes): - results[size] = do_resize(image_url, size) - current_job.update_progress(int((i + 1) / len(sizes) * 100)) - return results - -async def require_auth(authorization: str = Header(...)): - if not authorization.startswith("Bearer "): - raise HTTPException(401) - -app = FastAPI(title="Image Service") -app.include_router( - TaskitoRouter(queue, dependencies=[Depends(require_auth)]), - prefix="/tasks", - tags=["tasks"], -) - -# Start worker in a separate process: -# taskito worker --app myapp:queue -``` - -```bash -# Check job status -curl http://localhost:8000/tasks/jobs/01H5K6X... \ - -H "Authorization: Bearer mytoken" - -# Stream progress -curl -N http://localhost:8000/tasks/jobs/01H5K6X.../progress \ - -H "Authorization: Bearer mytoken" - -# Block for result (up to 60s) -curl http://localhost:8000/tasks/jobs/01H5K6X.../result?timeout=60 \ - -H "Authorization: Bearer mytoken" -``` diff --git a/docs/integrations/flask.md b/docs/integrations/flask.md deleted file mode 100644 index 4454ae9..0000000 --- a/docs/integrations/flask.md +++ /dev/null @@ -1,172 +0,0 @@ -# Flask Integration - -taskito provides a first-class Flask extension that configures a `Queue` from your app config and registers CLI commands. - -## Installation - -```bash -pip install taskito[flask] -``` - -## Basic Setup - -```python -from flask import Flask -from taskito.contrib.flask import Taskito - -app = Flask(__name__) -app.config["TASKITO_DB_PATH"] = "myapp.db" - -taskito = Taskito(app) - -@taskito.queue.task() -def send_email(to: str, subject: str): - ... -``` - -## Factory Pattern - -```python -from taskito.contrib.flask import Taskito - -taskito = Taskito() - -def create_app(): - app = Flask(__name__) - app.config["TASKITO_DB_PATH"] = "myapp.db" - taskito.init_app(app) - return app -``` - -## Configuration - -All configuration is read from `app.config`: - -| Config Key | Default | Description | -|------------|---------|-------------| -| `TASKITO_DB_PATH` | `.taskito/taskito.db` | SQLite database path | -| `TASKITO_BACKEND` | `"sqlite"` | Storage backend: `"sqlite"` or `"postgres"` | -| `TASKITO_DB_URL` | `None` | PostgreSQL connection URL (when backend is `"postgres"`) | -| `TASKITO_WORKERS` | `0` (auto) | Number of worker threads | -| `TASKITO_SCHEMA` | `"taskito"` | PostgreSQL schema name | -| `TASKITO_DEFAULT_RETRY` | `3` | Default retry count for tasks | -| `TASKITO_DEFAULT_TIMEOUT` | `300` | Default timeout in seconds | -| `TASKITO_DEFAULT_PRIORITY` | `0` | Default task priority | -| `TASKITO_RESULT_TTL` | `None` | Auto-purge completed jobs after N seconds | -| `TASKITO_DRAIN_TIMEOUT` | `30` | Seconds to wait for running tasks on shutdown | - -## Extension Options - -The `Taskito` constructor accepts a `cli_group` parameter to rename the CLI command group: - -```python -# Commands will be under `flask tasks worker`, `flask tasks info`, etc. -taskito = Taskito(app, cli_group="tasks") -``` - -## CLI Commands - -The extension registers commands under the `flask taskito` group (configurable via `cli_group`): - -### `flask taskito worker` - -Start a taskito worker: - -```bash -flask taskito worker -flask taskito worker --queues default,emails -``` - -### `flask taskito info` - -Show queue statistics. Supports `--format table` (default) and `--format json`: - -```bash -flask taskito info -flask taskito info --format json -``` - -``` -taskito queue statistics ------------------------------- - pending 12 - running 3 - completed 450 - failed 2 - dead 1 - cancelled 0 ------------------------------- - total 468 -``` - -## Accessing the Queue - -```python -# Via the extension instance -taskito.queue.stats() - -# Via app extensions -app.extensions["taskito"].queue.stats() -``` - -## Full Example - -A complete Flask application with task definitions and routes: - -```python -from flask import Flask, jsonify, request -from taskito.contrib.flask import Taskito - -app = Flask(__name__) -app.config["TASKITO_DB_PATH"] = "myapp.db" -app.config["TASKITO_DEFAULT_RETRY"] = 3 -app.config["TASKITO_RESULT_TTL"] = 86400 # 24h auto-cleanup - -taskito = Taskito(app) - -@taskito.queue.task() -def send_welcome_email(user_id: int): - """Send a welcome email to a new user.""" - user = get_user(user_id) - send_email(user.email, "Welcome!", "Thanks for signing up.") - -@taskito.queue.task(rate_limit="10/m") -def generate_report(report_type: str, params: dict): - """Generate a report (rate-limited to 10/minute).""" - return create_report(report_type, params) - -@app.route("/api/users", methods=["POST"]) -def create_user(): - user = create_user_in_db(request.json) - send_welcome_email.delay(user.id) - return jsonify({"id": user.id}), 201 - -@app.route("/api/reports", methods=["POST"]) -def request_report(): - job = generate_report.delay( - request.json["type"], - request.json.get("params", {}), - ) - return jsonify({"job_id": job.id}), 202 - -@app.route("/api/reports/") -def report_status(job_id: str): - job = taskito.queue.get_job(job_id) - if job is None: - return jsonify({"error": "Not found"}), 404 - return jsonify({"status": job.status, "progress": job.progress}) - -@app.route("/api/queue/stats") -def queue_stats(): - return jsonify(taskito.queue.stats()) -``` - -Run the app and worker: - -```bash -# Terminal 1: Flask app -flask run - -# Terminal 2: Worker -flask taskito worker -``` diff --git a/docs/integrations/index.md b/docs/integrations/index.md deleted file mode 100644 index 15525d6..0000000 --- a/docs/integrations/index.md +++ /dev/null @@ -1,50 +0,0 @@ -# Integrations - -taskito offers optional extras for popular frameworks and observability tools. Install only what you need. - -## Available Integrations - -| Extra | Install | What you get | -|-------|---------|--------------| -| **Flask** | `pip install taskito[flask]` | `Taskito(app)` extension, `flask taskito worker` CLI | -| **FastAPI** | `pip install taskito[fastapi]` | `TaskitoRouter` for instant REST API over the queue | -| **Django** | `pip install taskito[django]` | Admin integration, management commands | -| **OpenTelemetry** | `pip install taskito[otel]` | Distributed tracing with span-per-task | -| **Prometheus** | `pip install taskito[prometheus]` | `PrometheusMiddleware`, queue depth gauges, `/metrics` server | -| **Sentry** | `pip install taskito[sentry]` | `SentryMiddleware` with auto error capture and task tags | -| **Encryption** | `pip install taskito[encryption]` | `EncryptedSerializer` for at-rest payload encryption | -| **MsgPack** | `pip install taskito[msgpack]` | `MsgpackSerializer` for compact binary serialization | -| **Postgres** | `pip install taskito[postgres]` | Multi-machine workers via PostgreSQL backend | -| **Redis** | `pip install taskito[redis]` | Redis storage backend | - -## Framework Integrations - -- **[Flask](flask.md)** — Full Flask extension with app config, factory pattern, and CLI commands -- **[FastAPI](fastapi.md)** — Pre-built `APIRouter` with job status, SSE progress, and DLQ management -- **[Django](django.md)** — Admin views for browsing jobs, dead letters, and queue stats - -## Observability Integrations - -- **[OpenTelemetry](otel.md)** — Distributed tracing with per-task spans -- **[Prometheus](prometheus.md)** — Counters, histograms, and gauges for task execution -- **[Sentry](sentry.md)** — Automatic error capture with task context - -## Combining Integrations - -All middleware-based integrations (`OpenTelemetryMiddleware`, `PrometheusMiddleware`, `SentryMiddleware`) compose together: - -```python -from taskito import Queue -from taskito.contrib.otel import OpenTelemetryMiddleware -from taskito.contrib.prometheus import PrometheusMiddleware -from taskito.contrib.sentry import SentryMiddleware - -queue = Queue( - db_path="myapp.db", - middleware=[ - OpenTelemetryMiddleware(), - PrometheusMiddleware(), - SentryMiddleware(), - ], -) -``` diff --git a/docs/integrations/otel.md b/docs/integrations/otel.md deleted file mode 100644 index 2d89819..0000000 --- a/docs/integrations/otel.md +++ /dev/null @@ -1,97 +0,0 @@ -# OpenTelemetry Integration - -taskito provides optional OpenTelemetry support for distributed tracing of task execution. - -## Installation - -Install with the `otel` extra: - -```bash -pip install taskito[otel] -``` - -This installs `opentelemetry-api` as a dependency. - -## Setup - -Add `OpenTelemetryMiddleware` to your queue: - -```python -from taskito import Queue -from taskito.contrib.otel import OpenTelemetryMiddleware - -queue = Queue(middleware=[OpenTelemetryMiddleware()]) -``` - -## What Gets Traced - -Each task execution produces a span with: - -- **Span name**: `taskito.execute.` (customizable) -- **Attributes**: - - `taskito.job_id` — the job ID - - `taskito.task_name` — the registered task name - - `taskito.queue` — the queue name - - `taskito.retry_count` — current retry attempt -- **Status**: `OK` on success, `ERROR` on failure (with exception recorded) -- **Events**: A `retry` event is added when a task is about to be retried - -## Configuration with Exporters - -`OpenTelemetryMiddleware` uses the standard OpenTelemetry API, so configure exporters as you normally would: - -```python -from opentelemetry import trace -from opentelemetry.sdk.trace import TracerProvider -from opentelemetry.sdk.trace.export import BatchSpanProcessor -from opentelemetry.exporter.otlp.proto.grpc.trace_exporter import OTLPSpanExporter - -# Set up the tracer provider with an OTLP exporter -provider = TracerProvider() -provider.add_span_processor(BatchSpanProcessor(OTLPSpanExporter())) -trace.set_tracer_provider(provider) - -# Now create your queue — spans will be exported automatically -from taskito import Queue -from taskito.contrib.otel import OpenTelemetryMiddleware - -queue = Queue(middleware=[OpenTelemetryMiddleware()]) -``` - -## Configuration - -`OpenTelemetryMiddleware` accepts several options to customize how spans are created: - -```python -OpenTelemetryMiddleware( - tracer_name="my-service", - span_name_fn=lambda ctx: f"task/{ctx.task_name}", - attribute_prefix="myapp", - extra_attributes_fn=lambda ctx: {"deployment.env": "prod"}, - task_filter=lambda name: not name.startswith("internal."), -) -``` - -| Parameter | Type | Default | Description | -|---|---|---|---| -| `tracer_name` | `str` | `"taskito"` | OpenTelemetry tracer name. | -| `span_name_fn` | `Callable[[JobContext], str] | None` | `None` | Custom span name builder. Receives `JobContext`, returns a string. Defaults to `.execute.`. | -| `attribute_prefix` | `str` | `"taskito"` | Prefix for all span attribute keys. | -| `extra_attributes_fn` | `Callable[[JobContext], dict] | None` | `None` | Returns extra attributes to add to each span. Receives `JobContext`. | -| `task_filter` | `Callable[[str], bool] | None` | `None` | Predicate that receives a task name. Return `True` to trace, `False` to skip. `None` traces all tasks. | - -## Combining with Other Middleware - -`OpenTelemetryMiddleware` is a standard `TaskMiddleware`, so it composes with other middleware: - -```python -queue = Queue(middleware=[ - OpenTelemetryMiddleware(), - MyLoggingMiddleware(), -]) -``` - -!!! note "Thread safety" - `OpenTelemetryMiddleware` is thread-safe and can be used with multi-worker configurations. Internal span tracking is protected by a lock. - -See the [Middleware guide](../guide/extensibility/middleware.md) for more on combining middleware. diff --git a/docs/integrations/prometheus.md b/docs/integrations/prometheus.md deleted file mode 100644 index 94a9e63..0000000 --- a/docs/integrations/prometheus.md +++ /dev/null @@ -1,191 +0,0 @@ -# Prometheus Metrics - -taskito provides Prometheus metrics via a middleware and an optional stats collector thread. - -## Installation - -```bash -pip install taskito[prometheus] -``` - -This installs `prometheus-client` as a dependency. - -## PrometheusMiddleware - -Add `PrometheusMiddleware` to your queue to track per-task execution metrics: - -```python -from taskito import Queue -from taskito.contrib.prometheus import PrometheusMiddleware - -queue = Queue(db_path="myapp.db", middleware=[PrometheusMiddleware()]) -``` - -### Configuration - -```python -PrometheusMiddleware( - namespace="myapp", - extra_labels_fn=lambda ctx: {"env": "prod", "region": "us-east-1"}, - disabled_metrics={"resource", "proxy"}, - task_filter=lambda name: not name.startswith("internal."), -) -``` - -| Parameter | Type | Default | Description | -|---|---|---|---| -| `namespace` | `str` | `"taskito"` | Prefix for all metric names. | -| `extra_labels_fn` | `Callable[[JobContext], dict[str, str]] | None` | `None` | Returns extra labels to add to job metrics. Receives `JobContext`. | -| `disabled_metrics` | `set[str] | None` | `None` | Metric groups or individual names to skip. Groups: `"jobs"`, `"queue"`, `"resource"`, `"proxy"`, `"intercept"`. | -| `task_filter` | `Callable[[str], bool] | None` | `None` | Predicate that receives a task name. Return `True` to export metrics for the task, `False` to skip it. `None` exports all tasks. | - -### Metrics Tracked - -| Metric | Type | Labels | Description | -|--------|------|--------|-------------| -| `taskito_jobs_total` | Counter | `task`, `status` | Total jobs processed (status: `completed` or `failed`) | -| `taskito_job_duration_seconds` | Histogram | `task` | Job execution duration | -| `taskito_active_workers` | Gauge | — | Number of currently executing workers | -| `taskito_retries_total` | Counter | `task` | Total retry attempts | - -## PrometheusStatsCollector - -For queue-level metrics, use the stats collector. It polls `queue.stats()` on a background thread: - -```python -from taskito.contrib.prometheus import PrometheusStatsCollector - -collector = PrometheusStatsCollector(queue, interval=10) -collector.start() -``` - -### Configuration - -```python -PrometheusStatsCollector( - queue, - interval=10, - namespace="myapp", - disabled_metrics={"intercept"}, -) -``` - -| Parameter | Type | Default | Description | -|---|---|---|---| -| `queue` | `Queue` | — | The Queue instance to poll. | -| `interval` | `float` | `10.0` | Seconds between polls. | -| `namespace` | `str` | `"taskito"` | Prefix for metric names. Must match `PrometheusMiddleware` namespace to share metric objects. | -| `disabled_metrics` | `set[str] | None` | `None` | Metric groups or names to skip. Same groups as `PrometheusMiddleware`. | - -### Metrics Tracked - -| Metric | Type | Labels | Description | -|--------|------|--------|-------------| -| `taskito_queue_depth` | Gauge | `queue` | Number of pending jobs | -| `taskito_dlq_size` | Gauge | — | Number of dead-letter jobs | -| `taskito_worker_utilization` | Gauge | — | Ratio of running jobs to total workers (0.0–1.0) | - -## Metrics Server - -Start a standalone `/metrics` endpoint for Prometheus to scrape: - -```python -from taskito.contrib.prometheus import start_metrics_server - -start_metrics_server(port=9090) -``` - -This uses `prometheus_client.start_http_server` under the hood. - -## Full Example - -```python -from taskito import Queue -from taskito.contrib.prometheus import ( - PrometheusMiddleware, - PrometheusStatsCollector, - start_metrics_server, -) - -queue = Queue(db_path="myapp.db", middleware=[PrometheusMiddleware()]) - -# Start metrics endpoint -start_metrics_server(port=9090) - -# Start queue stats polling -collector = PrometheusStatsCollector(queue, interval=10) -collector.start() -``` - -Prometheus scrape config: - -```yaml -scrape_configs: - - job_name: taskito - static_configs: - - targets: ["localhost:9090"] -``` - -## Grafana Dashboard Tips - -Useful panels for a taskito Grafana dashboard: - -- **Throughput** — `rate(taskito_jobs_total[5m])` by `task` and `status` -- **Duration p95** — `histogram_quantile(0.95, rate(taskito_job_duration_seconds_bucket[5m]))` -- **Queue depth** — `taskito_queue_depth` by `queue` -- **DLQ size** — `taskito_dlq_size` with alert threshold -- **Worker utilization** — `taskito_worker_utilization` as a gauge - -## Combining with Other Middleware - -`PrometheusMiddleware` composes with other middleware: - -```python -from taskito.contrib.otel import OpenTelemetryMiddleware -from taskito.contrib.sentry import SentryMiddleware - -queue = Queue( - db_path="myapp.db", - middleware=[ - OpenTelemetryMiddleware(), - PrometheusMiddleware(), - SentryMiddleware(), - ], -) -``` - -See the [Middleware guide](../guide/extensibility/middleware.md) for more on combining middleware. - -## Example: Alert on High DLQ Size - -```python -from taskito.contrib.prometheus import PrometheusMiddleware, PrometheusStatsCollector, start_metrics_server - -queue = Queue(db_path="myapp.db", middleware=[PrometheusMiddleware()]) - -# Start metrics and collector -start_metrics_server(port=9090) -PrometheusStatsCollector(queue, interval=10).start() -``` - -Prometheus alerting rule: - -```yaml -groups: - - name: taskito - rules: - - alert: HighDLQSize - expr: taskito_dlq_size > 10 - for: 5m - labels: - severity: warning - annotations: - summary: "taskito dead letter queue has {{ $value }} entries" - - alert: HighErrorRate - expr: rate(taskito_jobs_total{status="failed"}[5m]) > 0.1 - for: 2m - labels: - severity: critical - annotations: - summary: "High task failure rate: {{ $value }} failures/sec" -``` diff --git a/docs/integrations/sentry.md b/docs/integrations/sentry.md deleted file mode 100644 index effa922..0000000 --- a/docs/integrations/sentry.md +++ /dev/null @@ -1,125 +0,0 @@ -# Sentry Integration - -taskito provides a `SentryMiddleware` that automatically captures task errors and sets rich context for Sentry. - -## Installation - -```bash -pip install taskito[sentry] -``` - -This installs `sentry-sdk` as a dependency. - -## Setup - -Initialize the Sentry SDK as usual, then add `SentryMiddleware` to your queue: - -```python -import sentry_sdk -from taskito import Queue -from taskito.contrib.sentry import SentryMiddleware - -sentry_sdk.init(dsn="https://examplePublicKey@o0.ingest.sentry.io/0") - -queue = Queue(db_path="myapp.db", middleware=[SentryMiddleware()]) -``` - -## What It Does - -### Scope Tags - -Each task execution gets a Sentry scope with the following tags (prefix customizable via `tag_prefix`): - -| Tag | Value | -|-----|-------| -| `taskito.task_name` | The registered task name | -| `taskito.job_id` | The job ID | -| `taskito.queue` | The queue name | -| `taskito.retry_count` | Current retry attempt | - -### Transaction Name - -The Sentry transaction is set to `taskito:` by default. Customizable via `transaction_name_fn`. - -### Automatic Error Capture - -When a task raises an exception, `SentryMiddleware` calls `sentry_sdk.capture_exception()` automatically. The exception appears in Sentry with all the context tags attached. - -### Retry Breadcrumbs - -When a task is retried, a breadcrumb is added with: - -- **Category**: `taskito` (matches `tag_prefix`) -- **Level**: `warning` -- **Message**: `Retrying (attempt ): ` - -This gives you a trail of retry attempts leading up to a final failure. - -## Configuration - -```python -SentryMiddleware( - tag_prefix="myapp", - transaction_name_fn=lambda ctx: f"task-{ctx.task_name}", - task_filter=lambda name: not name.startswith("internal."), - extra_tags_fn=lambda ctx: {"worker.host": socket.gethostname()}, -) -``` - -| Parameter | Type | Default | Description | -|---|---|---|---| -| `tag_prefix` | `str` | `"taskito"` | Prefix for Sentry tag keys and breadcrumb category. | -| `transaction_name_fn` | `Callable[[JobContext], str] | None` | `None` | Custom transaction name builder. Receives `JobContext`. Defaults to `:`. | -| `task_filter` | `Callable[[str], bool] | None` | `None` | Predicate on task name. Return `True` to report, `False` to skip. `None` reports all tasks. | -| `extra_tags_fn` | `Callable[[JobContext], dict[str, str]] | None` | `None` | Returns extra Sentry tags to set. Receives `JobContext`. | - -## Combining with Other Middleware - -`SentryMiddleware` composes with other observability middleware: - -```python -from taskito.contrib.otel import OpenTelemetryMiddleware -from taskito.contrib.prometheus import PrometheusMiddleware - -queue = Queue( - db_path="myapp.db", - middleware=[ - OpenTelemetryMiddleware(), - PrometheusMiddleware(), - SentryMiddleware(), - ], -) -``` - -See the [Middleware guide](../guide/extensibility/middleware.md) for more on combining middleware. - -## Full Example - -```python -import sentry_sdk -from taskito import Queue -from taskito.contrib.sentry import SentryMiddleware - -# Initialize Sentry first -sentry_sdk.init( - dsn="https://examplePublicKey@o0.ingest.sentry.io/0", - traces_sample_rate=1.0, -) - -# Create queue with Sentry middleware -queue = Queue(db_path="myapp.db", middleware=[SentryMiddleware()]) - -@queue.task(max_retries=3) -def process_payment(order_id: str, amount: float): - """Process a payment — errors are automatically reported to Sentry.""" - result = payment_gateway.charge(order_id, amount) - if not result.success: - raise PaymentError(f"Payment failed: {result.error}") - return result.transaction_id -``` - -When `process_payment` fails: - -1. The error appears in Sentry with tags `taskito.task_name=myapp.tasks.process_payment`, `taskito.job_id=...`, `taskito.queue=default` -2. If the task retries, each retry is recorded as a breadcrumb -3. The final failure (after all retries) includes the full breadcrumb trail diff --git a/docs-next/next.config.mjs b/docs/next.config.mjs similarity index 100% rename from docs-next/next.config.mjs rename to docs/next.config.mjs diff --git a/docs-next/package.json b/docs/package.json similarity index 97% rename from docs-next/package.json rename to docs/package.json index 90dde7c..de0bae5 100644 --- a/docs-next/package.json +++ b/docs/package.json @@ -1,5 +1,5 @@ { - "name": "docs-next", + "name": "docs", "version": "0.0.0", "private": true, "scripts": { diff --git a/docs-next/pnpm-lock.yaml b/docs/pnpm-lock.yaml similarity index 100% rename from docs-next/pnpm-lock.yaml rename to docs/pnpm-lock.yaml diff --git a/docs-next/postcss.config.mjs b/docs/postcss.config.mjs similarity index 100% rename from docs-next/postcss.config.mjs rename to docs/postcss.config.mjs diff --git a/docs/resources/configuration.md b/docs/resources/configuration.md deleted file mode 100644 index 82535d1..0000000 --- a/docs/resources/configuration.md +++ /dev/null @@ -1,165 +0,0 @@ -# Configuration - -Resources can be declared in code with `@queue.worker_resource()` or loaded from a TOML file. Both approaches support the same options. - -## TOML configuration file - -Define resources in a TOML file and load them at startup: - -```toml -# resources.toml - -[resources.config] -factory = "myapp.resources:load_config" -scope = "worker" - -[resources.db] -factory = "myapp.resources:create_engine" -teardown = "myapp.resources:close_engine" -health_check = "myapp.resources:check_db" -health_check_interval = 30.0 -max_recreation_attempts = 3 -scope = "worker" -depends_on = ["config"] - -[resources.session] -factory = "myapp.resources:create_session" -scope = "task" -pool_size = 20 -pool_min = 5 -acquire_timeout = 5.0 -depends_on = ["db"] -``` - -Load before starting the worker: - -```python -queue.load_resources("resources.toml") -``` - -The `factory`, `teardown`, and `health_check` values are import paths. Both formats are accepted: - -- `"myapp.resources:create_engine"` — colon separator (preferred) -- `"myapp.resources.create_engine"` — dot separator - -On Python 3.11+ the TOML parser is built-in. On earlier versions, install `tomli`: - -```bash -pip install tomli -``` - -## TOML resource options - -| Key | Type | Default | Description | -|---|---|---|---| -| `factory` | string | required | Import path to the factory callable. | -| `teardown` | string | — | Import path to the teardown callable. | -| `health_check` | string | — | Import path to the health check callable. | -| `health_check_interval` | float | `0.0` | Seconds between health checks. `0` disables. | -| `max_recreation_attempts` | int | `3` | Max recreation attempts on health failure. | -| `scope` | string | `"worker"` | `"worker"`, `"task"`, `"thread"`, or `"request"`. | -| `depends_on` | list[string] | `[]` | Resource names this one depends on. | -| `pool_size` | int | `4` | Task scope: max concurrent instances. | -| `pool_min` | int | `0` | Task scope: pre-warmed instances at startup. | -| `acquire_timeout` | float | `10.0` | Task scope: seconds to wait for a pool instance. | -| `max_lifetime` | float | `3600.0` | Task scope: max seconds an instance lives. | -| `idle_timeout` | float | `300.0` | Task scope: max idle seconds before eviction. | -| `reloadable` | bool | `false` | Allow hot reload via SIGHUP or CLI. | -| `frozen` | bool | `false` | Wrap instance in a read-only proxy. | - -## Pool configuration - -Pool parameters apply only to task-scoped resources (`scope = "task"`). They control the bounded pool that manages concurrent instances. - -| Parameter | Default | Description | -|---|---|---| -| `pool_size` | `4` | Max concurrent instances. Tasks block if the pool is exhausted. | -| `pool_min` | `0` | Instances pre-warmed at startup. `0` means lazy creation. | -| `acquire_timeout` | `10.0` | Seconds to wait for an available instance before raising `ResourceUnavailableError`. | -| `max_lifetime` | `3600.0` | Max seconds a pooled instance can live before it is replaced. | -| `idle_timeout` | `300.0` | Max seconds an instance can sit idle in the pool. | - -```python -@queue.worker_resource( - "session", - scope="task", - pool_size=20, - pool_min=5, - acquire_timeout=5.0, - max_lifetime=1800.0, -) -def create_session(db): - return db() -``` - -Setting `pool_min > 0` causes the pool to prewarm instances at worker startup. This avoids the cold-start latency on the first burst of tasks. - -## Frozen resources - -Wrap a resource in a read-only proxy to prevent accidental mutation: - -```python -@queue.worker_resource("config", frozen=True) -def load_config(): - return AppConfig.from_env() -``` - -Attempts to set attributes on a frozen resource raise `AttributeError`. This is useful for configuration objects that should be treated as immutable after initialization. - -## Hot reload - -Mark resources as reloadable to update them without restarting the worker: - -```python -@queue.worker_resource("feature_flags", reloadable=True) -def load_flags(): - return FeatureFlags.from_remote() -``` - -Trigger a reload by sending `SIGHUP` to the worker process: - -```bash -kill -HUP -``` - -Or via the CLI: - -```bash -taskito reload --pid -taskito reload --pid --resource feature_flags # reload one resource -``` - -Or programmatically from application code: - -```python -results = queue._resource_runtime.reload() -# {"feature_flags": True} -``` - -Only resources declared with `reloadable=True` are affected. Non-reloadable resources are left running — no teardown or reconnection. - -Resources are reloaded in the same topological order as initialization. If a reloadable resource depends on another reloadable resource, both are reloaded in dependency order. - -!!! note - SIGHUP is not available on Windows. Use the programmatic API instead. - -## Programmatic resource registration - -`load_resources()` and `@worker_resource()` both call `register_resource()` internally. You can call it directly for full control: - -```python -from taskito.resources.definition import ResourceDefinition, ResourceScope - -queue.register_resource(ResourceDefinition( - name="db", - factory=create_db, - teardown=close_db, - health_check=check_db, - health_check_interval=30.0, - scope=ResourceScope.WORKER, - depends_on=["config"], - reloadable=True, -)) -``` - -`register_resource()` must be called before `run_worker()`. diff --git a/docs/resources/dependency-injection.md b/docs/resources/dependency-injection.md deleted file mode 100644 index 48ad914..0000000 --- a/docs/resources/dependency-injection.md +++ /dev/null @@ -1,250 +0,0 @@ -# Dependency Injection - -Worker resources are long-lived objects initialized once at worker startup and injected into tasks by name. No serialization is involved — they live entirely in the worker process and are never put in the queue. - -## Declaring resources - -```python -from sqlalchemy import create_engine -from sqlalchemy.orm import sessionmaker - -@queue.worker_resource("db") -def create_db(): - engine = create_engine("postgresql://localhost/myapp") - return sessionmaker(engine) -``` - -The factory runs once when the worker starts. The return value is the resource instance. - -The factory can be async: - -```python -@queue.worker_resource("redis") -async def create_redis(): - import redis.asyncio as aioredis - return await aioredis.from_url("redis://localhost") -``` - -Taskito runs the async factory on the worker's event loop before accepting tasks. - -## `worker_resource()` parameters - -| Parameter | Default | Description | -|---|---|---| -| `name` | required | Resource name used in `inject=["name"]` and `Inject["name"]`. | -| `depends_on` | `[]` | Names of resources this factory receives as arguments. | -| `teardown` | `None` | Callable invoked with the instance on graceful shutdown. | -| `health_check` | `None` | Callable invoked periodically; returns truthy if healthy. | -| `health_check_interval` | `0.0` | Seconds between health checks. `0` disables checking. | -| `max_recreation_attempts` | `3` | Max times to recreate after consecutive health failures. | -| `scope` | `"worker"` | Lifetime scope — see [Resource scopes](#resource-scopes). | -| `pool_size` | `None` | Task scope: max concurrent instances (default: 4). | -| `pool_min` | `0` | Task scope: pre-warmed instances at startup. | -| `acquire_timeout` | `10.0` | Task scope: seconds to wait for an available instance. | -| `max_lifetime` | `3600.0` | Task scope: max seconds an instance can live. | -| `idle_timeout` | `300.0` | Task scope: max idle seconds before eviction. | -| `reloadable` | `False` | Allow hot reload via SIGHUP or CLI. | -| `frozen` | `False` | Wrap instance in a read-only proxy. | - -## Injecting resources into tasks - -=== "inject= parameter" - - ```python - @queue.task(inject=["db"]) - def process_order(order_id: int, db): - session = db() - order = session.get(Order, order_id) - ... - ``` - -=== "Inject annotation" - - ```python - from taskito import Inject - - @queue.task() - def process_order(order_id: int, db: Inject["db"]): - session = db() - order = session.get(Order, order_id) - ... - ``` - -Both syntaxes are equivalent. `Inject["name"]` is a type annotation — it works with any type checker and makes the dependency explicit in the function signature. The worker reads the annotation at task registration time and injects the resource automatically. - -If a caller explicitly passes a `db` kwarg to `.delay()`, that value wins over injection. - -## Resource scopes - -| Scope | Lifetime | Use case | -|---|---|---| -| `"worker"` (default) | Entire worker process | Database connection pools, shared caches | -| `"task"` | Acquired per-task from a pool, returned after | Short-lived connections with limited concurrency | -| `"thread"` | One instance per worker thread, created lazily | Thread-unsafe objects that must not be shared | -| `"request"` | Fresh instance per task, torn down after | Stateful per-request objects | - -```python -# Task scope: each task gets its own session from a pool of up to 10 -@queue.worker_resource("db_session", scope="task", pool_size=10) -def create_session(db): - return db() # db must be a worker-scoped resource - -# Thread scope: one cache per worker thread -@queue.worker_resource("local_cache", scope="thread") -def create_cache(): - return {} -``` - -Pool configuration parameters (`pool_size`, `pool_min`, `acquire_timeout`, `max_lifetime`, `idle_timeout`) only apply to task-scoped resources. See [Configuration](configuration.md#pool-configuration) for details. - -## Dependencies - -Resources can declare other resources they depend on. Taskito resolves the dependency graph and initializes in topological order, injecting dependencies as keyword arguments to the factory: - -```python -@queue.worker_resource("config") -def load_config(): - return Config.from_env() - - -@queue.worker_resource("db", depends_on=["config"]) -def create_db(config): - return create_engine(config.db_url, pool_size=10) - - -@queue.worker_resource("cache", depends_on=["config"]) -def create_cache(config): - return Redis.from_url(config.redis_url) -``` - -On shutdown, resources are torn down in reverse initialization order — `cache` and `db` before `config`. - -Cycles are detected eagerly at registration time and raise `CircularDependencyError`. - -## Teardown - -Supply a teardown callable to clean up the resource on graceful shutdown: - -```python -@queue.worker_resource( - "db", - teardown=lambda engine: engine.dispose(), -) -def create_db(): - return create_engine("postgresql://localhost/myapp") -``` - -Or use `register_resource()` for the programmatic API: - -```python -from taskito.resources.definition import ResourceDefinition - -queue.register_resource(ResourceDefinition( - name="db", - factory=create_db, - teardown=close_db, - depends_on=["config"], -)) -``` - -Teardown callables can be async — Taskito awaits them if they return a coroutine. - -## Health checking - -Resources can declare a health check function that runs on a background thread. If the check returns falsy, the worker attempts to recreate the resource: - -```python -def check_db(engine): - with engine.connect() as conn: - conn.execute(text("SELECT 1")) - return True - - -@queue.worker_resource( - "db", - health_check=check_db, - health_check_interval=30.0, # check every 30 seconds - max_recreation_attempts=3, # mark permanently unhealthy after 3 failures -) -def create_db(): - return create_engine("postgresql://localhost/myapp") -``` - -The health checker runs in a single daemon thread. Each resource with a non-zero `health_check_interval` is checked independently on its own schedule. - -Run a health check manually from application code: - -```python -is_healthy = queue.health_check("db") -``` - -If a resource fails all recreation attempts, it is marked permanently unhealthy. Subsequent tasks that depend on it raise `ResourceUnavailableError`. - -## Resource status - -```python -status = queue.resource_status() -# [ -# { -# "name": "config", -# "scope": "worker", -# "health": "healthy", -# "init_duration_ms": 12.4, -# "recreations": 0, -# "depends_on": [], -# }, -# { -# "name": "db", -# "scope": "worker", -# "health": "healthy", -# "init_duration_ms": 45.2, -# "recreations": 0, -# "depends_on": ["config"], -# }, -# ] -``` - -Task-scoped resources include a `"pool"` key with pool statistics. See [Observability](observability.md) for details. - -## Full example - -```python -from taskito import Queue, Inject -from sqlalchemy import create_engine, text -from sqlalchemy.orm import sessionmaker, Session - -queue = Queue(db_path="tasks.db", interception="strict") - - -@queue.worker_resource("config") -def load_config(): - return Config.from_env() - - -def check_db(engine): - with engine.connect() as conn: - conn.execute(text("SELECT 1")) - return True - - -@queue.worker_resource( - "db", - depends_on=["config"], - teardown=lambda engine: engine.dispose(), - health_check=check_db, - health_check_interval=60.0, -) -def create_db(config): - return create_engine(config.database_url, pool_size=10) - - -@queue.task() -def process_order(order_id: int, db: Inject["db"]): - session: Session = db() - try: - order = session.get(Order, order_id) - order.status = "processed" - session.commit() - finally: - session.close() -``` diff --git a/docs/resources/index.md b/docs/resources/index.md deleted file mode 100644 index 02f30f4..0000000 --- a/docs/resources/index.md +++ /dev/null @@ -1,81 +0,0 @@ -# Resource System - -The resource system gives tasks clean access to external dependencies — database connections, HTTP clients, cloud clients — without passing live objects through the queue. It operates in three layers that together solve a fundamental distributed systems problem: task arguments must be serializable, but most real-world dependencies are not. - -```mermaid -graph TD - A["task.delay(session, file)"] --> B["Layer 1: Argument Interception"] - B -->|"REDIRECT"| C["DI marker"] - B -->|"PROXY"| D["Serializable recipe"] - B -->|"PASS / CONVERT"| E["Serialized payload"] - C --> F["Queue"] - D --> F - E --> F - F --> G["Worker"] - G --> H["Layer 2: Resource Runtime"] - G --> I["Layer 3: Proxy Reconstruction"] - H --> J["Inject 'db' resource"] - I --> K["Rebuild file handle"] - J --> L["task(order_id, db, file)"] - K --> L -``` - -**Layer 1 — Argument Interception** classifies each value passed to `.delay()` before serialization. Database sessions become DI markers, file handles become recipes, safe primitives pass through unchanged, and non-serializable types like locks are rejected with a helpful error. - -**Layer 2 — Worker Resource Runtime** manages long-lived objects initialized once at worker startup. Resources are injected into tasks by name — no serialization needed, no connection per task. - -**Layer 3 — Resource Proxies** handles objects that have capturable state: file handles, HTTP sessions, cloud clients. The interceptor extracts a recipe; the worker rebuilds the live object before the task runs. - -## Minimal end-to-end example - -```python -from taskito import Queue, Inject -from sqlalchemy import create_engine -from sqlalchemy.orm import sessionmaker - -queue = Queue(db_path="tasks.db", interception="strict") - - -@queue.worker_resource("db") -def create_db(): - engine = create_engine("postgresql://localhost/myapp") - return sessionmaker(engine) - - -@queue.task() -def process_order(order_id: int, db: Inject["db"]): - session = db() - try: - order = session.get(Order, order_id) - order.status = "processed" - session.commit() - finally: - session.close() -``` - -Enqueue from anywhere in your application: - -```python -process_order.delay(42) -# The integer 42 passes through serialization normally. -# 'db' is injected by the worker — no session is ever put in the queue. -``` - -Start the worker: - -```bash -taskito worker --app myapp.tasks:queue -# [taskito] Initialized 1 resource(s): db -# [taskito] Worker started with 8 threads -``` - -## Section overview - -| Page | What it covers | -|---|---| -| [Argument Interception](interception.md) | Modes, strategies, custom types, `analyze()`, metrics | -| [Dependency Injection](dependency-injection.md) | `worker_resource()`, scopes, dependencies, teardown, health checks | -| [Resource Proxies](proxies.md) | Built-in handlers, HMAC signing, security, `NoProxy`, cloud handlers | -| [Configuration](configuration.md) | TOML config, pool tuning, frozen and reloadable resources, hot reload | -| [Testing](testing.md) | `test_mode(resources=)`, `MockResource`, pytest fixtures | -| [Observability](observability.md) | Prometheus metrics, dashboard endpoints, CLI commands | diff --git a/docs/resources/interception.md b/docs/resources/interception.md deleted file mode 100644 index 91e238c..0000000 --- a/docs/resources/interception.md +++ /dev/null @@ -1,179 +0,0 @@ -# Argument Interception - -Argument interception classifies every value passed to `.delay()` or `.apply_async()` before serialization. Enable it on the `Queue` constructor: - -```python -queue = Queue(db_path="tasks.db", interception="strict") -``` - -Without interception, values are passed directly to the serializer. A SQLAlchemy session or file handle would either raise a serialization error at enqueue time or produce a broken payload that fails on the worker. - -## Modes - -| Mode | Behavior | -|---|---| -| `"off"` | Disabled (default). All arguments pass through to the serializer unchanged. | -| `"strict"` | Raises `InterceptionError` immediately when a rejected type is detected. | -| `"lenient"` | Logs a warning and drops the rejected argument instead of raising. | - -`"strict"` is recommended for production — it surfaces problems at call time rather than causing silent task failures. - -## Classification strategies - -Every argument gets one of five strategies: - -| Strategy | What happens | Examples | -|---|---|---| -| `PASS` | Sent as-is to the serializer | `int`, `str`, `bool`, `bytes` | -| `CONVERT` | Transformed to a serializable form, reconstructed on the worker | `UUID`, `datetime`, `Decimal`, `Path`, `Enum`, Pydantic models, dataclasses | -| `REDIRECT` | Replaced with a DI marker; the worker injects the named resource | SQLAlchemy sessions, Redis clients, MongoDB clients | -| `PROXY` | Deconstructed to a recipe; reconstructed as a live object on the worker | File handles, loggers, `requests.Session`, `httpx.Client`, boto3 clients | -| `REJECT` | Raises `InterceptionError` in strict mode, dropped in lenient mode | Thread locks, generators, coroutines, sockets | - -## Built-in CONVERT types - -These are converted automatically when interception is enabled: - -| Type | Notes | -|---|---| -| `uuid.UUID` | Stored as `"uuid:"` | -| `datetime.datetime` / `date` / `time` / `timedelta` | ISO format | -| `decimal.Decimal` | Stored as string to preserve precision | -| `pathlib.Path` / `PurePath` | Stored as POSIX string | -| `re.Pattern` | Pattern string + flags | -| `collections.OrderedDict` | Preserves insertion order | -| `pydantic.BaseModel` | Via `.model_dump()` (if pydantic is installed) | -| `enum.Enum` subclasses | Class path + value | -| Dataclasses | Auto-detected via `dataclasses.is_dataclass()` | -| `NamedTuple` subclasses | Auto-detected | - -## Built-in REDIRECT types - -These connectors are automatically detected and replaced with a resource injection marker. The worker injects the named resource instead of attempting to deserialize a live connection object: - -| Type | Default resource name | -|---|---| -| `sqlalchemy.orm.Session` | `"db"` | -| `sqlalchemy.ext.asyncio.AsyncSession` | `"db"` | -| `sqlalchemy.engine.Engine` | `"db"` | -| `sqlalchemy.ext.asyncio.AsyncEngine` | `"db"` | -| `redis.Redis` | `"redis"` | -| `redis.asyncio.Redis` | `"redis"` | -| `pymongo.MongoClient` | `"mongo"` | -| `motor.motor_asyncio.AsyncIOMotorClient` | `"mongo"` | -| `psycopg2.extensions.connection` | `"db"` | -| `asyncpg.connection.Connection` | `"db"` | -| `django.db.backends.base.base.BaseDatabaseWrapper` | `"db"` | -| `aiohttp.ClientSession` | `"aiohttp_session"` | - -The resource name is the key you use in `@queue.worker_resource("name")`. If your resource has a different name, register a custom redirect with `register_type()`. - -## Built-in PROXY types - -These objects are deconstructed to a recipe dict and rebuilt by the worker: - -| Type | Handler name | -|---|---| -| `io.TextIOWrapper`, `io.BufferedReader`, `io.BufferedWriter`, `io.FileIO` | `"file"` | -| `logging.Logger` | `"logger"` | -| `requests.Session` | `"requests_session"` | -| `httpx.Client` / `httpx.AsyncClient` | `"httpx_client"` | -| boto3 clients (via `botocore.client.BaseClient`) | `"boto3_client"` | -| `google.cloud.storage.Client` / `Bucket` / `Blob` | `"gcs_client"` | - -See [Resource Proxies](proxies.md) for security options and handler details. - -## Built-in REJECT types - -These are always rejected because they cannot cross process or serialization boundaries: - -- Thread synchronization primitives (`Lock`, `RLock`, `Semaphore`, `Event`) -- `socket.socket` -- Generator objects -- Coroutine objects -- `subprocess.Popen` -- `asyncio.Task` / `asyncio.Future` -- `contextvars.Context` -- Multiprocessing `Lock` and `Queue` - -Each rejection includes a message explaining why and suggests alternatives. - -## Registering custom types - -Add custom rules for types not covered by the built-ins: - -```python -from myapp import MyDBClient, MoneyAmount, APIConnection - -# Treat a custom DB client as a worker resource (worker must have "my_db" registered) -queue.register_type(MyDBClient, "redirect", resource="my_db") - -# Convert a custom value type to something serializable -queue.register_type( - MoneyAmount, - "convert", - converter=lambda m: {"__type__": "money", "value": str(m.value), "currency": m.currency}, - type_key="money", -) - -# Reject with a helpful message -queue.register_type( - APIConnection, - "reject", - message="API connections are process-local. Register it as a worker resource instead.", -) -``` - -`register_type()` requires interception to be enabled (`"strict"` or `"lenient"`). Calling it when interception is `"off"` raises `RuntimeError`. - -| Parameter | Description | -|---|---| -| `python_type` | The type to register. | -| `strategy` | `"pass"`, `"convert"`, `"redirect"`, `"reject"`, or `"proxy"`. | -| `resource` | Resource name for `"redirect"`. | -| `message` | Rejection reason for `"reject"`. | -| `converter` | Converter callable for `"convert"`. | -| `type_key` | Dispatch key for the converter reconstructor. | -| `proxy_handler` | Handler name for `"proxy"`. | - -## Constructor parameters - -| Parameter | Default | Description | -|---|---|---| -| `interception` | `"off"` | Interception mode: `"strict"`, `"lenient"`, or `"off"`. | -| `max_intercept_depth` | `10` | Maximum depth the walker recurses into nested containers. | - -## Analyzing arguments - -Inspect how interception would classify arguments without actually transforming them: - -```python -from myapp.tasks import queue - -report = queue._interceptor.analyze( - args=(user_session, "Hello"), - kwargs={"attachment": open("file.pdf", "rb")}, -) -print(report) -# Argument Analysis: -# args[0] (Session) → REDIRECT (redirect to worker resource 'db') -# args[1] (str) → PASS -# kwargs.attachment (BufferedReader) → PROXY (handler=file) -``` - -`analyze()` is a development and debugging tool. It reads the registry but makes no changes to arguments. - -## Interception metrics - -```python -stats = queue.interception_stats() -# { -# "total_intercepts": 1200, -# "total_duration_ms": 216.0, -# "avg_duration_ms": 0.18, -# "strategy_counts": {"pass": 2800, "convert": 450, "redirect": 200, "proxy": 30, "reject": 0}, -# "max_depth_reached": 3, -# } -``` - -See [Observability](observability.md) for Prometheus metrics and dashboard endpoints. diff --git a/docs/resources/observability.md b/docs/resources/observability.md deleted file mode 100644 index 0cd63ab..0000000 --- a/docs/resources/observability.md +++ /dev/null @@ -1,203 +0,0 @@ -# Observability - -The resource system exposes metrics through three channels: the `Queue` API, the built-in dashboard, and Prometheus. - -## Queue API - -### `resource_status()` - -Returns a snapshot of every registered resource: - -```python -status = queue.resource_status() -# [ -# { -# "name": "config", -# "scope": "worker", -# "health": "healthy", -# "init_duration_ms": 12.4, -# "recreations": 0, -# "depends_on": [], -# }, -# { -# "name": "db", -# "scope": "worker", -# "health": "healthy", -# "init_duration_ms": 45.2, -# "recreations": 1, -# "depends_on": ["config"], -# }, -# { -# "name": "session", -# "scope": "task", -# "health": "healthy", -# "init_duration_ms": 0.0, -# "recreations": 0, -# "depends_on": ["db"], -# "pool": { -# "size": 20, -# "active": 3, -# "idle": 5, -# "total_acquisitions": 1542, -# "total_timeouts": 0, -# "avg_acquire_ms": 0.4, -# }, -# }, -# ] -``` - -Task-scoped resources include a `"pool"` key with pool statistics. The `"health"` field is `"healthy"`, `"unhealthy"`, or `"unknown"` (resource not yet initialized). - -### `interception_stats()` - -Returns aggregate metrics from the argument interceptor: - -```python -stats = queue.interception_stats() -# { -# "total_intercepts": 1200, -# "total_duration_ms": 216.0, -# "avg_duration_ms": 0.18, -# "strategy_counts": { -# "pass": 2800, -# "convert": 450, -# "redirect": 200, -# "proxy": 30, -# "reject": 0, -# }, -# "max_depth_reached": 3, -# } -``` - -Returns an empty dict if interception is disabled (`"off"`). - -### `proxy_stats()` - -Returns per-handler reconstruction metrics: - -```python -stats = queue.proxy_stats() -# [ -# { -# "handler": "file", -# "total_reconstructions": 42, -# "total_errors": 0, -# "total_cleanup_errors": 0, -# "total_checksum_failures": 0, -# "total_duration_ms": 50.4, -# "avg_duration_ms": 1.2, -# "max_duration_ms": 8.1, -# "p95_duration_ms": 3.4, -# }, -# { -# "handler": "boto3_client", -# "total_reconstructions": 310, -# "total_errors": 2, -# ... -# }, -# ] -``` - -## Dashboard endpoints - -The built-in dashboard exposes three JSON endpoints for the resource system: - -| Endpoint | Description | -|---|---| -| `GET /api/resources` | Same data as `resource_status()` | -| `GET /api/proxy-stats` | Same data as `proxy_stats()` | -| `GET /api/interception-stats` | Same data as `interception_stats()` | - -Start the dashboard: - -```bash -taskito dashboard --app myapp.tasks:queue -``` - -See the [Web Dashboard](../guide/observability/dashboard.md) guide for full dashboard documentation. - -## CLI commands - -### `taskito resources` - -Print a formatted table of all registered resources and their current status: - -```bash -taskito resources --app myapp.tasks:queue -# RESOURCE SCOPE HEALTH INIT (ms) RECREATIONS DEPENDS ON -# ----------------------------------------------------------------------- -# config worker healthy 12.40 0 - -# db worker healthy 45.21 1 config -# session task healthy 0.00 0 db -# pool: active=3 idle=5 max=20 timeouts=0 -``` - -### `taskito reload` - -Send `SIGHUP` to a running worker to reload all reloadable resources: - -```bash -taskito reload --pid 12345 -# Sent SIGHUP to worker (PID 12345) - -# Reload a specific resource only: -taskito reload --pid 12345 --resource feature_flags -``` - -!!! note - `taskito reload` sends `SIGHUP` — it does not wait for the reload to complete. Check `taskito resources` or logs to confirm the reload succeeded. - -## Prometheus metrics - -Install the Prometheus integration: - -```bash -pip install taskito[prometheus] -``` - -```python -from taskito.contrib.prometheus import PrometheusMiddleware, PrometheusStatsCollector - -queue = Queue(db_path="tasks.db", middleware=[PrometheusMiddleware()]) - -# Poll resource, proxy, and interception stats periodically -collector = PrometheusStatsCollector(queue, interval=10.0) -collector.start() -``` - -### Resource metrics - -| Metric | Type | Labels | Description | -|---|---|---|---| -| `taskito_resource_health_status` | Gauge | `resource` | `1` if healthy, `0` if unhealthy | -| `taskito_resource_recreation_total` | Gauge | `resource` | Total recreation count | -| `taskito_resource_init_duration_seconds` | Gauge | `resource` | Initialization duration | -| `taskito_resource_pool_size` | Gauge | `resource` | Pool max size (task scope) | -| `taskito_resource_pool_active` | Gauge | `resource` | Active pool instances | -| `taskito_resource_pool_idle` | Gauge | `resource` | Idle pool instances | -| `taskito_resource_pool_timeout_total` | Counter | `resource` | Pool acquisition timeouts | - -### Proxy metrics - -| Metric | Type | Labels | Description | -|---|---|---|---| -| `taskito_proxy_reconstruct_total` | Counter | `handler` | Total reconstructions | -| `taskito_proxy_reconstruct_errors_total` | Counter | `handler` | Reconstruction errors | -| `taskito_proxy_reconstruct_duration_seconds` | Histogram | `handler` | Reconstruction duration | - -### Interception metrics - -| Metric | Type | Labels | Description | -|---|---|---|---| -| `taskito_intercept_strategy_total` | Counter | `strategy` | Count per strategy (`pass`, `convert`, `redirect`, `proxy`, `reject`) | -| `taskito_intercept_duration_seconds` | Histogram | — | Interception pass duration | - -Metrics are exposed at `/metrics` on the dashboard server or via a standalone metrics server: - -```python -from taskito.contrib.prometheus import start_metrics_server - -start_metrics_server(port=9090) -``` - -See the [Prometheus integration](../integrations/prometheus.md) page for full setup instructions. diff --git a/docs/resources/proxies.md b/docs/resources/proxies.md deleted file mode 100644 index 9612b60..0000000 --- a/docs/resources/proxies.md +++ /dev/null @@ -1,154 +0,0 @@ -# Resource Proxies - -Proxies handle objects that are neither serializable primitives nor DI-injectable — things like file handles, HTTP sessions, and cloud clients that have capturable state. - -When interception detects a proxy-able argument, the handler's `deconstruct()` method extracts a JSON-serializable recipe. The worker calls `reconstruct()` to rebuild the live object before invoking the task. After the task completes, `cleanup()` is called on the reconstructed object. - -Proxies require interception to be enabled: - -```python -queue = Queue(db_path="tasks.db", interception="strict") -``` - -## Built-in handlers - -| Handler name | Handled types | Notes | -|---|---|---| -| `"file"` | `io.TextIOWrapper`, `io.BufferedReader`, `io.BufferedWriter`, `io.FileIO` | Stores path + mode; worker reopens the file | -| `"logger"` | `logging.Logger` | Stores logger name; worker resolves `logging.getLogger(name)` | -| `"requests_session"` | `requests.Session` | Stores headers, auth, timeout, verify; worker creates a new session | -| `"httpx_client"` | `httpx.Client`, `httpx.AsyncClient` | Stores base_url, headers, timeout, verify | -| `"boto3_client"` | boto3 clients (`botocore.client.BaseClient`) | Stores service name, region, endpoint_url; credentials are NOT included | -| `"gcs_client"` | `google.cloud.storage.Client`, `Bucket`, `Blob` | Stores project and resource identifiers; credentials are NOT included | - -`requests`, `httpx`, `boto3`, and `google-cloud-storage` are optional. Their handlers register automatically when the library is installed. - -## HMAC signing - -Proxy recipes are signed with HMAC-SHA256 to prevent recipe tampering between enqueue and execution: - -```python -queue = Queue( - db_path="tasks.db", - interception="strict", - recipe_signing_key="your-secret-key", -) -``` - -If `recipe_signing_key` is not set on the constructor, it falls back to the `TASKITO_RECIPE_SECRET` environment variable. Signed recipes are verified at reconstruction time — a modified or forged recipe raises `ProxyReconstructionError`. - -!!! warning - Omitting a signing key means recipes are not verified. Use a signing key in production. - -## Security options - -### Reconstruction timeout - -Limit how long reconstruction can take before raising `ProxyReconstructionError`: - -```python -queue = Queue( - db_path="tasks.db", - max_reconstruction_timeout=5.0, # seconds, default 5.0 -) -``` - -### File path allowlist - -Restrict which file paths the file proxy handler is allowed to reconstruct: - -```python -queue = Queue( - db_path="tasks.db", - file_path_allowlist=["/data/uploads/", "/tmp/taskito/"], -) -``` - -Paths outside the allowlist raise `ProxyReconstructionError` during reconstruction. Without an allowlist, any path is permitted. - -### Disabling specific handlers - -Disable individual handlers by name: - -```python -queue = Queue( - db_path="tasks.db", - disabled_proxies=["requests_session", "gcs_client"], -) -``` - -Disabled handlers are not registered. Arguments of those types fall through to the serializer — or are rejected if they would otherwise be PROXY-classified and interception is strict. - -## Cloud handlers - -### AWS (boto3) - -```bash -pip install taskito[aws] # adds boto3>=1.20 -``` - -The `boto3_client` handler stores the service name, region, and optional endpoint URL. **Credentials are not stored in the recipe.** The worker uses its own ambient credentials — IAM role, environment variables, or `~/.aws/credentials`. - -```python -import boto3 - -s3 = boto3.client("s3", region_name="us-east-1") -process_upload.delay(s3, "my-bucket/key") -# Recipe: {"service_name": "s3", "region_name": "us-east-1", "endpoint_url": null} -# Worker recreates: boto3.client("s3", region_name="us-east-1") -``` - -### Google Cloud Storage - -```bash -pip install taskito[gcs] # adds google-cloud-storage>=2.0 -``` - -The `gcs_client` handler stores the project and resource identifiers for `Client`, `Bucket`, and `Blob` objects. **Credentials are not stored.** The worker uses Application Default Credentials. - -```python -from google.cloud import storage - -client = storage.Client(project="my-project") -blob = client.bucket("my-bucket").blob("file.parquet") -process_file.delay(blob) -# Recipe: {"type": "blob", "project": "my-project", "bucket_name": "my-bucket", "blob_name": "file.parquet"} -``` - -## `NoProxy` wrapper - -Opt out of proxy handling for a specific argument. The value is passed through to the serializer as-is: - -```python -from taskito import NoProxy - -session = requests.Session() -session.headers["Authorization"] = "Bearer token" - -# Pass to cloudpickle instead of the proxy system -process.delay(NoProxy(session)) -``` - -Use `NoProxy` when the serializer can handle the value directly (e.g., with cloudpickle) or when you want to suppress proxy handling for a specific call without disabling the handler globally. - -## Proxy metrics - -```python -stats = queue.proxy_stats() -# [ -# { -# "handler": "file", -# "total_reconstructions": 42, -# "total_errors": 0, -# "total_cleanup_errors": 0, -# "total_checksum_failures": 0, -# "total_duration_ms": 50.4, -# "avg_duration_ms": 1.2, -# "max_duration_ms": 8.1, -# "p95_duration_ms": 3.4, -# }, -# ... -# ] -``` - -See [Observability](observability.md) for Prometheus metrics and dashboard endpoints. diff --git a/docs/resources/testing.md b/docs/resources/testing.md deleted file mode 100644 index 4aa8d51..0000000 --- a/docs/resources/testing.md +++ /dev/null @@ -1,123 +0,0 @@ -# Testing with Resources - -The `test_mode()` context manager runs tasks synchronously in the calling thread without starting a worker. Pass mock resources to override real factories during tests. - -## Injecting mock resources - -```python -from unittest.mock import MagicMock -from myapp.tasks import queue, process_order - -def test_process_order(): - mock_db = MagicMock() - mock_db.return_value.get.return_value = Order(id=42, total=99.0) - - with queue.test_mode(resources={"db": mock_db}) as results: - process_order.delay(42) - - assert results[0].succeeded - mock_db.return_value.get.assert_called_once_with(Order, 42) -``` - -The `resources=` dict maps resource names to mock values. Any plain Python object works — `MagicMock`, a real instance, a simple dict, whatever your test needs. - -When `test_mode(resources=...)` is active: -- Resources are taken directly from the dict — no factories are called. -- Proxy reconstruction is bypassed. Proxy markers in arguments are passed through unchanged, so tests don't fail because of missing files or network connections. -- The previous resource runtime is restored on context exit. - -## `MockResource` - -`MockResource` adds call tracking on top of a plain mock value: - -```python -from taskito import MockResource - -def test_with_spy(): - spy_db = MockResource("db", wraps=real_session_factory, track_calls=True) - - with queue.test_mode(resources={"db": spy_db}) as results: - process_order.delay(42) - - assert spy_db.call_count == 1 - assert results[0].succeeded -``` - -| Parameter | Description | -|---|---| -| `name` | Resource name (informational, used in repr). | -| `return_value` | Value returned when the resource is accessed. | -| `wraps` | Wrap a real object — it is returned as-is when the resource is accessed. | -| `track_calls` | If `True`, increment `call_count` each time the resource is accessed. | - -`MockResource` attributes: - -| Attribute | Description | -|---|---| -| `call_count` | Number of times the resource was accessed during the test. | -| `calls` | List of call tuples (currently `[]` — tracking is count-only). | - -!!! note - `MockResource` wraps a value — it is not callable by default. If your task calls `db()` to obtain a session, your `return_value` or `wraps` must be callable (e.g., a `MagicMock` or a real session factory). - -## Explicit kwargs override injection - -If a test calls `.delay()` with an explicit kwarg that matches an injected resource name, the explicit value wins: - -```python -@queue.task() -def my_task(db: Inject["db"]): - db.do_something() - -# Override injection for this one call: -mock_db = MagicMock() -my_task.delay(db=mock_db) -``` - -This also works inside `test_mode()` — the resource dict is the default, but explicit call-site kwargs always take precedence. - -## pytest fixture pattern - -```python -# conftest.py -import pytest -from unittest.mock import MagicMock -from myapp.tasks import queue - - -@pytest.fixture -def mock_db(): - session = MagicMock() - session.return_value.get.return_value = None - return session - - -@pytest.fixture -def task_results(mock_db): - with queue.test_mode(resources={"db": mock_db}) as results: - yield results, mock_db -``` - -```python -# test_orders.py -def test_order_processed(task_results): - results, mock_db = task_results - mock_db.return_value.get.return_value = Order(id=1, status="pending") - - process_order.delay(1) - - assert results[0].succeeded - order = mock_db.return_value.get.return_value - assert order.status == "processed" -``` - -## Propagating errors - -By default, task exceptions are captured in `TestResult.error`. To re-raise them immediately: - -```python -with queue.test_mode(propagate_errors=True) as results: - process_order.delay(999) # raises if the task raises -``` - -Use `propagate_errors=False` (the default) when you want to test error handling — check `results[0].failed` and `results[0].error`. diff --git a/docs-next/source.config.ts b/docs/source.config.ts similarity index 100% rename from docs-next/source.config.ts rename to docs/source.config.ts diff --git a/docs-next/src/app/(home)/comparison.tsx b/docs/src/app/(home)/comparison.tsx similarity index 100% rename from docs-next/src/app/(home)/comparison.tsx rename to docs/src/app/(home)/comparison.tsx diff --git a/docs-next/src/app/(home)/layout.tsx b/docs/src/app/(home)/layout.tsx similarity index 100% rename from docs-next/src/app/(home)/layout.tsx rename to docs/src/app/(home)/layout.tsx diff --git a/docs-next/src/app/(home)/page.tsx b/docs/src/app/(home)/page.tsx similarity index 100% rename from docs-next/src/app/(home)/page.tsx rename to docs/src/app/(home)/page.tsx diff --git a/docs-next/src/app/api/search/route.ts b/docs/src/app/api/search/route.ts similarity index 100% rename from docs-next/src/app/api/search/route.ts rename to docs/src/app/api/search/route.ts diff --git a/docs-next/src/app/docs/[[...slug]]/page.tsx b/docs/src/app/docs/[[...slug]]/page.tsx similarity index 100% rename from docs-next/src/app/docs/[[...slug]]/page.tsx rename to docs/src/app/docs/[[...slug]]/page.tsx diff --git a/docs-next/src/app/docs/layout.tsx b/docs/src/app/docs/layout.tsx similarity index 100% rename from docs-next/src/app/docs/layout.tsx rename to docs/src/app/docs/layout.tsx diff --git a/docs-next/src/app/global.css b/docs/src/app/global.css similarity index 100% rename from docs-next/src/app/global.css rename to docs/src/app/global.css diff --git a/docs-next/src/app/layout.tsx b/docs/src/app/layout.tsx similarity index 100% rename from docs-next/src/app/layout.tsx rename to docs/src/app/layout.tsx diff --git a/docs-next/src/app/llms-full.txt/route.ts b/docs/src/app/llms-full.txt/route.ts similarity index 100% rename from docs-next/src/app/llms-full.txt/route.ts rename to docs/src/app/llms-full.txt/route.ts diff --git a/docs-next/src/app/llms.mdx/docs/[[...slug]]/route.ts b/docs/src/app/llms.mdx/docs/[[...slug]]/route.ts similarity index 100% rename from docs-next/src/app/llms.mdx/docs/[[...slug]]/route.ts rename to docs/src/app/llms.mdx/docs/[[...slug]]/route.ts diff --git a/docs-next/src/app/llms.txt/route.ts b/docs/src/app/llms.txt/route.ts similarity index 100% rename from docs-next/src/app/llms.txt/route.ts rename to docs/src/app/llms.txt/route.ts diff --git a/docs-next/src/app/og/docs/[...slug]/route.tsx b/docs/src/app/og/docs/[...slug]/route.tsx similarity index 100% rename from docs-next/src/app/og/docs/[...slug]/route.tsx rename to docs/src/app/og/docs/[...slug]/route.tsx diff --git a/docs-next/src/components/mdx.tsx b/docs/src/components/mdx.tsx similarity index 100% rename from docs-next/src/components/mdx.tsx rename to docs/src/components/mdx.tsx diff --git a/docs-next/src/components/mermaid.tsx b/docs/src/components/mermaid.tsx similarity index 100% rename from docs-next/src/components/mermaid.tsx rename to docs/src/components/mermaid.tsx diff --git a/docs-next/src/components/provider.tsx b/docs/src/components/provider.tsx similarity index 100% rename from docs-next/src/components/provider.tsx rename to docs/src/components/provider.tsx diff --git a/docs-next/src/components/search.tsx b/docs/src/components/search.tsx similarity index 100% rename from docs-next/src/components/search.tsx rename to docs/src/components/search.tsx diff --git a/docs-next/src/lib/cn.ts b/docs/src/lib/cn.ts similarity index 100% rename from docs-next/src/lib/cn.ts rename to docs/src/lib/cn.ts diff --git a/docs-next/src/lib/layout.shared.tsx b/docs/src/lib/layout.shared.tsx similarity index 100% rename from docs-next/src/lib/layout.shared.tsx rename to docs/src/lib/layout.shared.tsx diff --git a/docs-next/src/lib/shared.ts b/docs/src/lib/shared.ts similarity index 100% rename from docs-next/src/lib/shared.ts rename to docs/src/lib/shared.ts diff --git a/docs-next/src/lib/source.ts b/docs/src/lib/source.ts similarity index 100% rename from docs-next/src/lib/source.ts rename to docs/src/lib/source.ts diff --git a/docs-next/tsconfig.json b/docs/tsconfig.json similarity index 100% rename from docs-next/tsconfig.json rename to docs/tsconfig.json diff --git a/docs/workflows/analysis.md b/docs/workflows/analysis.md deleted file mode 100644 index 83a33bf..0000000 --- a/docs/workflows/analysis.md +++ /dev/null @@ -1,140 +0,0 @@ -# Analysis & Visualization - -Analyze the workflow DAG before execution and render diagrams with live status. - -## Graph inspection - -```python -wf = Workflow(name="pipeline") -wf.step("a", task_a) -wf.step("b", task_b, after="a") -wf.step("c", task_c, after="a") -wf.step("d", task_d, after=["b", "c"]) - -wf.ancestors("d") # ["a", "b", "c"] -wf.descendants("a") # ["b", "c", "d"] -wf.topological_levels() # [["a"], ["b", "c"], ["d"]] -wf.stats() -# {"nodes": 4, "edges": 4, "depth": 3, "width": 2, "density": 0.6667} -``` - -| Method | Returns | Description | -|--------|---------|-------------| -| `ancestors(node)` | `list[str]` | All transitive predecessors | -| `descendants(node)` | `list[str]` | All transitive successors | -| `topological_levels()` | `list[list[str]]` | Nodes grouped by depth | -| `stats()` | `dict` | Node count, edge count, depth, width, density | - -## Critical path - -Find the longest-weighted path through the DAG: - -```python -path, cost = wf.critical_path({ - "a": 2.0, - "b": 7.0, - "c": 1.0, - "d": 3.0, -}) -# path = ["a", "b", "d"], cost = 12.0 -``` - -Pass estimated durations per step. The critical path determines the minimum total execution time. - -## Execution plan - -Generate a step-by-step schedule respecting worker limits: - -```python -plan = wf.execution_plan(max_workers=2) -# [["a"], ["b", "c"], ["d"]] - -plan = wf.execution_plan(max_workers=1) -# [["a"], ["b"], ["c"], ["d"]] -``` - -Each stage contains up to `max_workers` nodes. Nodes in the same topological level are batched together. - -## Bottleneck analysis - -Identify the most expensive step on the critical path: - -```python -result = wf.bottleneck_analysis({ - "a": 2.0, "b": 7.0, "c": 1.0, "d": 3.0 -}) -# { -# "node": "b", -# "cost": 7.0, -# "percentage": 58.3, -# "critical_path": ["a", "b", "d"], -# "total_cost": 12.0, -# "suggestion": "b is the bottleneck (58.3% of total time). ..." -# } -``` - -## Visualization - -Render the DAG as a diagram string: - -=== "Mermaid" - - ```python - print(wf.visualize("mermaid")) - ``` - - ``` - graph LR - a[a] - b[b] - c[c] - d[d] - a --> b - a --> c - b --> d - c --> d - ``` - -=== "Graphviz DOT" - - ```python - print(wf.visualize("dot")) - ``` - - ``` - digraph workflow { - rankdir=LR; - a [label="a" style=filled fillcolor=white]; - b [label="b" style=filled fillcolor=white]; - ... - } - ``` - -### Live status visualization - -`WorkflowRun.visualize()` includes status colors: - -```python -run = queue.submit_workflow(wf) -run.wait() - -print(run.visualize("mermaid")) -``` - -``` -graph LR - a[a ✓] - b[b ✓] - a --> b - style a fill:#90EE90 - style b fill:#90EE90 -``` - -| Status | Color | -|--------|-------| -| Completed | Green `#90EE90` | -| Failed | Red `#FFB6C1` | -| Running | Blue `#87CEEB` | -| Pending | Gray `#D3D3D3` | -| Skipped | Light gray `#F5F5F5` | -| Waiting Approval | Yellow `#FFFACD` | diff --git a/docs/workflows/building.md b/docs/workflows/building.md deleted file mode 100644 index 6241bfc..0000000 --- a/docs/workflows/building.md +++ /dev/null @@ -1,134 +0,0 @@ -# Building Workflows - -A workflow is a DAG of steps. Each step wraps a registered task. The engine creates jobs in topological order with `depends_on` chains so the existing scheduler handles execution. - -## Defining steps - -```python -from taskito.workflows import Workflow - -wf = Workflow(name="etl", version=1) -wf.step("extract", extract_task) -wf.step("transform", transform_task, after="extract") -wf.step("load", load_task, after="transform") -``` - -Steps are added in order. The `after` parameter declares predecessors — a step won't run until all its predecessors complete. - -### Multiple predecessors - -```python -wf.step("merge", merge_task, after=["branch_a", "branch_b"]) -``` - -### Step arguments - -```python -wf.step("fetch", fetch_task, args=("https://api.example.com",)) -wf.step("process", process_task, after="fetch", kwargs={"mode": "strict"}) -``` - -Arguments are serialized at submission time using the queue's serializer. - -## Step configuration - -| Parameter | Type | Default | Description | -|-----------|------|---------|-------------| -| `name` | `str` | required | Unique step name within the workflow | -| `task` | `TaskWrapper` | required | Registered `@queue.task()` function | -| `after` | `str \| list[str]` | `None` | Predecessor step(s) | -| `args` | `tuple` | `()` | Positional arguments | -| `kwargs` | `dict` | `None` | Keyword arguments | -| `queue` | `str` | `None` | Override queue name | -| `max_retries` | `int` | `None` | Override retry count | -| `timeout_ms` | `int` | `None` | Override timeout (milliseconds) | -| `priority` | `int` | `None` | Override priority | - -## Workflow decorator - -Register reusable workflow factories with `@queue.workflow()`: - -```python -@queue.workflow("nightly_etl") -def etl_pipeline(): - wf = Workflow() - wf.step("extract", extract) - wf.step("load", load, after="extract") - return wf - -# Build and submit -run = etl_pipeline.submit() -run.wait() - -# Or build without submitting -wf = etl_pipeline.build() -print(wf.step_names) # ["extract", "load"] -``` - -## Submitting - -```python -run = queue.submit_workflow(wf) -``` - -This creates a `WorkflowRun` handle. Under the hood: - -1. A `WorkflowDefinition` is stored (or reused by name + version) -2. A `WorkflowRun` record is created -3. For each step in topological order, a job is enqueued with `depends_on` chains -4. The run transitions to `RUNNING` - -```mermaid -sequenceDiagram - participant P as Python - participant R as Rust Engine - participant S as Scheduler - - P->>R: submit_workflow(dag, payloads) - R->>R: Store definition + run - loop Each step in topo order - R->>S: enqueue(job, depends_on=[pred_ids]) - end - R-->>P: WorkflowRun handle - S->>S: Dequeue jobs as deps satisfied -``` - -## Workflow parameters - -| Parameter | Type | Default | Description | -|-----------|------|---------|-------------| -| `name` | `str` | `"workflow"` | Workflow name | -| `version` | `int` | `1` | Version number | -| `on_failure` | `str` | `"fail_fast"` | Error strategy: `"fail_fast"` or `"continue"` | -| `cache_ttl` | `float` | `None` | Cache TTL in seconds for [incremental runs](caching.md) | - -## Node statuses - -Each step transitions through these states: - -```mermaid -stateDiagram-v2 - [*] --> Pending - Pending --> Running : job picked up - Running --> Completed : success - Running --> Failed : error (retries exhausted) - Pending --> Skipped : cascade / condition - Pending --> WaitingApproval : gate reached - WaitingApproval --> Completed : approved - WaitingApproval --> Failed : rejected - Pending --> CacheHit : incremental reuse - Completed --> [*] - Failed --> [*] - Skipped --> [*] - CacheHit --> [*] -``` - -| Status | Terminal | Meaning | -|--------|----------|---------| -| `PENDING` | No | Waiting for predecessors or job creation | -| `RUNNING` | No | Job is executing | -| `COMPLETED` | Yes | Step succeeded | -| `FAILED` | Yes | Step failed after retries exhausted | -| `SKIPPED` | Yes | Skipped due to failure cascade or unmet condition | -| `WAITING_APPROVAL` | No | Gate awaiting approve/reject | -| `CACHE_HIT` | Yes | Reused result from a prior run | diff --git a/docs/workflows/caching.md b/docs/workflows/caching.md deleted file mode 100644 index dcb37c5..0000000 --- a/docs/workflows/caching.md +++ /dev/null @@ -1,69 +0,0 @@ -# Incremental Runs - -Skip unchanged steps by reusing results from a prior run. When a node completed successfully in the base run, it gets `CACHE_HIT` status instead of re-executing. - -## Basic usage - -```python -# First run: everything executes, results hashed (SHA-256) -run1 = queue.submit_workflow(wf) -run1.wait() - -# Second run: skip completed nodes from run1 -run2 = queue.submit_workflow(wf, incremental=True, base_run=run1.id) -run2.wait() -``` - -Nodes that completed in `run1` with a stored result hash become `CACHE_HIT` in `run2`. Nodes that failed or are missing re-execute. - -## Dirty-set propagation - -If a node is dirty (failed or missing in the base run), all its downstream nodes are also dirty — even if they had cached results: - -```mermaid -graph LR - A["a — dirty (missing)"] --> B["b — dirty (propagated)"] - B --> C["c — dirty (propagated)"] - style A fill:#FFB6C1 - style B fill:#FFB6C1 - style C fill:#FFB6C1 -``` - -```mermaid -graph LR - A["a — CACHE_HIT ✓"] --> B["b — dirty (failed in base)"] - B --> C["c — dirty (propagated)"] - style A fill:#90EE90 - style B fill:#FFB6C1 - style C fill:#FFB6C1 -``` - -## Cache TTL - -Set a time-to-live on cached results: - -```python -wf = Workflow(name="pipeline", cache_ttl=3600) # 1 hour -``` - -If the base run completed more than `cache_ttl` seconds ago, all nodes are treated as dirty (full re-execution). - -## How it works - -At submit time with `incremental=True`: - -1. Fetch the base run's node data: `{name: (status, result_hash)}` -2. For each node in the new run: - - Base node completed + has result_hash → `CACHE_HIT` - - Base node failed / missing → dirty - - Any predecessor dirty → also dirty (propagated) -3. `CACHE_HIT` nodes are created with `status=cache_hit` and `completed_at` set — no job enqueued -4. Dirty nodes get normal jobs - -## Parameters - -| Parameter | Type | Default | Description | -|-----------|------|---------|-------------| -| `incremental` | `bool` | `False` | Enable cache comparison | -| `base_run` | `str` | `None` | Run ID to compare against | -| `cache_ttl` | `float` | `None` | TTL in seconds (on `Workflow`) | diff --git a/docs/workflows/canvas.md b/docs/workflows/canvas.md deleted file mode 100644 index ff191cb..0000000 --- a/docs/workflows/canvas.md +++ /dev/null @@ -1,118 +0,0 @@ -# Canvas Primitives - -For simpler pipelines without DAG features, taskito provides **chain**, **group**, and **chord** — lightweight composition that doesn't require the workflow engine. - -## Signatures - -A `Signature` wraps a task call for deferred execution: - -```python -from taskito import chain, group, chord - -sig = add.s(1, 2) # Mutable — receives previous result as first arg -sig = add.si(1, 2) # Immutable — ignores previous result -``` - -## Chain - -Execute tasks sequentially, piping each result to the next: - -```mermaid -graph LR - S1["extract.s(url)"] -->|result| S2["transform.s()"] - S2 -->|result| S3["load.s()"] -``` - -```python -result = chain( - extract.s("https://api.example.com/users"), - transform.s(), - load.s(), -).apply(queue) - -print(result.result(timeout=30)) -``` - -!!! tip - Use `.si()` when a step should **not** receive the previous result: - - ```python - chain( - step_a.s(input_data), - step_b.si(independent_data), - step_c.s(), - ).apply(queue) - ``` - -## Group - -Execute tasks in parallel (fan-out): - -```python -jobs = group( - process.s(1), - process.s(2), - process.s(3), -).apply(queue) - -results = [j.result(timeout=30) for j in jobs] -``` - -### Concurrency limits - -```python -jobs = group( - *[fetch.s(url) for url in urls], - max_concurrency=5, -).apply(queue) -``` - -## Chord - -Fan-out with a callback — run tasks in parallel, then pass all results to a final task: - -```python -result = chord( - group( - fetch.s("https://api1.example.com"), - fetch.s("https://api2.example.com"), - fetch.s("https://api3.example.com"), - ), - merge.s(), -).apply(queue) -``` - -## chunks / starmap - -```python -from taskito import chunks, starmap - -# Batch processing — split 1000 items into groups of 100 -results = chunks(process_batch, items, chunk_size=100).apply(queue) - -# Map-reduce pattern -result = chord( - chunks(process_batch, items, chunk_size=100), - merge_results.s(), -).apply(queue) - -# Tuple unpacking -results = starmap(add, [(1, 2), (3, 4), (5, 6)]).apply(queue) -``` - -## When to use canvas vs DAG workflows - -| Feature | Canvas | DAG Workflows | -|---------|--------|---------------| -| Setup | No imports needed | `from taskito.workflows import Workflow` | -| Topology | Linear chains, flat groups | Arbitrary DAGs | -| Fan-out | Static (known at build time) | Dynamic (from return values) | -| Conditions | None | `on_success`, `on_failure`, `always`, callables | -| Error handling | Per-task retries only | Workflow-level strategies | -| Approval gates | No | Yes | -| Sub-workflows | No | Yes | -| Incremental runs | No | Yes | -| Status tracking | Per-job only | Per-workflow + per-node | -| Visualization | No | Mermaid / DOT | - -Use canvas for quick one-off pipelines. Use DAG workflows for production pipelines that need monitoring, conditions, or complex topologies. diff --git a/docs/workflows/composition.md b/docs/workflows/composition.md deleted file mode 100644 index 4aafe4b..0000000 --- a/docs/workflows/composition.md +++ /dev/null @@ -1,85 +0,0 @@ -# Sub-Workflows & Scheduling - -Nest workflows for composition, and schedule workflows on a cron. - -## Sub-workflows - -Use `WorkflowProxy.as_step()` to embed one workflow inside another: - -```python -@queue.workflow("etl") -def etl_pipeline(region): - wf = Workflow() - wf.step("extract", extract, args=[region]) - wf.step("load", load, after="extract") - return wf - -@queue.workflow("daily") -def daily_pipeline(): - wf = Workflow() - wf.step("eu_etl", etl_pipeline.as_step(region="eu")) - wf.step("us_etl", etl_pipeline.as_step(region="us")) - wf.step("reconcile", reconcile, after=["eu_etl", "us_etl"]) - return wf - -run = daily_pipeline.submit() -``` - -```mermaid -graph TD - subgraph Parent["daily_pipeline"] - eu["eu_etl"] --> reconcile - us["us_etl"] --> reconcile - end - - subgraph Child1["etl (region=eu)"] - e1["extract"] --> l1["load"] - end - - subgraph Child2["etl (region=us)"] - e2["extract"] --> l2["load"] - end - - eu -.->|"submits"| Child1 - us -.->|"submits"| Child2 -``` - -### How it works - -1. Parent workflow submits the child workflow via `queue.submit_workflow()` with `parent_run_id` -2. The child runs independently with its own nodes and status -3. When the child completes/fails, the tracker updates the parent node -4. Downstream steps in the parent evaluate normally - -### Cancellation cascade - -Cancelling the parent cascades to all active child workflows: - -```python -run.cancel() # Cancels parent + all child sub-workflows -``` - -### Failure - -If a child workflow fails at runtime, the parent node is marked `FAILED`. Downstream steps follow the parent's `on_failure` strategy. - -The same holds for failures at *submission* time -- if the child's factory raises or the DAG fails to compile when the parent node becomes evaluable, the parent node is marked `FAILED` immediately (rather than leaving the outer run hanging), and the parent run finalizes normally. - -## Cron-scheduled workflows - -Stack `@queue.periodic()` on top of `@queue.workflow()`: - -```python -@queue.periodic(cron="0 0 2 * * *") # 2:00 AM daily -@queue.workflow("nightly_analytics") -def nightly(): - wf = Workflow() - wf.step("extract", extract_clickstream) - wf.step("aggregate", build_dashboards, after="extract") - return wf -``` - -Each cron trigger submits a new workflow run. Under the hood, a bridge task `_wf_launcher_nightly_analytics` is registered that calls `proxy.submit()`. - -!!! note - The `@queue.periodic()` decorator must be the **outer** decorator (applied second, listed first). diff --git a/docs/workflows/conditions.md b/docs/workflows/conditions.md deleted file mode 100644 index 1e6d1d4..0000000 --- a/docs/workflows/conditions.md +++ /dev/null @@ -1,119 +0,0 @@ -# Conditions & Error Handling - -Control which steps execute based on predecessor outcomes, and configure how the workflow responds to failures. - -## Step conditions - -```python -wf.step("deploy", deploy, after="test") # default: on_success -wf.step("rollback", rollback, after="deploy", condition="on_failure") -wf.step("notify", send_slack, after="deploy", condition="always") -``` - -| Condition | Runs when | -|-----------|-----------| -| `None` / `"on_success"` | All predecessors completed successfully | -| `"on_failure"` | Any predecessor failed | -| `"always"` | Predecessors are terminal (regardless of outcome) | -| `callable` | `condition(ctx)` returns `True` | - -## Callable conditions - -Pass a function that receives a `WorkflowContext`: - -```python -from taskito.workflows import WorkflowContext - -def high_score(ctx: WorkflowContext) -> bool: - return ctx.results["validate"]["score"] > 0.95 - -wf.step("deploy", deploy, after="validate", condition=high_score) -``` - -`WorkflowContext` fields: - -| Field | Type | Description | -|-------|------|-------------| -| `run_id` | `str` | Workflow run ID | -| `results` | `dict[str, Any]` | Deserialized return values of completed nodes | -| `statuses` | `dict[str, str]` | Status strings for all terminal nodes | -| `failure_count` | `int` | Number of failed nodes | -| `success_count` | `int` | Number of completed nodes | - -## Error strategies - -Set the workflow-level error strategy: - -=== "Fail Fast (default)" - - ```python - wf = Workflow(name="strict", on_failure="fail_fast") - ``` - - One failure skips **all** pending steps. The workflow transitions to `FAILED`. - - ```mermaid - graph LR - A["a ✓"] --> B["b ✗"] - B --> C["c ━ SKIPPED"] - B --> D["d ━ SKIPPED"] - style B fill:#FFB6C1 - style C fill:#F5F5F5 - style D fill:#F5F5F5 - ``` - -=== "Continue" - - ```python - wf = Workflow(name="resilient", on_failure="continue") - ``` - - Failed steps skip their `on_success` dependents, but **independent branches keep running**. - - ```mermaid - graph TD - root["root ✓"] --> fail_branch["fail ✗"] - root --> ok_branch["ok ✓"] - fail_branch --> after_fail["after_fail ━ SKIPPED"] - ok_branch --> after_ok["after_ok ✓"] - style fail_branch fill:#FFB6C1 - style after_fail fill:#F5F5F5 - style after_ok fill:#90EE90 - ``` - -## Skip propagation - -When a node is skipped, its successors are evaluated recursively: - -- `on_success` successors → **SKIPPED** (predecessor didn't succeed) -- `on_failure` successors → evaluated (predecessor is terminal) -- `always` successors → **run** regardless of how the predecessor ended - -```python -wf = Workflow(name="cleanup_pipeline") -wf.step("a", risky_task) -wf.step("b", next_step, after="a") # SKIPPED if a fails -wf.step("cleanup", cleanup, after="b", condition="always") # runs even if b is skipped -``` - -```mermaid -graph LR - A["a ✗ FAILED"] --> B["b ━ SKIPPED"] - B --> C["cleanup ✓ ALWAYS"] - style A fill:#FFB6C1 - style B fill:#F5F5F5 - style C fill:#90EE90 -``` - -## Combining conditions with fan-out - -Conditions work with fan-out nodes. If a fan-out child fails: - -```python -wf.step("fetch", fetch_data) -wf.step("process", process, after="fetch", fan_out="each") -wf.step("aggregate", aggregate, after="process", fan_in="all") -wf.step("on_error", alert, after="process", condition="on_failure") -``` - -If any `process[i]` child fails, the fan-out parent is marked `FAILED`, `aggregate` is skipped, and `on_error` runs. diff --git a/docs/workflows/fan-out.md b/docs/workflows/fan-out.md deleted file mode 100644 index 99645f0..0000000 --- a/docs/workflows/fan-out.md +++ /dev/null @@ -1,108 +0,0 @@ -# Fan-Out & Fan-In - -Split a step's result into parallel child jobs, then collect all results into a downstream step. - -```mermaid -graph LR - fetch --> process_0["process[0]"] - fetch --> process_1["process[1]"] - fetch --> process_2["process[2]"] - process_0 --> aggregate - process_1 --> aggregate - process_2 --> aggregate - - style fetch fill:#90EE90 - style aggregate fill:#87CEEB -``` - -## Fan-out with `"each"` - -The predecessor's return value must be iterable. Each element becomes a separate child job: - -```python -@queue.task() -def fetch() -> list[int]: - return [10, 20, 30] - -@queue.task() -def process(item: int) -> int: - return item * 2 - -@queue.task() -def aggregate(results: list[int]) -> int: - return sum(results) # receives [20, 40, 60] - -wf = Workflow(name="map_reduce") -wf.step("fetch", fetch) -wf.step("process", process, after="fetch", fan_out="each") -wf.step("aggregate", aggregate, after="process", fan_in="all") -``` - -Child nodes are named `process[0]`, `process[1]`, `process[2]` and appear in status queries. - -## How it works - -1. `fetch` completes — the tracker reads its return value -2. `apply_fan_out("each", result)` splits the list into individual items -3. `expand_fan_out()` creates N child nodes + N jobs (no `depends_on` — they're ready immediately) -4. Each child runs independently in parallel -5. When all children complete, `check_fan_out_completion()` marks the parent -6. The tracker collects all child results in index order -7. The fan-in job is created with `((results_list,), {})` as its payload - -```mermaid -sequenceDiagram - participant F as fetch job - participant T as Tracker - participant R as Rust Engine - - F->>T: JOB_COMPLETED(fetch) - T->>R: get_job(fetch_id).result_bytes - T->>R: expand_fan_out(3 children) - R-->>T: [child_job_ids] - Note over R: Children execute in parallel - R->>T: JOB_COMPLETED(process[0]) - R->>T: JOB_COMPLETED(process[1]) - R->>T: JOB_COMPLETED(process[2]) - T->>R: check_fan_out_completion → all done - T->>R: create_deferred_job(aggregate) -``` - -## Empty fan-out - -If the predecessor returns an empty list, the fan-out parent is marked `COMPLETED` immediately with zero children, and the fan-in receives an empty list: - -```python -@queue.task() -def fetch() -> list: - return [] # nothing to process - -# aggregate receives [] -``` - -## Fan-out with downstream steps - -Steps after the fan-in work normally: - -```python -wf = Workflow(name="full_pipeline") -wf.step("fetch", fetch) -wf.step("process", process, after="fetch", fan_out="each") -wf.step("aggregate", aggregate, after="process", fan_in="all") -wf.step("report", send_report, after="aggregate") # runs after aggregate -``` - -## Failure handling - -By default (`on_failure="fail_fast"`), if any fan-out child fails: - -- Remaining pending children are cancelled -- The fan-out parent is marked `FAILED` -- The fan-in and downstream steps are `SKIPPED` -- The workflow transitions to `FAILED` - -Combine with [conditions](conditions.md) for more control: - -```python -wf.step("handle_error", alert, after="process", condition="on_failure") -``` diff --git a/docs/workflows/gates.md b/docs/workflows/gates.md deleted file mode 100644 index de2eda1..0000000 --- a/docs/workflows/gates.md +++ /dev/null @@ -1,73 +0,0 @@ -# Approval Gates - -Pause a workflow for human review. The gate enters `WAITING_APPROVAL` status until explicitly approved or rejected — or until a timeout fires. - -```mermaid -graph LR - train["train ✓"] --> eval["evaluate ✓"] - eval --> gate["approve ⏸"] - gate -->|approved| deploy["deploy"] - gate -->|rejected| skip["deploy ━ SKIPPED"] - style gate fill:#FFFACD -``` - -## Adding a gate - -```python -wf = Workflow(name="ml_deploy") -wf.step("train", train_model) -wf.step("evaluate", evaluate, after="train") -wf.gate("approve", after="evaluate") -wf.step("deploy", deploy, after="approve") -``` - -When the workflow reaches the gate, it pauses. Downstream steps won't execute until the gate is resolved. - -## Resolving a gate - -```python -run = queue.submit_workflow(wf) - -# Later, after review: -queue.approve_gate(run.id, "approve") # → gate COMPLETED, deploy runs -# or: -queue.reject_gate(run.id, "approve") # → gate FAILED, deploy SKIPPED -``` - -## Timeout - -Auto-resolve after a deadline: - -```python -wf.gate("approve", after="evaluate", - timeout=86400, # 24 hours - on_timeout="reject") # or "approve" -``` - -| Parameter | Type | Default | Description | -|-----------|------|---------|-------------| -| `timeout` | `float` | `None` | Seconds until auto-resolve. `None` waits forever | -| `on_timeout` | `str` | `"reject"` | Action on expiry: `"approve"` or `"reject"` | -| `message` | `str` | `None` | Human-readable message for approvers | - -## Gate with conditions - -Gates respect step conditions: - -```python -wf.step("test", run_tests) -wf.gate("approve", after="test", condition="on_success") -wf.step("deploy", deploy, after="approve") -``` - -If `test` fails, the gate is skipped (condition not met), and `deploy` is also skipped. - -## Events - -When a gate enters `WAITING_APPROVAL`, a `WORKFLOW_GATE_REACHED` event fires: - -```python -@queue.on(EventType.WORKFLOW_GATE_REACHED) -def notify_team(event_type, payload): - send_slack(f"Workflow {payload['run_id']} needs approval at {payload['node_name']}") -``` diff --git a/docs/workflows/index.md b/docs/workflows/index.md deleted file mode 100644 index 2b9ca89..0000000 --- a/docs/workflows/index.md +++ /dev/null @@ -1,54 +0,0 @@ -# Workflows - -Build multi-step pipelines as directed acyclic graphs. Define steps, wire dependencies, and let taskito handle execution order, parallelism, failure propagation, and state tracking — all backed by a Rust engine with dagron-core for graph algorithms. - -```mermaid -flowchart TD - WB["Workflow Builder\nstep() · gate() · fan_out"] - WB -->|"_compile()"| TE["Topology Engine\ndagron-core DAG"] - TE --> WS["Workflow Storage\nSQLite — definitions, runs, nodes"] - WS -->|"enqueue jobs"| SC["Scheduler\ndepends_on chains"] - SC -->|"JOB_COMPLETED event"| WT["Workflow Tracker\nevent-driven orchestration"] - WT -->|"evaluate successors"| WS - WT --> WR["WorkflowRun\nstatus() · wait() · cancel()"] -``` - -## Quick start - -```python -from taskito import Queue -from taskito.workflows import Workflow - -queue = Queue(db_path="tasks.db") - -@queue.task() -def extract(): return fetch_data() - -@queue.task() -def transform(data): return clean(data) - -@queue.task() -def load(data): db.insert(data) - -wf = Workflow(name="etl_pipeline") -wf.step("extract", extract) -wf.step("transform", transform, after="extract") -wf.step("load", load, after="transform") - -run = queue.submit_workflow(wf) -result = run.wait(timeout=60) -print(result.state) # WorkflowState.COMPLETED -``` - -## Section overview - -| Page | What it covers | -|---|---| -| [Building Workflows](building.md) | `Workflow.step()`, decorator pattern, step configuration, DAG structure | -| [Fan-Out & Fan-In](fan-out.md) | Splitting results into parallel jobs, collecting with aggregation | -| [Conditions & Error Handling](conditions.md) | `on_success`, `on_failure`, `always`, callable conditions, `on_failure` modes | -| [Approval Gates](gates.md) | Human-in-the-loop pause/resume, timeout, approve/reject API | -| [Sub-Workflows & Scheduling](composition.md) | Nesting workflows, cron-scheduled runs | -| [Incremental Runs](caching.md) | Result hashing, `CACHE_HIT`, dirty-set propagation, TTL | -| [Analysis & Visualization](analysis.md) | Critical path, bottleneck analysis, Mermaid/DOT rendering | -| [Canvas Primitives](canvas.md) | Chain, group, chord — simple composition without DAGs | diff --git a/pyproject.toml b/pyproject.toml index 48b9b4c..72fd3f7 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -27,7 +27,7 @@ dependencies = ["cloudpickle>=3.0"] Homepage = "https://github.com/ByteVeda/taskito" Documentation = "https://docs.byteveda.org/taskito" Repository = "https://github.com/ByteVeda/taskito" -Changelog = "https://github.com/ByteVeda/taskito/blob/master/docs/changelog.md" +Changelog = "https://docs.byteveda.org/taskito/docs/more/changelog" Issues = "https://github.com/ByteVeda/taskito/issues" [project.optional-dependencies] @@ -44,7 +44,6 @@ encryption = ["cryptography"] flask = ["flask>=3.0"] aws = ["boto3>=1.34"] gcs = ["google-cloud-storage>=2.10"] -docs = ["zensical"] [tool.maturin] manifest-path = "crates/taskito-python/Cargo.toml" diff --git a/zensical.toml b/zensical.toml deleted file mode 100644 index 7aca009..0000000 --- a/zensical.toml +++ /dev/null @@ -1,213 +0,0 @@ -[project] -site_name = "taskito" -site_description = "Rust-powered task queue for Python. No broker required." -site_url = "https://docs.byteveda.org/taskito" -repo_url = "https://github.com/ByteVeda/taskito" -repo_name = "ByteVeda/taskito" - -extra_javascript = ["assets/js/copy-markdown.js"] -extra_css = ["assets/css/custom.css"] - -nav = [ - { "Home" = "index.md" }, - { "Getting Started" = [ - { "Overview" = "getting-started/index.md" }, - { "Installation" = "getting-started/installation.md" }, - { "Quickstart" = "getting-started/quickstart.md" }, - ] }, - { "User Guide" = [ - { "Overview" = "guide/index.md" }, - { "Core" = [ - { "Overview" = "guide/core/index.md" }, - { "Tasks" = "guide/core/tasks.md" }, - { "Workers" = "guide/core/workers.md" }, - { "Execution Models" = "guide/core/execution-model.md" }, - { "Queues & Priority" = "guide/core/queues.md" }, - { "Scheduling" = "guide/core/scheduling.md" }, - { "Workflows" = "guide/core/workflows.md" }, - ] }, - { "Reliability" = [ - { "Overview" = "guide/reliability/index.md" }, - { "Retries & Dead Letters" = "guide/reliability/retries.md" }, - { "Error Handling" = "guide/reliability/error-handling.md" }, - { "Delivery Guarantees" = "guide/reliability/guarantees.md" }, - { "Rate Limiting" = "guide/reliability/rate-limiting.md" }, - { "Circuit Breakers" = "guide/reliability/circuit-breakers.md" }, - { "Distributed Locking" = "guide/reliability/locking.md" }, - ] }, - { "Advanced Execution" = [ - { "Overview" = "guide/execution/index.md" }, - { "Prefork Pool" = "guide/execution/prefork.md" }, - { "Native Async Tasks" = "guide/execution/async-tasks.md" }, - { "Result Streaming" = "guide/execution/streaming.md" }, - { "Dependencies" = "guide/execution/dependencies.md" }, - { "Batch Enqueue" = "guide/execution/batch-enqueue.md" }, - { "Unique Tasks" = "guide/execution/unique-tasks.md" }, - ] }, - { "Extensibility" = [ - { "Overview" = "guide/extensibility/index.md" }, - { "Middleware" = "guide/extensibility/middleware.md" }, - { "Serializers" = "guide/extensibility/serializers.md" }, - { "Events & Webhooks" = "guide/extensibility/events-webhooks.md" }, - ] }, - { "Observability" = [ - { "Overview" = "guide/observability/index.md" }, - { "Monitoring & Hooks" = "guide/observability/monitoring.md" }, - { "Structured Logging" = "guide/observability/logging.md" }, - { "Web Dashboard" = "guide/observability/dashboard.md" }, - { "Dashboard REST API" = "guide/observability/dashboard-api.md" }, - ] }, - { "Operations" = [ - { "Overview" = "guide/operations/index.md" }, - { "Testing" = "guide/operations/testing.md" }, - { "Job Management" = "guide/operations/job-management.md" }, - { "Troubleshooting" = "guide/operations/troubleshooting.md" }, - { "Deployment" = "guide/operations/deployment.md" }, - { "KEDA Autoscaling" = "guide/operations/keda.md" }, - { "Postgres Backend" = "guide/operations/postgres.md" }, - { "Migrating from Celery" = "guide/operations/migration.md" }, - ] }, - ] }, - { "Integrations" = [ - { "Overview" = "integrations/index.md" }, - { "FastAPI" = "integrations/fastapi.md" }, - { "Flask" = "integrations/flask.md" }, - { "Django" = "integrations/django.md" }, - { "OpenTelemetry" = "integrations/otel.md" }, - { "Prometheus" = "integrations/prometheus.md" }, - { "Sentry" = "integrations/sentry.md" }, - ] }, - { "Resource System" = [ - { "Overview" = "resources/index.md" }, - { "Argument Interception" = "resources/interception.md" }, - { "Dependency Injection" = "resources/dependency-injection.md" }, - { "Resource Proxies" = "resources/proxies.md" }, - { "Configuration" = "resources/configuration.md" }, - { "Testing" = "resources/testing.md" }, - { "Observability" = "resources/observability.md" }, - ] }, - { "Workflows" = [ - { "Overview" = "workflows/index.md" }, - { "Building Workflows" = "workflows/building.md" }, - { "Fan-Out & Fan-In" = "workflows/fan-out.md" }, - { "Conditions & Error Handling" = "workflows/conditions.md" }, - { "Approval Gates" = "workflows/gates.md" }, - { "Sub-Workflows & Scheduling" = "workflows/composition.md" }, - { "Incremental Runs" = "workflows/caching.md" }, - { "Analysis & Visualization" = "workflows/analysis.md" }, - { "Canvas Primitives" = "workflows/canvas.md" }, - ] }, - { "Architecture" = [ - { "Overview" = "architecture/index.md" }, - { "Job Lifecycle" = "architecture/job-lifecycle.md" }, - { "Worker Pool" = "architecture/worker-pool.md" }, - { "Storage Layer" = "architecture/storage.md" }, - { "Scheduler" = "architecture/scheduler.md" }, - { "Resource System" = "architecture/resources.md" }, - { "Failure Model" = "architecture/failure-model.md" }, - { "Serialization" = "architecture/serialization.md" }, - ] }, - { "API Reference" = [ - { "Overview" = "api/index.md" }, - { "Queue" = [ - { "Constructor & Registration" = "api/queue/index.md" }, - { "Job Management" = "api/queue/jobs.md" }, - { "Queue & Stats" = "api/queue/queues.md" }, - { "Workers & Hooks" = "api/queue/workers.md" }, - { "Resources & Locking" = "api/queue/resources.md" }, - { "Events & Logs" = "api/queue/events.md" }, - ] }, - { "TaskWrapper" = "api/task.md" }, - { "JobResult" = "api/result.md" }, - { "JobContext" = "api/context.md" }, - { "Canvas" = "api/canvas.md" }, - { "Workflows" = "api/workflows.md" }, - { "Testing" = "api/testing.md" }, - { "CLI" = "api/cli.md" }, - ] }, - { "Examples" = [ - { "Overview" = "examples/index.md" }, - { "FastAPI Service" = "examples/fastapi-service.md" }, - { "Notification Service" = "examples/notifications.md" }, - { "Web Scraper Pipeline" = "examples/web-scraper.md" }, - { "Data Pipeline" = "examples/data-pipeline.md" }, - { "DAG Workflows" = "examples/workflows.md" }, - { "Benchmark" = "examples/benchmark.md" }, - ] }, - { "Comparison" = "comparison.md" }, - { "FAQ" = "faq.md" }, - { "Changelog" = "changelog.md" }, - { "Changelog Archive" = "changelog/archive.md" }, -] - -[project.theme] -name = "material" -features = [ - "navigation.instant", - "navigation.tracking", - "navigation.tabs", - "navigation.sections", - "navigation.indexes", - "navigation.path", - "navigation.prune", - "navigation.top", - "content.code.copy", - "content.code.annotate", - "content.tabs.link", - "search.highlight", - "search.suggest", - "toc.follow", -] - -[[project.theme.palette]] -scheme = "default" -primary = "deep purple" -accent = "amber" -toggle = { icon = "material/brightness-7", name = "Switch to dark mode" } - -[[project.theme.palette]] -scheme = "slate" -primary = "deep purple" -accent = "amber" -toggle = { icon = "material/brightness-4", name = "Switch to light mode" } - -[project.theme.font] -text = "Inter" -code = "JetBrains Mono" - -[project.theme.icon] -repo = "fontawesome/brands/github" - -[project.markdown_extensions] -admonition = {} -"pymdownx.details" = {} -"pymdownx.superfences" = { custom_fences = [ - { name = "mermaid", class = "mermaid", format = "pymdownx.superfences.fence_code_format" }, -] } -"pymdownx.tabbed" = { alternate_style = true } -"pymdownx.highlight" = { anchor_linenums = true, line_spans = "__span", pygments_lang_class = true } -"pymdownx.inlinehilite" = {} -"pymdownx.snippets" = {} -"pymdownx.mark" = {} -"pymdownx.keys" = {} -"pymdownx.emoji" = { emoji_index = "zensical.extensions.emoji.twemoji", emoji_generator = "zensical.extensions.emoji.to_svg" } -attr_list = {} -md_in_html = {} -tables = {} -toc = { permalink = true } -def_list = {} -"pymdownx.tasklist" = { custom_checkbox = true } - -[project.plugins] -search = {} - -[project.extra] -generator = false - -[[project.extra.social]] -icon = "fontawesome/brands/github" -link = "https://github.com/ByteVeda/taskito" - -[[project.extra.social]] -icon = "fontawesome/brands/python" -link = "https://pypi.org/project/taskito/"