From 0e2e7bf36a98a4974381ac556ed66d580f7465de Mon Sep 17 00:00:00 2001
From: Madhava Jay <me@madhavajay.com>
Date: Thu, 14 May 2026 14:26:18 +1000
Subject: [PATCH 1/5] starting on variant catalogue

---
 docs/assay-schema.md                          |  16 +
 docs/variant-catalogue-schema.md              | 154 +++++++
 docs/variant-catalogue-schema.yaml            |  26 ++
 docs/variant-catalogue.md                     | 428 ++++++++++++++++++
 rust/bioscript-cli/src/cli_bootstrap.rs       |  47 +-
 rust/bioscript-cli/src/report_review.rs       |  11 +-
 rust/bioscript-reporting/src/manifest.rs      |   1 +
 .../src/manifest_provenance.rs                |  75 +++
 rust/bioscript-runtime/src/runtime.rs         |  42 +-
 rust/bioscript-runtime/src/runtime/methods.rs |  83 ++++
 rust/bioscript-runtime/src/runtime/objects.rs |   6 +-
 rust/bioscript-runtime/src/runtime/state.rs   |   2 +
 rust/bioscript-schema/src/lib.rs              |   8 +-
 rust/bioscript-schema/src/remote_resource.rs  |   6 +-
 rust/bioscript-schema/src/validator.rs        |   1 +
 .../src/validator_catalogue.rs                | 176 +++++++
 rust/bioscript-schema/src/validator_load.rs   |   8 +
 rust/bioscript-schema/src/validator_panel.rs  |  45 ++
 rust/bioscript-schema/src/validator_parse.rs  |  37 ++
 rust/bioscript-schema/src/validator_roots.rs  |   5 +-
 rust/bioscript-schema/src/validator_types.rs  |   7 +
 .../tests/validate_variants.rs                |  72 +++
 .../src/report_workspace/analysis.rs          |   1 +
 23 files changed, 1235 insertions(+), 22 deletions(-)
 create mode 100644 docs/variant-catalogue-schema.md
 create mode 100644 docs/variant-catalogue-schema.yaml
 create mode 100644 docs/variant-catalogue.md
 create mode 100644 rust/bioscript-reporting/src/manifest_provenance.rs
 create mode 100644 rust/bioscript-schema/src/validator_catalogue.rs

diff --git a/docs/assay-schema.md b/docs/assay-schema.md
index a6d34bc..3deec23 100644
--- a/docs/assay-schema.md
+++ b/docs/assay-schema.md
@@ -43,6 +43,11 @@ analyses:
       - "g1-site-1.yaml"
       - "g1-site-2.yaml"
       - "g2-site.yaml"
+    assets:
+      - id: "variants"
+        path: "variants.tsv"
+      - id: "findings"
+        path: "findings.tsv"
     emits:
       - key: "apol1_status"
         label: "APOL1 status"
@@ -84,10 +89,21 @@ Rules:
 - `path` points to a BioScript-compatible Python file
 - `output_format` is optional and defaults to `tsv`; use `json` or `jsonl` when the script writes structured JSON output
 - `derived_from` lists the variant YAML files used by the interpretation
+- `assets` is optional and lists local files the analysis script can read through the injected `asset_paths` dict
 - `emits` is optional but recommended so report generators know which output columns to display and how to label them
 - `logic` is optional; use `logic.description` and `logic.source.url` to document where the script's derivation rules came from
 - Analysis rows may emit `notes` or `report_notes` as a reporting convention. HTML reports render those notes below the analysis table and omit them from the table columns; this avoids a manifest-level template language while still letting the script build human-readable text from computed values.
 
+Analysis scripts receive these injected variables:
+
+- `input_file`: input genotype/VCF/CRAM path
+- `output_file`: path the script must write
+- `participant_id`: current participant/sample identifier
+- `observations_file`: TSV containing the variant observations already gathered from assay/panel members for this participant
+- `asset_paths`: dict from each `assets[].id` to the resolved local path
+
+This supports large catalogues where Rust/BioScript first gathers all variant observations, then Python joins those observations to attached TSV assets such as `findings.tsv`, `conditions.tsv`, or `rules.tsv` and emits derived classification rows.
+
 ## Findings
 
 Use `findings` for evidence that binds either to a variant observation or an emitted analysis value. Keep the executable logic in `analyses`; keep PGx evidence and reporting semantics in YAML.
diff --git a/docs/variant-catalogue-schema.md b/docs/variant-catalogue-schema.md
new file mode 100644
index 0000000..759d986
--- /dev/null
+++ b/docs/variant-catalogue-schema.md
@@ -0,0 +1,154 @@
+# Variant Catalogue Schema
+
+For the assay/runtime design, including direct catalogue members and Python
+analysis assets, see [variant-catalogue.md](variant-catalogue.md).
+
+This schema describes a compact, auditable source catalogue for projects with
+many variants. It is intended for curation and packaging. Current BioScript
+runtime lookup still consumes normal `bioscript:variant:1.0` variant manifests;
+a packaging step can expand this catalogue into per-variant YAML files.
+
+Use `bioscript:variant-catalogue:1.0` when a project has more than about 10
+variants, or when repeated provenance and per-file boilerplate make review
+harder than a tabular source.
+
+## Shape
+
+```yaml
+schema: "bioscript:variant-catalogue:1.0"
+version: "1.0"
+name: "thalassemia-variants"
+
+variants:
+  source: "variants.tsv"
+  format: "tsv"
+  key: "variant_id"
+  columns:
+    variant_id:
+      role: "id"
+      required: true
+    rsid:
+      role: "identifier.rsid"
+    alts:
+      role: "alleles.alts"
+      list_separator: "|"
+    grch38_pos:
+      role: "coordinates.grch38.pos"
+      type: "integer"
+
+findings:
+  source: "findings.tsv"
+  format: "tsv"
+  key: "variant_id"
+  columns:
+    variant_id:
+      role: "variant.id"
+      required: true
+    finding_id:
+      role: "finding.id"
+    alt:
+      role: "finding.alt"
+    notes:
+      role: "finding.notes"
+
+provenance:
+  sources:
+    - id: "ithagenes"
+      kind: "database"
+      label: "IthaGenes"
+      url: "https://www.ithanet.eu/db/ithagenes?action=list"
+    - id: "dbsnp"
+      kind: "database"
+      label: "dbSNP"
+      url_template: "https://www.ncbi.nlm.nih.gov/snp/{rsid}"
+```
+
+## Required Fields
+
+- `schema`: must be `bioscript:variant-catalogue:1.0`
+- `version`: must be `1.0`
+- `name`
+- `variants.source`
+
+`variants.format` and `findings.format`, when present, must be `tsv`.
+`variants.columns` and `findings.columns` are recommended for real catalogues so
+tools can validate and interpret TSV columns without relying on hardcoded column
+names.
+
+## Recommended Files
+
+Keep files separate during curation:
+
+- `variants.yaml`: catalogue manifest and shared provenance
+- `variants.tsv`: biological identity and matching fields
+- `findings.tsv`: one interpretation row per finding
+
+Generate per-variant `bioscript:variant:1.0` YAML as a packaging/build artifact.
+
+## `variants.tsv`
+
+Recommended columns:
+
+```text
+variant_id	name	gene	rsid	aliases	kind	ref	alts	observed_alts	grch37_chrom	grch37_pos	grch37_start	grch37_end	grch38_chrom	grch38_pos	grch38_start	grch38_end
+```
+
+Use `|` inside list cells such as `aliases`, `alts`, and `observed_alts`.
+Use `pos` columns for SNVs and `start`/`end` columns for spans.
+
+Declare the semantic mapping in `variants.columns`. Recommended roles include:
+
+```text
+id
+name
+gene
+identifier.rsid
+identifier.aliases
+alleles.kind
+alleles.ref
+alleles.alts
+alleles.observed_alts
+coordinates.grch37.chrom
+coordinates.grch37.pos
+coordinates.grch37.start
+coordinates.grch37.end
+coordinates.grch38.chrom
+coordinates.grch38.pos
+coordinates.grch38.start
+coordinates.grch38.end
+```
+
+## `findings.tsv`
+
+Recommended columns:
+
+```text
+variant_id	finding_id	schema	alt	label	summary	notes
+```
+
+Additional source-specific columns such as `itha_id`, `functionality`,
+`phenotype`, `transcript_hgvs`, `genomic_hgvs`, and `ncbi_spdi` are allowed as
+curation data. The packaging tool can combine those atoms into standard variant
+finding entries.
+
+Each `findings.tsv.variant_id` should match a row in `variants.tsv`. Each
+allele-specific `alt` should be present in the corresponding variant row's
+`alts`, unless `alt` is `*`.
+
+Declare the semantic mapping in `findings.columns`. Recommended roles include:
+
+```text
+variant.id
+finding.id
+finding.schema
+finding.alt
+finding.label
+finding.summary
+finding.notes
+source.itha_id
+source.functionality
+source.phenotype
+source.transcript_hgvs
+source.genomic_hgvs
+source.ncbi_spdi
+```
diff --git a/docs/variant-catalogue-schema.yaml b/docs/variant-catalogue-schema.yaml
new file mode 100644
index 0000000..cfe5870
--- /dev/null
+++ b/docs/variant-catalogue-schema.yaml
@@ -0,0 +1,26 @@
+schema: "bioscript:variant-catalogue:1.0"
+version: "1.0"
+name: "example-variant-catalogue"
+label: "Example Variant Catalogue"
+summary: "Compact source catalogue for projects with many variants."
+
+variants:
+  source: "variants.tsv"
+  format: "tsv"
+  key: "variant_id"
+
+findings:
+  source: "findings.tsv"
+  format: "tsv"
+  key: "variant_id"
+
+provenance:
+  sources:
+    - id: "dbsnp"
+      kind: "database"
+      label: "dbSNP"
+      url_template: "https://www.ncbi.nlm.nih.gov/snp/{rsid}"
+    - id: "ncbi_variation"
+      kind: "database"
+      label: "NCBI Variation Services"
+      url_template: "https://api.ncbi.nlm.nih.gov/variation/v0/hgvs/{genomic_hgvs}/contextuals"
diff --git a/docs/variant-catalogue.md b/docs/variant-catalogue.md
new file mode 100644
index 0000000..ec133fd
--- /dev/null
+++ b/docs/variant-catalogue.md
@@ -0,0 +1,428 @@
+# Variant Catalogue Design Note
+
+Variant catalogues are for assays and panels that need to test many variants
+without making reviewers inspect hundreds of near-identical YAML files. The
+curated source stays as plain text TSV plus a small YAML manifest. Build tools
+can still expand the catalogue into normal `bioscript:variant:1.0` YAML files
+when a package or older runtime path needs per-variant manifests.
+
+This is a design note for the intended BioScript flow. The catalogue manifest
+schema already exists as `bioscript:variant-catalogue:1.0`; direct assay member
+support for catalogues is the next BioScript schema/runtime change.
+
+## Why This Exists
+
+Small assays are easiest to review as hand-written variant YAML:
+
+```yaml
+members:
+  - kind: "variant"
+    path: "variants/rs73885319.yaml"
+    version: "1.0"
+```
+
+That breaks down for catalogues such as thalassemia, where IthaGenes/dbSNP style
+sources can produce hundreds of rows. Repeating the same provenance, source
+URLs, and schema boilerplate in every YAML file makes review noisier. A TSV is
+more compact, can be sorted, can be checked with command-line tools, and keeps
+the source curation auditable.
+
+The runtime still needs structured observations and controlled file access. The
+intended flow is:
+
+1. BioScript reads the assay.
+2. BioScript expands catalogue members into variant lookup work.
+3. BioScript gathers one observation row per variant for the participant.
+4. BioScript mounts a small virtual filesystem for the analysis script.
+5. BioScript writes observations to a conventional work path.
+6. BioScript runs the analysis Python with a structured `bioscript.context`.
+7. Python reads TSV inputs with `bioscript.read_tsv(path)`.
+8. Python joins observations to attached TSV assets and emits report rows.
+
+## Required BioScript Schema Change
+
+Assay and panel `members` should accept a catalogue member:
+
+```yaml
+members:
+  - kind: "variant-catalogue"
+    path: "catalogue/variants.yaml"
+    version: "1.0"
+```
+
+Today, `bioscript:assay:1.0` assay members accept `kind: "variant"` only, so the
+direct member above is the required next schema change. The member should point
+to the catalogue YAML manifest, not directly to `variants.tsv`, because the YAML
+manifest holds schema identity, version, table locations, keys, and shared
+provenance.
+
+The catalogue manifest shape is:
+
+```yaml
+schema: "bioscript:variant-catalogue:1.0"
+version: "1.0"
+name: "thalassemia-variants"
+
+variants:
+  source: "variants.tsv"
+  format: "tsv"
+  key: "variant_id"
+  columns:
+    variant_id:
+      role: "id"
+      required: true
+    name:
+      role: "name"
+    gene:
+      role: "gene"
+    rsid:
+      role: "identifier.rsid"
+    aliases:
+      role: "identifier.aliases"
+      list_separator: "|"
+    kind:
+      role: "alleles.kind"
+    ref:
+      role: "alleles.ref"
+    alts:
+      role: "alleles.alts"
+      list_separator: "|"
+    observed_alts:
+      role: "alleles.observed_alts"
+      list_separator: "|"
+    grch37_chrom:
+      role: "coordinates.grch37.chrom"
+    grch37_pos:
+      role: "coordinates.grch37.pos"
+      type: "integer"
+    grch38_chrom:
+      role: "coordinates.grch38.chrom"
+    grch38_pos:
+      role: "coordinates.grch38.pos"
+      type: "integer"
+
+findings:
+  source: "findings.tsv"
+  format: "tsv"
+  key: "variant_id"
+  columns:
+    variant_id:
+      role: "variant.id"
+      required: true
+    finding_id:
+      role: "finding.id"
+    schema:
+      role: "finding.schema"
+    alt:
+      role: "finding.alt"
+    label:
+      role: "finding.label"
+    summary:
+      role: "finding.summary"
+    notes:
+      role: "finding.notes"
+
+provenance:
+  sources:
+    - id: "ithagenes"
+      kind: "database"
+      label: "IthaGenes"
+      url: "https://www.ithanet.eu/db/ithagenes?action=list"
+    - id: "dbsnp"
+      kind: "database"
+      label: "dbSNP"
+      url_template: "https://www.ncbi.nlm.nih.gov/snp/{rsid}"
+```
+
+## Column Mapping
+
+The manifest should declare how TSV columns map to BioScript concepts. This
+keeps the TSV compact while avoiding hidden assumptions about column names.
+Tools may provide default mappings for conventional columns, but real catalogues
+should encode the mapping explicitly.
+
+Each entry under `variants.columns` or `findings.columns` is keyed by the TSV
+column name:
+
+```yaml
+variants:
+  source: "variants.tsv"
+  key: "variant_id"
+  columns:
+    variant_id:
+      role: "id"
+      required: true
+    rsid:
+      role: "identifier.rsid"
+    alts:
+      role: "alleles.alts"
+      list_separator: "|"
+    grch38_pos:
+      role: "coordinates.grch38.pos"
+      type: "integer"
+```
+
+Column mapping fields:
+
+- `role`: semantic target for this TSV column
+- `required`: whether validation should fail if the column is missing or empty
+- `type`: optional primitive validation, such as `string` or `integer`
+- `list_separator`: separator for list-valued cells, normally `|`
+
+Recommended `variants.columns` roles:
+
+- `id`
+- `name`
+- `gene`
+- `identifier.rsid`
+- `identifier.aliases`
+- `alleles.kind`
+- `alleles.ref`
+- `alleles.alts`
+- `alleles.observed_alts`
+- `coordinates.grch37.chrom`
+- `coordinates.grch37.pos`
+- `coordinates.grch37.start`
+- `coordinates.grch37.end`
+- `coordinates.grch38.chrom`
+- `coordinates.grch38.pos`
+- `coordinates.grch38.start`
+- `coordinates.grch38.end`
+
+Recommended `findings.columns` roles:
+
+- `variant.id`
+- `finding.id`
+- `finding.schema`
+- `finding.alt`
+- `finding.label`
+- `finding.summary`
+- `finding.notes`
+- `source.itha_id`
+- `source.functionality`
+- `source.phenotype`
+- `source.transcript_hgvs`
+- `source.genomic_hgvs`
+- `source.ncbi_spdi`
+
+Unknown columns may still exist as source curation data, but executable tools
+should only depend on columns declared in the manifest. Validation should check
+that required mapped columns are present in the TSV header, integer columns parse
+as integers when populated, and list-valued columns are split consistently.
+
+The assay should also allow analysis assets, which are files made available to
+the Python analysis by id:
+
+```yaml
+analyses:
+  - id: "thalassemia_status"
+    kind: "bioscript"
+    path: "thalassemia.py"
+    output_format: "tsv"
+    derived_from:
+      - "catalogue/variants.yaml"
+    assets:
+      - id: "variants"
+        path: "catalogue/variants.tsv"
+      - id: "findings"
+        path: "catalogue/findings.tsv"
+    emits:
+      - key: "thalassemia_status"
+        label: "Thalassemia status"
+        value_type: "string"
+        format: "badge"
+```
+
+`assets` are not variant members. They are analysis inputs. The variant
+catalogue member tells BioScript what to look up; the pipeline files and assets
+give Python the extra curation tables it needs to classify the observed
+variants.
+
+## Analysis File Sandbox
+
+Analysis scripts should not receive arbitrary host filesystem paths. BioScript
+should mount a small virtual filesystem with conventional paths:
+
+```text
+/input
+/input/pipeline
+/work
+/output
+```
+
+Root meanings:
+
+- `/input`: read-only run inputs, starting with the genotype/sample input file
+- `/input/pipeline`: read-only assay package files, including YAML, TSV assets,
+  and analysis code
+- `/work`: read/write runtime workspace for BioScript-generated intermediate
+  files such as observations
+- `/output`: write/preserved output files and auxiliary artifacts
+
+Initial conventional paths:
+
+```text
+/input/genotypes
+/input/participant.tsv
+/input/pipeline/assay.yaml
+/input/pipeline/catalogue.yaml
+/input/pipeline/variants.tsv
+/input/pipeline/findings.tsv
+/work/observations.tsv
+/work/analysis.jsonl
+/output/results.tsv
+```
+
+`/input/participant.tsv` and `/work/analysis.jsonl` are optional future
+conventions. They can be absent; scripts should check with `bioscript.exists()`.
+
+Access rules:
+
+- `/input` and `/input/pipeline` are read-only
+- `/work` is read/write and intended for intermediate files
+- `/output` is write/preserved; files here are report artifacts
+- host paths, path traversal, and network paths are not available to analysis
+  scripts
+
+`/output/results.tsv` is the canonical report output when the script writes a
+file. Other files under `/output` are auxiliary artifacts. If a future runtime
+captures returned rows/dicts directly, it should reject ambiguous scripts that
+both return a primary result and write `/output/results.tsv`.
+
+## `bioscript.context`
+
+BioScript should expose a structured context object so scripts can interrogate
+the run without hardcoding every path:
+
+```python
+bioscript.context["participant_id"]
+bioscript.context["input_files"]
+bioscript.context["pipeline_files"]
+bioscript.context["assets"]
+bioscript.context["observations_file"]
+bioscript.context["output_file"]
+```
+
+Example context shape:
+
+```python
+{
+    "participant_id": "genome_hu50B3F5_v5_Full",
+    "input_files": {
+        "genotypes": "/input/genotypes",
+    },
+    "pipeline_files": {
+        "assay": "/input/pipeline/assay.yaml",
+        "catalogue": "/input/pipeline/catalogue.yaml",
+    },
+    "assets": {
+        "variants": "/input/pipeline/variants.tsv",
+        "findings": "/input/pipeline/findings.tsv",
+    },
+    "observations_file": "/work/observations.tsv",
+    "output_file": "/output/results.tsv",
+}
+```
+
+Compatibility variables may still be injected during migration:
+
+```python
+participant_id = bioscript.context["participant_id"]
+observations_file = bioscript.context["observations_file"]
+output_file = bioscript.context["output_file"]
+asset_paths = bioscript.context["assets"]
+```
+
+The context object should be read-only from the script's point of view. It can
+also expose helper methods later, but a dictionary shape is enough for the first
+implementation.
+
+## Thalassemia Use Case
+
+The thalassemia project is the motivating case:
+
+- `catalogue/variants.yaml` stores shared catalogue metadata and provenance.
+- `catalogue/variants.tsv` stores variant identity, rsIDs, aliases, alleles,
+  coordinates, and source curation columns.
+- `catalogue/findings.tsv` stores what a variant contributes to: for example
+  alpha-globin, beta-globin, or other globin/modifier categories, with source
+  notes such as IthaGenes functionality and phenotype.
+- `thalassemia.py` reads `observations_file` plus the TSV assets and emits a
+  compact classification row.
+
+This lets the assay report a derived classification such as:
+
+- no catalogue thalassemia variant observed
+- possible beta carrier
+- possible biallelic HBB risk
+- possible alpha silent carrier or trait
+- possible HbH/Hb Bart spectrum
+
+Those labels require careful clinical curation. The important first step is the
+data path: variants become observations, then Python joins observations to
+attached TSV assets and emits rows. The exact classification rules need separate
+review and should not be inferred from row count alone.
+
+## Minimal Python Analysis
+
+This is intentionally a hello-world style analysis. It proves how the injected
+files work, but the real thalassemia logic needs proper allele, phase, gene, and
+evidence handling.
+
+```python
+import bioscript
+
+
+observations = bioscript.read_tsv(bioscript.context["observations_file"])
+variants = bioscript.read_tsv(bioscript.context["assets"]["variants"])
+findings = bioscript.read_tsv(bioscript.context["assets"]["findings"])
+
+# Minimal proof of shape only. Real work should:
+# - match observations to variant_id via rsid, aliases, and coordinates
+# - verify the observed allele matches the finding allele
+# - handle deletions, insertions, complex alleles, no-calls, and phase
+# - classify alpha-globin, beta-globin, and modifier findings separately
+# - preserve provenance and source notes in emitted rows
+
+if len(observations) > 0 and len(variants) > 0 and len(findings) > 0:
+    status = "catalogue_assets_loaded"
+else:
+    status = "catalogue_assets_missing_or_empty"
+
+bioscript.write_tsv(bioscript.context["output_file"], [
+    {
+        "participant_id": bioscript.context["participant_id"],
+        "thalassemia_status": status,
+    }
+])
+```
+
+The analysis script receives virtual file paths as strings through
+`bioscript.context`. `observations_file` is a TSV created by BioScript for the
+current participant. `assets` is a dictionary whose keys are the ids from
+`analyses[].assets`.
+
+BioScript should provide `bioscript.read_tsv(path)` as a built-in helper. TSV is
+part of the catalogue asset contract, so every analysis should not need to carry
+its own parser. The helper should return a list of dictionaries keyed by the TSV
+header row, use tab as the delimiter, preserve empty cells as empty strings, and
+skip empty lines. Later versions may add typed parsing based on catalogue column
+metadata, but the default helper should stay predictable and string-based.
+
+## Packaging And Review
+
+Keep these files separate during curation:
+
+```text
+catalogue/variants.yaml
+catalogue/variants.tsv
+catalogue/findings.tsv
+```
+
+Generated per-variant YAML files are useful as a packaging artifact, especially
+while older runtime paths still expect `kind: "variant"` members. They should not
+be the primary review surface for large catalogues.
+
+The long-term runtime path should be better than generated YAML for large
+catalogues: BioScript can stream or batch the TSV-backed catalogue directly into
+lookups, then pass observations and assets to the analysis script.
diff --git a/rust/bioscript-cli/src/cli_bootstrap.rs b/rust/bioscript-cli/src/cli_bootstrap.rs
index 63bae41..7e7024a 100644
--- a/rust/bioscript-cli/src/cli_bootstrap.rs
+++ b/rust/bioscript-cli/src/cli_bootstrap.rs
@@ -57,13 +57,15 @@ fn run_cli() -> Result<(), String> {
     Ok(())
 }
 
-const USAGE: &str = "usage: bioscript <script.py|manifest.yaml|package.yaml|package.zip|https://.../package.yaml|https://.../package.zip> [--root <dir>] [--input-file <path>] [--output-file <path>] [--participant-id <id>] [--trace-report <path>] [--timing-report <path>] [--filter key=value] [--input-format auto|text|zip|vcf|cram] [--input-index <path>] [--reference-file <path>] [--reference-index <path>] [--auto-index] [--cache-dir <path>] [--max-duration-ms N] [--max-memory-bytes N] [--max-allocations N] [--max-recursion-depth N]\n       bioscript report <manifest.yaml|package.yaml|package.zip|https://.../package.yaml|https://.../package.zip> --input-file <path> [--input-file <path>...] --output-dir <dir> [--html] [--open] [--root <dir>] [--input-format auto|text|zip|vcf|cram] [--input-index <path>] [--reference-file <path>] [--reference-index <path>] [--allow-md5-mismatch] [--detect-sex] [--sample-sex male|female|unknown] [--analysis-max-duration-ms N]\n       bioscript review <manifest.yaml|package.yaml|package.zip> --cases <cases.yaml> --output-dir <dir> [--html] [--root <dir>] [--filter key=value]\n       bioscript import-package <package.yaml|package.zip|https://.../package.yaml|https://.../package.zip> [--root <dir>] [--output-dir <dir>]\n       bioscript validate-variants <path> [--report <file>]\n       bioscript validate-panels <path> [--report <file>]\n       bioscript validate-assays <path> [--report <file>]\n       bioscript prepare [--root <dir>] [--input-file <path>] [--reference-file <path>] [--input-format auto|text|zip|vcf|cram] [--cache-dir <path>]\n       bioscript inspect <path> [--input-index <path>] [--reference-file <path>] [--reference-index <path>] [--detect-sex]";
+const USAGE: &str = "usage: bioscript <script.py|manifest.yaml|package.yaml|package.zip|https://.../package.yaml|https://.../package.zip> [--root <dir>] [--input-file <path>] [--output-file <path>] [--observations-file <path>] [--asset id=path] [--participant-id <id>] [--trace-report <path>] [--timing-report <path>] [--filter key=value] [--input-format auto|text|zip|vcf|cram] [--input-index <path>] [--reference-file <path>] [--reference-index <path>] [--auto-index] [--cache-dir <path>] [--max-duration-ms N] [--max-memory-bytes N] [--max-allocations N] [--max-recursion-depth N]\n       bioscript report <manifest.yaml|package.yaml|package.zip|https://.../package.yaml|https://.../package.zip> --input-file <path> [--input-file <path>...] --output-dir <dir> [--html] [--open] [--root <dir>] [--input-format auto|text|zip|vcf|cram] [--input-index <path>] [--reference-file <path>] [--reference-index <path>] [--allow-md5-mismatch] [--detect-sex] [--sample-sex male|female|unknown] [--analysis-max-duration-ms N]\n       bioscript review <manifest.yaml|package.yaml|package.zip> --cases <cases.yaml> --output-dir <dir> [--html] [--root <dir>] [--filter key=value]\n       bioscript import-package <package.yaml|package.zip|https://.../package.yaml|https://.../package.zip> [--root <dir>] [--output-dir <dir>]\n       bioscript validate-variants <path> [--report <file>]\n       bioscript validate-panels <path> [--report <file>]\n       bioscript validate-assays <path> [--report <file>]\n       bioscript prepare [--root <dir>] [--input-file <path>] [--reference-file <path>] [--input-format auto|text|zip|vcf|cram] [--cache-dir <path>]\n       bioscript inspect <path> [--input-index <path>] [--reference-file <path>] [--reference-index <path>] [--detect-sex]";
 
 struct CliOptions {
     script_path: Option<PathBuf>,
     root: Option<PathBuf>,
     input_file: Option<String>,
     output_file: Option<String>,
+    observations_file: Option<String>,
+    asset_paths: BTreeMap<String, String>,
     participant_id: Option<String>,
     trace_report: Option<PathBuf>,
     timing_report: Option<PathBuf>,
@@ -107,6 +109,8 @@ fn default_cli_options() -> CliOptions {
         root: None,
         input_file: None,
         output_file: None,
+        observations_file: None,
+        asset_paths: BTreeMap::new(),
         participant_id: None,
         trace_report: None,
         timing_report: None,
@@ -167,6 +171,24 @@ fn parse_cli_path_arg(
                 return Err("--output-file requires a path".to_owned());
             };
         options.output_file = Some(value);
+    } else if arg == "--observations-file" {
+            let Some(value) = args.next() else {
+                return Err("--observations-file requires a path".to_owned());
+            };
+        options.observations_file = Some(value);
+    } else if arg == "--asset" {
+            let Some(value) = args.next() else {
+                return Err("--asset requires id=path".to_owned());
+            };
+        let Some((id, path)) = value.split_once('=') else {
+            return Err("--asset requires id=path".to_owned());
+        };
+        let id = id.trim();
+        let path = path.trim();
+        if id.is_empty() || path.is_empty() {
+            return Err("--asset requires non-empty id=path".to_owned());
+        }
+        options.asset_paths.insert(id.to_owned(), path.to_owned());
     } else if arg == "--participant-id" {
             let Some(value) = args.next() else {
                 return Err("--participant-id requires a value".to_owned());
@@ -380,6 +402,15 @@ fn run_cli_script(
     if let Some(output_file) = options.output_file {
         inputs.push(("output_file", monty::MontyObject::String(output_file)));
     }
+    if let Some(observations_file) = options.observations_file {
+        inputs.push((
+            "observations_file",
+            monty::MontyObject::String(observations_file),
+        ));
+    }
+    if !options.asset_paths.is_empty() {
+        inputs.push(("asset_paths", monty_string_dict(&options.asset_paths)));
+    }
     if let Some(participant_id) = options.participant_id {
         inputs.push(("participant_id", monty::MontyObject::String(participant_id)));
     }
@@ -395,6 +426,20 @@ fn run_cli_script(
     Ok(())
 }
 
+fn monty_string_dict(values: &BTreeMap<String, String>) -> monty::MontyObject {
+    monty::MontyObject::Dict(
+        values
+            .iter()
+            .map(|(key, value)| {
+                (
+                    monty::MontyObject::String(key.clone()),
+                    monty::MontyObject::String(value.clone()),
+                )
+            })
+            .collect(),
+    )
+}
+
 fn write_timing_report(path: &PathBuf, timings: &[StageTiming]) -> Result<(), String> {
     if let Some(parent) = path.parent() {
         fs::create_dir_all(parent).map_err(|err| {
diff --git a/rust/bioscript-cli/src/report_review.rs b/rust/bioscript-cli/src/report_review.rs
index d10932c..ce413bc 100644
--- a/rust/bioscript-cli/src/report_review.rs
+++ b/rust/bioscript-cli/src/report_review.rs
@@ -94,13 +94,14 @@ fn generate_review_report(options: &ReviewReportOptions) -> Result<(), String> {
         let input_bytes = review_case_genotype_text(&case);
         let store = GenotypeStore::from_bytes(&format!("{}.txt", case.id), input_bytes.as_bytes())
             .map_err(|err| err.to_string())?;
-        let input_observations = run_manifest_rows_with_store(
+        let manifest_rows = run_manifest_rows_with_store(
             &options.root,
             &options.manifest_path,
             &store,
             &case.id,
             &options.filters,
-        )?
+        )?;
+        let input_observations = manifest_rows
         .iter()
         .map(|row| {
             app_observation_from_manifest_row(
@@ -114,7 +115,7 @@ fn generate_review_report(options: &ReviewReportOptions) -> Result<(), String> {
         .collect::<Result<Vec<_>, _>>()?;
         observations.extend(input_observations.clone());
 
-        let input_analyses = run_review_analyses(options, &case, &input_bytes)?;
+        let input_analyses = run_review_analyses(options, &case, &input_bytes, &manifest_rows)?;
         analyses.extend(input_analyses.clone());
         let synthetic_input = PathBuf::from(format!("review://{}", case.id));
         let synthetic_input_name = synthetic_input
@@ -206,6 +207,7 @@ fn run_review_analyses(
     options: &ReviewReportOptions,
     case: &ReviewCase,
     input_bytes: &str,
+    observation_rows: &[BTreeMap<String, String>],
 ) -> Result<Vec<serde_json::Value>, String> {
     let temp_dir = options.output_dir.join(".review-temp");
     fs::create_dir_all(&temp_dir).map_err(|err| {
@@ -221,14 +223,13 @@ fn run_review_analyses(
         format: Some(GenotypeSourceFormat::Text),
         ..GenotypeLoadOptions::default()
     };
-    let observation_rows = Vec::new();
     let analysis_options = ReportAnalysisOptions {
         runtime_root: &options.root,
         input_file: &temp_path,
         participant_id: &case.id,
         loader: &loader,
         output_dir: &options.output_dir,
-        observation_rows: &observation_rows,
+        observation_rows,
         filters: &options.filters,
         max_duration_ms: 1_000,
     };
diff --git a/rust/bioscript-reporting/src/manifest.rs b/rust/bioscript-reporting/src/manifest.rs
index 87baadc..06e0645 100644
--- a/rust/bioscript-reporting/src/manifest.rs
+++ b/rust/bioscript-reporting/src/manifest.rs
@@ -8,6 +8,7 @@ use bioscript_schema::{
     load_variant_manifest_text,
 };
 
+#[path = "manifest_provenance.rs"]
 mod provenance;
 
 pub use provenance::{collect_manifest_provenance_entries, load_manifest_provenance_links};
diff --git a/rust/bioscript-reporting/src/manifest_provenance.rs b/rust/bioscript-reporting/src/manifest_provenance.rs
new file mode 100644
index 0000000..02d81c1
--- /dev/null
+++ b/rust/bioscript-reporting/src/manifest_provenance.rs
@@ -0,0 +1,75 @@
+use std::collections::BTreeMap;
+
+use super::{
+    ManifestWorkspace, manifest_supports_findings, traversable_manifest_member_paths, yaml_string,
+    yaml_to_json,
+};
+
+pub fn load_manifest_provenance_links(
+    workspace: &impl ManifestWorkspace,
+    path: &str,
+) -> Result<Vec<serde_json::Value>, String> {
+    let value = workspace.load_yaml(path)?;
+    let schema = yaml_string(&value, "schema").unwrap_or_default();
+    let mut links = BTreeMap::new();
+
+    collect_manifest_provenance_entries(&value, &mut links)?;
+
+    if manifest_supports_findings(&schema)
+        && let Some(items) = value
+            .get("findings")
+            .and_then(serde_yaml::Value::as_sequence)
+    {
+        for item in items {
+            if let Some(include) = item.get("include").and_then(serde_yaml::Value::as_str) {
+                let include_path = workspace.resolve(path, include)?;
+                let included = load_manifest_provenance_links(workspace, &include_path)?;
+                for source in included {
+                    insert_provenance_link(&mut links, source);
+                }
+            }
+        }
+    }
+
+    for member_path in traversable_manifest_member_paths(&schema, &value) {
+        let resolved = workspace.resolve(path, member_path)?;
+        let member_links = load_manifest_provenance_links(workspace, &resolved)?;
+        for source in member_links {
+            insert_provenance_link(&mut links, source);
+        }
+    }
+
+    Ok(links.into_values().collect())
+}
+
+pub fn collect_manifest_provenance_entries(
+    value: &serde_yaml::Value,
+    links: &mut BTreeMap<String, serde_json::Value>,
+) -> Result<(), String> {
+    let Some(sources) = value
+        .get("provenance")
+        .and_then(|provenance| provenance.get("sources"))
+        .and_then(serde_yaml::Value::as_sequence)
+    else {
+        return Ok(());
+    };
+
+    for source in sources {
+        let source = yaml_to_json(source.clone())?;
+        insert_provenance_link(links, source);
+    }
+    Ok(())
+}
+
+fn insert_provenance_link(
+    links: &mut BTreeMap<String, serde_json::Value>,
+    source: serde_json::Value,
+) {
+    let key = source
+        .get("url")
+        .or_else(|| source.get("url_template"))
+        .or_else(|| source.get("label"))
+        .and_then(serde_json::Value::as_str)
+        .map_or_else(|| source.to_string(), ToOwned::to_owned);
+    links.entry(key).or_insert(source);
+}
diff --git a/rust/bioscript-runtime/src/runtime.rs b/rust/bioscript-runtime/src/runtime.rs
index a5e0e11..69f98a1 100644
--- a/rust/bioscript-runtime/src/runtime.rs
+++ b/rust/bioscript-runtime/src/runtime.rs
@@ -134,7 +134,16 @@ impl BioscriptRuntime {
             "__file__",
             MontyObject::String(script_path.display().to_string()),
         ));
-        extra_inputs.push(("bioscript", bioscript_object()));
+        extra_inputs.push((
+            "bioscript",
+            bioscript_object(MontyObject::Dict(
+                self.config
+                    .context
+                    .iter()
+                    .map(|(key, value)| (MontyObject::String(key.clone()), value.clone()))
+                    .collect(),
+            )),
+        ));
 
         let result = self.run_script(
             &instrumented,
@@ -247,8 +256,10 @@ impl BioscriptRuntime {
             ("Bioscript", "variant") => self.method_variant(args, kwargs),
             ("Bioscript", "query_plan") => self.method_query_plan(args, kwargs),
             ("Bioscript", "write_tsv") => self.method_write_tsv(args, kwargs),
+            ("Bioscript", "read_tsv") => self.method_read_tsv(args, kwargs),
             ("Bioscript", "read_text") => self.method_read_text(args, kwargs),
             ("Bioscript", "write_text") => self.method_write_text(args, kwargs),
+            ("Bioscript", "exists") => self.method_exists(args, kwargs),
             ("GenotypeFile", "get") => self.method_genotype_get(args, kwargs),
             ("GenotypeFile", "lookup_variant") => self.method_genotype_lookup_variant(args, kwargs),
             ("GenotypeFile", "lookup_variant_details") => {
@@ -281,6 +292,19 @@ impl BioscriptRuntime {
     fn resolve_user_path(&self, raw_path: &str) -> Result<PathBuf, RuntimeError> {
         let path = Path::new(raw_path);
         if path.is_absolute() {
+            if self.uses_virtual_files() {
+                for component in path.components() {
+                    match component {
+                        Component::ParentDir | Component::Prefix(_) => {
+                            return Err(RuntimeError::InvalidArguments(format!(
+                                "path escapes bioscript root: {raw_path}"
+                            )));
+                        }
+                        Component::RootDir | Component::CurDir | Component::Normal(_) => {}
+                    }
+                }
+                return Ok(path.to_path_buf());
+            }
             return Err(RuntimeError::InvalidArguments(format!(
                 "absolute paths are not allowed: {raw_path}"
             )));
@@ -303,6 +327,11 @@ impl BioscriptRuntime {
         if self.virtual_file_exists(raw_path) {
             return Ok(path);
         }
+        if self.uses_virtual_files() && Path::new(raw_path).is_absolute() {
+            return Err(RuntimeError::Io(format!(
+                "virtual file does not exist: {raw_path}"
+            )));
+        }
         let canonical = path.canonicalize().map_err(|err| {
             RuntimeError::Io(format!("failed to resolve {}: {err}", path.display()))
         })?;
@@ -313,6 +342,13 @@ impl BioscriptRuntime {
     fn resolve_user_write_path(&self, raw_path: &str) -> Result<PathBuf, RuntimeError> {
         let path = self.resolve_user_path(raw_path)?;
         if self.uses_virtual_files() {
+            if Path::new(raw_path).is_absolute()
+                && !(raw_path.starts_with("/output/") || raw_path.starts_with("/work/"))
+            {
+                return Err(RuntimeError::InvalidArguments(format!(
+                    "virtual write path must be under /work or /output: {raw_path}"
+                )));
+            }
             return Ok(path);
         }
         if path.exists() {
@@ -773,7 +809,7 @@ mod tests {
         )
         .unwrap();
         let runtime = BioscriptRuntime::new(&root).unwrap();
-        let bioscript = bioscript_object();
+        let bioscript = bioscript_object(MontyObject::Dict(Vec::new().into()));
 
         let genotype = runtime
             .method_load_genotypes(
@@ -853,7 +889,7 @@ mod tests {
     fn runtime_private_methods_cover_successful_text_tsv_and_trace_paths() {
         let root = temp_dir("host-output");
         let runtime = BioscriptRuntime::new(&root).unwrap();
-        let bioscript = bioscript_object();
+        let bioscript = bioscript_object(MontyObject::Dict(Vec::new().into()));
         let rows = MontyObject::List(vec![MontyObject::Dict(
             vec![
                 (
diff --git a/rust/bioscript-runtime/src/runtime/methods.rs b/rust/bioscript-runtime/src/runtime/methods.rs
index d9490d3..1a48a7a 100644
--- a/rust/bioscript-runtime/src/runtime/methods.rs
+++ b/rust/bioscript-runtime/src/runtime/methods.rs
@@ -358,6 +358,60 @@ impl BioscriptRuntime {
         host_read_text(self, &args[1..], kwargs)
     }
 
+    pub(super) fn method_read_tsv(
+        &self,
+        args: &[MontyObject],
+        kwargs: &[(MontyObject, MontyObject)],
+    ) -> Result<MontyObject, RuntimeError> {
+        reject_kwargs(kwargs, "bioscript.read_tsv")?;
+        if args.len() != 2 {
+            return Err(RuntimeError::InvalidArguments(
+                "bioscript.read_tsv expects self and path".to_owned(),
+            ));
+        }
+        let path =
+            self.resolve_existing_user_path(&expect_string_arg(args, 1, "bioscript.read_tsv")?)?;
+        let text = if let Some(content) = self.read_virtual_text_file(&path) {
+            content
+        } else {
+            fs::read_to_string(&path).map_err(|err| {
+                RuntimeError::Io(format!("failed to read {}: {err}", path.display()))
+            })?
+        };
+        Ok(tsv_rows_object(&text))
+    }
+
+    pub(super) fn method_exists(
+        &self,
+        args: &[MontyObject],
+        kwargs: &[(MontyObject, MontyObject)],
+    ) -> Result<MontyObject, RuntimeError> {
+        reject_kwargs(kwargs, "bioscript.exists")?;
+        if args.len() != 2 {
+            return Err(RuntimeError::InvalidArguments(
+                "bioscript.exists expects self and path".to_owned(),
+            ));
+        }
+        let raw_path = expect_string_arg(args, 1, "bioscript.exists")?;
+        if self.config.virtual_text_files.contains_key(&raw_path)
+            || self.config.virtual_binary_files.contains_key(&raw_path)
+            || self
+                .state
+                .virtual_written_text_files
+                .lock()
+                .expect("virtual file mutex poisoned")
+                .contains_key(&raw_path)
+        {
+            return Ok(MontyObject::Bool(true));
+        }
+        if self.config.virtual_text_files.is_empty() && self.config.virtual_binary_files.is_empty()
+        {
+            let path = self.resolve_user_path(&raw_path)?;
+            return Ok(MontyObject::Bool(path.exists()));
+        }
+        Ok(MontyObject::Bool(false))
+    }
+
     pub(super) fn method_write_text(
         &self,
         args: &[MontyObject],
@@ -371,3 +425,32 @@ impl BioscriptRuntime {
         host_write_text(self, &args[1..], kwargs)
     }
 }
+
+fn tsv_rows_object(text: &str) -> MontyObject {
+    let mut lines = text.lines().filter(|line| !line.trim().is_empty());
+    let Some(header_line) = lines.next() else {
+        return MontyObject::List(Vec::new());
+    };
+    let headers: Vec<&str> = header_line.split('\t').collect();
+    MontyObject::List(
+        lines
+            .map(|line| {
+                let cells: Vec<&str> = line.split('\t').collect();
+                MontyObject::Dict(
+                    headers
+                        .iter()
+                        .enumerate()
+                        .map(|(idx, header)| {
+                            (
+                                MontyObject::String((*header).to_owned()),
+                                MontyObject::String(
+                                    cells.get(idx).copied().unwrap_or_default().to_owned(),
+                                ),
+                            )
+                        })
+                        .collect(),
+                )
+            })
+            .collect(),
+    )
+}
diff --git a/rust/bioscript-runtime/src/runtime/objects.rs b/rust/bioscript-runtime/src/runtime/objects.rs
index 818ccdb..ae30b06 100644
--- a/rust/bioscript-runtime/src/runtime/objects.rs
+++ b/rust/bioscript-runtime/src/runtime/objects.rs
@@ -1,12 +1,12 @@
 use bioscript_core::{VariantKind, VariantSpec};
 use monty::MontyObject;
 
-pub(crate) fn bioscript_object() -> MontyObject {
+pub(crate) fn bioscript_object(context: MontyObject) -> MontyObject {
     MontyObject::Dataclass {
         name: "Bioscript".to_owned(),
         type_id: 1,
-        field_names: vec![],
-        attrs: vec![].into(),
+        field_names: vec!["context".to_owned()],
+        attrs: vec![(MontyObject::String("context".to_owned()), context)].into(),
         frozen: true,
     }
 }
diff --git a/rust/bioscript-runtime/src/runtime/state.rs b/rust/bioscript-runtime/src/runtime/state.rs
index 94adc87..222ded0 100644
--- a/rust/bioscript-runtime/src/runtime/state.rs
+++ b/rust/bioscript-runtime/src/runtime/state.rs
@@ -17,6 +17,7 @@ use bioscript_core::RuntimeError;
 pub struct RuntimeConfig {
     pub limits: ResourceLimits,
     pub loader: GenotypeLoadOptions,
+    pub context: BTreeMap<String, monty::MontyObject>,
     pub virtual_binary_files: BTreeMap<String, Vec<u8>>,
     pub virtual_text_files: BTreeMap<String, String>,
     /// Observations the host has already resolved before invoking the
@@ -38,6 +39,7 @@ impl Default for RuntimeConfig {
         Self {
             limits,
             loader: GenotypeLoadOptions::default(),
+            context: BTreeMap::new(),
             virtual_binary_files: BTreeMap::new(),
             virtual_text_files: BTreeMap::new(),
             preloaded_observations: Vec::new(),
diff --git a/rust/bioscript-schema/src/lib.rs b/rust/bioscript-schema/src/lib.rs
index b2d70c1..2cb05da 100644
--- a/rust/bioscript-schema/src/lib.rs
+++ b/rust/bioscript-schema/src/lib.rs
@@ -5,10 +5,10 @@ pub use remote_resource::{
     RemoteDependency, RemoteResourceKind, RemoteResourceResolution, resolve_remote_resource_text,
 };
 pub use validator::{
-    AssayManifest, Download, FileReport, Issue, PanelInterpretation, PanelInterpretationLogic,
-    PanelInterpretationLogicSource, PanelManifest, PanelMember, Permissions, Severity,
-    ValidationReport, VariantManifest, load_assay_manifest, load_assay_manifest_text,
-    load_panel_manifest, load_panel_manifest_text, load_variant_manifest,
+    AssayManifest, Download, FileReport, Issue, PanelInterpretation, PanelInterpretationAsset,
+    PanelInterpretationLogic, PanelInterpretationLogicSource, PanelManifest, PanelMember,
+    Permissions, Severity, ValidationReport, VariantManifest, load_assay_manifest,
+    load_assay_manifest_text, load_panel_manifest, load_panel_manifest_text, load_variant_manifest,
     load_variant_manifest_text, load_variant_manifest_text_for_lookup, validate_assays_path,
     validate_panels_path, validate_variants_path,
 };
diff --git a/rust/bioscript-schema/src/remote_resource.rs b/rust/bioscript-schema/src/remote_resource.rs
index 40ad8d1..33d0b1b 100644
--- a/rust/bioscript-schema/src/remote_resource.rs
+++ b/rust/bioscript-schema/src/remote_resource.rs
@@ -104,15 +104,15 @@ fn infer_kind(name: &str, schema: Option<&str>, parsed: Option<&Value>) -> Remot
     }
     if let Some(schema) = schema {
         let lower = schema.to_ascii_lowercase();
+        if lower.contains("catalogue") || lower.contains("catalog") || lower.contains("index") {
+            return RemoteResourceKind::Catalogue;
+        }
         if lower.contains("variant") {
             return RemoteResourceKind::Variant;
         }
         if lower.contains("panel") {
             return RemoteResourceKind::Panel;
         }
-        if lower.contains("catalogue") || lower.contains("catalog") || lower.contains("index") {
-            return RemoteResourceKind::Catalogue;
-        }
         if lower.contains("assay") {
             return RemoteResourceKind::Assay;
         }
diff --git a/rust/bioscript-schema/src/validator.rs b/rust/bioscript-schema/src/validator.rs
index 0bae7f6..278685f 100644
--- a/rust/bioscript-schema/src/validator.rs
+++ b/rust/bioscript-schema/src/validator.rs
@@ -5,6 +5,7 @@ include!("validator_types.rs");
 include!("validator_load.rs");
 include!("validator_roots.rs");
 include!("validator_alleles_findings.rs");
+include!("validator_catalogue.rs");
 include!("validator_panel.rs");
 include!("validator_parse.rs");
 include!("validator_helpers.rs");
diff --git a/rust/bioscript-schema/src/validator_catalogue.rs b/rust/bioscript-schema/src/validator_catalogue.rs
new file mode 100644
index 0000000..b348ad9
--- /dev/null
+++ b/rust/bioscript-schema/src/validator_catalogue.rs
@@ -0,0 +1,176 @@
+fn validate_variant_catalogue_root(root: &Value, issues: &mut Vec<Issue>) {
+    require_const(root, &["schema"], "bioscript:variant-catalogue:1.0", issues);
+    require_const(root, &["version"], "1.0", issues);
+    validate_optional_strings(root, &["name", "label", "summary"], issues);
+    require_path(root, &["name"], issues);
+    validate_tags(root, issues);
+    validate_catalogue_table(root, "variants", true, issues);
+    validate_catalogue_table(root, "findings", false, issues);
+    validate_catalogue_provenance(root, issues);
+}
+
+fn validate_catalogue_table(
+    root: &Value,
+    field: &str,
+    required: bool,
+    issues: &mut Vec<Issue>,
+) {
+    let Some(value) = value_at(root, &[field]) else {
+        if required {
+            issues.push(Issue {
+                severity: Severity::Error,
+                path: field.to_owned(),
+                message: "missing required table declaration".to_owned(),
+            });
+        }
+        return;
+    };
+    let Some(mapping) = value.as_mapping() else {
+        issues.push(Issue {
+            severity: Severity::Error,
+            path: field.to_owned(),
+            message: "expected mapping".to_owned(),
+        });
+        return;
+    };
+    validate_required_non_empty_mapping_string(mapping, field, "source", issues);
+    validate_optional_catalogue_mapping_string(mapping, field, "format", issues);
+    validate_optional_catalogue_mapping_string(mapping, field, "key", issues);
+
+    if let Some(source) = mapping
+        .get(Value::String("source".to_owned()))
+        .and_then(Value::as_str)
+        && !std::path::Path::new(source)
+            .extension()
+            .is_some_and(|ext| ext.eq_ignore_ascii_case("tsv"))
+    {
+        issues.push(Issue {
+            severity: Severity::Warning,
+            path: format!("{field}.source"),
+            message: "expected a .tsv source for auditable tabular catalogue data".to_owned(),
+        });
+    }
+    if let Some(format) = mapping
+        .get(Value::String("format".to_owned()))
+        .and_then(Value::as_str)
+        && format != "tsv"
+    {
+        issues.push(Issue {
+            severity: Severity::Error,
+            path: format!("{field}.format"),
+            message: "expected 'tsv'".to_owned(),
+        });
+    }
+}
+
+fn validate_catalogue_provenance(root: &Value, issues: &mut Vec<Issue>) {
+    let Some(sources) = value_at(root, &["provenance", "sources"]).and_then(Value::as_sequence)
+    else {
+        return;
+    };
+    let mut ids = BTreeSet::new();
+    for (idx, source) in sources.iter().enumerate() {
+        let Some(mapping) = source.as_mapping() else {
+            issues.push(Issue {
+                severity: Severity::Error,
+                path: format!("provenance.sources[{idx}]"),
+                message: "expected mapping".to_owned(),
+            });
+            continue;
+        };
+        for field in ["id", "kind", "label"] {
+            validate_required_non_empty_mapping_string(
+                mapping,
+                &format!("provenance.sources[{idx}]"),
+                field,
+                issues,
+            );
+        }
+        if let Some(id) = mapping
+            .get(Value::String("id".to_owned()))
+            .and_then(Value::as_str)
+            && !ids.insert(id.to_owned())
+        {
+            issues.push(Issue {
+                severity: Severity::Error,
+                path: format!("provenance.sources[{idx}].id"),
+                message: format!("duplicate provenance source id '{id}'"),
+            });
+        }
+
+        let url = mapping
+            .get(Value::String("url".to_owned()))
+            .and_then(Value::as_str);
+        let url_template = mapping
+            .get(Value::String("url_template".to_owned()))
+            .and_then(Value::as_str);
+        match (url, url_template) {
+            (Some(value), _) => validate_url_string(
+                value,
+                &format!("provenance.sources[{idx}].url"),
+                false,
+                issues,
+            ),
+            (None, Some(value)) => validate_url_template(
+                value,
+                &format!("provenance.sources[{idx}].url_template"),
+                issues,
+            ),
+            (None, None) => issues.push(Issue {
+                severity: Severity::Error,
+                path: format!("provenance.sources[{idx}]"),
+                message: "expected url or url_template".to_owned(),
+            }),
+        }
+    }
+}
+
+fn validate_url_template(value: &str, path: &str, issues: &mut Vec<Issue>) {
+    let sample = value
+        .replace("{rsid}", "rs1")
+        .replace("{genomic_hgvs}", "NG_000001.1%3Ag.1A%3ET")
+        .replace("{variant_id}", "rs1");
+    validate_url_string(&sample, path, false, issues);
+}
+
+fn validate_required_non_empty_mapping_string(
+    mapping: &Mapping,
+    parent: &str,
+    field: &str,
+    issues: &mut Vec<Issue>,
+) {
+    match mapping
+        .get(Value::String(field.to_owned()))
+        .and_then(Value::as_str)
+    {
+        Some(text) if !text.trim().is_empty() => {}
+        Some(_) => issues.push(Issue {
+            severity: Severity::Error,
+            path: format!("{parent}.{field}"),
+            message: "empty string".to_owned(),
+        }),
+        None => issues.push(Issue {
+            severity: Severity::Error,
+            path: format!("{parent}.{field}"),
+            message: "missing required field".to_owned(),
+        }),
+    }
+}
+
+fn validate_optional_catalogue_mapping_string(
+    mapping: &Mapping,
+    parent: &str,
+    field: &str,
+    issues: &mut Vec<Issue>,
+) {
+    let Some(value) = mapping.get(Value::String(field.to_owned())) else {
+        return;
+    };
+    if !matches!(value, Value::String(text) if !text.trim().is_empty()) {
+        issues.push(Issue {
+            severity: Severity::Error,
+            path: format!("{parent}.{field}"),
+            message: "expected non-empty string".to_owned(),
+        });
+    }
+}
diff --git a/rust/bioscript-schema/src/validator_load.rs b/rust/bioscript-schema/src/validator_load.rs
index 776314f..699e9ee 100644
--- a/rust/bioscript-schema/src/validator_load.rs
+++ b/rust/bioscript-schema/src/validator_load.rs
@@ -329,6 +329,14 @@ fn validate_variant_file(path: &Path) -> Result<FileReport, String> {
             }],
         });
     };
+    if schema == "bioscript:variant-catalogue:1.0" {
+        let mut issues = Vec::new();
+        validate_variant_catalogue_root(&value, &mut issues);
+        return Ok(FileReport {
+            file: path.to_path_buf(),
+            issues,
+        });
+    }
     if !schema.contains("variant") {
         if schema == "bioscript:pgx-findings:1.0" {
             let mut issues = Vec::new();
diff --git a/rust/bioscript-schema/src/validator_panel.rs b/rust/bioscript-schema/src/validator_panel.rs
index ba6f9e5..4c23b12 100644
--- a/rust/bioscript-schema/src/validator_panel.rs
+++ b/rust/bioscript-schema/src/validator_panel.rs
@@ -250,9 +250,54 @@ fn validate_panel_interpretation(
         }
     }
     validate_panel_interpretation_logic(key, idx, mapping, issues);
+    validate_panel_interpretation_assets(key, idx, mapping, issues);
     validate_panel_interpretation_emits(key, idx, mapping, issues);
 }
 
+fn validate_panel_interpretation_assets(
+    key: &str,
+    idx: usize,
+    mapping: &Mapping,
+    issues: &mut Vec<Issue>,
+) {
+    let Some(assets) = mapping
+        .get(Value::String("assets".to_owned()))
+        .and_then(Value::as_sequence)
+    else {
+        return;
+    };
+    let mut ids = BTreeSet::new();
+    for (asset_idx, asset) in assets.iter().enumerate() {
+        let Some(asset) = asset.as_mapping() else {
+            issues.push(Issue {
+                severity: Severity::Error,
+                path: format!("{key}[{idx}].assets[{asset_idx}]"),
+                message: "expected mapping".to_owned(),
+            });
+            continue;
+        };
+        for field in ["id", "path"] {
+            validate_required_mapping_string(
+                asset,
+                field,
+                &format!("{key}[{idx}].assets[{asset_idx}]"),
+                issues,
+            );
+        }
+        if let Some(id) = asset
+            .get(Value::String("id".to_owned()))
+            .and_then(Value::as_str)
+            && !ids.insert(id.to_owned())
+        {
+            issues.push(Issue {
+                severity: Severity::Error,
+                path: format!("{key}[{idx}].assets[{asset_idx}].id"),
+                message: format!("duplicate asset id '{id}'"),
+            });
+        }
+    }
+}
+
 fn validate_panel_interpretation_logic(
     key: &str,
     idx: usize,
diff --git a/rust/bioscript-schema/src/validator_parse.rs b/rust/bioscript-schema/src/validator_parse.rs
index b959518..f7f4f16 100644
--- a/rust/bioscript-schema/src/validator_parse.rs
+++ b/rust/bioscript-schema/src/validator_parse.rs
@@ -82,6 +82,7 @@ fn parse_panel_interpretations(root: &Value) -> Result<Vec<PanelInterpretation>,
                 .and_then(Value::as_str)
                 .map(ToOwned::to_owned),
             derived_from: mapping_sequence_of_strings(mapping, "derived_from", idx, key)?,
+            assets: parse_panel_interpretation_assets(mapping, idx, key)?,
             emits: parse_panel_interpretation_emits(mapping, idx)?,
             logic: parse_panel_interpretation_logic(mapping)?,
         });
@@ -89,6 +90,42 @@ fn parse_panel_interpretations(root: &Value) -> Result<Vec<PanelInterpretation>,
     Ok(interpretations)
 }
 
+fn parse_panel_interpretation_assets(
+    mapping: &Mapping,
+    interpretation_idx: usize,
+    key: &str,
+) -> Result<Vec<PanelInterpretationAsset>, String> {
+    let Some(items) = mapping
+        .get(Value::String("assets".to_owned()))
+        .and_then(Value::as_sequence)
+    else {
+        return Ok(Vec::new());
+    };
+    let mut assets = Vec::new();
+    for (idx, item) in items.iter().enumerate() {
+        let Some(mapping) = item.as_mapping() else {
+            return Err(format!(
+                "{key}[{interpretation_idx}].assets[{idx}] must be a mapping"
+            ));
+        };
+        assets.push(PanelInterpretationAsset {
+            id: mapping_required_string(
+                mapping,
+                "id",
+                idx,
+                &format!("{key}[{interpretation_idx}].assets"),
+            )?,
+            path: mapping_required_string(
+                mapping,
+                "path",
+                idx,
+                &format!("{key}[{interpretation_idx}].assets"),
+            )?,
+        });
+    }
+    Ok(assets)
+}
+
 fn parse_panel_interpretation_logic(
     mapping: &Mapping,
 ) -> Result<Option<PanelInterpretationLogic>, String> {
diff --git a/rust/bioscript-schema/src/validator_roots.rs b/rust/bioscript-schema/src/validator_roots.rs
index 9600eb7..c9d6f73 100644
--- a/rust/bioscript-schema/src/validator_roots.rs
+++ b/rust/bioscript-schema/src/validator_roots.rs
@@ -34,7 +34,7 @@ fn validate_panel_root(root: &Value, issues: &mut Vec<Issue>) {
     validate_tags(root, issues);
     validate_permissions(root, issues);
     validate_downloads(root, issues);
-    validate_panel_members(root, &["variant", "assay"], issues);
+    validate_panel_members(root, &["variant", "variant-catalogue", "assay"], issues);
     validate_panel_interpretations(root, issues);
     validate_findings(root, issues);
 }
@@ -43,7 +43,7 @@ fn validate_assay_root(root: &Value, issues: &mut Vec<Issue>) {
     validate_schema_and_identity(root, "bioscript:assay:1.0", None, issues);
     validate_optional_strings(root, &["name", "label", "summary"], issues);
     validate_tags(root, issues);
-    validate_panel_members(root, &["variant"], issues);
+    validate_panel_members(root, &["variant", "variant-catalogue"], issues);
     validate_panel_interpretations(root, issues);
     validate_findings(root, issues);
 }
@@ -331,4 +331,3 @@ fn validate_coordinate_range_values(start: i64, end: i64, assembly: &str, issues
         });
     }
 }
-
diff --git a/rust/bioscript-schema/src/validator_types.rs b/rust/bioscript-schema/src/validator_types.rs
index a9466eb..a96a5a8 100644
--- a/rust/bioscript-schema/src/validator_types.rs
+++ b/rust/bioscript-schema/src/validator_types.rs
@@ -157,10 +157,17 @@ pub struct PanelInterpretation {
     pub path: String,
     pub output_format: Option<String>,
     pub derived_from: Vec<String>,
+    pub assets: Vec<PanelInterpretationAsset>,
     pub emits: Vec<PanelInterpretationEmit>,
     pub logic: Option<PanelInterpretationLogic>,
 }
 
+#[derive(Debug, Clone, PartialEq, Eq)]
+pub struct PanelInterpretationAsset {
+    pub id: String,
+    pub path: String,
+}
+
 #[derive(Debug, Clone, PartialEq, Eq)]
 pub struct PanelInterpretationLogic {
     pub source: Option<PanelInterpretationLogicSource>,
diff --git a/rust/bioscript-schema/tests/validate_variants.rs b/rust/bioscript-schema/tests/validate_variants.rs
index 82a0fa0..3733cda 100644
--- a/rust/bioscript-schema/tests/validate_variants.rs
+++ b/rust/bioscript-schema/tests/validate_variants.rs
@@ -106,6 +106,73 @@ provenance:
     assert_eq!(report.total_warnings(), 0);
 }
 
+#[test]
+fn validate_variants_accepts_variant_catalogue_shape() {
+    let dir = temp_dir("validate-catalogue");
+    let fixture = dir.join("variants.yaml");
+    fs::write(
+        &fixture,
+        r#"
+schema: "bioscript:variant-catalogue:1.0"
+version: "1.0"
+name: "thalassemia-variants"
+variants:
+  source: "variants.tsv"
+  format: "tsv"
+  key: "variant_id"
+findings:
+  source: "findings.tsv"
+  format: "tsv"
+  key: "variant_id"
+provenance:
+  sources:
+    - id: "ithagenes"
+      kind: "database"
+      label: "IthaGenes"
+      url: "https://www.ithanet.eu/db/ithagenes?action=list"
+    - id: "dbsnp"
+      kind: "database"
+      label: "dbSNP"
+      url_template: "https://www.ncbi.nlm.nih.gov/snp/{rsid}"
+"#,
+    )
+    .unwrap();
+
+    let report = validate_variants_path(&fixture).unwrap();
+    assert_eq!(report.total_errors(), 0);
+    assert_eq!(report.total_warnings(), 0);
+}
+
+#[test]
+fn validate_variants_reports_variant_catalogue_shape_issues() {
+    let dir = temp_dir("validate-catalogue-errors");
+    let fixture = dir.join("variants.yaml");
+    fs::write(
+        &fixture,
+        r#"
+schema: "bioscript:variant-catalogue:1.0"
+version: "1.0"
+name: "broken-catalogue"
+variants:
+  source: "variants.csv"
+  format: "csv"
+provenance:
+  sources:
+    - id: "dbsnp"
+      kind: "database"
+      label: "dbSNP"
+"#,
+    )
+    .unwrap();
+
+    let report = validate_variants_path(&fixture).unwrap();
+    let text = report.render_text();
+    assert_eq!(report.total_errors(), 2);
+    assert_eq!(report.total_warnings(), 1);
+    assert!(text.contains("expected 'tsv'"));
+    assert!(text.contains("expected url or url_template"));
+}
+
 #[test]
 fn load_variant_manifest_text_accepts_start_end_coordinates() {
     let manifest = load_variant_manifest_text(
@@ -260,6 +327,11 @@ interpretations:
     path: "apol1.py"
     derived_from:
       - "g1-site-1.yaml"
+    assets:
+      - id: "variants"
+        path: "variants.tsv"
+      - id: "findings"
+        path: "findings.tsv"
     emits:
       - key: "apol1_status"
         label: "APOL1 status"
diff --git a/rust/bioscript-wasm/src/report_workspace/analysis.rs b/rust/bioscript-wasm/src/report_workspace/analysis.rs
index c67ae08..e63f248 100644
--- a/rust/bioscript-wasm/src/report_workspace/analysis.rs
+++ b/rust/bioscript-wasm/src/report_workspace/analysis.rs
@@ -96,6 +96,7 @@ impl PackageWorkspace {
                 RuntimeConfig {
                     limits,
                     loader: loader.clone(),
+                    context: BTreeMap::new(),
                     virtual_binary_files,
                     virtual_text_files: std::mem::take(&mut virtual_text_files),
                     preloaded_observations: preloaded_observations.to_vec(),

From 1ba3f8768c8b7e2fe9915e639e63daebbb704401 Mon Sep 17 00:00:00 2001
From: Madhava Jay <me@madhavajay.com>
Date: Thu, 14 May 2026 15:29:48 +1000
Subject: [PATCH 2/5] Adding initial variant catalogue plus controlled file i/o

---
 docs/assay-schema.md                          |  17 +-
 docs/variant-catalogue.md                     |  17 +-
 rust/bioscript-cli/src/manifest_runner.rs     |  65 ++++
 rust/bioscript-cli/src/report_execution.rs    | 188 +++++++++--
 rust/bioscript-cli/src/report_observations.rs |  19 +-
 rust/bioscript-cli/tests/cli.rs               | 156 ++++++++-
 rust/bioscript-reporting/src/lib.rs           |   7 +-
 rust/bioscript-reporting/src/manifest.rs      | 121 +++----
 .../src/manifest_catalogue.rs                 | 302 ++++++++++++++++++
 .../src/manifest_members.rs                   |  91 ++++++
 .../tests/validate_variants.rs                |   3 +
 rust/bioscript-wasm/src/report_api.rs         |   2 +-
 rust/bioscript-wasm/src/report_workspace.rs   |  25 +-
 .../src/report_workspace/analysis.rs          | 132 +++++++-
 14 files changed, 982 insertions(+), 163 deletions(-)
 create mode 100644 rust/bioscript-reporting/src/manifest_catalogue.rs
 create mode 100644 rust/bioscript-reporting/src/manifest_members.rs

diff --git a/docs/assay-schema.md b/docs/assay-schema.md
index 3deec23..57dca64 100644
--- a/docs/assay-schema.md
+++ b/docs/assay-schema.md
@@ -88,19 +88,24 @@ Rules:
 - `kind` is currently `bioscript`
 - `path` points to a BioScript-compatible Python file
 - `output_format` is optional and defaults to `tsv`; use `json` or `jsonl` when the script writes structured JSON output
-- `derived_from` lists the variant YAML files used by the interpretation
-- `assets` is optional and lists local files the analysis script can read through the injected `asset_paths` dict
+- `derived_from` lists the variant YAML or variant catalogue files used by the interpretation
+- `assets` is optional and lists local files the analysis script can read through `bioscript.context["assets"]`; `asset_paths` is also injected during migration
 - `emits` is optional but recommended so report generators know which output columns to display and how to label them
 - `logic` is optional; use `logic.description` and `logic.source.url` to document where the script's derivation rules came from
 - Analysis rows may emit `notes` or `report_notes` as a reporting convention. HTML reports render those notes below the analysis table and omit them from the table columns; this avoids a manifest-level template language while still letting the script build human-readable text from computed values.
 
 Analysis scripts receive these injected variables:
 
-- `input_file`: input genotype/VCF/CRAM path
-- `output_file`: path the script must write
+- `input_file`: virtual input genotype/VCF/CRAM path, normally `/input/genotypes`
+- `output_file`: virtual output path the script must write, normally under `/output`
 - `participant_id`: current participant/sample identifier
-- `observations_file`: TSV containing the variant observations already gathered from assay/panel members for this participant
-- `asset_paths`: dict from each `assets[].id` to the resolved local path
+- `observations_file`: virtual TSV path containing the variant observations already gathered from assay/panel members for this participant
+- `asset_paths`: dict from each `assets[].id` to its virtual path
+
+The preferred API is `bioscript.context`, which includes `participant_id`,
+`input_files`, `pipeline_files`, `assets`, `observations_file`, and
+`output_file`. Scripts should use `bioscript.read_tsv(path)` for TSV assets and
+observation files.
 
 This supports large catalogues where Rust/BioScript first gathers all variant observations, then Python joins those observations to attached TSV assets such as `findings.tsv`, `conditions.tsv`, or `rules.tsv` and emits derived classification rows.
 
diff --git a/docs/variant-catalogue.md b/docs/variant-catalogue.md
index ec133fd..e250e3c 100644
--- a/docs/variant-catalogue.md
+++ b/docs/variant-catalogue.md
@@ -6,9 +6,9 @@ curated source stays as plain text TSV plus a small YAML manifest. Build tools
 can still expand the catalogue into normal `bioscript:variant:1.0` YAML files
 when a package or older runtime path needs per-variant manifests.
 
-This is a design note for the intended BioScript flow. The catalogue manifest
-schema already exists as `bioscript:variant-catalogue:1.0`; direct assay member
-support for catalogues is the next BioScript schema/runtime change.
+This is the BioScript flow for TSV-backed variant catalogues. The catalogue
+manifest schema is `bioscript:variant-catalogue:1.0`, and assay/panel members
+can point at catalogue manifests directly.
 
 ## Why This Exists
 
@@ -50,11 +50,9 @@ members:
     version: "1.0"
 ```
 
-Today, `bioscript:assay:1.0` assay members accept `kind: "variant"` only, so the
-direct member above is the required next schema change. The member should point
-to the catalogue YAML manifest, not directly to `variants.tsv`, because the YAML
-manifest holds schema identity, version, table locations, keys, and shared
-provenance.
+The member should point to the catalogue YAML manifest, not directly to
+`variants.tsv`, because the YAML manifest holds schema identity, version, table
+locations, keys, and shared provenance.
 
 The catalogue manifest shape is:
 
@@ -370,9 +368,6 @@ files work, but the real thalassemia logic needs proper allele, phase, gene, and
 evidence handling.
 
 ```python
-import bioscript
-
-
 observations = bioscript.read_tsv(bioscript.context["observations_file"])
 variants = bioscript.read_tsv(bioscript.context["assets"]["variants"])
 findings = bioscript.read_tsv(bioscript.context["assets"]["findings"])
diff --git a/rust/bioscript-cli/src/manifest_runner.rs b/rust/bioscript-cli/src/manifest_runner.rs
index eddac35..9786e29 100644
--- a/rust/bioscript-cli/src/manifest_runner.rs
+++ b/rust/bioscript-cli/src/manifest_runner.rs
@@ -43,6 +43,43 @@ fn run_manifest(
             )?;
             Ok(())
         }
+        bioscript_reporting::ReportManifestKind::VariantCatalogue => {
+            let input_file = resolved_input
+                .as_deref()
+                .ok_or("manifest execution requires --input-file")?;
+            let store = GenotypeStore::from_file_with_options(Path::new(input_file), options.loader)
+                .map_err(|err| err.to_string())?;
+            let workspace = bioscript_reporting::FilesystemManifestWorkspace::new(runtime_root);
+            let tasks = bioscript_reporting::collect_variant_manifest_tasks(
+                &workspace,
+                &manifest_path.display().to_string(),
+                options.filters,
+            )?;
+            let observations = store
+                .lookup_variants(
+                    &tasks
+                        .iter()
+                        .map(|task| task.manifest.spec.clone())
+                        .collect::<Vec<_>>(),
+                )
+                .map_err(|err| err.to_string())?;
+            let rows = tasks
+                .into_iter()
+                .zip(observations)
+                .map(|(task, observation)| {
+                    variant_row(
+                        runtime_root,
+                        Path::new(&task.manifest_path),
+                        &task.manifest.name,
+                        &task.manifest.tags,
+                        &observation,
+                        options.participant_id,
+                    )
+                })
+                .collect::<Vec<_>>();
+            write_manifest_outputs(&rows, resolved_output.as_deref(), resolved_trace.as_deref())?;
+            Ok(())
+        }
         bioscript_reporting::ReportManifestKind::Panel => {
             let manifest = load_panel_manifest(manifest_path)?;
             let rows = run_panel_manifest(
@@ -138,6 +175,22 @@ fn run_panel_manifest_with_store(
                 }
                 variant_entries.push((member_index, resolved, manifest));
             }
+            bioscript_reporting::ExecutablePanelMember::VariantCatalogue(path) => {
+                let resolved = resolve_manifest_path(runtime_root, &panel.path, path)?;
+                let workspace = bioscript_reporting::FilesystemManifestWorkspace::new(runtime_root);
+                let tasks = bioscript_reporting::collect_variant_manifest_tasks(
+                    &workspace,
+                    &resolved.display().to_string(),
+                    filters,
+                )?;
+                for task in tasks {
+                    variant_entries.push((
+                        member_index,
+                        PathBuf::from(&task.manifest_path),
+                        task.manifest,
+                    ));
+                }
+            }
             bioscript_reporting::ExecutablePanelMember::Assay(path) => {
                 let resolved = resolve_manifest_path(runtime_root, &panel.path, path)?;
                 let assay = load_assay_manifest(&resolved)?;
@@ -211,6 +264,18 @@ fn run_assay_manifest_with_store(
                 }
                 entries.push((resolved, manifest));
             }
+            bioscript_reporting::ExecutableAssayMember::VariantCatalogue(path) => {
+                let resolved = resolve_manifest_path(runtime_root, &assay.path, path)?;
+                let workspace = bioscript_reporting::FilesystemManifestWorkspace::new(runtime_root);
+                let tasks = bioscript_reporting::collect_variant_manifest_tasks(
+                    &workspace,
+                    &resolved.display().to_string(),
+                    filters,
+                )?;
+                for task in tasks {
+                    entries.push((PathBuf::from(&task.manifest_path), task.manifest));
+                }
+            }
         }
     }
 
diff --git a/rust/bioscript-cli/src/report_execution.rs b/rust/bioscript-cli/src/report_execution.rs
index 227b257..293b410 100644
--- a/rust/bioscript-cli/src/report_execution.rs
+++ b/rust/bioscript-cli/src/report_execution.rs
@@ -114,6 +114,8 @@ fn run_interpretations_for_report(
         })?;
         run_bioscript_analysis_script(&BioscriptAnalysisScriptInput {
             runtime_root: options.runtime_root,
+            manifest_path,
+            interpretation,
             script_path: &script_path,
             input_file: options.input_file,
             output_file: &output_file,
@@ -163,6 +165,8 @@ fn run_interpretations_for_report(
 
 struct BioscriptAnalysisScriptInput<'a> {
     runtime_root: &'a Path,
+    manifest_path: &'a Path,
+    interpretation: &'a PanelInterpretation,
     script_path: &'a Path,
     input_file: &'a Path,
     output_file: &'a Path,
@@ -179,56 +183,182 @@ fn run_bioscript_analysis_script(input: &BioscriptAnalysisScriptInput<'_>) -> Re
         .max_allocations(400_000)
         .gc_interval(1000)
         .max_recursion_depth(Some(200));
+    let input_bytes = fs::read(input.input_file)
+        .map_err(|err| format!("failed to read analysis input {}: {err}", input.input_file.display()))?;
+    let observations_text = fs::read_to_string(input.observations_file).map_err(|err| {
+        format!(
+            "failed to read analysis observations {}: {err}",
+            input.observations_file.display()
+        )
+    })?;
+    let script_text = fs::read_to_string(input.script_path)
+        .map_err(|err| format!("failed to read analysis script {}: {err}", input.script_path.display()))?;
+    let virtual_input_file = "/input/genotypes".to_owned();
+    let virtual_observations_file = "/work/observations.tsv".to_owned();
+    let output_extension = input
+        .output_file
+        .extension()
+        .and_then(|value| value.to_str())
+        .unwrap_or("tsv");
+    let virtual_output_file = format!("/output/results.{output_extension}");
+    let script_virtual_path = virtual_pipeline_path(input.script_path, "analysis.py");
+    let manifest_virtual_path = virtual_pipeline_path(input.manifest_path, "manifest.yaml");
+    let mut virtual_text_files = BTreeMap::new();
+    virtual_text_files.insert(script_virtual_path.clone(), script_text);
+    virtual_text_files.insert(
+        manifest_virtual_path.clone(),
+        fs::read_to_string(input.manifest_path).map_err(|err| {
+            format!(
+                "failed to read analysis manifest {}: {err}",
+                input.manifest_path.display()
+            )
+        })?,
+    );
+    virtual_text_files.insert(virtual_observations_file.clone(), observations_text);
+    let mut asset_paths = BTreeMap::new();
+    for asset in &input.interpretation.assets {
+        let asset_path = resolve_manifest_path(input.runtime_root, input.manifest_path, &asset.path)?;
+        let virtual_asset_path = virtual_pipeline_path(&asset_path, &asset.path);
+        let text = fs::read_to_string(&asset_path)
+            .map_err(|err| format!("failed to read analysis asset {}: {err}", asset_path.display()))?;
+        virtual_text_files.insert(virtual_asset_path.clone(), text);
+        asset_paths.insert(asset.id.clone(), virtual_asset_path);
+    }
+    let mut virtual_binary_files = BTreeMap::new();
+    virtual_binary_files.insert(virtual_input_file.clone(), input_bytes);
+    let context = analysis_context(
+        input.participant_id,
+        &virtual_input_file,
+        &script_virtual_path,
+        &manifest_virtual_path,
+        &asset_paths,
+        &virtual_observations_file,
+        &virtual_output_file,
+    );
     let runtime = BioscriptRuntime::with_config(
         input.runtime_root.to_path_buf(),
         RuntimeConfig {
             limits,
             loader: input.loader.clone(),
+            context,
+            virtual_binary_files,
+            virtual_text_files,
             ..RuntimeConfig::default()
         },
     )
     .map_err(|err| err.to_string())?;
     runtime
         .run_file(
-            input.script_path,
+            &script_virtual_path,
             None,
             vec![
-                (
-                    "input_file",
-                    monty::MontyObject::String(runtime_path_string(
-                        input.runtime_root,
-                        input.input_file,
-                    )),
-                ),
-                (
-                    "output_file",
-                    monty::MontyObject::String(runtime_path_string(
-                        input.runtime_root,
-                        input.output_file,
-                    )),
-                ),
-                (
-                    "observations_file",
-                    monty::MontyObject::String(runtime_path_string(
-                        input.runtime_root,
-                        input.observations_file,
-                    )),
-                ),
+                ("input_file", monty::MontyObject::String(virtual_input_file)),
+                ("output_file", monty::MontyObject::String(virtual_output_file.clone())),
+                ("observations_file", monty::MontyObject::String(virtual_observations_file)),
+                ("asset_paths", monty_string_dict(&asset_paths)),
                 (
                     "participant_id",
                     monty::MontyObject::String(input.participant_id.to_owned()),
                 ),
             ],
         )
-        .map(|_| ())
-        .map_err(|err| err.to_string())
+        .map_err(|err| err.to_string())?;
+    let written = runtime.virtual_written_text_files();
+    let output_text = written
+        .get(&virtual_output_file)
+        .ok_or_else(|| format!("analysis did not write {virtual_output_file}"))?;
+    if let Some(parent) = input.output_file.parent() {
+        fs::create_dir_all(parent)
+            .map_err(|err| format!("failed to create analysis output dir {}: {err}", parent.display()))?;
+    }
+    fs::write(input.output_file, output_text).map_err(|err| {
+        format!(
+            "failed to write analysis output {}: {err}",
+            input.output_file.display()
+        )
+    })?;
+    persist_virtual_output_files(&written, &virtual_output_file, input.output_file)
 }
 
-fn runtime_path_string(runtime_root: &Path, path: &Path) -> String {
-    path.strip_prefix(runtime_root)
-        .unwrap_or(path)
-        .display()
-        .to_string()
+fn persist_virtual_output_files(
+    written: &BTreeMap<String, String>,
+    primary_virtual_output_file: &str,
+    primary_output_file: &Path,
+) -> Result<(), String> {
+    let Some(output_dir) = primary_output_file.parent() else {
+        return Ok(());
+    };
+    for (virtual_path, text) in written {
+        if virtual_path == primary_virtual_output_file {
+            continue;
+        }
+        let Some(relative) = virtual_path.strip_prefix("/output/") else {
+            continue;
+        };
+        let relative_path = Path::new(relative);
+        if relative_path
+            .components()
+            .any(|component| !matches!(component, std::path::Component::Normal(_)))
+        {
+            return Err(format!("analysis wrote invalid output path {virtual_path}"));
+        }
+        let output_path = output_dir.join(relative_path);
+        if let Some(parent) = output_path.parent() {
+            fs::create_dir_all(parent)
+                .map_err(|err| format!("failed to create output dir {}: {err}", parent.display()))?;
+        }
+        fs::write(&output_path, text)
+            .map_err(|err| format!("failed to write analysis output {}: {err}", output_path.display()))?;
+    }
+    Ok(())
+}
+
+fn virtual_pipeline_path(path: &Path, fallback: &str) -> String {
+    let name = path
+        .file_name()
+        .and_then(|value| value.to_str())
+        .unwrap_or(fallback);
+    format!("/input/pipeline/{name}")
+}
+
+fn analysis_context(
+    participant_id: &str,
+    input_file: &str,
+    script_path: &str,
+    manifest_path: &str,
+    asset_paths: &BTreeMap<String, String>,
+    observations_file: &str,
+    output_file: &str,
+) -> BTreeMap<String, monty::MontyObject> {
+    BTreeMap::from([
+        (
+            "participant_id".to_owned(),
+            monty::MontyObject::String(participant_id.to_owned()),
+        ),
+        (
+            "input_files".to_owned(),
+            monty_string_dict(&BTreeMap::from([(
+                "genotypes".to_owned(),
+                input_file.to_owned(),
+            )])),
+        ),
+        (
+            "pipeline_files".to_owned(),
+            monty_string_dict(&BTreeMap::from([
+                ("manifest".to_owned(), manifest_path.to_owned()),
+                ("analysis".to_owned(), script_path.to_owned()),
+            ])),
+        ),
+        ("assets".to_owned(), monty_string_dict(asset_paths)),
+        (
+            "observations_file".to_owned(),
+            monty::MontyObject::String(observations_file.to_owned()),
+        ),
+        (
+            "output_file".to_owned(),
+            monty::MontyObject::String(output_file.to_owned()),
+        ),
+    ])
 }
 
 fn parse_analysis_output(
diff --git a/rust/bioscript-cli/src/report_observations.rs b/rust/bioscript-cli/src/report_observations.rs
index 261dada..eb11db3 100644
--- a/rust/bioscript-cli/src/report_observations.rs
+++ b/rust/bioscript-cli/src/report_observations.rs
@@ -11,10 +11,21 @@ fn app_observation_from_manifest_row(
     } else {
         runtime_root.join(&row_path)
     };
-    let manifest = load_variant_manifest(&manifest_path)?;
-    let gene = variant_manifest_gene(&manifest_path)?;
-    let observed_alt_alleles = variant_observed_alt_alleles(&manifest_path)?;
-    let source = variant_primary_source(&manifest_path)?;
+    let workspace = bioscript_reporting::FilesystemManifestWorkspace::new(runtime_root);
+    let task = bioscript_reporting::load_variant_manifest_task_by_path(
+        &workspace,
+        &manifest_path.display().to_string(),
+    )?;
+    let manifest = task.manifest;
+    let (gene, observed_alt_alleles, source) = if row_path.contains('#') {
+        (String::new(), Vec::new(), serde_json::Value::Null)
+    } else {
+        (
+            variant_manifest_gene(&manifest_path)?,
+            variant_observed_alt_alleles(&manifest_path)?,
+            variant_primary_source(&manifest_path)?,
+        )
+    };
     Ok(bioscript_reporting::app_observation_from_manifest_row(
         bioscript_reporting::AppObservationInput {
             row,
diff --git a/rust/bioscript-cli/tests/cli.rs b/rust/bioscript-cli/tests/cli.rs
index 17613ed..c2215c5 100644
--- a/rust/bioscript-cli/tests/cli.rs
+++ b/rust/bioscript-cli/tests/cli.rs
@@ -1,6 +1,6 @@
 use std::{
     fs,
-    path::PathBuf,
+    path::{Path, PathBuf},
     process::Command,
     time::{SystemTime, UNIX_EPOCH},
 };
@@ -379,3 +379,157 @@ interpretations:
     assert!(stdout.contains("example-rs73885319"));
     assert!(stdout.contains("AG"));
 }
+
+#[test]
+fn report_runs_variant_catalogue_analysis_with_sandboxed_assets() {
+    let root = repo_root();
+    let dir = temp_dir("catalogue-report");
+    let (assay, output_dir) = write_catalogue_report_fixture(&dir);
+
+    let output = Command::new(env!("CARGO_BIN_EXE_bioscript"))
+        .current_dir(&root)
+        .arg("report")
+        .arg(&assay)
+        .arg("--input-file")
+        .arg("old/examples/apol1/test_snps.txt")
+        .arg("--output-dir")
+        .arg(&output_dir)
+        .output()
+        .unwrap();
+
+    assert!(
+        output.status.success(),
+        "stderr: {}",
+        String::from_utf8_lossy(&output.stderr)
+    );
+    let participant = "test_snps";
+    let analysis_tsv =
+        fs::read_to_string(output_dir.join(format!("analysis/{participant}/catalogue_status.tsv")))
+            .unwrap();
+    assert!(analysis_tsv.contains("catalogue_assets_loaded"));
+    let observations = fs::read_to_string(output_dir.join("observations.tsv")).unwrap();
+    assert!(observations.contains("example-rs73885319"));
+    assert!(observations.contains("AG"));
+}
+
+fn write_catalogue_report_fixture(dir: &Path) -> (PathBuf, PathBuf) {
+    let catalogue_dir = dir.join("catalogue");
+    fs::create_dir_all(&catalogue_dir).unwrap();
+    write_catalogue_manifest(&catalogue_dir);
+    write_catalogue_tables(&catalogue_dir);
+    write_catalogue_analysis(dir);
+    let assay = write_catalogue_assay(dir);
+    (assay, dir.join("report"))
+}
+
+fn write_catalogue_manifest(catalogue_dir: &Path) {
+    fs::write(
+        catalogue_dir.join("variants.yaml"),
+        r#"
+schema: "bioscript:variant-catalogue:1.0"
+version: "1.0"
+name: "apol1-catalogue"
+variants:
+  source: "variants.tsv"
+  format: "tsv"
+  key: "variant_id"
+  columns:
+    variant_id:
+      role: "id"
+      required: true
+    name:
+      role: "name"
+    rsid:
+      role: "identifier.rsid"
+    kind:
+      role: "alleles.kind"
+    ref:
+      role: "alleles.ref"
+    alts:
+      role: "alleles.alts"
+    grch38_chrom:
+      role: "coordinates.grch38.chrom"
+    grch38_pos:
+      role: "coordinates.grch38.pos"
+      type: "integer"
+provenance:
+  sources:
+    - id: "dbsnp"
+      kind: "database"
+      label: "dbSNP"
+      url_template: "https://www.ncbi.nlm.nih.gov/snp/{rsid}"
+"#,
+    )
+    .unwrap();
+}
+
+fn write_catalogue_tables(catalogue_dir: &Path) {
+    fs::write(
+        catalogue_dir.join("variants.tsv"),
+        "variant_id\tname\trsid\tkind\tref\talts\tgrch38_chrom\tgrch38_pos\napol1-g1-site-1\texample-rs73885319\trs73885319\tsnv\tA\tG\t22\t36265860\n",
+    )
+    .unwrap();
+    fs::write(
+        catalogue_dir.join("findings.tsv"),
+        "variant_id\tlabel\napol1-g1-site-1\tcatalogue_assets_loaded\n",
+    )
+    .unwrap();
+}
+
+fn write_catalogue_analysis(dir: &Path) {
+    fs::write(
+        dir.join("analysis.py"),
+        r#"
+observations = bioscript.read_tsv(bioscript.context["observations_file"])
+variants = bioscript.read_tsv(bioscript.context["assets"]["variants"])
+findings = bioscript.read_tsv(bioscript.context["assets"]["findings"])
+
+status = "missing"
+if len(observations) > 0 and len(variants) > 0 and len(findings) > 0:
+    status = findings[0]["label"]
+
+bioscript.write_tsv(bioscript.context["output_file"], [
+    {
+        "participant_id": bioscript.context["participant_id"],
+        "status": status,
+    }
+])
+"#,
+    )
+    .unwrap();
+}
+
+fn write_catalogue_assay(dir: &Path) -> PathBuf {
+    let assay = dir.join("assay.yaml");
+    fs::write(
+        &assay,
+        r#"
+schema: "bioscript:assay:1.0"
+version: "1.0"
+name: "catalogue-assay"
+members:
+  - kind: "variant-catalogue"
+    path: "catalogue/variants.yaml"
+    version: "1.0"
+analyses:
+  - id: "catalogue_status"
+    kind: "bioscript"
+    path: "analysis.py"
+    output_format: "tsv"
+    derived_from:
+      - "catalogue/variants.yaml"
+    assets:
+      - id: "variants"
+        path: "catalogue/variants.tsv"
+      - id: "findings"
+        path: "catalogue/findings.tsv"
+    emits:
+      - key: "status"
+        label: "Status"
+        value_type: "string"
+        format: "badge"
+"#,
+    )
+    .unwrap();
+    assay
+}
diff --git a/rust/bioscript-reporting/src/lib.rs b/rust/bioscript-reporting/src/lib.rs
index 45d2f47..ea61414 100644
--- a/rust/bioscript-reporting/src/lib.rs
+++ b/rust/bioscript-reporting/src/lib.rs
@@ -26,9 +26,10 @@ pub use manifest::{
     VariantManifestTask, assay_executable_member, assay_executable_member_path,
     collect_analysis_manifest_tasks, collect_manifest_provenance_entries,
     collect_variant_manifest_tasks, load_manifest_findings, load_manifest_provenance_links,
-    load_report_manifest_context, matches_analysis_path_filters, matches_variant_manifest_filters,
-    panel_executable_member, panel_executable_member_path, report_assay_id, report_manifest_kind,
-    report_manifest_metadata, report_manifest_schema, resolve_filesystem_manifest_path,
+    load_report_manifest_context, load_variant_manifest_task_by_path,
+    matches_analysis_path_filters, matches_variant_manifest_filters, panel_executable_member,
+    panel_executable_member_path, report_assay_id, report_manifest_kind, report_manifest_metadata,
+    report_manifest_schema, resolve_filesystem_manifest_path,
 };
 pub use matching::match_app_findings;
 pub use observation::{AppObservationInput, app_observation_from_manifest_row};
diff --git a/rust/bioscript-reporting/src/manifest.rs b/rust/bioscript-reporting/src/manifest.rs
index 06e0645..6d30287 100644
--- a/rust/bioscript-reporting/src/manifest.rs
+++ b/rust/bioscript-reporting/src/manifest.rs
@@ -8,14 +8,25 @@ use bioscript_schema::{
     load_variant_manifest_text,
 };
 
+#[path = "manifest_catalogue.rs"]
+mod catalogue;
+#[path = "manifest_members.rs"]
+mod members;
 #[path = "manifest_provenance.rs"]
 mod provenance;
 
+use catalogue::load_variant_catalogue_tasks;
+pub(crate) use members::traversable_manifest_member_paths;
+pub use members::{
+    ExecutableAssayMember, ExecutablePanelMember, assay_executable_member,
+    assay_executable_member_path, panel_executable_member, panel_executable_member_path,
+};
 pub use provenance::{collect_manifest_provenance_entries, load_manifest_provenance_links};
 
 #[derive(Debug, Clone, Copy, PartialEq, Eq)]
 pub enum ReportManifestKind {
     Variant,
+    VariantCatalogue,
     Panel,
     Assay,
 }
@@ -23,6 +34,7 @@ pub enum ReportManifestKind {
 pub fn report_manifest_kind(schema: &str) -> Result<ReportManifestKind, String> {
     match schema {
         "bioscript:variant:1.0" | "bioscript:variant" => Ok(ReportManifestKind::Variant),
+        "bioscript:variant-catalogue:1.0" => Ok(ReportManifestKind::VariantCatalogue),
         "bioscript:panel:1.0" => Ok(ReportManifestKind::Panel),
         "bioscript:assay:1.0" => Ok(ReportManifestKind::Assay),
         other => Err(format!("unsupported manifest schema '{other}'")),
@@ -220,7 +232,7 @@ pub fn collect_analysis_manifest_tasks(
                 interpretations: manifest.interpretations,
             }])
         }
-        ReportManifestKind::Variant => Ok(Vec::new()),
+        ReportManifestKind::Variant | ReportManifestKind::VariantCatalogue => Ok(Vec::new()),
     }
 }
 
@@ -244,6 +256,9 @@ pub fn collect_variant_manifest_tasks(
                 manifest,
             }])
         }
+        ReportManifestKind::VariantCatalogue => {
+            load_variant_catalogue_tasks(workspace, path, filters)
+        }
         ReportManifestKind::Panel => {
             let text = workspace.load_text(path)?;
             let manifest = load_panel_manifest_text(path, &text)?;
@@ -257,6 +272,10 @@ pub fn collect_variant_manifest_tasks(
                             tasks.push(variant);
                         }
                     }
+                    ExecutablePanelMember::VariantCatalogue(member_path) => {
+                        let resolved = workspace.resolve(path, member_path)?;
+                        tasks.extend(load_variant_catalogue_tasks(workspace, &resolved, filters)?);
+                    }
                     ExecutablePanelMember::Assay(member_path) => {
                         let resolved = workspace.resolve(path, member_path)?;
                         tasks.extend(collect_variant_manifest_tasks(
@@ -280,6 +299,10 @@ pub fn collect_variant_manifest_tasks(
                             tasks.push(variant);
                         }
                     }
+                    ExecutableAssayMember::VariantCatalogue(member_path) => {
+                        let resolved = workspace.resolve(path, member_path)?;
+                        tasks.extend(load_variant_catalogue_tasks(workspace, &resolved, filters)?);
+                    }
                 }
             }
             Ok(tasks)
@@ -299,6 +322,19 @@ fn load_variant_task(
     })
 }
 
+pub fn load_variant_manifest_task_by_path(
+    workspace: &impl ManifestWorkspace,
+    path: &str,
+) -> Result<VariantManifestTask, String> {
+    if let Some((catalogue_path, variant_id)) = path.rsplit_once('#') {
+        return load_variant_catalogue_tasks(workspace, catalogue_path, &[])?
+            .into_iter()
+            .find(|task| task.manifest_path == path || task.manifest.name == variant_id)
+            .ok_or_else(|| format!("variant catalogue entry not found: {path}"));
+    }
+    load_variant_task(workspace, path)
+}
+
 pub fn load_manifest_findings(
     workspace: &impl ManifestWorkspace,
     path: &str,
@@ -372,61 +408,6 @@ pub fn matches_analysis_path_filters(path: &str, filters: &[String]) -> bool {
     })
 }
 
-#[derive(Debug, Clone, Copy, PartialEq, Eq)]
-pub enum ExecutablePanelMember<'a> {
-    Variant(&'a str),
-    Assay(&'a str),
-}
-
-pub fn panel_executable_member<'a>(
-    kind: &str,
-    path: Option<&'a str>,
-) -> Result<ExecutablePanelMember<'a>, String> {
-    let path = panel_executable_member_path(kind, path)?;
-    match kind {
-        "variant" => Ok(ExecutablePanelMember::Variant(path)),
-        "assay" => Ok(ExecutablePanelMember::Assay(path)),
-        _ => unreachable!("panel_executable_member_path validates member kind"),
-    }
-}
-
-pub fn panel_executable_member_path<'a>(
-    kind: &str,
-    path: Option<&'a str>,
-) -> Result<&'a str, String> {
-    let Some(path) = path else {
-        return Err("remote panel members are not executable yet".to_owned());
-    };
-    if !matches!(kind, "variant" | "assay") {
-        return Err(format!("panel member kind '{kind}' is not executable"));
-    }
-    Ok(path)
-}
-
-#[derive(Debug, Clone, Copy, PartialEq, Eq)]
-pub enum ExecutableAssayMember<'a> {
-    Variant(&'a str),
-}
-pub fn assay_executable_member<'a>(
-    kind: &str,
-    path: Option<&'a str>,
-) -> Result<ExecutableAssayMember<'a>, String> {
-    let path = assay_executable_member_path(kind, path)?;
-    Ok(ExecutableAssayMember::Variant(path))
-}
-pub fn assay_executable_member_path<'a>(
-    kind: &str,
-    path: Option<&'a str>,
-) -> Result<&'a str, String> {
-    if kind != "variant" {
-        return Err(format!("assay member kind '{kind}' is not executable"));
-    }
-    let Some(path) = path else {
-        return Err("remote assay members are not executable yet".to_owned());
-    };
-    Ok(path)
-}
-
 pub(super) fn manifest_supports_findings(schema: &str) -> bool {
     matches!(
         schema,
@@ -438,34 +419,6 @@ pub(super) fn manifest_supports_findings(schema: &str) -> bool {
     )
 }
 
-fn manifest_has_traversable_members(schema: &str) -> bool {
-    matches!(schema, "bioscript:assay:1.0" | "bioscript:panel:1.0")
-}
-
-pub(super) fn traversable_manifest_member_paths<'a>(
-    schema: &str,
-    value: &'a serde_yaml::Value,
-) -> Vec<&'a str> {
-    if !manifest_has_traversable_members(schema) {
-        return Vec::new();
-    }
-    value
-        .get("members")
-        .and_then(serde_yaml::Value::as_sequence)
-        .into_iter()
-        .flatten()
-        .filter_map(traversable_manifest_member_path)
-        .collect()
-}
-
-fn traversable_manifest_member_path(member: &serde_yaml::Value) -> Option<&str> {
-    let kind = member.get("kind").and_then(serde_yaml::Value::as_str)?;
-    if !matches!(kind, "variant" | "assay") {
-        return None;
-    }
-    member.get("path").and_then(serde_yaml::Value::as_str)
-}
-
 pub(super) fn yaml_to_json(value: serde_yaml::Value) -> Result<serde_json::Value, String> {
     serde_json::to_value(value).map_err(|err| format!("failed to convert YAML to JSON: {err}"))
 }
diff --git a/rust/bioscript-reporting/src/manifest_catalogue.rs b/rust/bioscript-reporting/src/manifest_catalogue.rs
new file mode 100644
index 0000000..20666bb
--- /dev/null
+++ b/rust/bioscript-reporting/src/manifest_catalogue.rs
@@ -0,0 +1,302 @@
+use std::{collections::BTreeMap, path::PathBuf};
+
+use bioscript_core::{GenomicLocus, VariantKind, VariantSpec};
+use bioscript_schema::VariantManifest;
+
+use super::{
+    ManifestWorkspace, VariantManifestTask, matches_variant_manifest_filters, yaml_string,
+    yaml_string_sequence,
+};
+
+pub(super) fn load_variant_catalogue_tasks(
+    workspace: &impl ManifestWorkspace,
+    path: &str,
+    filters: &[String],
+) -> Result<Vec<VariantManifestTask>, String> {
+    let value = workspace.load_yaml(path)?;
+    let variants = value
+        .get("variants")
+        .and_then(serde_yaml::Value::as_mapping)
+        .ok_or_else(|| format!("{path} is missing variants table declaration"))?;
+    let source = variants
+        .get(serde_yaml::Value::String("source".to_owned()))
+        .and_then(serde_yaml::Value::as_str)
+        .ok_or_else(|| format!("{path} variants.source missing"))?;
+    let source_path = workspace.resolve(path, source)?;
+    let table_text = workspace.load_text(&source_path)?;
+    let rows = parse_tsv_rows(&table_text)?;
+    let columns = CatalogueColumns::from_manifest(variants);
+    let catalogue_name =
+        yaml_string(&value, "name").unwrap_or_else(|| "variant-catalogue".to_owned());
+    let catalogue_tags = yaml_string_sequence(&value, "tags")
+        .into_iter()
+        .filter_map(|value| value.as_str().map(ToOwned::to_owned))
+        .collect::<Vec<_>>();
+
+    rows.into_iter()
+        .enumerate()
+        .map(|(idx, row)| {
+            catalogue_row_task(path, &catalogue_name, &catalogue_tags, &columns, idx, &row)
+        })
+        .filter_map(|result| match result {
+            Ok(task)
+                if matches_variant_manifest_filters(
+                    &task.manifest,
+                    &task.manifest_path,
+                    filters,
+                ) =>
+            {
+                Some(Ok(task))
+            }
+            Ok(_) => None,
+            Err(err) => Some(Err(err)),
+        })
+        .collect()
+}
+
+fn catalogue_row_task(
+    catalogue_path: &str,
+    catalogue_name: &str,
+    catalogue_tags: &[String],
+    columns: &CatalogueColumns,
+    idx: usize,
+    row: &BTreeMap<String, String>,
+) -> Result<VariantManifestTask, String> {
+    let variant_id = columns
+        .value(row, "id")
+        .or_else(|| columns.value(row, "identifier.rsid"))
+        .filter(|value| !value.is_empty())
+        .ok_or_else(|| {
+            format!(
+                "{catalogue_path} variants.tsv row {} missing variant id",
+                idx + 2
+            )
+        })?;
+    let name = columns
+        .value(row, "name")
+        .filter(|value| !value.is_empty())
+        .map_or_else(
+            || format!("{catalogue_name}-{variant_id}"),
+            ToOwned::to_owned,
+        );
+    let mut rsids = split_list(
+        columns.value(row, "identifier.rsid"),
+        columns.separator("identifier.rsid"),
+    );
+    rsids.extend(split_list(
+        columns.value(row, "identifier.aliases"),
+        columns.separator("identifier.aliases"),
+    ));
+    rsids.sort();
+    rsids.dedup();
+    let spec = VariantSpec {
+        rsids,
+        grch37: catalogue_locus(columns, row, "grch37")?,
+        grch38: catalogue_locus(columns, row, "grch38")?,
+        reference: columns.value(row, "alleles.ref").map(ToOwned::to_owned),
+        alternate: split_list(
+            columns.value(row, "alleles.alts"),
+            columns.separator("alleles.alts"),
+        )
+        .into_iter()
+        .next(),
+        kind: columns
+            .value(row, "alleles.kind")
+            .map(catalogue_variant_kind),
+        ..VariantSpec::default()
+    };
+    if !spec.has_rsids() && !spec.has_coordinates() {
+        return Err(format!(
+            "{catalogue_path} variants.tsv row {} has no rsid or coordinates",
+            idx + 2
+        ));
+    }
+    let manifest = VariantManifest {
+        path: PathBuf::from(format!("{catalogue_path}#{variant_id}")),
+        name,
+        tags: catalogue_tags.to_vec(),
+        spec,
+    };
+    Ok(VariantManifestTask {
+        manifest_path: manifest.path.display().to_string(),
+        manifest,
+    })
+}
+
+fn catalogue_locus(
+    columns: &CatalogueColumns,
+    row: &BTreeMap<String, String>,
+    assembly: &str,
+) -> Result<Option<GenomicLocus>, String> {
+    let chrom_role = format!("coordinates.{assembly}.chrom");
+    let Some(chrom) = columns
+        .value(row, &chrom_role)
+        .filter(|value| !value.is_empty())
+    else {
+        return Ok(None);
+    };
+    let pos_role = format!("coordinates.{assembly}.pos");
+    if let Some(pos) = columns
+        .value(row, &pos_role)
+        .filter(|value| !value.is_empty())
+    {
+        let pos = parse_catalogue_i64(pos, &pos_role)?;
+        return Ok(Some(GenomicLocus {
+            chrom: chrom.to_owned(),
+            start: pos,
+            end: pos,
+        }));
+    }
+    let start_role = format!("coordinates.{assembly}.start");
+    let end_role = format!("coordinates.{assembly}.end");
+    match (
+        columns
+            .value(row, &start_role)
+            .filter(|value| !value.is_empty()),
+        columns
+            .value(row, &end_role)
+            .filter(|value| !value.is_empty()),
+    ) {
+        (Some(start), Some(end)) => Ok(Some(GenomicLocus {
+            chrom: chrom.to_owned(),
+            start: parse_catalogue_i64(start, &start_role)?,
+            end: parse_catalogue_i64(end, &end_role)?,
+        })),
+        (None, None) => Ok(None),
+        _ => Err(format!("expected both {start_role} and {end_role}")),
+    }
+}
+
+fn parse_catalogue_i64(value: &str, role: &str) -> Result<i64, String> {
+    value
+        .parse::<i64>()
+        .map_err(|err| format!("invalid integer for {role}: {value}: {err}"))
+}
+
+fn catalogue_variant_kind(value: &str) -> VariantKind {
+    match value {
+        "snv" | "snp" => VariantKind::Snp,
+        "deletion" => VariantKind::Deletion,
+        "insertion" => VariantKind::Insertion,
+        "indel" => VariantKind::Indel,
+        _ => VariantKind::Other,
+    }
+}
+
+fn split_list(value: Option<&str>, separator: Option<&str>) -> Vec<String> {
+    let Some(value) = value else {
+        return Vec::new();
+    };
+    let separator = separator.unwrap_or("|");
+    value
+        .split(separator)
+        .map(str::trim)
+        .filter(|item| !item.is_empty())
+        .map(ToOwned::to_owned)
+        .collect()
+}
+
+fn parse_tsv_rows(text: &str) -> Result<Vec<BTreeMap<String, String>>, String> {
+    let mut lines = text.lines().filter(|line| !line.trim().is_empty());
+    let Some(header_line) = lines.next() else {
+        return Ok(Vec::new());
+    };
+    let headers = header_line
+        .split('\t')
+        .map(ToOwned::to_owned)
+        .collect::<Vec<_>>();
+    if headers.is_empty() || headers.iter().any(|header| header.trim().is_empty()) {
+        return Err("TSV header contains an empty column name".to_owned());
+    }
+    Ok(lines
+        .map(|line| {
+            let cells = line.split('\t').collect::<Vec<_>>();
+            headers
+                .iter()
+                .enumerate()
+                .map(|(idx, header)| {
+                    (
+                        header.clone(),
+                        cells.get(idx).copied().unwrap_or_default().to_owned(),
+                    )
+                })
+                .collect()
+        })
+        .collect())
+}
+
+#[derive(Debug, Default)]
+struct CatalogueColumns {
+    role_to_column: BTreeMap<String, String>,
+    role_to_separator: BTreeMap<String, String>,
+}
+
+impl CatalogueColumns {
+    fn from_manifest(mapping: &serde_yaml::Mapping) -> Self {
+        let mut columns = Self::default();
+        if let Some(column_mappings) = mapping
+            .get(serde_yaml::Value::String("columns".to_owned()))
+            .and_then(serde_yaml::Value::as_mapping)
+        {
+            for (column, config) in column_mappings {
+                let Some(column_name) = column.as_str() else {
+                    continue;
+                };
+                let Some(config) = config.as_mapping() else {
+                    continue;
+                };
+                let Some(role) = config
+                    .get(serde_yaml::Value::String("role".to_owned()))
+                    .and_then(serde_yaml::Value::as_str)
+                else {
+                    continue;
+                };
+                columns
+                    .role_to_column
+                    .insert(role.to_owned(), column_name.to_owned());
+                if let Some(separator) = config
+                    .get(serde_yaml::Value::String("list_separator".to_owned()))
+                    .and_then(serde_yaml::Value::as_str)
+                {
+                    columns
+                        .role_to_separator
+                        .insert(role.to_owned(), separator.to_owned());
+                }
+            }
+        }
+        columns.insert_default("id", "variant_id");
+        columns.insert_default("name", "name");
+        columns.insert_default("gene", "gene");
+        columns.insert_default("identifier.rsid", "rsid");
+        columns.insert_default("identifier.aliases", "aliases");
+        columns.insert_default("alleles.kind", "kind");
+        columns.insert_default("alleles.ref", "ref");
+        columns.insert_default("alleles.alts", "alts");
+        columns.insert_default("coordinates.grch37.chrom", "grch37_chrom");
+        columns.insert_default("coordinates.grch37.pos", "grch37_pos");
+        columns.insert_default("coordinates.grch37.start", "grch37_start");
+        columns.insert_default("coordinates.grch37.end", "grch37_end");
+        columns.insert_default("coordinates.grch38.chrom", "grch38_chrom");
+        columns.insert_default("coordinates.grch38.pos", "grch38_pos");
+        columns.insert_default("coordinates.grch38.start", "grch38_start");
+        columns.insert_default("coordinates.grch38.end", "grch38_end");
+        columns
+    }
+
+    fn insert_default(&mut self, role: &str, column: &str) {
+        self.role_to_column
+            .entry(role.to_owned())
+            .or_insert_with(|| column.to_owned());
+    }
+
+    fn value<'a>(&self, row: &'a BTreeMap<String, String>, role: &str) -> Option<&'a str> {
+        self.role_to_column
+            .get(role)
+            .and_then(|column| row.get(column))
+            .map(String::as_str)
+    }
+
+    fn separator(&self, role: &str) -> Option<&str> {
+        self.role_to_separator.get(role).map(String::as_str)
+    }
+}
diff --git a/rust/bioscript-reporting/src/manifest_members.rs b/rust/bioscript-reporting/src/manifest_members.rs
new file mode 100644
index 0000000..149e711
--- /dev/null
+++ b/rust/bioscript-reporting/src/manifest_members.rs
@@ -0,0 +1,91 @@
+#[derive(Debug, Clone, Copy, PartialEq, Eq)]
+pub enum ExecutablePanelMember<'a> {
+    Variant(&'a str),
+    VariantCatalogue(&'a str),
+    Assay(&'a str),
+}
+
+pub fn panel_executable_member<'a>(
+    kind: &str,
+    path: Option<&'a str>,
+) -> Result<ExecutablePanelMember<'a>, String> {
+    let path = panel_executable_member_path(kind, path)?;
+    match kind {
+        "variant" => Ok(ExecutablePanelMember::Variant(path)),
+        "variant-catalogue" => Ok(ExecutablePanelMember::VariantCatalogue(path)),
+        "assay" => Ok(ExecutablePanelMember::Assay(path)),
+        _ => unreachable!("panel_executable_member_path validates member kind"),
+    }
+}
+
+pub fn panel_executable_member_path<'a>(
+    kind: &str,
+    path: Option<&'a str>,
+) -> Result<&'a str, String> {
+    let Some(path) = path else {
+        return Err("remote panel members are not executable yet".to_owned());
+    };
+    if !matches!(kind, "variant" | "variant-catalogue" | "assay") {
+        return Err(format!("panel member kind '{kind}' is not executable"));
+    }
+    Ok(path)
+}
+
+#[derive(Debug, Clone, Copy, PartialEq, Eq)]
+pub enum ExecutableAssayMember<'a> {
+    Variant(&'a str),
+    VariantCatalogue(&'a str),
+}
+
+pub fn assay_executable_member<'a>(
+    kind: &str,
+    path: Option<&'a str>,
+) -> Result<ExecutableAssayMember<'a>, String> {
+    let path = assay_executable_member_path(kind, path)?;
+    match kind {
+        "variant" => Ok(ExecutableAssayMember::Variant(path)),
+        "variant-catalogue" => Ok(ExecutableAssayMember::VariantCatalogue(path)),
+        _ => unreachable!("assay_executable_member_path validates member kind"),
+    }
+}
+
+pub fn assay_executable_member_path<'a>(
+    kind: &str,
+    path: Option<&'a str>,
+) -> Result<&'a str, String> {
+    if !matches!(kind, "variant" | "variant-catalogue") {
+        return Err(format!("assay member kind '{kind}' is not executable"));
+    }
+    let Some(path) = path else {
+        return Err("remote assay members are not executable yet".to_owned());
+    };
+    Ok(path)
+}
+
+pub(crate) fn traversable_manifest_member_paths<'a>(
+    schema: &str,
+    value: &'a serde_yaml::Value,
+) -> Vec<&'a str> {
+    if !manifest_has_traversable_members(schema) {
+        return Vec::new();
+    }
+    value
+        .get("members")
+        .and_then(serde_yaml::Value::as_sequence)
+        .into_iter()
+        .flatten()
+        .filter_map(traversable_manifest_member_path)
+        .collect()
+}
+
+fn manifest_has_traversable_members(schema: &str) -> bool {
+    matches!(schema, "bioscript:assay:1.0" | "bioscript:panel:1.0")
+}
+
+fn traversable_manifest_member_path(member: &serde_yaml::Value) -> Option<&str> {
+    let kind = member.get("kind").and_then(serde_yaml::Value::as_str)?;
+    if !matches!(kind, "variant" | "variant-catalogue" | "assay") {
+        return None;
+    }
+    member.get("path").and_then(serde_yaml::Value::as_str)
+}
diff --git a/rust/bioscript-schema/tests/validate_variants.rs b/rust/bioscript-schema/tests/validate_variants.rs
index 3733cda..65030c2 100644
--- a/rust/bioscript-schema/tests/validate_variants.rs
+++ b/rust/bioscript-schema/tests/validate_variants.rs
@@ -321,6 +321,9 @@ members:
   - kind: "variant"
     path: "g1-site-1.yaml"
     version: "1.0"
+  - kind: "variant-catalogue"
+    path: "catalogue/variants.yaml"
+    version: "1.0"
 interpretations:
   - id: "apol1_status"
     kind: "bioscript"
diff --git a/rust/bioscript-wasm/src/report_api.rs b/rust/bioscript-wasm/src/report_api.rs
index 459c621..b797891 100644
--- a/rust/bioscript-wasm/src/report_api.rs
+++ b/rust/bioscript-wasm/src/report_api.rs
@@ -12,7 +12,7 @@ use bioscript_formats::{
     inspect_bytes as inspect_bytes_rs,
 };
 use bioscript_runtime::{BioscriptRuntime, RuntimeConfig};
-use bioscript_schema::{PanelInterpretation, VariantManifest, load_variant_manifest_text};
+use bioscript_schema::PanelInterpretation;
 use monty::{MontyObject, ResourceLimits};
 use serde::{Deserialize, Serialize};
 use wasm_bindgen::prelude::*;
diff --git a/rust/bioscript-wasm/src/report_workspace.rs b/rust/bioscript-wasm/src/report_workspace.rs
index 85451a5..a42141c 100644
--- a/rust/bioscript-wasm/src/report_workspace.rs
+++ b/rust/bioscript-wasm/src/report_workspace.rs
@@ -65,11 +65,6 @@ impl PackageWorkspace {
         normalize_package_path(&base.join(relative).display().to_string())
     }
 
-    pub(super) fn load_variant(&self, path: &str) -> Result<VariantManifest, JsError> {
-        load_variant_manifest_text(path, self.text(path)?)
-            .map_err(|err| JsError::new(&format!("load variant {path}: {err}")))
-    }
-
     pub(super) fn run_manifest_rows(
         &self,
         manifest_path: &str,
@@ -114,9 +109,19 @@ impl PackageWorkspace {
         fallback_assembly: Option<Assembly>,
     ) -> Result<serde_json::Value, JsError> {
         let row_path = row.get("path").cloned().unwrap_or_default();
-        let manifest = self.load_variant(&row_path)?;
-        let value = self.yaml(&row_path)?;
-        let gene = yaml_string(&value, "gene").unwrap_or_default();
+        let task = bioscript_reporting::load_variant_manifest_task_by_path(self, &row_path)
+            .map_err(|err| JsError::new(&err))?;
+        let manifest = task.manifest;
+        let (gene, source, observed_alt_alleles) = if row_path.contains('#') {
+            (String::new(), serde_json::Value::Null, Vec::new())
+        } else {
+            let value = self.yaml(&row_path)?;
+            (
+                yaml_string(&value, "gene").unwrap_or_default(),
+                variant_primary_source_from_yaml(&value),
+                variant_observed_alt_alleles_from_yaml(&value),
+            )
+        };
         Ok(bioscript_reporting::app_observation_from_manifest_row(
             bioscript_reporting::AppObservationInput {
                 row,
@@ -124,8 +129,8 @@ impl PackageWorkspace {
                 assay_id,
                 manifest,
                 gene,
-                source: variant_primary_source_from_yaml(&value),
-                observed_alt_alleles: variant_observed_alt_alleles_from_yaml(&value),
+                source,
+                observed_alt_alleles,
                 inferred_sex,
                 fallback_assembly,
             },
diff --git a/rust/bioscript-wasm/src/report_workspace/analysis.rs b/rust/bioscript-wasm/src/report_workspace/analysis.rs
index e63f248..bedca4b 100644
--- a/rust/bioscript-wasm/src/report_workspace/analysis.rs
+++ b/rust/bioscript-wasm/src/report_workspace/analysis.rs
@@ -41,7 +41,7 @@ impl PackageWorkspace {
         manifest_path: &str,
         manifest_name: &str,
         interpretations: &[PanelInterpretation],
-        input_name: &str,
+        _input_name: &str,
         input_bytes: &[u8],
         preloaded_observations: &[VariantObservation],
         participant_id: &str,
@@ -72,19 +72,46 @@ impl PackageWorkspace {
                 participant_id,
                 &interpretation.id,
             );
-            let observations_file = options
+            let _observations_file = options
                 .output_dir
                 .as_deref()
                 .filter(|dir| !dir.is_empty())
                 .map(|dir| format!("{}/{}", dir.trim_end_matches('/'), observations_output_file))
                 .unwrap_or(observations_output_file);
-            let mut virtual_text_files = self.files.clone();
+            let output_extension = Path::new(&output_file)
+                .extension()
+                .and_then(|value| value.to_str())
+                .unwrap_or(analysis_format.extension);
+            let virtual_input_file = "/input/genotypes".to_owned();
+            let virtual_output_file = format!("/output/results.{output_extension}");
+            let virtual_observations_file = "/work/observations.tsv".to_owned();
+            let script_virtual_path = virtual_pipeline_path(&script_path, "analysis.py");
+            let manifest_virtual_path = virtual_pipeline_path(manifest_path, "manifest.yaml");
+            let mut virtual_text_files = BTreeMap::new();
+            virtual_text_files.insert(
+                script_virtual_path.clone(),
+                self.text(&script_path)?.to_owned(),
+            );
+            virtual_text_files.insert(
+                manifest_virtual_path.clone(),
+                self.text(manifest_path)?.to_owned(),
+            );
             virtual_text_files.insert(
-                observations_file.clone(),
+                virtual_observations_file.clone(),
                 bioscript_reporting::render_analysis_observations_tsv(preloaded_observations),
             );
+            let mut asset_paths = BTreeMap::new();
+            for asset in &interpretation.assets {
+                let asset_path = self.resolve(manifest_path, &asset.path)?;
+                let virtual_asset_path = virtual_pipeline_path(&asset_path, &asset.path);
+                virtual_text_files.insert(
+                    virtual_asset_path.clone(),
+                    self.text(&asset_path)?.to_owned(),
+                );
+                asset_paths.insert(asset.id.clone(), virtual_asset_path);
+            }
             let mut virtual_binary_files = BTreeMap::new();
-            virtual_binary_files.insert(input_name.to_owned(), input_bytes.to_vec());
+            virtual_binary_files.insert(virtual_input_file.clone(), input_bytes.to_vec());
             let limits = ResourceLimits::new()
                 .max_duration(Duration::from_millis(options.analysis_max_duration_ms))
                 .max_memory(16 * 1024 * 1024)
@@ -96,24 +123,39 @@ impl PackageWorkspace {
                 RuntimeConfig {
                     limits,
                     loader: loader.clone(),
-                    context: BTreeMap::new(),
+                    context: analysis_context(
+                        participant_id,
+                        &virtual_input_file,
+                        &script_virtual_path,
+                        &manifest_virtual_path,
+                        &asset_paths,
+                        &virtual_observations_file,
+                        &virtual_output_file,
+                    ),
                     virtual_binary_files,
-                    virtual_text_files: std::mem::take(&mut virtual_text_files),
+                    virtual_text_files,
                     preloaded_observations: preloaded_observations.to_vec(),
                 },
             )
             .map_err(|err| JsError::new(&format!("create analysis runtime failed: {err:?}")))?;
             runtime
                 .run_file(
-                    &script_path,
+                    &script_virtual_path,
                     None,
                     vec![
-                        ("input_file", MontyObject::String(input_name.to_owned())),
-                        ("output_file", MontyObject::String(output_file.clone())),
+                        (
+                            "input_file",
+                            MontyObject::String(virtual_input_file.clone()),
+                        ),
+                        (
+                            "output_file",
+                            MontyObject::String(virtual_output_file.clone()),
+                        ),
                         (
                             "observations_file",
-                            MontyObject::String(observations_file.clone()),
+                            MontyObject::String(virtual_observations_file.clone()),
                         ),
+                        ("asset_paths", monty_string_dict(&asset_paths)),
                         (
                             "participant_id",
                             MontyObject::String(participant_id.to_owned()),
@@ -124,9 +166,9 @@ impl PackageWorkspace {
                     JsError::new(&format!("analysis {} failed: {err:?}", interpretation.id))
                 })?;
             let written = runtime.virtual_written_text_files();
-            let text = written.get(&output_file).ok_or_else(|| {
+            let text = written.get(&virtual_output_file).ok_or_else(|| {
                 JsError::new(&format!(
-                    "analysis {} did not write {output_file}",
+                    "analysis {} did not write {virtual_output_file}",
                     interpretation.id
                 ))
             })?;
@@ -140,7 +182,7 @@ impl PackageWorkspace {
                     manifest_path,
                     script_path: &script_path,
                     output_file: &output_file,
-                    observations_file: Some(&observations_file),
+                    observations_file: Some(&virtual_observations_file),
                     row_headers,
                     rows,
                 },
@@ -149,3 +191,65 @@ impl PackageWorkspace {
         Ok(outputs)
     }
 }
+
+fn virtual_pipeline_path(path: &str, fallback: &str) -> String {
+    let name = Path::new(path)
+        .file_name()
+        .and_then(|value| value.to_str())
+        .unwrap_or(fallback);
+    format!("/input/pipeline/{name}")
+}
+
+fn analysis_context(
+    participant_id: &str,
+    input_file: &str,
+    script_path: &str,
+    manifest_path: &str,
+    asset_paths: &BTreeMap<String, String>,
+    observations_file: &str,
+    output_file: &str,
+) -> BTreeMap<String, MontyObject> {
+    BTreeMap::from([
+        (
+            "participant_id".to_owned(),
+            MontyObject::String(participant_id.to_owned()),
+        ),
+        (
+            "input_files".to_owned(),
+            monty_string_dict(&BTreeMap::from([(
+                "genotypes".to_owned(),
+                input_file.to_owned(),
+            )])),
+        ),
+        (
+            "pipeline_files".to_owned(),
+            monty_string_dict(&BTreeMap::from([
+                ("manifest".to_owned(), manifest_path.to_owned()),
+                ("analysis".to_owned(), script_path.to_owned()),
+            ])),
+        ),
+        ("assets".to_owned(), monty_string_dict(asset_paths)),
+        (
+            "observations_file".to_owned(),
+            MontyObject::String(observations_file.to_owned()),
+        ),
+        (
+            "output_file".to_owned(),
+            MontyObject::String(output_file.to_owned()),
+        ),
+    ])
+}
+
+fn monty_string_dict(values: &BTreeMap<String, String>) -> MontyObject {
+    MontyObject::Dict(
+        values
+            .iter()
+            .map(|(key, value)| {
+                (
+                    MontyObject::String(key.clone()),
+                    MontyObject::String(value.clone()),
+                )
+            })
+            .collect(),
+    )
+}

From bcb2dbab0124e086f087ff6664f7489608bf2c14 Mon Sep 17 00:00:00 2001
From: Madhava Jay <me@madhavajay.com>
Date: Fri, 15 May 2026 13:16:16 +1000
Subject: [PATCH 3/5] adding variant catalogue

---
 rust/bioscript-core/src/observation.rs        |  3 +-
 rust/bioscript-formats/src/genotype.rs        | 34 +++++++++++++--
 .../src/genotype/backends.rs                  | 28 ++++++++++++-
 .../src/genotype/delimited/scan.rs            | 41 ++++++++++++++++++-
 .../bioscript-formats/src/genotype/loaders.rs | 17 +++++++-
 rust/bioscript-formats/src/genotype/types.rs  |  2 +
 rust/bioscript-formats/tests/file_formats.rs  |  2 +-
 .../tests/file_formats/delimited.rs           | 27 ++++++++++--
 .../tests/file_formats/zip_and_fixtures.rs    |  9 +++-
 rust/bioscript-reporting/src/html.rs          |  3 ++
 .../src/html/observations.rs                  | 11 ++++-
 .../src/manifest_catalogue.rs                 |  9 +++-
 rust/bioscript-reporting/src/observation.rs   | 38 +++++++++++++++++
 13 files changed, 207 insertions(+), 17 deletions(-)

diff --git a/rust/bioscript-core/src/observation.rs b/rust/bioscript-core/src/observation.rs
index 40e7da9..79e8f26 100644
--- a/rust/bioscript-core/src/observation.rs
+++ b/rust/bioscript-core/src/observation.rs
@@ -94,12 +94,13 @@ pub struct ObservationRecord {
     pub facets: Option<String>,
 }
 
-pub const OBSERVATION_TSV_HEADERS: [&str; 29] = [
+pub const OBSERVATION_TSV_HEADERS: [&str; 30] = [
     "participant_id",
     "assay_id",
     "assay_version",
     "variant_key",
     "rsid",
+    "gene",
     "assembly",
     "chrom",
     "pos_start",
diff --git a/rust/bioscript-formats/src/genotype.rs b/rust/bioscript-formats/src/genotype.rs
index ff58d03..6671709 100644
--- a/rust/bioscript-formats/src/genotype.rs
+++ b/rust/bioscript-formats/src/genotype.rs
@@ -91,6 +91,7 @@ impl GenotypeStore {
                 format: GenotypeSourceFormat::Text,
                 values: HashMap::new(),
                 locus_values: HashMap::new(),
+                assembly: None,
                 source_lines: HashMap::new(),
             }),
         }
@@ -105,8 +106,9 @@ impl GenotypeStore {
                 path,
                 GenotypeSourceFormat::Text,
                 None,
+                options,
             )),
-            GenotypeSourceFormat::Zip => Self::from_zip_file(path),
+            GenotypeSourceFormat::Zip => Self::from_zip_file(path, options),
             GenotypeSourceFormat::Vcf => Ok(Self::from_vcf_file(path, options)),
             GenotypeSourceFormat::Cram => Self::from_cram_file(path, options),
             GenotypeSourceFormat::Bam => Err(RuntimeError::Unsupported(format!(
@@ -183,7 +185,7 @@ impl GenotypeStore {
         }
     }
 
-    fn from_zip_file(path: &Path) -> Result<Self, RuntimeError> {
+    fn from_zip_file(path: &Path, options: &GenotypeLoadOptions) -> Result<Self, RuntimeError> {
         let selected = select_zip_entry(path)?;
         let lower = selected.to_ascii_lowercase();
         if lower.ends_with(".vcf") || lower.ends_with(".vcf.gz") {
@@ -212,6 +214,7 @@ impl GenotypeStore {
             path,
             GenotypeSourceFormat::Zip,
             Some(selected),
+            options,
         ))
     }
 
@@ -244,12 +247,14 @@ impl GenotypeStore {
         path: &Path,
         format: GenotypeSourceFormat,
         zip_entry_name: Option<String>,
+        options: &GenotypeLoadOptions,
     ) -> Self {
         Self {
             backend: QueryBackend::Delimited(DelimitedBackend {
                 format,
                 path: path.to_path_buf(),
                 zip_entry_name,
+                options: options.clone(),
             }),
         }
     }
@@ -1115,6 +1120,10 @@ mod tests {
             format: GenotypeSourceFormat::Text,
             path: text.clone(),
             zip_entry_name: None,
+            options: GenotypeLoadOptions {
+                assembly: Some(Assembly::Grch38),
+                ..GenotypeLoadOptions::default()
+            },
         };
         let variants = vec![
             VariantSpec {
@@ -1147,6 +1156,21 @@ mod tests {
             Some("CT")
         );
 
+        let unknown_assembly_backend = DelimitedBackend {
+            format: GenotypeSourceFormat::Text,
+            path: text.clone(),
+            zip_entry_name: None,
+            options: GenotypeLoadOptions::default(),
+        };
+        let unknown_assembly_err =
+            scan_delimited_variants(&unknown_assembly_backend, &variants[1..2]).unwrap_err();
+        assert!(
+            unknown_assembly_err
+                .to_string()
+                .contains("delimited genotype input assembly is unknown"),
+            "{unknown_assembly_err}"
+        );
+
         let zip_path = dir.join("sample.zip");
         let cursor = std::io::Cursor::new(Vec::new());
         let mut writer = zip::ZipWriter::new(cursor);
@@ -1191,8 +1215,9 @@ mod tests {
 
         let unsupported_backend = DelimitedBackend {
             format: GenotypeSourceFormat::Vcf,
-            path: text,
+            path: text.clone(),
             zip_entry_name: None,
+            options: GenotypeLoadOptions::default(),
         };
         let err = scan_delimited_variants(&unsupported_backend, &variants).unwrap_err();
         assert!(
@@ -1393,6 +1418,7 @@ mod tests {
             format: GenotypeSourceFormat::Zip,
             path: zip_path.clone(),
             zip_entry_name: None,
+            options: GenotypeLoadOptions::default(),
         };
         let err = scan_delimited_variants(&bad_zip_backend, &[VariantSpec::default()]).unwrap_err();
         assert!(
@@ -1405,6 +1431,7 @@ mod tests {
             format: GenotypeSourceFormat::Zip,
             path: zip_path,
             zip_entry_name: Some("missing.csv".to_owned()),
+            options: GenotypeLoadOptions::default(),
         };
         let err =
             scan_delimited_variants(&bad_entry_backend, &[VariantSpec::default()]).unwrap_err();
@@ -1417,6 +1444,7 @@ mod tests {
             format: GenotypeSourceFormat::Text,
             path: dir.join("missing.txt"),
             zip_entry_name: None,
+            options: GenotypeLoadOptions::default(),
         };
         let err =
             scan_delimited_variants(&missing_text_backend, &[VariantSpec::default()]).unwrap_err();
diff --git a/rust/bioscript-formats/src/genotype/backends.rs b/rust/bioscript-formats/src/genotype/backends.rs
index 659328d..fa99bd5 100644
--- a/rust/bioscript-formats/src/genotype/backends.rs
+++ b/rust/bioscript-formats/src/genotype/backends.rs
@@ -1,4 +1,4 @@
-use bioscript_core::{RuntimeError, VariantObservation, VariantSpec};
+use bioscript_core::{Assembly, GenomicLocus, RuntimeError, VariantObservation, VariantSpec};
 
 use super::{
     describe_query, lookup_indexed_vcf_variants, scan_delimited_variants, scan_vcf_variants,
@@ -39,7 +39,7 @@ impl RsidMapBackend {
             }
         }
 
-        if let Some(locus) = variant.grch38.as_ref().or(variant.grch37.as_ref())
+        if let Some(locus) = delimited_locus_for_assembly(variant, self.assembly)
             && let Some((value, matched_rsid, source)) = self.locus_values.get(&(
                 locus.chrom.trim_start_matches("chr").to_ascii_lowercase(),
                 locus.start,
@@ -57,6 +57,19 @@ impl RsidMapBackend {
             });
         }
 
+        if variant.has_coordinates()
+            && self.assembly.is_none()
+            && matches!(
+                self.format,
+                GenotypeSourceFormat::Text | GenotypeSourceFormat::Zip
+            )
+        {
+            return Err(RuntimeError::Unsupported(format!(
+                "delimited genotype input assembly is unknown; refusing coordinate lookup for {}",
+                describe_query(variant)
+            )));
+        }
+
         let evidence = if variant.has_coordinates() {
             format!(
                 "no matching rsid or locus found for {}",
@@ -73,6 +86,17 @@ impl RsidMapBackend {
     }
 }
 
+pub(crate) fn delimited_locus_for_assembly(
+    variant: &VariantSpec,
+    assembly: Option<Assembly>,
+) -> Option<&GenomicLocus> {
+    match assembly {
+        Some(Assembly::Grch37) => variant.grch37.as_ref(),
+        Some(Assembly::Grch38) => variant.grch38.as_ref(),
+        None => None,
+    }
+}
+
 impl DelimitedBackend {
     pub(super) fn backend_name(&self) -> &'static str {
         match self.format {
diff --git a/rust/bioscript-formats/src/genotype/delimited/scan.rs b/rust/bioscript-formats/src/genotype/delimited/scan.rs
index 1b41b82..1a440da 100644
--- a/rust/bioscript-formats/src/genotype/delimited/scan.rs
+++ b/rust/bioscript-formats/src/genotype/delimited/scan.rs
@@ -6,10 +6,14 @@ use std::{
 
 use zip::ZipArchive;
 
+use crate::inspect::detect_assembly;
 use bioscript_core::{RuntimeError, VariantObservation, VariantSpec};
 
 use super::{
-    super::{GenotypeSourceFormat, describe_query, types::DelimitedBackend, variant_sort_key},
+    super::{
+        GenotypeSourceFormat, backends::delimited_locus_for_assembly, describe_query,
+        types::DelimitedBackend, variant_sort_key,
+    },
     DelimitedColumnIndexes, detect_delimiter, parse_streaming_row,
 };
 
@@ -25,11 +29,14 @@ pub(crate) fn scan_delimited_variants(
     let mut results = vec![VariantObservation::default(); variants.len()];
     let mut unresolved = variants.len();
 
+    let has_coordinate_queries = variants.iter().any(VariantSpec::has_coordinates);
+    let mut detected_assembly = backend.options.assembly;
+
     for (idx, variant) in &indexed {
         for rsid in &variant.rsids {
             rsid_targets.entry(rsid.clone()).or_default().push(*idx);
         }
-        if let Some(locus) = variant.grch38.as_ref().or(variant.grch37.as_ref()) {
+        if let Some(locus) = delimited_locus_for_assembly(variant, detected_assembly) {
             coord_targets
                 .entry((
                     locus.chrom.trim_start_matches("chr").to_ascii_lowercase(),
@@ -58,6 +65,36 @@ pub(crate) fn scan_delimited_variants(
         }
 
         let delimiter = detect_delimiter(&probe_lines);
+        if detected_assembly.is_none() {
+            let mut label = backend
+                .path
+                .to_string_lossy()
+                .to_ascii_lowercase()
+                .to_string();
+            if let Some(entry_name) = backend.zip_entry_name.as_ref() {
+                label.push('\n');
+                label.push_str(&entry_name.to_ascii_lowercase());
+            }
+            detected_assembly = detect_assembly(&label, &probe_lines);
+            if let Some(assembly) = detected_assembly {
+                for (idx, variant) in &indexed {
+                    if let Some(locus) = delimited_locus_for_assembly(variant, Some(assembly)) {
+                        coord_targets
+                            .entry((
+                                locus.chrom.trim_start_matches("chr").to_ascii_lowercase(),
+                                locus.start,
+                            ))
+                            .or_default()
+                            .push(*idx);
+                    }
+                }
+            } else if has_coordinate_queries {
+                return Err(RuntimeError::Unsupported(format!(
+                    "delimited genotype input assembly is unknown for {}; refusing coordinate lookup",
+                    backend.path.display()
+                )));
+            }
+        }
         let mut column_indexes: Option<DelimitedColumnIndexes> = None;
         let mut comment_header: Option<Vec<String>> = None;
 
diff --git a/rust/bioscript-formats/src/genotype/loaders.rs b/rust/bioscript-formats/src/genotype/loaders.rs
index 079a069..feed1db 100644
--- a/rust/bioscript-formats/src/genotype/loaders.rs
+++ b/rust/bioscript-formats/src/genotype/loaders.rs
@@ -1,6 +1,8 @@
 use std::{collections::HashMap, io::BufRead};
 
-use bioscript_core::RuntimeError;
+use bioscript_core::{Assembly, RuntimeError};
+
+use crate::inspect::detect_assembly;
 
 use super::{
     COMMENT_PREFIXES, GenotypeSourceFormat, GenotypeStore, QueryBackend, RowParser, RsidMapBackend,
@@ -28,6 +30,7 @@ pub(crate) fn from_vcf_reader<R: BufRead>(
         GenotypeSourceFormat::Vcf,
         values,
         HashMap::new(),
+        None,
         HashMap::new(),
     ))
 }
@@ -64,6 +67,7 @@ pub(crate) fn from_delimited_reader<R: BufRead>(
     }
 
     let mut parser = RowParser::new(delimiter.unwrap_or(super::Delimiter::Tab));
+    let assembly = detect_assembly(&label.to_ascii_lowercase(), &prelude);
     let mut values = HashMap::new();
     let mut locus_values = HashMap::new();
     let mut source_lines = HashMap::new();
@@ -93,7 +97,13 @@ pub(crate) fn from_delimited_reader<R: BufRead>(
         )?;
     }
 
-    Ok(from_rsid_map(format, values, locus_values, source_lines))
+    Ok(from_rsid_map(
+        format,
+        values,
+        locus_values,
+        assembly,
+        source_lines,
+    ))
 }
 
 pub(crate) fn from_vcf_lines(lines: Vec<String>) -> Result<GenotypeStore, RuntimeError> {
@@ -105,6 +115,7 @@ pub(crate) fn from_vcf_lines(lines: Vec<String>) -> Result<GenotypeStore, Runtim
         GenotypeSourceFormat::Vcf,
         values,
         HashMap::new(),
+        None,
         HashMap::new(),
     ))
 }
@@ -171,6 +182,7 @@ fn from_rsid_map(
     format: GenotypeSourceFormat,
     values: HashMap<String, String>,
     locus_values: HashMap<(String, i64), (String, Option<String>, String)>,
+    assembly: Option<Assembly>,
     source_lines: HashMap<String, String>,
 ) -> GenotypeStore {
     GenotypeStore {
@@ -178,6 +190,7 @@ fn from_rsid_map(
             format,
             values,
             locus_values,
+            assembly,
             source_lines,
         }),
     }
diff --git a/rust/bioscript-formats/src/genotype/types.rs b/rust/bioscript-formats/src/genotype/types.rs
index 60b0cd7..67d5473 100644
--- a/rust/bioscript-formats/src/genotype/types.rs
+++ b/rust/bioscript-formats/src/genotype/types.rs
@@ -35,6 +35,7 @@ pub(crate) struct RsidMapBackend {
     pub(crate) format: GenotypeSourceFormat,
     pub(crate) values: HashMap<String, String>,
     pub(crate) locus_values: HashMap<(String, i64), (String, Option<String>, String)>,
+    pub(crate) assembly: Option<Assembly>,
     /// Original input line per rsid, retained so wasm-side `from_bytes` loads
     /// can emit the same `| source line: …` evidence that the CLI's
     /// path-backed `DelimitedBackend` does on every lookup. Empty for
@@ -47,6 +48,7 @@ pub(crate) struct DelimitedBackend {
     pub(crate) format: GenotypeSourceFormat,
     pub(crate) path: PathBuf,
     pub(crate) zip_entry_name: Option<String>,
+    pub(crate) options: GenotypeLoadOptions,
 }
 
 #[derive(Debug, Clone)]
diff --git a/rust/bioscript-formats/tests/file_formats.rs b/rust/bioscript-formats/tests/file_formats.rs
index 104ea84..a561b34 100644
--- a/rust/bioscript-formats/tests/file_formats.rs
+++ b/rust/bioscript-formats/tests/file_formats.rs
@@ -5,7 +5,7 @@ use std::{
     time::{SystemTime, UNIX_EPOCH},
 };
 
-use bioscript_core::{VariantKind, VariantSpec};
+use bioscript_core::{Assembly, VariantKind, VariantSpec};
 use bioscript_formats::{
     GenotypeLoadOptions, GenotypeSourceFormat, GenotypeStore, QueryKind, alignment,
 };
diff --git a/rust/bioscript-formats/tests/file_formats/delimited.rs b/rust/bioscript-formats/tests/file_formats/delimited.rs
index fd9a733..15b450b 100644
--- a/rust/bioscript-formats/tests/file_formats/delimited.rs
+++ b/rust/bioscript-formats/tests/file_formats/delimited.rs
@@ -14,7 +14,14 @@ fn delimited_parser_handles_comments_blank_lines_csv_and_split_alleles() {
     )
     .unwrap();
 
-    let store = GenotypeStore::from_file(&path).unwrap();
+    let store = GenotypeStore::from_file_with_options(
+        &path,
+        &GenotypeLoadOptions {
+            assembly: Some(Assembly::Grch38),
+            ..GenotypeLoadOptions::default()
+        },
+    )
+    .unwrap();
 
     assert_eq!(store.get("rs73885319").unwrap().as_deref(), Some("AG"));
     assert_eq!(store.get("rs60910145").unwrap().as_deref(), Some("--"));
@@ -55,7 +62,14 @@ fn delimited_parser_uses_comment_headers_aliases_quotes_and_extra_columns() {
     )
     .unwrap();
 
-    let store = GenotypeStore::from_file(&path).unwrap();
+    let store = GenotypeStore::from_file_with_options(
+        &path,
+        &GenotypeLoadOptions {
+            assembly: Some(Assembly::Grch38),
+            ..GenotypeLoadOptions::default()
+        },
+    )
+    .unwrap();
 
     assert_eq!(store.get("rsQuoted").unwrap().as_deref(), Some("AT"));
     assert_eq!(store.get("rsSlash").unwrap().as_deref(), Some("ID"));
@@ -87,7 +101,14 @@ fn delimited_parser_handles_space_delimited_rows_without_headers_and_inline_comm
     )
     .unwrap();
 
-    let store = GenotypeStore::from_file(&path).unwrap();
+    let store = GenotypeStore::from_file_with_options(
+        &path,
+        &GenotypeLoadOptions {
+            assembly: Some(Assembly::Grch38),
+            ..GenotypeLoadOptions::default()
+        },
+    )
+    .unwrap();
 
     assert_eq!(store.get("rsSpace").unwrap().as_deref(), Some("CT"));
     let observation = store
diff --git a/rust/bioscript-formats/tests/file_formats/zip_and_fixtures.rs b/rust/bioscript-formats/tests/file_formats/zip_and_fixtures.rs
index d814223..68b8af9 100644
--- a/rust/bioscript-formats/tests/file_formats/zip_and_fixtures.rs
+++ b/rust/bioscript-formats/tests/file_formats/zip_and_fixtures.rs
@@ -11,7 +11,14 @@ fn batch_lookup_preserves_input_order_after_coordinate_sorting() {
          rs1\t1\t10\tAG\n",
     )
     .unwrap();
-    let store = GenotypeStore::from_file(&path).unwrap();
+    let store = GenotypeStore::from_file_with_options(
+        &path,
+        &GenotypeLoadOptions {
+            assembly: Some(Assembly::Grch38),
+            ..GenotypeLoadOptions::default()
+        },
+    )
+    .unwrap();
 
     let results = store
         .lookup_variants(&[
diff --git a/rust/bioscript-reporting/src/html.rs b/rust/bioscript-reporting/src/html.rs
index 608f1b3..d536f26 100644
--- a/rust/bioscript-reporting/src/html.rs
+++ b/rust/bioscript-reporting/src/html.rs
@@ -25,6 +25,9 @@ pub fn render_app_html_document(
     let mut out = String::from(
         r##"<!doctype html><meta charset="utf-8"><title>BioScript report</title><style>body{font-family:system-ui,sans-serif;margin:0;background:#f7f8fa;color:#1f2933}.wrap{max-width:1440px;margin:0 auto;padding:24px}h1{margin:0 0 10px}h2{margin:32px 0 10px;scroll-margin-top:82px}.nav{position:sticky;top:0;z-index:20;display:flex;gap:8px;flex-wrap:wrap;align-items:center;margin:16px -24px 22px;padding:10px 24px;background:rgba(247,248,250,.96);border-block:1px solid #d8dee6;backdrop-filter:saturate(160%) blur(8px)}.nav a{border:1px solid #cbd5df;background:#fff;color:#1f2933;text-decoration:none;padding:7px 10px;border-radius:6px}.nav a:hover{background:#eef2f6}.table-tools,.level-filter{display:flex;justify-content:space-between;gap:12px;align-items:flex-start;margin:6px 0}.table-tools input{width:min(420px,100%);border:1px solid #cbd5df;border-radius:6px;padding:7px 9px;font:inherit;background:#fff}.table-tools label,.level-filter label{display:flex;gap:5px;align-items:center}.level-filter{justify-content:flex-start;flex-wrap:wrap}.filter-scale{display:flex;flex-direction:column;gap:6px}.filter-scale-title{font-weight:700;color:#344054;display:flex;gap:6px;align-items:center}.filter-options{display:flex;flex-wrap:wrap;gap:6px 10px}.filter-actions{display:flex;gap:8px;align-self:flex-end}.level-filter a{display:inline-grid;place-items:center;width:20px;height:20px;border:1px solid #cbd5df;border-radius:50%;text-decoration:none;color:#1f2933;background:#fff;font-weight:700}.filter-action,.pgx-tabs button{border:1px solid #cbd5df;background:#fff;color:#1f2933;border-radius:6px;padding:4px 7px;font:inherit;cursor:pointer}.filter-action:hover,.pgx-tabs button:hover{background:#eef2f6}.pgx-tabs{display:flex;gap:8px;margin:10px 0}.pgx-tabs button.active{background:#1f2933;border-color:#1f2933;color:#fff}.table-wrap{overflow:auto;border:1px solid #d8dee6;background:white;border-radius:8px}table{border-collapse:collapse;width:100%;font-size:13px}td,th{border-bottom:1px solid #e5e9ef;padding:6px 8px;text-align:left;vertical-align:top}th{position:sticky;top:0;background:#eef2f6;z-index:1;white-space:nowrap;cursor:pointer;user-select:none}.sort-mark{font-size:10px;color:#667085;margin-left:3px}.row-variant td{background:#fff7cc}.row-reference td{background:#eaf7ee}.debug-hidden .debug-col{display:none}.participant-filter{display:flex;gap:8px;align-items:center;margin:12px 0}.participant-filter select{border:1px solid #cbd5df;border-radius:6px;padding:6px 8px;background:#fff;font:inherit}.genotype-hit{font-weight:700}.allele-hit{color:#075985;background:#dff4ff;border-radius:3px;padding:0 2px}.level-badge,.pgx-badge{display:inline-block;min-width:2.2em;text-align:center;border-radius:999px;padding:2px 8px;color:#fff;font-weight:700}.level-1,.pgx-informative{background:#0abc72}.level-2,.pgx-actionable{background:#2a74df}.level-3,.pgx-recommended{background:#ffc107;color:#1f2933}.level-4,.pgx-required{background:#c53b3b}.pgx-no-clinical,.pgx-criteria,.pgx-unknown,.level-unknown{background:#667085}.mono{font-family:ui-monospace,SFMono-Regular,Menlo,monospace}.muted{color:#667085}.effect{max-width:760px;min-width:360px}.analysis-kv{display:grid;grid-template-columns:max-content minmax(0,1fr);gap:6px 14px;background:#fff;border:1px solid #d8dee6;border-radius:8px;padding:10px 12px;margin:8px 0 10px}.analysis-kv dt{font-weight:700;color:#344054}.analysis-kv dd{margin:0;min-width:0}.analysis-badge{display:inline-block;border:1px solid #cbd5df;border-radius:999px;background:#eef2f6;padding:1px 8px;font-weight:700}.analysis-badge-normal{background:#eaf7ee;border-color:#9fd6ad;color:#14532d}.analysis-badge-variant{background:#fff7cc;border-color:#f0d66a;color:#713f12}.analysis-badge-unknown{background:#eef2f6;border-color:#cbd5df;color:#475467}.analysis-card{background:#fff;border:1px solid #cbd5df;border-radius:8px;margin:14px 0;overflow:hidden}.analysis-card summary{display:flex;align-items:center;gap:10px;padding:11px 13px;background:#eef2f6;cursor:pointer;font-weight:700}.analysis-card-body{padding:12px 14px}.analysis-card-title{font-size:16px}.logic-note,.analysis-notes{background:#fff;border:1px solid #d8dee6;border-radius:8px;padding:10px 12px;margin:8px 0 10px}.logic-note h4,.analysis-notes h4,h4{margin:0 0 6px}.logic-note p,.analysis-notes p{margin:0 0 6px}.analysis-notes p:last-child{margin-bottom:0}.provenance-list{background:#fff;border:1px solid #d8dee6;border-radius:8px;padding:10px 18px}.provenance-list li{margin:8px 0}pre{white-space:pre-wrap;background:#fff;padding:12px;border:1px solid #d8dee6;border-radius:8px;max-height:520px;overflow:auto}</style><script>const sortState={};function cellText(row,i){const cell=row.cells[i];return(cell?.dataset.sort||cell?.innerText||"").trim()}function cmp(a,b){const an=Number(a),bn=Number(b);if(a!==""&&b!==""&&!Number.isNaN(an)&&!Number.isNaN(bn))return an-bn;return a.localeCompare(b,undefined,{numeric:true,sensitivity:"base"})}function sortTable(id,col){const table=document.getElementById(id);const tbody=table.tBodies[0];const key=id+":"+col;const dir=sortState[key]==="asc"?"desc":"asc";sortState[key]=dir;table.querySelectorAll(".sort-mark").forEach(s=>s.textContent="");table.tHead.rows[0].cells[col].querySelector(".sort-mark").textContent=dir==="asc"?"^":"v";Array.from(tbody.rows).sort((a,b)=>{const v=cmp(cellText(a,col),cellText(b,col));return dir==="asc"?v:-v}).forEach(r=>tbody.appendChild(r))}let selectedParticipant="";function participantOk(row){return !selectedParticipant||!row.dataset.participant||row.dataset.participant===selectedParticipant}function pgxLevelOk(row){if(!row.dataset.pgxLevel&&!row.dataset.level)return true;const key=row.dataset.pgxLevel||row.dataset.level||"unknown";const input=document.querySelector('[data-pgx-any-level-filter="'+key+'"]')||document.querySelector('[data-pgx-any-level-filter="unknown"]');return !!input&&input.checked}function pgxOutcomeOk(row){const key=row.dataset.pgxOutcome;if(!key)return true;const input=document.querySelector('[data-pgx-outcome-filter="'+key+'"]');return !input||input.checked}function observationOk(row){const key=row.dataset.observation;if(!key)return true;const input=document.querySelector('[data-observation-filter="'+key+'"]');return !input||input.checked}function applyTableFilters(id){const q=(document.querySelector('[data-filter-for="'+id+'"]')?.value||"").toLowerCase();document.querySelectorAll("#"+id+" tbody tr").forEach(row=>{let ok=row.innerText.toLowerCase().includes(q)&&participantOk(row)&&pgxLevelOk(row)&&pgxOutcomeOk(row)&&observationOk(row);row.style.display=ok?"":"none"})}function applyPgxFilters(){document.querySelectorAll('#pgx table[id]').forEach(table=>applyTableFilters(table.id));document.querySelectorAll('#pgx .pgx-drug-group').forEach(group=>{const visible=Array.from(group.querySelectorAll('tbody tr')).some(row=>row.style.display!=='none');group.style.display=visible?'':'none'})}function setPgxFilterGroup(checked){document.querySelectorAll("[data-pgx-any-level-filter]").forEach(input=>input.checked=checked);applyPgxFilters()}function setObservationFilterGroup(checked){document.querySelectorAll("[data-observation-filter]").forEach(input=>input.checked=checked);applyTableFilters("observations-table")}function setPgxView(view){document.getElementById("pgx-view-variant").hidden=view!=="variant";document.getElementById("pgx-view-drug").hidden=view!=="drug";document.getElementById("pgx-tab-variant")?.classList.toggle("active",view==="variant");document.getElementById("pgx-tab-drug")?.classList.toggle("active",view==="drug")}function setParticipant(value){selectedParticipant=value;document.querySelectorAll("table[id]").forEach(table=>applyTableFilters(table.id))}function toggleDebug(show){document.getElementById("report-wrap").classList.toggle("debug-hidden",!show)}</script><div class="wrap debug-hidden" id="report-wrap">"##,
     );
+    out.push_str(
+        r#"<style>.ref-alt-cell{max-width:38rem}.ref-alt-cell .truncate-cell{display:inline-block;max-width:min(38rem,62vw);overflow:hidden;text-overflow:ellipsis;white-space:nowrap;vertical-align:bottom}</style>"#,
+    );
     let label_findings = collect_report_findings(reports, "bioscript:pgx-label:1.0");
     let summary_findings = collect_report_findings(reports, "bioscript:pgx-summary:1.0");
     let has_pgx_findings = !label_findings.is_empty() || !summary_findings.is_empty();
diff --git a/rust/bioscript-reporting/src/html/observations.rs b/rust/bioscript-reporting/src/html/observations.rs
index a5cab20..1c9ef86 100644
--- a/rust/bioscript-reporting/src/html/observations.rs
+++ b/rust/bioscript-reporting/src/html/observations.rs
@@ -170,7 +170,7 @@ pub(super) fn render_observation_cell(
         return;
     }
     if header == "ref_alt" {
-        class_cell(out, &observation_ref_alt(observation), "mono");
+        ref_alt_cell(out, observation);
         return;
     }
     if header == "allele_balance" {
@@ -230,6 +230,15 @@ pub(super) fn render_observation_cell(
     );
 }
 
+fn ref_alt_cell(out: &mut String, observation: &serde_json::Value) {
+    let value = observation_ref_alt(observation);
+    let escaped_value = html_escape(&value);
+    let _ = write!(
+        out,
+        "<td class=\"mono ref-alt-cell\" title=\"{escaped_value}\"><span class=\"truncate-cell\">{escaped_value}</span></td>"
+    );
+}
+
 pub(super) fn observation_is_imputed_vcf_reference(observation: &serde_json::Value) -> bool {
     observation
         .get("evidence_raw")
diff --git a/rust/bioscript-reporting/src/manifest_catalogue.rs b/rust/bioscript-reporting/src/manifest_catalogue.rs
index 20666bb..e7db82f 100644
--- a/rust/bioscript-reporting/src/manifest_catalogue.rs
+++ b/rust/bioscript-reporting/src/manifest_catalogue.rs
@@ -111,10 +111,17 @@ fn catalogue_row_task(
             idx + 2
         ));
     }
+    let mut tags = catalogue_tags.to_vec();
+    if let Some(gene) = columns.value(row, "gene").filter(|value| !value.is_empty()) {
+        let gene_tag = format!("gene:{gene}");
+        if !tags.iter().any(|tag| tag == &gene_tag) {
+            tags.push(gene_tag);
+        }
+    }
     let manifest = VariantManifest {
         path: PathBuf::from(format!("{catalogue_path}#{variant_id}")),
         name,
-        tags: catalogue_tags.to_vec(),
+        tags,
         spec,
     };
     Ok(VariantManifestTask {
diff --git a/rust/bioscript-reporting/src/observation.rs b/rust/bioscript-reporting/src/observation.rs
index 38e4ced..8fd79ba 100644
--- a/rust/bioscript-reporting/src/observation.rs
+++ b/rust/bioscript-reporting/src/observation.rs
@@ -229,6 +229,16 @@ fn render_app_observation_json(input: AppObservationJson) -> serde_json::Value {
         weak_indel_match,
         zygosity,
     } = input;
+    let gene = if gene.is_empty() {
+        manifest_gene_from_tags(&manifest).unwrap_or_default()
+    } else {
+        gene
+    };
+    let source = if source.is_null() {
+        manifest_default_source(&row, &manifest)
+    } else {
+        source
+    };
     serde_json::json!({
         "participant_id": row.get("participant_id").cloned().unwrap_or_default(),
         "assay_id": assay_id,
@@ -269,6 +279,34 @@ fn render_app_observation_json(input: AppObservationJson) -> serde_json::Value {
     })
 }
 
+fn manifest_gene_from_tags(manifest: &VariantManifest) -> Option<String> {
+    manifest.tags.iter().find_map(|tag| {
+        tag.strip_prefix("gene:")
+            .filter(|gene| !gene.is_empty())
+            .map(ToOwned::to_owned)
+    })
+}
+
+fn manifest_default_source(
+    row: &BTreeMap<String, String>,
+    manifest: &VariantManifest,
+) -> serde_json::Value {
+    let rsid = row
+        .get("matched_rsid")
+        .filter(|rsid| !rsid.is_empty())
+        .or_else(|| row.get("rsid").filter(|rsid| !rsid.is_empty()))
+        .or_else(|| manifest.spec.rsids.first().filter(|rsid| !rsid.is_empty()));
+    let Some(rsid) = rsid else {
+        return serde_json::Value::Null;
+    };
+    serde_json::json!({
+        "kind": "database",
+        "label": "dbSNP / NCBI SNP",
+        "url": format!("https://www.ncbi.nlm.nih.gov/snp/{rsid}"),
+        "fields": ["identifiers.rsids"],
+    })
+}
+
 fn assembly_row_value(assembly: Assembly) -> String {
     match assembly {
         Assembly::Grch37 => "grch37".to_owned(),

From d79a3f493d5b8a940ede975b667ea0b60760dda1 Mon Sep 17 00:00:00 2001
From: Madhava Jay <me@madhavajay.com>
Date: Fri, 15 May 2026 14:06:24 +1000
Subject: [PATCH 4/5] fixing linting

---
 .../src/genotype/delimited/scan.rs            |   6 +-
 rust/bioscript-reporting/src/html.rs          |   2 +-
 rust/bioscript-reporting/src/observation.rs   | 214 +-----------------
 .../src/observation/genotype_display.rs       | 207 +++++++++++++++++
 4 files changed, 218 insertions(+), 211 deletions(-)
 create mode 100644 rust/bioscript-reporting/src/observation/genotype_display.rs

diff --git a/rust/bioscript-formats/src/genotype/delimited/scan.rs b/rust/bioscript-formats/src/genotype/delimited/scan.rs
index 87d9a87..a9248dd 100644
--- a/rust/bioscript-formats/src/genotype/delimited/scan.rs
+++ b/rust/bioscript-formats/src/genotype/delimited/scan.rs
@@ -68,11 +68,7 @@ pub(crate) fn scan_delimited_variants(
 
         let delimiter = detect_delimiter(&probe_lines);
         if detected_assembly.is_none() {
-            let mut label = backend
-                .path
-                .to_string_lossy()
-                .to_ascii_lowercase()
-                .to_string();
+            let mut label = backend.path.to_string_lossy().to_ascii_lowercase();
             if let Some(entry_name) = backend.zip_entry_name.as_ref() {
                 label.push('\n');
                 label.push_str(&entry_name.to_ascii_lowercase());
diff --git a/rust/bioscript-reporting/src/html.rs b/rust/bioscript-reporting/src/html.rs
index 7bd0293..29f978c 100644
--- a/rust/bioscript-reporting/src/html.rs
+++ b/rust/bioscript-reporting/src/html.rs
@@ -26,7 +26,7 @@ pub fn render_app_html_document(
         r##"<!doctype html><meta charset="utf-8"><title>BioScript report</title><style>body{font-family:system-ui,sans-serif;margin:0;background:#f7f8fa;color:#1f2933}.wrap{max-width:1440px;margin:0 auto;padding:24px}h1{margin:0 0 10px}h2{margin:32px 0 10px;scroll-margin-top:82px}.nav{position:sticky;top:0;z-index:20;display:flex;gap:8px;flex-wrap:wrap;align-items:center;margin:16px -24px 22px;padding:10px 24px;background:rgba(247,248,250,.96);border-block:1px solid #d8dee6;backdrop-filter:saturate(160%) blur(8px)}.nav a{border:1px solid #cbd5df;background:#fff;color:#1f2933;text-decoration:none;padding:7px 10px;border-radius:6px}.nav a:hover{background:#eef2f6}.table-tools,.level-filter{display:flex;justify-content:space-between;gap:12px;align-items:flex-start;margin:6px 0}.table-tools input{width:min(420px,100%);border:1px solid #cbd5df;border-radius:6px;padding:7px 9px;font:inherit;background:#fff}.table-tools label,.level-filter label{display:flex;gap:5px;align-items:center}.level-filter{justify-content:flex-start;flex-wrap:wrap}.filter-scale{display:flex;flex-direction:column;gap:6px}.filter-scale-title{font-weight:700;color:#344054;display:flex;gap:6px;align-items:center}.filter-options{display:flex;flex-wrap:wrap;gap:6px 10px}.filter-actions{display:flex;gap:8px;align-self:flex-end}.level-filter a{display:inline-grid;place-items:center;width:20px;height:20px;border:1px solid #cbd5df;border-radius:50%;text-decoration:none;color:#1f2933;background:#fff;font-weight:700}.filter-action,.pgx-tabs button{border:1px solid #cbd5df;background:#fff;color:#1f2933;border-radius:6px;padding:4px 7px;font:inherit;cursor:pointer}.filter-action:hover,.pgx-tabs button:hover{background:#eef2f6}.pgx-tabs{display:flex;gap:8px;margin:10px 0}.pgx-tabs button.active{background:#1f2933;border-color:#1f2933;color:#fff}.table-wrap{overflow:auto;border:1px solid #d8dee6;background:white;border-radius:8px}table{border-collapse:collapse;width:100%;font-size:13px}td,th{border-bottom:1px solid #e5e9ef;padding:6px 8px;text-align:left;vertical-align:top}th{position:sticky;top:0;background:#eef2f6;z-index:1;white-space:nowrap;cursor:pointer;user-select:none}.sort-mark{font-size:10px;color:#667085;margin-left:3px}.row-variant td{background:#fff7cc}.row-reference td{background:#eaf7ee}.debug-hidden .debug-col{display:none}.participant-filter{display:flex;gap:8px;align-items:center;margin:12px 0}.participant-filter select{border:1px solid #cbd5df;border-radius:6px;padding:6px 8px;background:#fff;font:inherit}.genotype-hit{font-weight:700}.allele-hit{color:#075985;background:#dff4ff;border-radius:3px;padding:0 2px}.level-badge,.pgx-badge{display:inline-block;min-width:2.2em;text-align:center;border-radius:999px;padding:2px 8px;color:#fff;font-weight:700}.level-1,.pgx-informative{background:#0abc72}.level-2,.pgx-actionable{background:#2a74df}.level-3,.pgx-recommended{background:#ffc107;color:#1f2933}.level-4,.pgx-required{background:#c53b3b}.pgx-no-clinical,.pgx-criteria,.pgx-unknown,.level-unknown{background:#667085}.mono{font-family:ui-monospace,SFMono-Regular,Menlo,monospace}.muted{color:#667085}.effect{max-width:760px;min-width:360px}.analysis-kv{display:grid;grid-template-columns:max-content minmax(0,1fr);gap:6px 14px;background:#fff;border:1px solid #d8dee6;border-radius:8px;padding:10px 12px;margin:8px 0 10px}.analysis-kv dt{font-weight:700;color:#344054}.analysis-kv dd{margin:0;min-width:0}.analysis-badge{display:inline-block;border:1px solid #cbd5df;border-radius:999px;background:#eef2f6;padding:1px 8px;font-weight:700}.analysis-badge-normal{background:#eaf7ee;border-color:#9fd6ad;color:#14532d}.analysis-badge-variant{background:#fff7cc;border-color:#f0d66a;color:#713f12}.analysis-badge-unknown{background:#eef2f6;border-color:#cbd5df;color:#475467}.analysis-card{background:#fff;border:1px solid #cbd5df;border-radius:8px;margin:14px 0;overflow:hidden}.analysis-card summary{display:flex;align-items:center;gap:10px;padding:11px 13px;background:#eef2f6;cursor:pointer;font-weight:700}.analysis-card-body{padding:12px 14px}.analysis-card-title{font-size:16px}.logic-note,.analysis-notes{background:#fff;border:1px solid #d8dee6;border-radius:8px;padding:10px 12px;margin:8px 0 10px}.logic-note h4,.analysis-notes h4,h4{margin:0 0 6px}.logic-note p,.analysis-notes p{margin:0 0 6px}.analysis-notes p:last-child{margin-bottom:0}.provenance-list{background:#fff;border:1px solid #d8dee6;border-radius:8px;padding:10px 18px}.provenance-list li{margin:8px 0}pre{white-space:pre-wrap;background:#fff;padding:12px;border:1px solid #d8dee6;border-radius:8px;max-height:520px;overflow:auto}</style><script>const sortState={};function cellText(row,i){const cell=row.cells[i];return(cell?.dataset.sort||cell?.innerText||"").trim()}function cmp(a,b){const an=Number(a),bn=Number(b);if(a!==""&&b!==""&&!Number.isNaN(an)&&!Number.isNaN(bn))return an-bn;return a.localeCompare(b,undefined,{numeric:true,sensitivity:"base"})}function sortTable(id,col){const table=document.getElementById(id);const tbody=table.tBodies[0];const key=id+":"+col;const dir=sortState[key]==="asc"?"desc":"asc";sortState[key]=dir;table.querySelectorAll(".sort-mark").forEach(s=>s.textContent="");table.tHead.rows[0].cells[col].querySelector(".sort-mark").textContent=dir==="asc"?"^":"v";Array.from(tbody.rows).sort((a,b)=>{const v=cmp(cellText(a,col),cellText(b,col));return dir==="asc"?v:-v}).forEach(r=>tbody.appendChild(r))}let selectedParticipant="";function participantOk(row){return !selectedParticipant||!row.dataset.participant||row.dataset.participant===selectedParticipant}function pgxLevelOk(row){if(!row.dataset.pgxLevel&&!row.dataset.level)return true;const key=row.dataset.pgxLevel||row.dataset.level||"unknown";const input=document.querySelector('[data-pgx-any-level-filter="'+key+'"]')||document.querySelector('[data-pgx-any-level-filter="unknown"]');return !!input&&input.checked}function pgxOutcomeOk(row){const key=row.dataset.pgxOutcome;if(!key)return true;const input=document.querySelector('[data-pgx-outcome-filter="'+key+'"]');return !input||input.checked}function observationOk(row){const key=row.dataset.observation;if(!key)return true;const input=document.querySelector('[data-observation-filter="'+key+'"]');return !input||input.checked}function applyTableFilters(id){const q=(document.querySelector('[data-filter-for="'+id+'"]')?.value||"").toLowerCase();document.querySelectorAll("#"+id+" tbody tr").forEach(row=>{let ok=row.innerText.toLowerCase().includes(q)&&participantOk(row)&&pgxLevelOk(row)&&pgxOutcomeOk(row)&&observationOk(row);row.style.display=ok?"":"none"})}function applyPgxFilters(){document.querySelectorAll('#pgx table[id]').forEach(table=>applyTableFilters(table.id));document.querySelectorAll('#pgx .pgx-drug-group').forEach(group=>{const visible=Array.from(group.querySelectorAll('tbody tr')).some(row=>row.style.display!=='none');group.style.display=visible?'':'none'})}function setPgxFilterGroup(checked){document.querySelectorAll("[data-pgx-any-level-filter]").forEach(input=>input.checked=checked);applyPgxFilters()}function setObservationFilterGroup(checked){document.querySelectorAll("[data-observation-filter]").forEach(input=>input.checked=checked);applyTableFilters("observations-table")}function setPgxView(view){document.getElementById("pgx-view-variant").hidden=view!=="variant";document.getElementById("pgx-view-drug").hidden=view!=="drug";document.getElementById("pgx-tab-variant")?.classList.toggle("active",view==="variant");document.getElementById("pgx-tab-drug")?.classList.toggle("active",view==="drug")}function setParticipant(value){selectedParticipant=value;document.querySelectorAll("table[id]").forEach(table=>applyTableFilters(table.id))}function toggleDebug(show){document.getElementById("report-wrap").classList.toggle("debug-hidden",!show)}</script><div class="wrap debug-hidden" id="report-wrap">"##,
     );
     out.push_str(
-        r#"<style>.ref-alt-cell{max-width:38rem}.ref-alt-cell .truncate-cell{display:inline-block;max-width:min(38rem,62vw);overflow:hidden;text-overflow:ellipsis;white-space:nowrap;vertical-align:bottom}</style>"#,
+        r"<style>.ref-alt-cell{max-width:38rem}.ref-alt-cell .truncate-cell{display:inline-block;max-width:min(38rem,62vw);overflow:hidden;text-overflow:ellipsis;white-space:nowrap;vertical-align:bottom}</style>",
     );
     let label_findings = collect_report_findings(reports, "bioscript:pgx-label:1.0");
     let summary_findings = collect_report_findings(reports, "bioscript:pgx-summary:1.0");
diff --git a/rust/bioscript-reporting/src/observation.rs b/rust/bioscript-reporting/src/observation.rs
index 337602c..1341143 100644
--- a/rust/bioscript-reporting/src/observation.rs
+++ b/rust/bioscript-reporting/src/observation.rs
@@ -1,15 +1,20 @@
 use std::collections::BTreeMap;
 
-use bioscript_core::{Assembly, GenomicLocus, VariantKind};
-use bioscript_formats::{InferredSex, SexDetectionConfidence, SexInference};
+use bioscript_core::{Assembly, GenomicLocus};
+use bioscript_formats::SexInference;
 use bioscript_schema::VariantManifest;
 
 mod facets;
+mod genotype_display;
 
 use facets::{
     classify_non_reportable_alleles, is_weak_delimited_indel_match, observation_facets,
     parse_optional_u32,
 };
+use genotype_display::{
+    assembly_row_value, deletion_copy_number_display, genotype_display_from_raw_counts,
+    hemizygous_display_genotype, normalize_app_genotype, observation_evidence_raw,
+};
 
 pub struct AppObservationInput<'a> {
     pub row: &'a BTreeMap<String, String>,
@@ -307,212 +312,11 @@ fn manifest_default_source(
     })
 }
 
-fn assembly_row_value(assembly: Assembly) -> String {
-    match assembly {
-        Assembly::Grch37 => "grch37".to_owned(),
-        Assembly::Grch38 => "grch38".to_owned(),
-    }
-}
-
-fn hemizygous_display_genotype(display: &str) -> String {
-    display
-        .chars()
-        .find(char::is_ascii_alphabetic)
-        .map_or_else(|| display.to_owned(), |allele| allele.to_string())
-}
-
-fn deletion_copy_number_display(
-    row: &BTreeMap<String, String>,
-    manifest: &VariantManifest,
-    depth: Option<u32>,
-    alt_count: Option<u32>,
-) -> Option<String> {
-    if !matches!(manifest.spec.kind, Some(VariantKind::Deletion)) {
-        return None;
-    }
-    if !matches!(row.get("backend").map(String::as_str), Some("cram" | "bam")) {
-        return None;
-    }
-    if manifest.spec.reference.as_deref().unwrap_or_default().len() <= 1 {
-        return None;
-    }
-    let depth = depth?;
-    if depth == 0 {
-        return None;
-    }
-    let alt_fraction = f64::from(alt_count.unwrap_or(0)) / f64::from(depth);
-    if alt_fraction >= 0.8 {
-        Some("DD".to_owned())
-    } else if alt_fraction <= 0.2 {
-        Some("II".to_owned())
-    } else {
-        Some("DI".to_owned())
-    }
-}
-
-fn normalize_app_genotype(
-    display: &str,
-    ref_allele: &str,
-    alt_allele: &str,
-    kind: Option<VariantKind>,
-    chrom: &str,
-    inferred_sex: Option<&SexInference>,
-) -> (String, String) {
-    if display.is_empty() {
-        return ("./.".to_owned(), "unknown".to_owned());
-    }
-    if matches!(kind, Some(VariantKind::Deletion))
-        && ref_allele.len() != 1
-        && display
-            .chars()
-            .filter(char::is_ascii_alphabetic)
-            .all(|allele| matches!(allele.to_ascii_uppercase(), 'I' | 'D'))
-    {
-        return normalize_app_genotype(display, "I", "D", None, chrom, inferred_sex);
-    }
-    let alleles: Vec<char> = display.chars().filter(char::is_ascii_alphabetic).collect();
-    if ref_allele.len() != 1 || alt_allele.len() != 1 {
-        return (display.to_owned(), "unknown".to_owned());
-    }
-    let ref_ch = ref_allele.chars().next().unwrap_or_default();
-    let alt_ch = alt_allele.chars().next().unwrap_or_default();
-    if alleles.len() == 1 && is_haploid_sex_chromosome(chrom) {
-        let allele = alleles[0];
-        if allele == ref_ch {
-            return ("0".to_owned(), "hem_ref".to_owned());
-        }
-        if allele == alt_ch {
-            return ("1".to_owned(), "hem_alt".to_owned());
-        }
-        return (display.to_owned(), "unknown".to_owned());
-    }
-    if alleles.len() != 2 {
-        return (display.to_owned(), "unknown".to_owned());
-    }
-    if is_confident_male_sex_chromosome(chrom, inferred_sex) && alleles[0] == alleles[1] {
-        let allele = alleles[0];
-        if allele == ref_ch {
-            return ("0".to_owned(), "hem_ref".to_owned());
-        }
-        if allele == alt_ch {
-            return ("1".to_owned(), "hem_alt".to_owned());
-        }
-        return (display.to_owned(), "unknown".to_owned());
-    }
-    let alt_count = alleles.iter().filter(|allele| **allele == alt_ch).count();
-    let ref_count = alleles.iter().filter(|allele| **allele == ref_ch).count();
-    match (ref_count, alt_count) {
-        (2, 0) => ("0/0".to_owned(), "hom_ref".to_owned()),
-        (1, 1) => ("0/1".to_owned(), "het".to_owned()),
-        (0, 2) => ("1/1".to_owned(), "hom_alt".to_owned()),
-        _ => (display.to_owned(), "unknown".to_owned()),
-    }
-}
-
-fn is_confident_male_sex_chromosome(chrom: &str, inferred_sex: Option<&SexInference>) -> bool {
-    is_haploid_sex_chromosome(chrom)
-        && inferred_sex.is_some_and(|sex| {
-            sex.sex == InferredSex::Male
-                && matches!(
-                    sex.confidence,
-                    SexDetectionConfidence::High | SexDetectionConfidence::Medium
-                )
-        })
-}
-
-fn is_haploid_sex_chromosome(chrom: &str) -> bool {
-    matches!(
-        chrom
-            .trim()
-            .trim_start_matches("chr")
-            .trim_start_matches("CHR")
-            .to_ascii_uppercase()
-            .as_str(),
-        "X" | "Y" | "23" | "24"
-    )
-}
-
-fn observation_evidence_raw(
-    row: &BTreeMap<String, String>,
-    chrom: &str,
-    inferred_sex: Option<&SexInference>,
-) -> String {
-    let mut evidence_raw = row.get("evidence").cloned().unwrap_or_default();
-    if !is_haploid_sex_chromosome(chrom) {
-        return evidence_raw;
-    }
-    let Some(inferred_sex) = inferred_sex else {
-        return evidence_raw;
-    };
-    let sex_evidence = sex_inference_evidence_raw(inferred_sex);
-    if sex_evidence.is_empty() {
-        return evidence_raw;
-    }
-    if evidence_raw.is_empty() {
-        evidence_raw = sex_evidence;
-    } else {
-        evidence_raw.push_str(" | ");
-        evidence_raw.push_str(&sex_evidence);
-    }
-    evidence_raw
-}
-
-fn sex_inference_evidence_raw(inferred_sex: &SexInference) -> String {
-    let sex = match inferred_sex.sex {
-        InferredSex::Male => "male",
-        InferredSex::Female => "female",
-        InferredSex::Unknown => "unknown",
-    };
-    let confidence = match inferred_sex.confidence {
-        SexDetectionConfidence::High => "high",
-        SexDetectionConfidence::Medium => "medium",
-        SexDetectionConfidence::Low => "low",
-    };
-    let mut fields = vec![
-        format!("detected_sex={sex}"),
-        format!("sex_confidence={confidence}"),
-        format!("sex_method={}", inferred_sex.method),
-    ];
-    fields.extend(
-        inferred_sex
-            .evidence
-            .iter()
-            .map(|item| format!("sex_{item}")),
-    );
-    fields.join(" ")
-}
-
-fn genotype_display_from_raw_counts(raw_counts: &str) -> Option<String> {
-    let counts: serde_json::Map<String, serde_json::Value> =
-        serde_json::from_str(raw_counts).ok()?;
-    let mut items = counts
-        .into_iter()
-        .filter_map(|(base, count)| {
-            let base = base.chars().next()?.to_ascii_uppercase();
-            let count = count.as_u64()?;
-            if matches!(base, 'A' | 'C' | 'G' | 'T') && count > 0 {
-                Some((base, count))
-            } else {
-                None
-            }
-        })
-        .collect::<Vec<_>>();
-    if items.is_empty() {
-        return None;
-    }
-    items.sort_by(|a, b| b.1.cmp(&a.1).then_with(|| a.0.cmp(&b.0)));
-    let total = items.iter().map(|(_, count)| *count).sum::<u64>();
-    let (top_base, top_count) = items[0];
-    if total == 0 || items.len() == 1 || top_count.saturating_mul(10) >= total.saturating_mul(8) {
-        return Some(format!("{top_base}{top_base}"));
-    }
-    Some(format!("{}{}", top_base, items[1].0))
-}
-
 #[cfg(test)]
 mod tests {
     use super::*;
-    use bioscript_core::VariantSpec;
+    use bioscript_core::{VariantKind, VariantSpec};
+    use bioscript_formats::{InferredSex, SexDetectionConfidence};
     use std::path::PathBuf;
 
     #[test]
diff --git a/rust/bioscript-reporting/src/observation/genotype_display.rs b/rust/bioscript-reporting/src/observation/genotype_display.rs
new file mode 100644
index 0000000..c944d2e
--- /dev/null
+++ b/rust/bioscript-reporting/src/observation/genotype_display.rs
@@ -0,0 +1,207 @@
+use std::collections::BTreeMap;
+
+use bioscript_core::{Assembly, VariantKind};
+use bioscript_formats::{InferredSex, SexDetectionConfidence, SexInference};
+use bioscript_schema::VariantManifest;
+
+pub(super) fn assembly_row_value(assembly: Assembly) -> String {
+    match assembly {
+        Assembly::Grch37 => "grch37".to_owned(),
+        Assembly::Grch38 => "grch38".to_owned(),
+    }
+}
+
+pub(super) fn hemizygous_display_genotype(display: &str) -> String {
+    display
+        .chars()
+        .find(char::is_ascii_alphabetic)
+        .map_or_else(|| display.to_owned(), |allele| allele.to_string())
+}
+
+pub(super) fn deletion_copy_number_display(
+    row: &BTreeMap<String, String>,
+    manifest: &VariantManifest,
+    depth: Option<u32>,
+    alt_count: Option<u32>,
+) -> Option<String> {
+    if !matches!(manifest.spec.kind, Some(VariantKind::Deletion)) {
+        return None;
+    }
+    if !matches!(row.get("backend").map(String::as_str), Some("cram" | "bam")) {
+        return None;
+    }
+    if manifest.spec.reference.as_deref().unwrap_or_default().len() <= 1 {
+        return None;
+    }
+    let depth = depth?;
+    if depth == 0 {
+        return None;
+    }
+    let alt_fraction = f64::from(alt_count.unwrap_or(0)) / f64::from(depth);
+    if alt_fraction >= 0.8 {
+        Some("DD".to_owned())
+    } else if alt_fraction <= 0.2 {
+        Some("II".to_owned())
+    } else {
+        Some("DI".to_owned())
+    }
+}
+
+pub(super) fn normalize_app_genotype(
+    display: &str,
+    ref_allele: &str,
+    alt_allele: &str,
+    kind: Option<VariantKind>,
+    chrom: &str,
+    inferred_sex: Option<&SexInference>,
+) -> (String, String) {
+    if display.is_empty() {
+        return ("./.".to_owned(), "unknown".to_owned());
+    }
+    if matches!(kind, Some(VariantKind::Deletion))
+        && ref_allele.len() != 1
+        && display
+            .chars()
+            .filter(char::is_ascii_alphabetic)
+            .all(|allele| matches!(allele.to_ascii_uppercase(), 'I' | 'D'))
+    {
+        return normalize_app_genotype(display, "I", "D", None, chrom, inferred_sex);
+    }
+    let alleles: Vec<char> = display.chars().filter(char::is_ascii_alphabetic).collect();
+    if ref_allele.len() != 1 || alt_allele.len() != 1 {
+        return (display.to_owned(), "unknown".to_owned());
+    }
+    let ref_ch = ref_allele.chars().next().unwrap_or_default();
+    let alt_ch = alt_allele.chars().next().unwrap_or_default();
+    if alleles.len() == 1 && is_haploid_sex_chromosome(chrom) {
+        let allele = alleles[0];
+        if allele == ref_ch {
+            return ("0".to_owned(), "hem_ref".to_owned());
+        }
+        if allele == alt_ch {
+            return ("1".to_owned(), "hem_alt".to_owned());
+        }
+        return (display.to_owned(), "unknown".to_owned());
+    }
+    if alleles.len() != 2 {
+        return (display.to_owned(), "unknown".to_owned());
+    }
+    if is_confident_male_sex_chromosome(chrom, inferred_sex) && alleles[0] == alleles[1] {
+        let allele = alleles[0];
+        if allele == ref_ch {
+            return ("0".to_owned(), "hem_ref".to_owned());
+        }
+        if allele == alt_ch {
+            return ("1".to_owned(), "hem_alt".to_owned());
+        }
+        return (display.to_owned(), "unknown".to_owned());
+    }
+    let alt_count = alleles.iter().filter(|allele| **allele == alt_ch).count();
+    let ref_count = alleles.iter().filter(|allele| **allele == ref_ch).count();
+    match (ref_count, alt_count) {
+        (2, 0) => ("0/0".to_owned(), "hom_ref".to_owned()),
+        (1, 1) => ("0/1".to_owned(), "het".to_owned()),
+        (0, 2) => ("1/1".to_owned(), "hom_alt".to_owned()),
+        _ => (display.to_owned(), "unknown".to_owned()),
+    }
+}
+
+fn is_confident_male_sex_chromosome(chrom: &str, inferred_sex: Option<&SexInference>) -> bool {
+    is_haploid_sex_chromosome(chrom)
+        && inferred_sex.is_some_and(|sex| {
+            sex.sex == InferredSex::Male
+                && matches!(
+                    sex.confidence,
+                    SexDetectionConfidence::High | SexDetectionConfidence::Medium
+                )
+        })
+}
+
+fn is_haploid_sex_chromosome(chrom: &str) -> bool {
+    matches!(
+        chrom
+            .trim()
+            .trim_start_matches("chr")
+            .trim_start_matches("CHR")
+            .to_ascii_uppercase()
+            .as_str(),
+        "X" | "Y" | "23" | "24"
+    )
+}
+
+pub(super) fn observation_evidence_raw(
+    row: &BTreeMap<String, String>,
+    chrom: &str,
+    inferred_sex: Option<&SexInference>,
+) -> String {
+    let mut evidence_raw = row.get("evidence").cloned().unwrap_or_default();
+    if !is_haploid_sex_chromosome(chrom) {
+        return evidence_raw;
+    }
+    let Some(inferred_sex) = inferred_sex else {
+        return evidence_raw;
+    };
+    let sex_evidence = sex_inference_evidence_raw(inferred_sex);
+    if sex_evidence.is_empty() {
+        return evidence_raw;
+    }
+    if evidence_raw.is_empty() {
+        evidence_raw = sex_evidence;
+    } else {
+        evidence_raw.push_str(" | ");
+        evidence_raw.push_str(&sex_evidence);
+    }
+    evidence_raw
+}
+
+fn sex_inference_evidence_raw(inferred_sex: &SexInference) -> String {
+    let sex = match inferred_sex.sex {
+        InferredSex::Male => "male",
+        InferredSex::Female => "female",
+        InferredSex::Unknown => "unknown",
+    };
+    let confidence = match inferred_sex.confidence {
+        SexDetectionConfidence::High => "high",
+        SexDetectionConfidence::Medium => "medium",
+        SexDetectionConfidence::Low => "low",
+    };
+    let mut fields = vec![
+        format!("detected_sex={sex}"),
+        format!("sex_confidence={confidence}"),
+        format!("sex_method={}", inferred_sex.method),
+    ];
+    fields.extend(
+        inferred_sex
+            .evidence
+            .iter()
+            .map(|item| format!("sex_{item}")),
+    );
+    fields.join(" ")
+}
+
+pub(super) fn genotype_display_from_raw_counts(raw_counts: &str) -> Option<String> {
+    let counts: serde_json::Map<String, serde_json::Value> =
+        serde_json::from_str(raw_counts).ok()?;
+    let mut items = counts
+        .into_iter()
+        .filter_map(|(base, count)| {
+            let base = base.chars().next()?.to_ascii_uppercase();
+            let count = count.as_u64()?;
+            if matches!(base, 'A' | 'C' | 'G' | 'T') && count > 0 {
+                Some((base, count))
+            } else {
+                None
+            }
+        })
+        .collect::<Vec<_>>();
+    if items.is_empty() {
+        return None;
+    }
+    items.sort_by(|a, b| b.1.cmp(&a.1).then_with(|| a.0.cmp(&b.0)));
+    let total = items.iter().map(|(_, count)| *count).sum::<u64>();
+    let (top_base, top_count) = items[0];
+    if total == 0 || items.len() == 1 || top_count.saturating_mul(10) >= total.saturating_mul(8) {
+        return Some(format!("{top_base}{top_base}"));
+    }
+    Some(format!("{}{}", top_base, items[1].0))
+}

From d8868640fb2e7b5d457ee0a49f203c1de3afe5ca Mon Sep 17 00:00:00 2001
From: Madhava Jay <me@madhavajay.com>
Date: Fri, 15 May 2026 14:08:54 +1000
Subject: [PATCH 5/5] linting

---
 rust/bioscript-cli/src/report_execution.rs | 72 +++++++++++++++-------
 1 file changed, 51 insertions(+), 21 deletions(-)

diff --git a/rust/bioscript-cli/src/report_execution.rs b/rust/bioscript-cli/src/report_execution.rs
index 0c251f3..a9a5ff8 100644
--- a/rust/bioscript-cli/src/report_execution.rs
+++ b/rust/bioscript-cli/src/report_execution.rs
@@ -162,27 +162,17 @@ fn run_bioscript_analysis_script(input: &BioscriptAnalysisScriptInput<'_>) -> Re
     let virtual_output_file = format!("/output/results.{output_extension}");
     let script_virtual_path = virtual_pipeline_path(input.script_path, "analysis.py");
     let manifest_virtual_path = virtual_pipeline_path(input.manifest_path, "manifest.yaml");
-    let mut virtual_text_files = BTreeMap::new();
-    virtual_text_files.insert(script_virtual_path.clone(), script_text);
-    virtual_text_files.insert(
-        manifest_virtual_path.clone(),
-        fs::read_to_string(input.manifest_path).map_err(|err| {
-            format!(
-                "failed to read analysis manifest {}: {err}",
-                input.manifest_path.display()
-            )
-        })?,
-    );
-    virtual_text_files.insert(virtual_observations_file.clone(), observations_text);
-    let mut asset_paths = BTreeMap::new();
-    for asset in &input.interpretation.assets {
-        let asset_path = resolve_manifest_path(input.runtime_root, input.manifest_path, &asset.path)?;
-        let virtual_asset_path = virtual_pipeline_path(&asset_path, &asset.path);
-        let text = fs::read_to_string(&asset_path)
-            .map_err(|err| format!("failed to read analysis asset {}: {err}", asset_path.display()))?;
-        virtual_text_files.insert(virtual_asset_path.clone(), text);
-        asset_paths.insert(asset.id.clone(), virtual_asset_path);
-    }
+    let AnalysisVirtualTextFiles {
+        text_files: virtual_text_files,
+        asset_paths,
+    } = collect_analysis_virtual_text_files(
+        input,
+        &script_virtual_path,
+        script_text,
+        &manifest_virtual_path,
+        &virtual_observations_file,
+        observations_text,
+    )?;
     let mut virtual_binary_files = BTreeMap::new();
     virtual_binary_files.insert(virtual_input_file.clone(), input_bytes);
     let context = analysis_context(
@@ -239,6 +229,46 @@ fn run_bioscript_analysis_script(input: &BioscriptAnalysisScriptInput<'_>) -> Re
     persist_virtual_output_files(&written, &virtual_output_file, input.output_file)
 }
 
+struct AnalysisVirtualTextFiles {
+    text_files: BTreeMap<String, String>,
+    asset_paths: BTreeMap<String, String>,
+}
+
+fn collect_analysis_virtual_text_files(
+    input: &BioscriptAnalysisScriptInput<'_>,
+    script_virtual_path: &str,
+    script_text: String,
+    manifest_virtual_path: &str,
+    virtual_observations_file: &str,
+    observations_text: String,
+) -> Result<AnalysisVirtualTextFiles, String> {
+    let mut virtual_text_files = BTreeMap::new();
+    virtual_text_files.insert(script_virtual_path.to_owned(), script_text);
+    virtual_text_files.insert(
+        manifest_virtual_path.to_owned(),
+        fs::read_to_string(input.manifest_path).map_err(|err| {
+            format!(
+                "failed to read analysis manifest {}: {err}",
+                input.manifest_path.display()
+            )
+        })?,
+    );
+    virtual_text_files.insert(virtual_observations_file.to_owned(), observations_text);
+    let mut asset_paths = BTreeMap::new();
+    for asset in &input.interpretation.assets {
+        let asset_path = resolve_manifest_path(input.runtime_root, input.manifest_path, &asset.path)?;
+        let virtual_asset_path = virtual_pipeline_path(&asset_path, &asset.path);
+        let text = fs::read_to_string(&asset_path)
+            .map_err(|err| format!("failed to read analysis asset {}: {err}", asset_path.display()))?;
+        virtual_text_files.insert(virtual_asset_path.clone(), text);
+        asset_paths.insert(asset.id.clone(), virtual_asset_path);
+    }
+    Ok(AnalysisVirtualTextFiles {
+        text_files: virtual_text_files,
+        asset_paths,
+    })
+}
+
 fn persist_virtual_output_files(
     written: &BTreeMap<String, String>,
     primary_virtual_output_file: &str,