MapleEve · MapleEve · May 7, 2026 · May 7, 2026 · May 7, 2026 · May 7, 2026
diff --git a/app/pipeline/contracts/__init__.py b/app/pipeline/contracts/__init__.py
@@ -2,13 +2,16 @@
 
 from .asr import ASRProvider, ASRRequest, ASRResult
 from .artifacts import (
+    ARTIFACT_MANIFEST_VERSION,
     AsyncUploadReader,
     AudioArtifactIndex,
+    ArtifactManifestEntry,
     PersistedTranscriptionArtifacts,
     SavedUploadArtifact,
     TranscriptionArtifactStore,
     TranscriptionArtifactWriteRequest,
     UploadPersistenceRequest,
+    build_artifact_manifest,
 )
 from .context import PipelineContext
 from .diarization import (
@@ -48,13 +51,15 @@
     "ASRProvider",
     "ASRRequest",
     "ASRResult",
+    "ARTIFACT_MANIFEST_VERSION",
     "AsyncUploadReader",
     "AudioArtifactIndex",
     "AudioEnhancementProvider",
     "AudioEnhancementRequest",
     "AudioEnhancementResult",
     "AudioNormalizationRequest",
     "AudioNormalizationResult",
+    "ArtifactManifestEntry",
     "DiarizationProvider",
     "DiarizationRequest",
     "DiarizationResult",
@@ -76,4 +81,5 @@
     "VoiceprintMatchProvider",
     "VoiceprintMatchRequest",
     "VoiceprintMatchResult",
+    "build_artifact_manifest",
 ]
diff --git a/app/pipeline/contracts/artifacts.py b/app/pipeline/contracts/artifacts.py
@@ -6,6 +6,8 @@
 from pathlib import Path
 from typing import Any, Protocol, runtime_checkable
 
+ARTIFACT_MANIFEST_VERSION = "artifact_manifest.v1"
+
 
 class AsyncUploadReader(Protocol):
     """Minimal async file interface used by UploadFile and test doubles."""
@@ -64,6 +66,49 @@ class PersistedTranscriptionArtifacts:
     embedding_paths: dict[str, Path]
 
 
+@dataclass(frozen=True, slots=True)
+class ArtifactManifestEntry:
+    """Public-safe artifact descriptor embedded in completed results.
+
+    This intentionally describes artifact names and roles without exposing
+    host-local paths. Clients may ignore the whole manifest.
+    """
+
+    name: str
+    filename: str
+    role: str
+    media_type: str
+    required_for_result: bool = False
+    speaker_label: str | None = None
+
+    def as_dict(self) -> dict[str, Any]:
+        payload: dict[str, Any] = {
+            "name": self.name,
+            "filename": self.filename,
+            "role": self.role,
+            "media_type": self.media_type,
+            "required_for_result": self.required_for_result,
+        }
+        if self.speaker_label is not None:
+            payload["speaker_label"] = self.speaker_label
+        return payload
+
+
+def build_artifact_manifest(
+    stable: list[ArtifactManifestEntry],
+    optional: list[ArtifactManifestEntry] | None = None,
+    experimental: list[ArtifactManifestEntry] | None = None,
+) -> dict[str, Any]:
+    """Build the optional artifact manifest for a completed transcription."""
+
+    return {
+        "manifest_version": ARTIFACT_MANIFEST_VERSION,
+        "stable": [entry.as_dict() for entry in stable],
+        "optional": [entry.as_dict() for entry in optional or []],
+        "experimental": [entry.as_dict() for entry in experimental or []],
+    }
+
+
 @runtime_checkable
 class TranscriptionArtifactStore(Protocol):
     """Stable slot for persisting completed transcription artifacts."""
@@ -74,11 +119,14 @@ def persist_transcription(
 
 
 __all__ = [
+    "ARTIFACT_MANIFEST_VERSION",
     "AsyncUploadReader",
     "AudioArtifactIndex",
+    "ArtifactManifestEntry",
     "PersistedTranscriptionArtifacts",
     "SavedUploadArtifact",
     "TranscriptionArtifactStore",
     "TranscriptionArtifactWriteRequest",
     "UploadPersistenceRequest",
+    "build_artifact_manifest",
 ]
diff --git a/app/providers/artifacts/default.py b/app/providers/artifacts/default.py
@@ -6,8 +6,14 @@
 from pathlib import Path
 
 from config import DENOISE_MODEL, DENOISE_SNR_THRESHOLD
+from infra.audio.paths import safe_speaker_label
 from infra.transcription_artifacts import persist_transcription_artifacts
-from pipeline.contracts import PipelineContext, PipelineResult
+from pipeline.contracts import (
+    ArtifactManifestEntry,
+    PipelineContext,
+    PipelineResult,
+    build_artifact_manifest,
+)
 
 
 class InMemoryArtifactsProvider:
@@ -89,6 +95,7 @@ def _build_transcription(self, context: PipelineContext) -> dict | None:
             context.aligned_segments,
             context.voiceprint_matches,
         )
+        embedding_labels = sorted(context.speaker_embeddings)
         warning = None
         if not context.voiceprint_matches and not context.speaker_embeddings:
             warning = "no_speakers_detected"
@@ -111,6 +118,7 @@ def _build_transcription(self, context: PipelineContext) -> dict | None:
                 "max_speakers": context.request.max_speakers,
                 "no_repeat_ngram_size": context.request.no_repeat_ngram_size or 0,
             },
+            "artifacts": self._build_artifact_manifest(embedding_labels),
         }
         if context.transcription_result is not None:
             guard_report = context.transcription_result.get("hallucination_guard")
@@ -123,6 +131,29 @@ def _build_transcription(self, context: PipelineContext) -> dict | None:
             transcription["warning"] = warning
         return transcription
 
+    @staticmethod
+    def _build_artifact_manifest(speaker_labels: list[str]) -> dict:
+        stable = [
+            ArtifactManifestEntry(
+                name="result",
+                filename="result.json",
+                role="primary_result",
+                media_type="application/json",
+                required_for_result=True,
+            )
+        ]
+        stable.extend(
+            ArtifactManifestEntry(
+                name="speaker_embedding",
+                filename=f"emb_{safe_speaker_label(speaker_label)}.npy",
+                role="speaker_embedding",
+                media_type="application/octet-stream",
+                speaker_label=speaker_label,
+            )
+            for speaker_label in speaker_labels
+        )
+        return build_artifact_manifest(stable=stable)
+
     def build(self, context: PipelineContext) -> PipelineResult:
         transcription = self._build_transcription(context)
         artifact_paths = None

diff --git a/doc/api.en.md b/doc/api.en.md
@@ -180,6 +180,28 @@ practice, omit `denoise_model` to inherit `DENOISE_MODEL`, send
       "max_speakers": 0,
       "no_repeat_ngram_size": 0
     },
+    "artifacts": {
+      "manifest_version": "artifact_manifest.v1",
+      "stable": [
+        {
+          "name": "result",
+          "filename": "result.json",
+          "role": "primary_result",
+          "media_type": "application/json",
+          "required_for_result": true
+        },
+        {
+          "name": "speaker_embedding",
+          "filename": "emb_SPEAKER_00.npy",
+          "role": "speaker_embedding",
+          "media_type": "application/octet-stream",
+          "required_for_result": false,
+          "speaker_label": "SPEAKER_00"
+        }
+      ],
+      "optional": [],
+      "experimental": []
+    },
     "alignment": {
       "status": "succeeded",
       "language": "en",
@@ -198,7 +220,8 @@ enrollment or rename call.
 **Result contract anchors**: completed results report `status="completed"` in
 the persisted transcription object. `segments[].speaker_label` is always the
 raw diarization cluster label. `segments[].words` and top-level `alignment` are
-optional metadata; clients must tolerate either field being absent.
+optional metadata; top-level `artifacts` is optional as well. Clients must
+tolerate these fields being absent.
 
 `speaker_id` / `speaker_name`: matching uses an **adaptive threshold**, not a
 fixed `0.75` cutoff. Actual logic:
@@ -259,6 +282,14 @@ no need to cross-reference the original request. See
 [`configuration.en.md`](./configuration.en.md) for each setting's source and
 default.
 
+**`artifacts`** is an optional manifest describing stable, optional, and
+experimental artifacts that live alongside this result. Current stable entries
+include the primary `result.json` and one `emb_<speaker_label>.npy` speaker
+embedding per cluster. The manifest exposes only filenames, roles, categories,
+media types, and `speaker_label`; it does not expose local paths, hosts, tokens,
+real job runtime paths, or debug data. Default clients do not need this field,
+and older results without `artifacts` remain compatible.
+
 Completed `GET /api/jobs/{id}` results and `GET /api/transcriptions/{id}` share the
 same payload shape. That means `speaker_map` and `unique_speakers` are available in
 the completed job result as well:
@@ -289,6 +320,7 @@ aggregation fields for UI / downstream consumers:
 | --- | --- | --- |
 | `speaker_map` | object | `speaker_label → {matched_id, matched_name, similarity, embedding_key}` mapping; reflects the **diarization model's voiceprint match result** and does not change when segments are manually corrected |
 | `unique_speakers` | array[string] | Deduplicated list of speaker names, recalculated from the persisted `segments[].speaker_name` values to reflect the latest manual corrections |
+| `artifacts` | object | Optional artifact manifest for stable / optional / experimental artifacts; clients must tolerate it being absent |
 
 ### `GET /api/export/{tr_id}`
 

diff --git a/doc/api.zh.md b/doc/api.zh.md
@@ -175,6 +175,28 @@ curl -X POST http://localhost:8780/api/transcribe \
       "max_speakers": 0,
       "no_repeat_ngram_size": 0
     },
+    "artifacts": {
+      "manifest_version": "artifact_manifest.v1",
+      "stable": [
+        {
+          "name": "result",
+          "filename": "result.json",
+          "role": "primary_result",
+          "media_type": "application/json",
+          "required_for_result": true
+        },
+        {
+          "name": "speaker_embedding",
+          "filename": "emb_SPEAKER_00.npy",
+          "role": "speaker_embedding",
+          "media_type": "application/octet-stream",
+          "required_for_result": false,
+          "speaker_label": "SPEAKER_00"
+        }
+      ],
+      "optional": [],
+      "experimental": []
+    },
     "alignment": {
       "status": "succeeded",
       "language": "zh",
@@ -191,7 +213,8 @@ curl -X POST http://localhost:8780/api/transcribe \
 
 **结果契约锚点**：完成态持久化转写对象会带 `status="completed"`。
 `segments[].speaker_label` 永远是原始 diarization cluster 标签。
-`segments[].words` 和顶层 `alignment` 都是可选元数据，客户端必须能接受字段缺失。
+`segments[].words`、顶层 `alignment` 和顶层 `artifacts` 都是可选元数据，
+客户端必须能接受字段缺失。
 
 `speaker_id` 和 `speaker_name`：匹配采用**自适应阈值**，不是固定 0.75。实际逻辑：
 
@@ -231,6 +254,12 @@ alignment 模型会记录为 `jonatasgrosman/wav2vec2-large-xlsr-53-chinese-zh-c
 都可独立解读，无需再查原始请求。各配置项来源和默认值见
 [`configuration.zh.md`](./configuration.zh.md)。
 
+**`artifacts`** 是可选 manifest，用于描述与该结果同目录的稳定、可选和实验性
+artifact。当前稳定项包括主结果 `result.json` 和每个说话人 cluster 的
+`emb_<speaker_label>.npy`。manifest 只暴露文件名、角色、类别、媒体类型和
+`speaker_label`，不暴露本地路径、主机、token、真实 job 运行路径或调试信息。
+默认客户端不需要依赖该字段；老结果没有 `artifacts` 时仍应按兼容结果处理。
+
 `GET /api/jobs/{id}` 的完成态结果与 `GET /api/transcriptions/{id}` 使用同一份
 持久化结果结构，因此完成态里同样会带上 `speaker_map` 和 `unique_speakers`：
 
@@ -260,6 +289,7 @@ alignment 模型会记录为 `jonatasgrosman/wav2vec2-large-xlsr-53-chinese-zh-c
 | --- | --- | --- |
 | `speaker_map` | object | `speaker_label → {matched_id, matched_name, similarity, embedding_key}` 的映射，反映 **diarization 模型的声纹匹配结果**，不随人工单段纠错变化；便于前端一次性渲染人名下拉 / 统计 |
 | `unique_speakers` | array[string] | 去重后的说话人名列表，从持久化结果里的 `segments[].speaker_name` 重算，反映最新的人工纠错结果 |
+| `artifacts` | object | 可选 artifact manifest；用于发现结果相关的稳定 / 可选 / 实验 artifact，缺失时必须兼容 |
 
 与 `GET /api/jobs/{id}` 不同，本端点始终从磁盘读取持久化结果，**进程重启后仍可访问**，
 也能反映最新的人工纠错；`/api/jobs/{id}` 优先读内存，内存未命中时才回落到磁盘（见上方注意事项）。

diff --git a/doc/changelog.en.md b/doc/changelog.en.md
@@ -4,6 +4,15 @@
 
 ## Unreleased
 
+### Features
+
+- Added an optional `artifacts` manifest to completed results. The manifest
+  lists artifact category, role, filename, media type, and `speaker_label`
+  where relevant; it does not expose local paths, job runtime paths, hosts,
+  tokens, or debug data. Default clients can continue to rely only on the
+  `result.json` primary view, and must treat unknown or missing `artifacts`
+  fields as compatible.
+
 ## 0.7.6 — Health, alignment, and embedding runtime fixes (2026-05-07)
 
 ### Security

diff --git a/doc/changelog.zh.md b/doc/changelog.zh.md
@@ -4,6 +4,13 @@
 
 ## Unreleased
 
+### 功能
+
+- 新增完成态结果的可选 `artifacts` manifest。该字段只列出当前结果相关 artifact
+  的类别、角色、文件名、媒体类型和 `speaker_label`，不暴露本地路径、job 运行路径、
+  host、token 或调试信息。默认客户端仍只需读取 `result.json` 主视图；未知或缺失
+  `artifacts` 字段必须被视为兼容。
+
 ## 0.7.6 — 健康检查、alignment 与 embedding 运行时修复 (2026-05-07)
 
 ### 安全

diff --git a/doc/configuration.en.md b/doc/configuration.en.md
@@ -193,12 +193,17 @@ Stable anchors in completed transcription results:
 - Top-level `alignment`: optional forced-alignment metadata, sanitized.
 - Top-level `params`: effective per-job processing settings, including request
   overrides and service defaults used for this result.
+- Top-level `artifacts`: optional artifact manifest listing stable / optional /
+  experimental artifact filenames, roles, categories, media types, and
+  `speaker_label` values; it never exposes local paths, hosts, tokens, or debug
+  data.
 - `speaker_map`: diarization cluster to voiceprint match map; manual segment
   corrections do not rewrite it.
 - `unique_speakers`: deduplicated current segment display names.
 
 New fields are added under the optional-field principle. Clients should ignore
-unknown fields and tolerate missing `words`, `alignment`, and `warning`.
+unknown fields and tolerate missing `words`, `alignment`, `artifacts`, and
+`warning`.
 
 ## v0.7.6 Validation Wording
 

diff --git a/doc/configuration.zh.md b/doc/configuration.zh.md
@@ -178,11 +178,14 @@ cohort 生命周期：
 - `segments[].words`：可选词级 alignment。
 - 顶层 `alignment`：可选 forced-alignment 元数据，字段内容会脱敏。
 - 顶层 `params`：记录本次任务实际使用的请求级与服务级处理参数，便于离线解释结果。
+- 顶层 `artifacts`：可选 artifact manifest，只列出稳定 / 可选 / 实验 artifact 的
+  文件名、角色、类别、媒体类型和 `speaker_label`；不暴露本地路径、主机、token 或
+  调试信息。
 - `speaker_map`：diarization cluster 到声纹匹配结果的映射；人工改单段说话人不会回写它。
 - `unique_speakers`：按当前 segment 展示名去重后的列表。
 
 新增字段按可选字段原则扩展；客户端应忽略不认识的字段，并容忍 `words` /
-`alignment` / `warning` 缺失。
+`alignment` / `artifacts` / `warning` 缺失。
 
 ## v0.7.6 验证口径