EtanHey · EtanHey · May 27, 2026 · May 27, 2026 · chatgpt-codex-connector · May 27, 2026
@@ -373,6 +373,10 @@ def _loads_tags(value: Any) -> set[str]:
     return set()
 
 
+def _tags_json_equivalent(left: Any, right: Any) -> bool:
+    return _loads_tags(left) == _loads_tags(right)
+
+
 def _max_optional_number(left: Any, right: Any) -> Any:
     values = []
     for value in (left, right):
@@ -640,11 +644,13 @@ def merge_existing_chunk_seen(conn: Any, *, chunk_id: str, incoming: dict[str, A
     last_seen_at = _latest_timestamp(
         existing_last, existing_created, incoming.get("last_seen_at"), incoming.get("created_at")
     )
+    merged_tags_json = json.dumps(merged_tags) if merged_tags else None
     updates: dict[str, Any] = {
-        "tags": json.dumps(merged_tags) if merged_tags else None,
         "seen_count": int(existing_seen or 1) + int(incoming.get("seen_count") or 1),
         "last_seen_at": last_seen_at,
     }
+    if not _tags_json_equivalent(existing_tags, merged_tags_json):
+        updates["tags"] = merged_tags_json
     if merged_importance is not None:
         updates["importance"] = merged_importance
     if merged_half_life is not None:

@@ -97,8 +97,21 @@ def _columns(conn: apsw.Connection, table: str) -> set[str]:
     return {row[1] for row in conn.execute(f"PRAGMA table_info({table})")}
 
 
+def _content_hash(content: str) -> str:
+    return hashlib.sha256(content.strip().encode("utf-8")).hexdigest()
+
+
+def _preview_text(values: dict[str, Any]) -> str:
+    summary = str(values.get("summary") or "").strip()
+    content = str(values.get("content") or "").strip()
+    source = summary or content
+    return source.replace("\n", " ").replace("\r", " ").replace("\t", " ")[:220]
+
+
 def _insert_chunk(conn: apsw.Connection, values: dict[str, Any]) -> None:
     cols = _columns(conn, "chunks")
+    if "preview_text" in cols and not str(values.get("preview_text") or "").strip():
+        values = {**values, "preview_text": _preview_text(values)}
     if "content" in values:
         fields = compute_dedupe_fields(str(values["content"]), values.get("created_at"))
         values = {
@@ -226,6 +239,7 @@ def _apply_store(conn: apsw.Connection, event: dict[str, Any]) -> ApplyResult:
             "summary": content[:200],
             "tags": json.dumps(tags) if tags else None,
             "importance": float(event["importance"]) if event.get("importance") is not None else None,
+            "content_hash": _content_hash(content),
             "chunk_origin": detect_chunk_origin(content, event.get("chunk_origin")),
         },
     )
@@ -296,6 +310,7 @@ def _apply_watcher(conn: apsw.Connection, event: dict[str, Any]) -> None:
             "conversation_id": event.get("conversation_id"),
             "sender": event.get("sender"),
             "tags": json.dumps(tags) if tags else None,
+            "content_hash": _content_hash(content),
             "chunk_origin": detect_chunk_origin(content, event.get("chunk_origin")),
         },
     )
@@ -344,6 +359,7 @@ def _apply_hook(conn: apsw.Connection, event: dict[str, Any]) -> None:
             "created_at": datetime.fromtimestamp(timestamp, timezone.utc).isoformat(),
             "conversation_id": session_id,
             "importance": 5,
+            "content_hash": _content_hash(content),
             "chunk_origin": detect_chunk_origin(content, event.get("chunk_origin")),
         },
     )
@@ -356,6 +372,14 @@ def _apply_enrichment(conn: apsw.Connection, event: dict[str, Any]) -> None:
         return
     enrichment = event.get("enrichment") or {}
     cols = _columns(conn, "chunks")
+    if "content_hash" in cols and event.get("content_hash"):
+        row = conn.execute("SELECT content_hash, content FROM chunks WHERE id = ?", (chunk_id,)).fetchone()
+        if not row:
+            return
+        current_hash = row[0] or _content_hash(str(row[1] or ""))
+        if current_hash and current_hash != event["content_hash"]:
+            logger.warning("Skipping stale enrichment for chunk_id=%s content_hash mismatch", chunk_id)
+            return
     updates: dict[str, Any] = {}
     mappings = {
         "summary": "summary",
@@ -507,7 +531,9 @@ def drain_once(
 
     lock_fd = _acquire_queue_lock(queue_dir)
     try:
-        files = sorted(queue_dir.glob("*.jsonl"))[:batch_size]
+        files = sorted(queue_dir.glob("*.jsonl"), key=lambda path: (path.name.startswith("enrichment-"), path.name))[
+            :batch_size
+        ]
         if not files:
             return 0
         _log(log_path, f"queue_depth={len(files)}")

@@ -801,6 +801,22 @@ def _init_db(self) -> None:
             )
         """)
         self._trigram_fts_available = True
+        cursor.execute("""
+            CREATE TABLE IF NOT EXISTS chunk_fts_rowids (
+                chunk_id TEXT PRIMARY KEY,
+                fts_rowid INTEGER,
+                trigram_rowid INTEGER
+            )
+        """)
+        cursor.execute("""
+            INSERT OR IGNORE INTO chunk_fts_rowids(chunk_id, fts_rowid)
+            SELECT chunk_id, rowid FROM chunks_fts WHERE chunk_id IS NOT NULL
+        """)
+        cursor.execute("""
+            INSERT INTO chunk_fts_rowids(chunk_id, trigram_rowid)
+            SELECT chunk_id, rowid FROM chunks_fts_trigram WHERE chunk_id IS NOT NULL
+            ON CONFLICT(chunk_id) DO UPDATE SET trigram_rowid = excluded.trigram_rowid
+        """)
 
         # FTS5 sync triggers — keep summary/tags/resolved_query in sync
         cursor.execute("DROP TRIGGER IF EXISTS chunks_fts_insert")
@@ -816,6 +832,9 @@ def _init_db(self) -> None:
                     new.resolved_queries,
                     new.id
                 );
+                INSERT INTO chunk_fts_rowids(chunk_id, fts_rowid)
+                VALUES (new.id, last_insert_rowid())
+                ON CONFLICT(chunk_id) DO UPDATE SET fts_rowid = excluded.fts_rowid;
             END
         """)
         cursor.execute("DROP TRIGGER IF EXISTS chunks_fts_trigram_insert")
@@ -831,25 +850,37 @@ def _init_db(self) -> None:
                     new.resolved_queries,
                     new.id
                 );
+                INSERT INTO chunk_fts_rowids(chunk_id, trigram_rowid)
+                VALUES (new.id, last_insert_rowid())
+                ON CONFLICT(chunk_id) DO UPDATE SET trigram_rowid = excluded.trigram_rowid;
             END
         """)
         cursor.execute("DROP TRIGGER IF EXISTS chunks_fts_delete")
         cursor.execute("""
             CREATE TRIGGER IF NOT EXISTS chunks_fts_delete AFTER DELETE ON chunks BEGIN
-                DELETE FROM chunks_fts WHERE chunk_id = old.id;
+                DELETE FROM chunks_fts
+                WHERE rowid = (SELECT fts_rowid FROM chunk_fts_rowids WHERE chunk_id = old.id);
+                DELETE FROM chunks_fts_trigram
+                WHERE rowid = (SELECT trigram_rowid FROM chunk_fts_rowids WHERE chunk_id = old.id);
+                DELETE FROM chunk_fts_rowids WHERE chunk_id = old.id;
             END
         """)
         cursor.execute("DROP TRIGGER IF EXISTS chunks_fts_trigram_delete")
         cursor.execute("""
             CREATE TRIGGER IF NOT EXISTS chunks_fts_trigram_delete AFTER DELETE ON chunks BEGIN
-                DELETE FROM chunks_fts_trigram WHERE chunk_id = old.id;
+                DELETE FROM chunks_fts
+                WHERE rowid = (SELECT fts_rowid FROM chunk_fts_rowids WHERE chunk_id = old.id);
+                DELETE FROM chunks_fts_trigram
+                WHERE rowid = (SELECT trigram_rowid FROM chunk_fts_rowids WHERE chunk_id = old.id);
+                DELETE FROM chunk_fts_rowids WHERE chunk_id = old.id;
             END
         """)
         cursor.execute("DROP TRIGGER IF EXISTS chunks_fts_update")
         cursor.execute("""
             CREATE TRIGGER IF NOT EXISTS chunks_fts_update
             AFTER UPDATE OF content, summary, tags, resolved_query, key_facts, resolved_queries ON chunks BEGIN
-                DELETE FROM chunks_fts WHERE chunk_id = old.id;
+                DELETE FROM chunks_fts
+                WHERE rowid = (SELECT fts_rowid FROM chunk_fts_rowids WHERE chunk_id = old.id);
                 INSERT INTO chunks_fts(content, summary, tags, resolved_query, key_facts, resolved_queries, chunk_id)
                 VALUES (
                     new.content,
@@ -860,13 +891,17 @@ def _init_db(self) -> None:
                     new.resolved_queries,
                     new.id
                 );
+                INSERT INTO chunk_fts_rowids(chunk_id, fts_rowid)
+                VALUES (new.id, last_insert_rowid())
+                ON CONFLICT(chunk_id) DO UPDATE SET fts_rowid = excluded.fts_rowid;
             END
         """)
         cursor.execute("DROP TRIGGER IF EXISTS chunks_fts_trigram_update")
         cursor.execute("""
             CREATE TRIGGER IF NOT EXISTS chunks_fts_trigram_update
             AFTER UPDATE OF content, summary, tags, resolved_query, key_facts, resolved_queries ON chunks BEGIN
-                DELETE FROM chunks_fts_trigram WHERE chunk_id = old.id;
+                DELETE FROM chunks_fts_trigram
+                WHERE rowid = (SELECT trigram_rowid FROM chunk_fts_rowids WHERE chunk_id = old.id);
                 INSERT INTO chunks_fts_trigram(content, summary, tags, resolved_query, key_facts, resolved_queries, chunk_id)
                 VALUES (
                     new.content,
@@ -877,6 +912,9 @@ def _init_db(self) -> None:
                     new.resolved_queries,
                     new.id
                 );
+                INSERT INTO chunk_fts_rowids(chunk_id, trigram_rowid)
+                VALUES (new.id, last_insert_rowid())
+                ON CONFLICT(chunk_id) DO UPDATE SET trigram_rowid = excluded.trigram_rowid;
             END
         """)