-
Notifications
You must be signed in to change notification settings - Fork 7
Prioritize watcher drain writes #338
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -97,8 +97,21 @@ def _columns(conn: apsw.Connection, table: str) -> set[str]: | |
| return {row[1] for row in conn.execute(f"PRAGMA table_info({table})")} | ||
|
|
||
|
|
||
| def _content_hash(content: str) -> str: | ||
| return hashlib.sha256(content.strip().encode("utf-8")).hexdigest() | ||
|
|
||
|
|
||
| def _preview_text(values: dict[str, Any]) -> str: | ||
| summary = str(values.get("summary") or "").strip() | ||
| content = str(values.get("content") or "").strip() | ||
| source = summary or content | ||
| return source.replace("\n", " ").replace("\r", " ").replace("\t", " ")[:220] | ||
|
|
||
|
|
||
| def _insert_chunk(conn: apsw.Connection, values: dict[str, Any]) -> None: | ||
| cols = _columns(conn, "chunks") | ||
| if "preview_text" in cols and not str(values.get("preview_text") or "").strip(): | ||
| values = {**values, "preview_text": _preview_text(values)} | ||
| if "content" in values: | ||
| fields = compute_dedupe_fields(str(values["content"]), values.get("created_at")) | ||
| values = { | ||
|
|
@@ -226,6 +239,7 @@ def _apply_store(conn: apsw.Connection, event: dict[str, Any]) -> ApplyResult: | |
| "summary": content[:200], | ||
| "tags": json.dumps(tags) if tags else None, | ||
| "importance": float(event["importance"]) if event.get("importance") is not None else None, | ||
| "content_hash": _content_hash(content), | ||
| "chunk_origin": detect_chunk_origin(content, event.get("chunk_origin")), | ||
| }, | ||
| ) | ||
|
|
@@ -296,6 +310,7 @@ def _apply_watcher(conn: apsw.Connection, event: dict[str, Any]) -> None: | |
| "conversation_id": event.get("conversation_id"), | ||
| "sender": event.get("sender"), | ||
| "tags": json.dumps(tags) if tags else None, | ||
| "content_hash": _content_hash(content), | ||
| "chunk_origin": detect_chunk_origin(content, event.get("chunk_origin")), | ||
| }, | ||
| ) | ||
|
|
@@ -344,6 +359,7 @@ def _apply_hook(conn: apsw.Connection, event: dict[str, Any]) -> None: | |
| "created_at": datetime.fromtimestamp(timestamp, timezone.utc).isoformat(), | ||
| "conversation_id": session_id, | ||
| "importance": 5, | ||
| "content_hash": _content_hash(content), | ||
| "chunk_origin": detect_chunk_origin(content, event.get("chunk_origin")), | ||
| }, | ||
| ) | ||
|
|
@@ -356,6 +372,14 @@ def _apply_enrichment(conn: apsw.Connection, event: dict[str, Any]) -> None: | |
| return | ||
| enrichment = event.get("enrichment") or {} | ||
| cols = _columns(conn, "chunks") | ||
| if "content_hash" in cols and event.get("content_hash"): | ||
| row = conn.execute("SELECT content_hash, content FROM chunks WHERE id = ?", (chunk_id,)).fetchone() | ||
| if not row: | ||
| return | ||
| current_hash = row[0] or _content_hash(str(row[1] or "")) | ||
| if current_hash and current_hash != event["content_hash"]: | ||
|
Comment on lines
+379
to
+380
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
When a queued watcher/store update rewrites an existing chunk through Useful? React with 👍 / 👎. |
||
| logger.warning("Skipping stale enrichment for chunk_id=%s content_hash mismatch", chunk_id) | ||
| return | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Stale enrichment hash trusts columnHigh Severity
Reviewed by Cursor Bugbot for commit 2749cd5. Configure here. |
||
| updates: dict[str, Any] = {} | ||
| mappings = { | ||
| "summary": "summary", | ||
|
|
@@ -507,7 +531,9 @@ def drain_once( | |
|
|
||
| lock_fd = _acquire_queue_lock(queue_dir) | ||
| try: | ||
| files = sorted(queue_dir.glob("*.jsonl"))[:batch_size] | ||
| files = sorted(queue_dir.glob("*.jsonl"), key=lambda path: (path.name.startswith("enrichment-"), path.name))[ | ||
| :batch_size | ||
|
Comment on lines
+534
to
+535
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
When an Useful? React with 👍 / 👎. |
||
| ] | ||
|
Comment on lines
+534
to
+536
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
When the queue has at least Useful? React with 👍 / 👎.
coderabbitai[bot] marked this conversation as resolved.
|
||
| if not files: | ||
| return 0 | ||
| _log(log_path, f"queue_depth={len(files)}") | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -801,6 +801,22 @@ def _init_db(self) -> None: | |
| ) | ||
| """) | ||
| self._trigram_fts_available = True | ||
| cursor.execute(""" | ||
| CREATE TABLE IF NOT EXISTS chunk_fts_rowids ( | ||
| chunk_id TEXT PRIMARY KEY, | ||
| fts_rowid INTEGER, | ||
| trigram_rowid INTEGER | ||
| ) | ||
| """) | ||
| cursor.execute(""" | ||
| INSERT OR IGNORE INTO chunk_fts_rowids(chunk_id, fts_rowid) | ||
| SELECT chunk_id, rowid FROM chunks_fts WHERE chunk_id IS NOT NULL | ||
| """) | ||
| cursor.execute(""" | ||
| INSERT INTO chunk_fts_rowids(chunk_id, trigram_rowid) | ||
| SELECT chunk_id, rowid FROM chunks_fts_trigram WHERE chunk_id IS NOT NULL | ||
| ON CONFLICT(chunk_id) DO UPDATE SET trigram_rowid = excluded.trigram_rowid | ||
| """) | ||
|
|
||
| # FTS5 sync triggers — keep summary/tags/resolved_query in sync | ||
| cursor.execute("DROP TRIGGER IF EXISTS chunks_fts_insert") | ||
|
|
@@ -816,6 +832,9 @@ def _init_db(self) -> None: | |
| new.resolved_queries, | ||
| new.id | ||
| ); | ||
| INSERT INTO chunk_fts_rowids(chunk_id, fts_rowid) | ||
| VALUES (new.id, last_insert_rowid()) | ||
| ON CONFLICT(chunk_id) DO UPDATE SET fts_rowid = excluded.fts_rowid; | ||
| END | ||
| """) | ||
| cursor.execute("DROP TRIGGER IF EXISTS chunks_fts_trigram_insert") | ||
|
|
@@ -831,25 +850,37 @@ def _init_db(self) -> None: | |
| new.resolved_queries, | ||
| new.id | ||
| ); | ||
| INSERT INTO chunk_fts_rowids(chunk_id, trigram_rowid) | ||
| VALUES (new.id, last_insert_rowid()) | ||
| ON CONFLICT(chunk_id) DO UPDATE SET trigram_rowid = excluded.trigram_rowid; | ||
| END | ||
| """) | ||
| cursor.execute("DROP TRIGGER IF EXISTS chunks_fts_delete") | ||
| cursor.execute(""" | ||
| CREATE TRIGGER IF NOT EXISTS chunks_fts_delete AFTER DELETE ON chunks BEGIN | ||
| DELETE FROM chunks_fts WHERE chunk_id = old.id; | ||
| DELETE FROM chunks_fts | ||
| WHERE rowid = (SELECT fts_rowid FROM chunk_fts_rowids WHERE chunk_id = old.id); | ||
| DELETE FROM chunks_fts_trigram | ||
| WHERE rowid = (SELECT trigram_rowid FROM chunk_fts_rowids WHERE chunk_id = old.id); | ||
| DELETE FROM chunk_fts_rowids WHERE chunk_id = old.id; | ||
| END | ||
| """) | ||
| cursor.execute("DROP TRIGGER IF EXISTS chunks_fts_trigram_delete") | ||
| cursor.execute(""" | ||
| CREATE TRIGGER IF NOT EXISTS chunks_fts_trigram_delete AFTER DELETE ON chunks BEGIN | ||
| DELETE FROM chunks_fts_trigram WHERE chunk_id = old.id; | ||
| DELETE FROM chunks_fts | ||
| WHERE rowid = (SELECT fts_rowid FROM chunk_fts_rowids WHERE chunk_id = old.id); | ||
| DELETE FROM chunks_fts_trigram | ||
| WHERE rowid = (SELECT trigram_rowid FROM chunk_fts_rowids WHERE chunk_id = old.id); | ||
| DELETE FROM chunk_fts_rowids WHERE chunk_id = old.id; | ||
| END | ||
| """) | ||
| cursor.execute("DROP TRIGGER IF EXISTS chunks_fts_update") | ||
|
Comment on lines
876
to
878
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
When Useful? React with 👍 / 👎. |
||
| cursor.execute(""" | ||
| CREATE TRIGGER IF NOT EXISTS chunks_fts_update | ||
| AFTER UPDATE OF content, summary, tags, resolved_query, key_facts, resolved_queries ON chunks BEGIN | ||
| DELETE FROM chunks_fts WHERE chunk_id = old.id; | ||
| DELETE FROM chunks_fts | ||
| WHERE rowid = (SELECT fts_rowid FROM chunk_fts_rowids WHERE chunk_id = old.id); | ||
| INSERT INTO chunks_fts(content, summary, tags, resolved_query, key_facts, resolved_queries, chunk_id) | ||
| VALUES ( | ||
| new.content, | ||
|
|
@@ -860,13 +891,17 @@ def _init_db(self) -> None: | |
| new.resolved_queries, | ||
| new.id | ||
| ); | ||
| INSERT INTO chunk_fts_rowids(chunk_id, fts_rowid) | ||
| VALUES (new.id, last_insert_rowid()) | ||
| ON CONFLICT(chunk_id) DO UPDATE SET fts_rowid = excluded.fts_rowid; | ||
| END | ||
| """) | ||
| cursor.execute("DROP TRIGGER IF EXISTS chunks_fts_trigram_update") | ||
| cursor.execute(""" | ||
| CREATE TRIGGER IF NOT EXISTS chunks_fts_trigram_update | ||
| AFTER UPDATE OF content, summary, tags, resolved_query, key_facts, resolved_queries ON chunks BEGIN | ||
| DELETE FROM chunks_fts_trigram WHERE chunk_id = old.id; | ||
| DELETE FROM chunks_fts_trigram | ||
| WHERE rowid = (SELECT trigram_rowid FROM chunk_fts_rowids WHERE chunk_id = old.id); | ||
| INSERT INTO chunks_fts_trigram(content, summary, tags, resolved_query, key_facts, resolved_queries, chunk_id) | ||
| VALUES ( | ||
| new.content, | ||
|
|
@@ -877,6 +912,9 @@ def _init_db(self) -> None: | |
| new.resolved_queries, | ||
| new.id | ||
| ); | ||
| INSERT INTO chunk_fts_rowids(chunk_id, trigram_rowid) | ||
| VALUES (new.id, last_insert_rowid()) | ||
| ON CONFLICT(chunk_id) DO UPDATE SET trigram_rowid = excluded.trigram_rowid; | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Repair breaks trigram rowid mapMedium Severity
Additional Locations (1)Reviewed by Cursor Bugbot for commit 8e4639a. Configure here. |
||
| END | ||
| """) | ||
|
|
||
|
|
||


Uh oh!
There was an error while loading. Please reload this page.