Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/workflows/production.yml
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,6 @@ on:

jobs:
ci:
uses: TogetherCrew/operations/.github/workflows/ci.yml@main
uses: TogetherCrew/operations/.github/workflows/ci2.yml@main
secrets:
CC_TEST_REPORTER_ID: ${{ secrets.CC_TEST_REPORTER_ID }}
2 changes: 1 addition & 1 deletion .github/workflows/start.staging.yml
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,6 @@ on: pull_request

jobs:
ci:
uses: TogetherCrew/operations/.github/workflows/ci.yml@main
uses: TogetherCrew/operations/.github/workflows/ci2.yml@main
secrets:
CC_TEST_REPORTER_ID: ${{ secrets.CC_TEST_REPORTER_ID }}
2 changes: 1 addition & 1 deletion hivemind_etl/mediawiki/etl.py
Original file line number Diff line number Diff line change
Expand Up @@ -99,7 +99,7 @@ def transform(self) -> list[Document]:
def load(self, documents: list[Document]) -> None:
logging.info(f"Loading {len(documents)} documents into Qdrant!")
ingestion_pipeline = CustomIngestionPipeline(
self.community_id, collection_name=self.platform_id
self.community_id, collection_name=self.platform_id, use_cache=False,
)

# Process batches in parallel using ThreadPoolExecutor
Expand Down
2 changes: 2 additions & 0 deletions hivemind_etl/simple_ingestion/pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -147,6 +147,7 @@ async def process_document(
pipeline = CustomIngestionPipeline(
community_id=ingestion_request.communityId,
collection_name=collection_name,
use_cache=False,
)

document = Document(
Expand Down Expand Up @@ -188,6 +189,7 @@ async def process_documents_batch(
pipeline = CustomIngestionPipeline(
community_id=batch_chunk.communityId,
collection_name=collection_name,
use_cache=False,
)

# Convert all documents in this chunk to Document objects
Expand Down
2 changes: 1 addition & 1 deletion hivemind_etl/website/website_etl.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ def __init__(

# preparing the ingestion pipeline
self.ingestion_pipeline = CustomIngestionPipeline(
self.community_id, collection_name=self.platform_id
self.community_id, collection_name=self.platform_id, use_cache=False,
)

async def extract(
Expand Down
2 changes: 2 additions & 0 deletions hivemind_summarizer/activities.py
Original file line number Diff line number Diff line change
Expand Up @@ -97,6 +97,7 @@ async def fetch_platform_summaries_by_date(
pipeline = CustomIngestionPipeline(
community_id=community_id,
collection_name=f"{input.platform_id}_summary",
use_cache=False,
)
# get the latest date from the collection
latest_date = pipeline.get_latest_document_date(
Expand Down Expand Up @@ -211,6 +212,7 @@ async def fetch_platform_summaries_by_date_range(
extract_text_only=extract_text_only,
platform_id=input.platform_id,
community_id=community_id,
use_cache=False,
)
summaries = await fetch_platform_summaries_by_date(date_input)
result[date] = summaries
Expand Down
2 changes: 1 addition & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
python-dotenv>=1.0.0, <2.0.0
tc-hivemind-backend==1.4.3
tc-hivemind-backend==1.4.6
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

💡 Verification agent

🧩 Analysis chain

Bump to tc-hivemind-backend 1.4.6 — verify API usage and transitive impacts.

If this bump is to consume the new use_cache plumbing, ensure all CustomIngestionPipeline instantiations include use_cache= to avoid runtime errors on older call sites.

Run:

Also re-run unit/integration tests for ingestion and summarizer paths that construct the pipeline.


🏁 Script executed:

#!/usr/bin/env bash
# 1) Show every instantiation
rg -nP --type=py -C2 'CustomIngestionPipeline\s*\('

# 2) Flag calls likely missing the explicit kwarg (handles multi-line)
rg --json -nUP --type=py '(?s)CustomIngestionPipeline\s*\((?:(?!\)).)*\)' \
| jq -r 'select(.type=="match") | .data | "\(.path.text):\(.line_number):\(.submatches[0].match.text)"' \
| rg -v 'use_cache\s*='

Length of output: 3179


Ensure all CustomIngestionPipeline instantiations include use_cache
Missing use_cache at:

  • hivemind_summarizer/activities.py:97
  • hivemind_etl/simple_ingestion/pipeline.py:147
  • hivemind_etl/simple_ingestion/pipeline.py:189
    Add use_cache=<bool> to these calls to avoid runtime errors.
🤖 Prompt for AI Agents
In requirements.txt line 2 and in hivemind_summarizer/activities.py around line
97 and hivemind_etl/simple_ingestion/pipeline.py around lines 147 and 189, each
instantiation of CustomIngestionPipeline is missing the required use_cache
parameter; update those constructor calls to include use_cache=<bool> (true or
false as appropriate for the use case) so the signature is satisfied and runtime
errors are avoided, keeping the parameter name exactly use_cache and ensuring
any callers/tests are updated if they relied on a default.

llama-index-storage-docstore-redis==0.1.2
llama-index-storage-docstore-mongodb==0.1.3
crawlee[playwright]==0.3.8
Expand Down
4 changes: 2 additions & 2 deletions tests/unit/test_mediawiki_etl.py
Original file line number Diff line number Diff line change
Expand Up @@ -164,7 +164,7 @@ def test_load_with_dump_deletion(self, mock_ingestion_pipeline_class):

# Verify that methods were called correctly
mock_ingestion_pipeline_class.assert_called_once_with(
self.community_id, collection_name=self.platform_id
self.community_id, collection_name=self.platform_id, use_cache=False
)
mock_pipeline.run_pipeline.assert_called_once_with(documents)
self.assertFalse(os.path.exists(etl.dump_dir))
Expand Down Expand Up @@ -192,7 +192,7 @@ def test_load_without_dump_deletion(self, mock_ingestion_pipeline_class):

# Verify that methods were called correctly
mock_ingestion_pipeline_class.assert_called_once_with(
self.community_id, collection_name=self.platform_id
self.community_id, collection_name=self.platform_id, use_cache=False
)
mock_pipeline.run_pipeline.assert_called_once_with(documents)
self.assertTrue(os.path.exists(etl.dump_dir))