From 4e22190481e892aec009a46a2763c907e449bf9c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Th=C3=A9o?= Date: Wed, 19 Nov 2025 11:46:21 +0100 Subject: [PATCH] fix: update document collection logic to use batch documents --- .../nodes_workflow/DocumentHubCollector/document_collector.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/welearn_datastack/nodes_workflow/DocumentHubCollector/document_collector.py b/welearn_datastack/nodes_workflow/DocumentHubCollector/document_collector.py index 510944d..bfb1e04 100644 --- a/welearn_datastack/nodes_workflow/DocumentHubCollector/document_collector.py +++ b/welearn_datastack/nodes_workflow/DocumentHubCollector/document_collector.py @@ -153,7 +153,7 @@ def extract_data_from_urls( for corpus_name in batch_docs: # Get data corpus_collector = corpus_plugin[corpus_name] - documents = corpus_collector.run(documents=welearn_documents) # type: ignore + documents = corpus_collector.run(documents=batch_docs[corpus_name]) # type: ignore for wrapper_document in documents: state_title = (