From 053c971172893ca27ec9a89aaf20222b31e303f1 Mon Sep 17 00:00:00 2001 From: SamgeShao Date: Sat, 10 May 2025 18:06:09 +0100 Subject: [PATCH] =?UTF-8?q?add=EF=BC=9A=E5=A2=9E=E5=8A=A0=E2=80=9C?= =?UTF-8?q?=E4=BB=85=E4=B8=8A=E4=BC=A0=E6=96=87=E4=BB=B6=E2=80=9D=E7=9A=84?= =?UTF-8?q?=E9=85=8D=E7=BD=AE=E9=A1=B9?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- ragflows/configs.demo.py | 3 +++ ragflows/main.py | 11 +++++++++-- 2 files changed, 12 insertions(+), 2 deletions(-) diff --git a/ragflows/configs.demo.py b/ragflows/configs.demo.py index e284bda..dba6237 100644 --- a/ragflows/configs.demo.py +++ b/ragflows/configs.demo.py @@ -22,6 +22,9 @@ # 文档最少行数,低于该值的文档则被忽略,该参数仅作用于 txt,md,html 后缀文件 DOC_MIN_LINES = 1 +# 是否仅上传文件。True=仅上传文件, False=上传文件+自动解析 +ONLY_UPLOAD = False + def get_header(): return {'authorization': AUTHORIZATION} \ No newline at end of file diff --git a/ragflows/main.py b/ragflows/main.py index 15e50a7..ac40225 100644 --- a/ragflows/main.py +++ b/ragflows/main.py @@ -95,8 +95,10 @@ def get_file_lines(file_path) -> int: # 如果文件已存在,则判断是否已经对文件进行了切片解析 if ragflowdb.exist_name(filename): doc_item = ragflowdb.get_doc_item_by_name(filename) - if doc_item.get('progress') == 1: - timeutils.print_log(f"{file_path} 已上传,跳过") + if configs.ONLY_UPLOAD: + timeutils.print_log(f"{file_path} 已存在,跳过") + elif doc_item.get('progress') == 1: + timeutils.print_log(f"{file_path} 已完成切片,跳过") else: status = api.parse_chunks_with_check(filename) timeutils.print_log(f"{file_path} 切片状态:", status) @@ -115,6 +117,11 @@ def get_file_lines(file_path) -> int: timeutils.print_log(f'{file_path} 上传失败:{response.get("text")}') continue + # 仅上传,跳过切片解析 + if configs.ONLY_UPLOAD: + continue + + # 上传成功,开始切片 timeutils.print_log(f'{file_path},开始切片并等待解析完毕') status = api.parse_chunks_with_check(filename) timeutils.print_log(file_path, "切片状态:", status)