diff --git a/ragflows/configs.demo.py b/ragflows/configs.demo.py index e284bda..dba6237 100644 --- a/ragflows/configs.demo.py +++ b/ragflows/configs.demo.py @@ -22,6 +22,9 @@ # 文档最少行数,低于该值的文档则被忽略,该参数仅作用于 txt,md,html 后缀文件 DOC_MIN_LINES = 1 +# 是否仅上传文件。True=仅上传文件, False=上传文件+自动解析 +ONLY_UPLOAD = False + def get_header(): return {'authorization': AUTHORIZATION} \ No newline at end of file diff --git a/ragflows/main.py b/ragflows/main.py index 15e50a7..ac40225 100644 --- a/ragflows/main.py +++ b/ragflows/main.py @@ -95,8 +95,10 @@ def get_file_lines(file_path) -> int: # 如果文件已存在,则判断是否已经对文件进行了切片解析 if ragflowdb.exist_name(filename): doc_item = ragflowdb.get_doc_item_by_name(filename) - if doc_item.get('progress') == 1: - timeutils.print_log(f"{file_path} 已上传,跳过") + if configs.ONLY_UPLOAD: + timeutils.print_log(f"{file_path} 已存在,跳过") + elif doc_item.get('progress') == 1: + timeutils.print_log(f"{file_path} 已完成切片,跳过") else: status = api.parse_chunks_with_check(filename) timeutils.print_log(f"{file_path} 切片状态:", status) @@ -115,6 +117,11 @@ def get_file_lines(file_path) -> int: timeutils.print_log(f'{file_path} 上传失败:{response.get("text")}') continue + # 仅上传,跳过切片解析 + if configs.ONLY_UPLOAD: + continue + + # 上传成功,开始切片 timeutils.print_log(f'{file_path},开始切片并等待解析完毕') status = api.parse_chunks_with_check(filename) timeutils.print_log(file_path, "切片状态:", status)