Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions ragflows/configs.demo.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,9 @@
# 文档最少行数,低于该值的文档则被忽略,该参数仅作用于 txt,md,html 后缀文件
DOC_MIN_LINES = 1

# 是否仅上传文件。True=仅上传文件, False=上传文件+自动解析
ONLY_UPLOAD = False


def get_header():
return {'authorization': AUTHORIZATION}
11 changes: 9 additions & 2 deletions ragflows/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -95,8 +95,10 @@ def get_file_lines(file_path) -> int:
# 如果文件已存在,则判断是否已经对文件进行了切片解析
if ragflowdb.exist_name(filename):
doc_item = ragflowdb.get_doc_item_by_name(filename)
if doc_item.get('progress') == 1:
timeutils.print_log(f"{file_path} 已上传,跳过")
if configs.ONLY_UPLOAD:
timeutils.print_log(f"{file_path} 已存在,跳过")
elif doc_item.get('progress') == 1:
timeutils.print_log(f"{file_path} 已完成切片,跳过")
else:
status = api.parse_chunks_with_check(filename)
timeutils.print_log(f"{file_path} 切片状态:", status)
Expand All @@ -115,6 +117,11 @@ def get_file_lines(file_path) -> int:
timeutils.print_log(f'{file_path} 上传失败:{response.get("text")}')
continue

# 仅上传,跳过切片解析
if configs.ONLY_UPLOAD:
continue

# 上传成功,开始切片
timeutils.print_log(f'{file_path},开始切片并等待解析完毕')
status = api.parse_chunks_with_check(filename)
timeutils.print_log(file_path, "切片状态:", status)
Expand Down