Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 7 additions & 3 deletions ragflows/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -114,7 +114,7 @@ def parse_chunks(doc_ids, run=1):
url = f"{configs.API_URL}/document/run" # 替换为实际的服务器地址
data = {"doc_ids":doc_ids,"run":run}
response = requests.post(url, json=data, headers=configs.get_header())
timeutils.print_log(response.text)
timeutils.print_log("parse_chunks response:", response.text)
if response.status_code == 200:
return response.json()
else:
Expand Down Expand Up @@ -157,10 +157,14 @@ def parse_chunks_with_check(filename):
fileutils.save(f"{fileutils.get_cache_dir()}/ragflow_fail.txt", f"{timeutils.get_now_str()} {msg}\n")
return False

timeutils.print_log(f"[{filename}]解析进度为:{progress}")
if configs.ENABLE_PROGRESS_LOG:
progress_percent = round(progress * 100, 2)
timeutils.print_log(f"[{filename}]解析进度为:{progress_percent}%")

if progress == 1:
return True
time.sleep(1)

time.sleep(configs.PROGRESS_CHECK_INTERVAL)


# 是否请求成功
Expand Down
6 changes: 6 additions & 0 deletions ragflows/configs.demo.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,12 @@
# 是否仅上传文件。True=仅上传文件, False=上传文件+自动解析
ONLY_UPLOAD = False

# 是否打印切片进度查询日志。True=打印,False=不打印
ENABLE_PROGRESS_LOG = True

# 切片进度查询间隔时间(秒)
PROGRESS_CHECK_INTERVAL = 1


def get_header():
return {'authorization': AUTHORIZATION}
11 changes: 6 additions & 5 deletions ragflows/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -105,12 +105,13 @@ def main():
if ragflowdb.exist_name(filename):
doc_item = ragflowdb.get_doc_item_by_name(filename)
if configs.ONLY_UPLOAD:
timeutils.print_log(f"{file_path} 已存在,跳过")
timeutils.print_log(f"{file_path} 已存在,跳过\n")
elif doc_item.get('progress') == 1:
timeutils.print_log(f"{file_path} 已完成切片,跳过")
timeutils.print_log(f"{file_path} 已完成切片,跳过\n")
else:
timeutils.print_log(f'{file_path},开始切片并等待解析完毕')
status = api.parse_chunks_with_check(filename)
timeutils.print_log(f"{file_path} 切片状态:", status)
timeutils.print_log(f"{file_path} 切片状态:", status, "\n")
continue

# 文件不存在,上传文件=>切片=>解析并等待解析完毕
Expand All @@ -121,7 +122,7 @@ def main():
parser_id=configs.PARSER_ID,
run="1"
)
timeutils.print_log(response)
timeutils.print_log("upload_file_to_kb response:", response)
if api.is_succeed(response) is False:
timeutils.print_log(f'{file_path} 上传失败:{response.get("text")}')
continue
Expand All @@ -133,7 +134,7 @@ def main():
# 上传成功,开始切片
timeutils.print_log(f'{file_path},开始切片并等待解析完毕')
status = api.parse_chunks_with_check(filename)
timeutils.print_log(file_path, "切片状态:", status)
timeutils.print_log(file_path, "切片状态:", status, "\n")

timeutils.print_log('all done')

Expand Down
4 changes: 2 additions & 2 deletions ragflows/ragflowdb.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,14 +40,14 @@ def get_doc_list(kb_id):
doc_ids = db.query_list(sql)
return doc_ids

@timeutils.monitor
# @timeutils.monitor
def get_doc_item(doc_id):
db = get_db()
sql = f"select id,name,progress from document where id = '{doc_id}'"
results = db.query_list(sql)
return results[0] if results else None

@timeutils.monitor
# @timeutils.monitor
def get_doc_item_by_name(name):
db = get_db()
kb_id = configs.DIFY_DOC_KB_ID
Expand Down
27 changes: 24 additions & 3 deletions scripts/launcher.py
Original file line number Diff line number Diff line change
Expand Up @@ -77,10 +77,10 @@ def __init__(self):
self.log_handlers = [] # 添加日志处理器列表
self.original_print_log = None # 保存原始的日志打印函数
self.title("RagFlow Upload")
self.geometry("800x600")
self.geometry("800x660")

# 版本和仓库信息
self.version = "v1.0.1" # 版本号
self.version = "v1.0.2" # 版本号
self.github_repo = "https://github.com/Samge0/ragflow-upload" # GitHub仓库地址

# 自定义图标
Expand All @@ -98,13 +98,16 @@ def __init__(self):
"PARSER_ID": {"type": str, "label": "解析方式", "default": "naive"},
"DOC_DIR": {"type": str, "label": "文档目录", "default": "your doc dir"},
"DOC_SUFFIX": {"type": str, "label": "文档后缀", "default": "md,txt,pdf,docx"},
"PROGRESS_CHECK_INTERVAL": {"type": int, "label": "切片进度查询间隔", "default": "1"},

"MYSQL_HOST": {"type": str, "label": "MySQL主机", "default": "localhost"},
"MYSQL_PORT": {"type": int, "label": "MySQL端口", "default": "5455"},
"MYSQL_USER": {"type": str, "label": "MySQL用户名", "default": "root"},
"MYSQL_PASSWORD": {"type": str, "label": "MySQL密码", "default": "infini_rag_flow"},
"MYSQL_DATABASE": {"type": str, "label": "MySQL数据库", "default": "rag_flow"},
"DOC_MIN_LINES": {"type": int, "label": "最小行数", "default": "1"},
"ONLY_UPLOAD": {"type": bool, "label": "仅上传文件", "default": "False"}
"ONLY_UPLOAD": {"type": bool, "label": "仅上传文件", "default": "False"},
"ENABLE_PROGRESS_LOG": {"type": bool, "label": "打印切片进度日志", "default": "True"},
}

self.create_ui()
Expand Down Expand Up @@ -171,6 +174,17 @@ def create_ui(self):
)
self.run_button.pack(side="left", padx=5, pady=5)

# 添加清理日志按钮
self.clear_log_button = ctk.CTkButton(
button_frame,
text="清理日志",
command=self.clear_log,
fg_color=["#757575", "#616161"], # 灰色
hover_color=["#616161", "#424242"], # 深灰色
text_color="white" # 白色文字
)
self.clear_log_button.pack(side="left", padx=5, pady=5)

# 日志区域
log_frame = ctk.CTkFrame(self.main_frame)
log_frame.pack(fill="both", expand=True, padx=5, pady=5)
Expand Down Expand Up @@ -465,6 +479,13 @@ def save_config(self):
except Exception as e:
self.log(f"保存配置失败: {str(e)}")

def clear_log(self):
"""清理UI界面的日志显示"""
self.log_text.configure(state="normal")
self.log_text.delete(1.0, "end")
self.log_text.configure(state="disabled")
self.log("日志已清理")

if __name__ == "__main__":
ctk.set_appearance_mode("dark")
ctk.set_default_color_theme("blue")
Expand Down
Loading