-
-
Notifications
You must be signed in to change notification settings - Fork 367
Open
Labels
enhancementNew feature or requestNew feature or request
Description
Is your feature request related to a problem? Please describe.
I have been using AutoRAG and performing the parse, chunk, and evaluate steps separately, and then reviewing the data stored in the benchmark one by one. However, it is time-consuming to click through each step and difficult to compare the results. Is there a feature that would allow me to view the optimal settings and detailed statistics considering all the configuration values collectively?
for parsed_raw in parsed_raw_files:
for chunk in chunked_file_list:
parsed_raw_index = parsed_raw.split("/")[-1].split(".")[0]
chunk_index = str(chunk).split("/")[-1].split(".")[0]
if not parsed_raw_index == chunk.parent.name:
continue
initial_raw = Raw(pd.read_parquet(parsed_raw, engine="pyarrow"))
initial_corpus = Corpus(pd.read_parquet(chunk, engine="pyarrow"), initial_raw)
qa = initial_corpus.sample(random_single_hop, n=len(initial_corpus.data), random_state=random.randint(1,100)).map(
lambda df: df.reset_index(drop=True),
).make_retrieval_gt_contents().batch_apply(
multiple_queries_gen, # query generation
llm=llm,
lang="ko",
n=10,
).batch_apply(
make_basic_gen_gt, # answer generation (basic)
llm=llm,
lang="ko",
).batch_apply(
make_concise_gen_gt, # answer generation (concise)
llm=llm,
lang="ko",
).filter(
dontknow_filter_rule_based, # filter unanswerable questions
lang="ko",
)
qa_dir_name = "qa"
if not os.path.exists(os.path.join(current_dir, qa_dir_name)):
os.makedirs(os.path.join(current_dir, qa_dir_name))
output_path = os.path.join(current_dir, qa_dir_name, f"parsed_{parsed_raw_index}_chunk_{chunk_index}_qa.parquet")
corpus_output_path = os.path.join(current_dir, qa_dir_name, f"parsed_{parsed_raw_index}_chunk_{chunk_index}_corpus.parquet")
qa.to_parquet(output_path, corpus_output_path) for i in range(10):
for j in range(4):
opt["config"] = os.path.join(current_dir, "config", "evaluate_config.yaml")
opt["qa_data_path"] = os.path.join(current_dir, "qa","parsed_{}_chunk_{}_qa.parquet".format(i,j))
opt["corpus_data_path"] = os.path.join(current_dir, "qa","parsed_{}_chunk_{}_corpus.parquet".format(i,j))
opt["project_dir"] = os.path.join(current_dir, "benchmark")
evaluate(**opt)vkehfdl1
Metadata
Metadata
Assignees
Labels
enhancementNew feature or requestNew feature or request
