In [3]:
import json

with open("./retrieved_results/colqwen.json", "r") as f:
    result_data = json.load(f)

new_data = []
for example in result_data:
    
    evidence_pages = []
    for key, value in example['evidence'].items():
        evidence_pages.extend(value)
    example['evidence_pages'] = evidence_pages
    retrieved_results = []
    for key, value in example['retrieved_results'].items():
        retrieved_results.append({
            "page": int(key.split(".")[0].split("_")[-1]),
            "score": value
        })
    retrieved_results.sort(key=lambda x: x['score'], reverse=True)
    example['retrieved_results'] = retrieved_results
    new_example = {"question_id": example['question_id'], "question": example['question'], "retrieved_results": retrieved_results, "evidence_pages": evidence_pages}
    new_data.append(new_example)

with open("./retrieved_results/colqwen.json", "w", encoding="utf-8") as f:
    json.dump(new_data, f, indent=4, ensure_ascii=False)



In [12]:
import json

with open("./retrieved_results/colqwen.json", "r") as f:
    qwen_data = json.load(f)

evidence_data = [example['evidence_pages'] for example in qwen_data]

with open('./retrieved_results/bm25.json', 'r') as f:
    result_data = json.load(f)

for indx, example in enumerate(result_data):
    example['evidence_pages'] = evidence_data[indx]

with open("./retrieved_results/bm25.json", "w", encoding="utf-8") as f:
    json.dump(result_data, f, indent=4, ensure_ascii=False)


In [None]:
def calculate_metrics(retrieved_results, k=10):
    """
    Calculate MRR@k 和 Recall@k
    
    Args:
        retrieved_results: Retrieval Results，each element contains question_id, question, retrieved_results
    
    Returns:
        mrr: MRR@k 
        recall: Recall@k 
    """
    mrr_sum = 0
    recall_sum = 0
    total_queries = len(retrieved_results)
    
    for query in retrieved_results:
        # Access Retrieval Results and Golden Pages
        retrieved_pages = [item["page"] for item in query["retrieved_results"][:k]]
        true_pages = set(query["evidence_pages"])  
        
        # Calculate MRR@k
        mrr = 0
        for rank, page in enumerate(retrieved_pages, 1):
            if page in true_pages:
                mrr = 1.0 / rank
                break
        mrr_sum += mrr
        
        # Calculate Recall@k
        hits = sum(1 for page in retrieved_pages if page in true_pages)
        recall = hits / len(true_pages) if true_pages else 0
        recall_sum += recall
    
    mrr = mrr_sum / total_queries
    recall = recall_sum / total_queries
    
    return {
        f"MRR@{k}": mrr,
        f"Recall@{k}": recall
    }


In [None]:
import json

model_names = ["bm25", "contriever-msmarco", "bge_m3", "colpali", "colqwen", "visrag"]

ks = [10, 20]
for k in ks:
    for model_name in model_names:
        with open(f"./retrieved_results/{model_name}.json", "r") as f:
            retrieved_results = json.load(f)
        metrics = calculate_metrics(retrieved_results, k=k)
        print(model_name)
        for metric_name, value in metrics.items():
            print(f"{metric_name}: {value:.4f}")
        print("*"*100)

bm25
MRR@10: 0.1620
Recall@10: 0.3409
****************************************************************************************************
contriever-msmarco
MRR@10: 0.3050
Recall@10: 0.5158
****************************************************************************************************
bge_m3
MRR@10: 0.6151
Recall@10: 0.7709
****************************************************************************************************
colpali
MRR@10: 0.5807
Recall@10: 0.7378
****************************************************************************************************
colqwen
MRR@10: 0.7854
Recall@10: 0.8831
****************************************************************************************************
visrag
MRR@10: 0.7404
Recall@10: 0.8754
****************************************************************************************************
bm25
MRR@20: 0.1784
Recall@20: 0.6321
****************************************************************************************************
con

In [20]:
with open('./data/FinM4R/test.json', 'r') as f:
    data = json.load(f)

for example in data:
    for key, value in example['evidence'].items():
        if len(value) > 0:
            for page in value:
                if type(page) != int:
                    print(example['question_id'])
