In [1]:
import json
from collections import defaultdict
import numpy as np
from tqdm import tqdm
import ir_datasets

# 🔄 تحميل qrels المعدل من ملف محلي
# تحميل qrels من BEIR Quora
dataset = ir_datasets.load("beir/quora/test")
qrels = defaultdict(set)
for qrel in dataset.qrels_iter():
    if int(qrel.relevance) > 0:
        qrels[qrel.query_id].add(qrel.doc_id)

# 📥 تحميل نتائج المطابقة من ملف JSON
results_path = r"C:\Users\Azzam\PycharmProjects\PythonProject\Query Matching & Ranking\TfIdfMatching\tfidf_results_batch.json"
with open(results_path, "r", encoding="utf-8") as f:
    results = json.load(f)

# 🧮 دوال التقييم
def precision_at_k(retrieved, relevant, k):
    retrieved_k = retrieved[:k]
    return len([doc for doc in retrieved_k if doc in relevant]) / k if retrieved_k else 0.0

def recall_at_k(retrieved, relevant, k):
    retrieved_k = retrieved[:k]
    return len([doc for doc in retrieved_k if doc in relevant]) / len(relevant) if relevant else 0.0

def average_precision(retrieved, relevant, k):
    score = 0.0
    hits = 0
    for i, doc_id in enumerate(retrieved[:k], start=1):
        if doc_id in relevant:
            hits += 1
            score += hits / i
    return score / min(len(relevant), k) if relevant else 0.0

def dcg(retrieved, relevant, k):
    return sum([(1 if retrieved[i] in relevant else 0) / np.log2(i + 2) for i in range(min(len(retrieved), k))])

def idcg(relevant, k):
    return sum([1 / np.log2(i + 2) for i in range(min(len(relevant), k))])

def ndcg_at_k(retrieved, relevant, k):
    dcg_val = dcg(retrieved, relevant, k)
    idcg_val = idcg(relevant, k)
    return dcg_val / idcg_val if idcg_val > 0 else 0.0

# ➕ تابع MRR
def mean_reciprocal_rank(retrieved, relevant):
    for i, doc_id in enumerate(retrieved, start=1):
        if doc_id in relevant:
            return 1 / i
    return 0.0

# ⚙️ التقييم لجميع الاستعلامات
k = 10
precisions, recalls, maps, ndcgs, mrrs = [], [], [], [], []

for qid, retrieved_docs in tqdm(results.items(), desc="📊 تقييم الاستعلامات"):
    retrieved_doc_ids = [doc_id for doc_id, _ in retrieved_docs]
    relevant_doc_ids = qrels.get(qid, set())  # استخدام get لتفادي الخطأ إذا لم يوجد الاستعلام

    precisions.append(precision_at_k(retrieved_doc_ids, relevant_doc_ids, k))
    recalls.append(recall_at_k(retrieved_doc_ids, relevant_doc_ids, k))
    maps.append(average_precision(retrieved_doc_ids, relevant_doc_ids, k))
    ndcgs.append(ndcg_at_k(retrieved_doc_ids, relevant_doc_ids, k))
    mrrs.append(mean_reciprocal_rank(retrieved_doc_ids, relevant_doc_ids))

# 📌 المتوسطات النهائية
evaluation_results = {
    "Precision@10": round(np.mean(precisions), 4),
    "Recall@10": round(np.mean(recalls), 4),
    "MAP@10": round(np.mean(maps), 4),
    "NDCG@10": round(np.mean(ndcgs), 4),
    "MRR": round(np.mean(mrrs), 4)
}

# 📢 طباعة النتائج
print("📈 نتائج التقييم (باستخدام qrels المعدل):")
for metric, value in evaluation_results.items():
    print(f"{metric}: {value}")


📊 تقييم الاستعلامات: 100%|██████████| 10000/10000 [00:00<00:00, 33987.07it/s]

📈 نتائج التقييم (باستخدام qrels المعدل):
Precision@10: 0.1062
Recall@10: 0.7979
MAP@10: 0.6396
NDCG@10: 0.6901
MRR: 0.6864





In [2]:
import json
from collections import defaultdict
import numpy as np
from tqdm import tqdm
import ir_datasets

# 🔄 تحميل qrels المعدل من ملف محلي
# تحميل qrels من BEIR Quora
dataset = ir_datasets.load("beir/quora/test")
qrels = defaultdict(set)
for qrel in dataset.qrels_iter():
    if int(qrel.relevance) > 0:
        qrels[qrel.query_id].add(qrel.doc_id)

# 📥 تحميل نتائج المطابقة من ملف JSON
results_path = r"C:\Users\Azzam\PycharmProjects\PythonProject\Query Matching & Ranking\BertMatching\bert_results.json"
with open(results_path, "r", encoding="utf-8") as f:
    results = json.load(f)

# 🧮 دوال التقييم
def precision_at_k(retrieved, relevant, k):
    retrieved_k = retrieved[:k]
    return len([doc for doc in retrieved_k if doc in relevant]) / k if retrieved_k else 0.0

def recall_at_k(retrieved, relevant, k):
    retrieved_k = retrieved[:k]
    return len([doc for doc in retrieved_k if doc in relevant]) / len(relevant) if relevant else 0.0

def average_precision(retrieved, relevant, k):
    score = 0.0
    hits = 0
    for i, doc_id in enumerate(retrieved[:k], start=1):
        if doc_id in relevant:
            hits += 1
            score += hits / i
    return score / min(len(relevant), k) if relevant else 0.0

def dcg(retrieved, relevant, k):
    return sum([(1 if retrieved[i] in relevant else 0) / np.log2(i + 2) for i in range(min(len(retrieved), k))])

def idcg(relevant, k):
    return sum([1 / np.log2(i + 2) for i in range(min(len(relevant), k))])

def ndcg_at_k(retrieved, relevant, k):
    dcg_val = dcg(retrieved, relevant, k)
    idcg_val = idcg(relevant, k)
    return dcg_val / idcg_val if idcg_val > 0 else 0.0

# ➕ تابع MRR
def mean_reciprocal_rank(retrieved, relevant):
    for i, doc_id in enumerate(retrieved, start=1):
        if doc_id in relevant:
            return 1 / i
    return 0.0

# ⚙️ التقييم لجميع الاستعلامات
k = 10
precisions, recalls, maps, ndcgs, mrrs = [], [], [], [], []

for qid, retrieved_docs in tqdm(results.items(), desc="📊 تقييم الاستعلامات"):
    retrieved_doc_ids = [doc_id for doc_id, _ in retrieved_docs]
    relevant_doc_ids = qrels.get(qid, set())  # استخدام get لتفادي الخطأ إذا لم يوجد الاستعلام

    precisions.append(precision_at_k(retrieved_doc_ids, relevant_doc_ids, k))
    recalls.append(recall_at_k(retrieved_doc_ids, relevant_doc_ids, k))
    maps.append(average_precision(retrieved_doc_ids, relevant_doc_ids, k))
    ndcgs.append(ndcg_at_k(retrieved_doc_ids, relevant_doc_ids, k))
    mrrs.append(mean_reciprocal_rank(retrieved_doc_ids, relevant_doc_ids))

# 📌 المتوسطات النهائية
evaluation_results = {
    "Precision@10": round(np.mean(precisions), 4),
    "Recall@10": round(np.mean(recalls), 4),
    "MAP@10": round(np.mean(maps), 4),
    "NDCG@10": round(np.mean(ndcgs), 4),
    "MRR": round(np.mean(mrrs), 4)
}

# 📢 طباعة النتائج
print("📈 نتائج التقييم (باستخدام qrels المعدل):")
for metric, value in evaluation_results.items():
    print(f"{metric}: {value}")


📊 تقييم الاستعلامات: 100%|██████████| 10000/10000 [00:00<00:00, 13159.58it/s]

📈 نتائج التقييم (باستخدام qrels المعدل):
Precision@10: 0.1217
Recall@10: 0.8823
MAP@10: 0.7514
NDCG@10: 0.7948
MRR: 0.7908





In [3]:
import json
from collections import defaultdict
import numpy as np
from tqdm import tqdm
import ir_datasets

# 🔄 تحميل qrels المعدل من ملف محلي
# تحميل qrels من BEIR Quora
dataset = ir_datasets.load("beir/quora/test")
qrels = defaultdict(set)
for qrel in dataset.qrels_iter():
    if int(qrel.relevance) > 0:
        qrels[qrel.query_id].add(qrel.doc_id)

# 📥 تحميل نتائج المطابقة من ملف JSON
results_path = r"C:\Users\Azzam\PycharmProjects\PythonProject\Query Matching & Ranking\HybridMatching\hybrid_results.json"
with open(results_path, "r", encoding="utf-8") as f:
    results = json.load(f)

# 🧮 دوال التقييم
def precision_at_k(retrieved, relevant, k):
    retrieved_k = retrieved[:k]
    return len([doc for doc in retrieved_k if doc in relevant]) / k if retrieved_k else 0.0

def recall_at_k(retrieved, relevant, k):
    retrieved_k = retrieved[:k]
    return len([doc for doc in retrieved_k if doc in relevant]) / len(relevant) if relevant else 0.0

def average_precision(retrieved, relevant, k):
    score = 0.0
    hits = 0
    for i, doc_id in enumerate(retrieved[:k], start=1):
        if doc_id in relevant:
            hits += 1
            score += hits / i
    return score / min(len(relevant), k) if relevant else 0.0

def dcg(retrieved, relevant, k):
    return sum([(1 if retrieved[i] in relevant else 0) / np.log2(i + 2) for i in range(min(len(retrieved), k))])

def idcg(relevant, k):
    return sum([1 / np.log2(i + 2) for i in range(min(len(relevant), k))])

def ndcg_at_k(retrieved, relevant, k):
    dcg_val = dcg(retrieved, relevant, k)
    idcg_val = idcg(relevant, k)
    return dcg_val / idcg_val if idcg_val > 0 else 0.0

# ➕ تابع MRR
def mean_reciprocal_rank(retrieved, relevant):
    for i, doc_id in enumerate(retrieved, start=1):
        if doc_id in relevant:
            return 1 / i
    return 0.0

# ⚙️ التقييم لجميع الاستعلامات
k = 10
precisions, recalls, maps, ndcgs, mrrs = [], [], [], [], []

for qid, retrieved_docs in tqdm(results.items(), desc="📊 تقييم الاستعلامات"):
    retrieved_doc_ids = [doc_id for doc_id, _ in retrieved_docs]
    relevant_doc_ids = qrels.get(qid, set())  # استخدام get لتفادي الخطأ إذا لم يوجد الاستعلام

    precisions.append(precision_at_k(retrieved_doc_ids, relevant_doc_ids, k))
    recalls.append(recall_at_k(retrieved_doc_ids, relevant_doc_ids, k))
    maps.append(average_precision(retrieved_doc_ids, relevant_doc_ids, k))
    ndcgs.append(ndcg_at_k(retrieved_doc_ids, relevant_doc_ids, k))
    mrrs.append(mean_reciprocal_rank(retrieved_doc_ids, relevant_doc_ids))

# 📌 المتوسطات النهائية
evaluation_results = {
    "Precision@10": round(np.mean(precisions), 4),
    "Recall@10": round(np.mean(recalls), 4),
    "MAP@10": round(np.mean(maps), 4),
    "NDCG@10": round(np.mean(ndcgs), 4),
    "MRR": round(np.mean(mrrs), 4)
}

# 📢 طباعة النتائج
print("📈 نتائج التقييم (باستخدام qrels المعدل):")
for metric, value in evaluation_results.items():
    print(f"{metric}: {value}")


📊 تقييم الاستعلامات: 100%|██████████| 10000/10000 [00:00<00:00, 29284.33it/s]

📈 نتائج التقييم (باستخدام qrels المعدل):
Precision@10: 0.1177
Recall@10: 0.8685
MAP@10: 0.7205
NDCG@10: 0.7686
MRR: 0.7641



