In [27]:
import json
from collections import defaultdict
import numpy as np
from tqdm import tqdm

# 🔄 تحميل qrels المعدل من ملف محلي
qrels_path = r"C:\Users\Azzam\.ir_datasets\antique\test\qrels"
qrels = defaultdict(set)

with open(qrels_path, "r", encoding="utf-8") as f:
    for line in f:
        parts = line.strip().split()
        if len(parts) >= 4:
            qid, _, docid, rel = parts
            if int(rel) > 0:
                qrels[qid].add(docid)

# 📥 تحميل نتائج المطابقة من ملف JSON
results_path = r"C:\Users\Azzam\PycharmProjects\PythonProject\Query Matching & Ranking\BertMatching\bert_antique_results.json"
with open(results_path, "r", encoding="utf-8") as f:
    results = json.load(f)

# 🧮 دوال التقييم
def precision_at_k(retrieved, relevant, k):
    retrieved_k = retrieved[:k]
    return len([doc for doc in retrieved_k if doc in relevant]) / k if retrieved_k else 0.0

def recall_at_k(retrieved, relevant, k):
    retrieved_k = retrieved[:k]
    return len([doc for doc in retrieved_k if doc in relevant]) / len(relevant) if relevant else 0.0

def average_precision(retrieved, relevant, k):
    score = 0.0
    hits = 0
    for i, doc_id in enumerate(retrieved[:k], start=1):
        if doc_id in relevant:
            hits += 1
            score += hits / i
    return score / min(len(relevant), k) if relevant else 0.0

def dcg(retrieved, relevant, k):
    return sum([(1 if retrieved[i] in relevant else 0) / np.log2(i + 2) for i in range(min(len(retrieved), k))])

def idcg(relevant, k):
    return sum([1 / np.log2(i + 2) for i in range(min(len(relevant), k))])

def ndcg_at_k(retrieved, relevant, k):
    dcg_val = dcg(retrieved, relevant, k)
    idcg_val = idcg(relevant, k)
    return dcg_val / idcg_val if idcg_val > 0 else 0.0

# ⚙️ التقييم لجميع الاستعلامات
k = 10
precisions, recalls, maps, ndcgs = [], [], [], []

for qid, retrieved_docs in tqdm(results.items(), desc="📊 تقييم الاستعلامات"):
    retrieved_doc_ids = [doc_id for doc_id, _ in retrieved_docs]
    relevant_doc_ids = qrels.get(qid, set())  # ✅ استخدام get للحماية

    precisions.append(precision_at_k(retrieved_doc_ids, relevant_doc_ids, k))
    recalls.append(recall_at_k(retrieved_doc_ids, relevant_doc_ids, k))
    maps.append(average_precision(retrieved_doc_ids, relevant_doc_ids, k))
    ndcgs.append(ndcg_at_k(retrieved_doc_ids, relevant_doc_ids, k))

# 📌 المتوسطات النهائية
evaluation_results = {
    "Precision@10": round(np.mean(precisions), 4),
    "Recall@10": round(np.mean(recalls), 4),
    "MAP@10": round(np.mean(maps), 4),
    "NDCG@10": round(np.mean(ndcgs), 4),
}

# 📢 طباعة النتائج
print("📈 نتائج التقييم (باستخدام qrels المعدل):")
for metric, value in evaluation_results.items():
    print(f"{metric}: {value}")


📊 تقييم الاستعلامات: 100%|██████████| 176/176 [00:00<00:00, 10353.26it/s]

📈 نتائج التقييم (باستخدام qrels المعدل):
Precision@10: 0.4256
Recall@10: 0.1233
MAP@10: 0.3184
NDCG@10: 0.4757





In [28]:
import json
from collections import defaultdict
import numpy as np
from tqdm import tqdm

# 🔄 تحميل qrels المعدل من ملف محلي
qrels_path = r"C:\Users\Azzam\.ir_datasets\antique\test\qrels"
qrels = defaultdict(set)

with open(qrels_path, "r", encoding="utf-8") as f:
    for line in f:
        parts = line.strip().split()
        if len(parts) >= 4:
            qid, _, docid, rel = parts
            if int(rel) > 0:
                qrels[qid].add(docid)

# 📥 تحميل نتائج المطابقة من ملف JSON
results_path = r"C:\Users\Azzam\PycharmProjects\PythonProject\Query Matching & Ranking\TfIdfMatching\tfidf_results_batch_antique.json"
with open(results_path, "r", encoding="utf-8") as f:
    results = json.load(f)

# 🧮 دوال التقييم
def precision_at_k(retrieved, relevant, k):
    retrieved_k = retrieved[:k]
    return len([doc for doc in retrieved_k if doc in relevant]) / k if retrieved_k else 0.0

def recall_at_k(retrieved, relevant, k):
    retrieved_k = retrieved[:k]
    return len([doc for doc in retrieved_k if doc in relevant]) / len(relevant) if relevant else 0.0

def average_precision(retrieved, relevant, k):
    score = 0.0
    hits = 0
    for i, doc_id in enumerate(retrieved[:k], start=1):
        if doc_id in relevant:
            hits += 1
            score += hits / i
    return score / min(len(relevant), k) if relevant else 0.0

def dcg(retrieved, relevant, k):
    return sum([(1 if retrieved[i] in relevant else 0) / np.log2(i + 2) for i in range(min(len(retrieved), k))])

def idcg(relevant, k):
    return sum([1 / np.log2(i + 2) for i in range(min(len(relevant), k))])

def ndcg_at_k(retrieved, relevant, k):
    dcg_val = dcg(retrieved, relevant, k)
    idcg_val = idcg(relevant, k)
    return dcg_val / idcg_val if idcg_val > 0 else 0.0

# ⚙️ التقييم لجميع الاستعلامات
k = 10
precisions, recalls, maps, ndcgs = [], [], [], []

for qid, retrieved_docs in tqdm(results.items(), desc="📊 تقييم الاستعلامات"):
    retrieved_doc_ids = [doc_id for doc_id, _ in retrieved_docs]
    relevant_doc_ids = qrels.get(qid, set())  # ✅ استخدام get للحماية

    precisions.append(precision_at_k(retrieved_doc_ids, relevant_doc_ids, k))
    recalls.append(recall_at_k(retrieved_doc_ids, relevant_doc_ids, k))
    maps.append(average_precision(retrieved_doc_ids, relevant_doc_ids, k))
    ndcgs.append(ndcg_at_k(retrieved_doc_ids, relevant_doc_ids, k))

# 📌 المتوسطات النهائية
evaluation_results = {
    "Precision@10": round(np.mean(precisions), 4),
    "Recall@10": round(np.mean(recalls), 4),
    "MAP@10": round(np.mean(maps), 4),
    "NDCG@10": round(np.mean(ndcgs), 4),
}

# 📢 طباعة النتائج
print("📈 نتائج التقييم (باستخدام qrels المعدل):")
for metric, value in evaluation_results.items():
    print(f"{metric}: {value}")


📊 تقييم الاستعلامات: 100%|██████████| 176/176 [00:00<00:00, 8422.39it/s]

📈 نتائج التقييم (باستخدام qrels المعدل):
Precision@10: 0.4188
Recall@10: 0.1247
MAP@10: 0.304
NDCG@10: 0.4577





In [29]:
import json
from collections import defaultdict
import numpy as np
from tqdm import tqdm

# 🔄 تحميل qrels المعدل من ملف محلي
qrels_path = r"C:\Users\Azzam\.ir_datasets\antique\test\qrels"
qrels = defaultdict(set)

with open(qrels_path, "r", encoding="utf-8") as f:
    for line in f:
        parts = line.strip().split()
        if len(parts) >= 4:
            qid, _, docid, rel = parts
            if int(rel) > 0:
                qrels[qid].add(docid)

# 📥 تحميل نتائج المطابقة من ملف JSON
results_path = r"C:\Users\Azzam\PycharmProjects\PythonProject\Query Matching & Ranking\HybridMatching\hybrid_results_antique.json"
with open(results_path, "r", encoding="utf-8") as f:
    results = json.load(f)

# 🧮 دوال التقييم
def precision_at_k(retrieved, relevant, k):
    retrieved_k = retrieved[:k]
    return len([doc for doc in retrieved_k if doc in relevant]) / k if retrieved_k else 0.0

def recall_at_k(retrieved, relevant, k):
    retrieved_k = retrieved[:k]
    return len([doc for doc in retrieved_k if doc in relevant]) / len(relevant) if relevant else 0.0

def average_precision(retrieved, relevant, k):
    score = 0.0
    hits = 0
    for i, doc_id in enumerate(retrieved[:k], start=1):
        if doc_id in relevant:
            hits += 1
            score += hits / i
    return score / min(len(relevant), k) if relevant else 0.0

def dcg(retrieved, relevant, k):
    return sum([(1 if retrieved[i] in relevant else 0) / np.log2(i + 2) for i in range(min(len(retrieved), k))])

def idcg(relevant, k):
    return sum([1 / np.log2(i + 2) for i in range(min(len(relevant), k))])

def ndcg_at_k(retrieved, relevant, k):
    dcg_val = dcg(retrieved, relevant, k)
    idcg_val = idcg(relevant, k)
    return dcg_val / idcg_val if idcg_val > 0 else 0.0

# ⚙️ التقييم لجميع الاستعلامات
k = 10
precisions, recalls, maps, ndcgs = [], [], [], []

for qid, retrieved_docs in tqdm(results.items(), desc="📊 تقييم الاستعلامات"):
    retrieved_doc_ids = [doc_id for doc_id, _ in retrieved_docs]
    relevant_doc_ids = qrels.get(qid, set())  # ✅ استخدام get للحماية

    precisions.append(precision_at_k(retrieved_doc_ids, relevant_doc_ids, k))
    recalls.append(recall_at_k(retrieved_doc_ids, relevant_doc_ids, k))
    maps.append(average_precision(retrieved_doc_ids, relevant_doc_ids, k))
    ndcgs.append(ndcg_at_k(retrieved_doc_ids, relevant_doc_ids, k))

# 📌 المتوسطات النهائية
evaluation_results = {
    "Precision@10": round(np.mean(precisions), 4),
    "Recall@10": round(np.mean(recalls), 4),
    "MAP@10": round(np.mean(maps), 4),
    "NDCG@10": round(np.mean(ndcgs), 4),
}

# 📢 طباعة النتائج
print("📈 نتائج التقييم (باستخدام qrels المعدل):")
for metric, value in evaluation_results.items():
    print(f"{metric}: {value}")


📊 تقييم الاستعلامات: 100%|██████████| 176/176 [00:00<00:00, 11735.86it/s]

📈 نتائج التقييم (باستخدام qrels المعدل):
Precision@10: 0.4716
Recall@10: 0.1401
MAP@10: 0.3685
NDCG@10: 0.525



