In [1]:
import json
import ir_datasets
from collections import defaultdict
import numpy as np
from tqdm import tqdm

# تحميل qrels من BEIR Quora
dataset = ir_datasets.load("antique/test/non-offensive")
qrels = defaultdict(set)
for qrel in dataset.qrels_iter():
    if int(qrel.relevance) > 0:
        qrels[qrel.query_id].add(qrel.doc_id)

# تحميل نتائج المطابقة من ملف JSON
with open(r"C:\Users\Azzam\PycharmProjects\PythonProject\Query Matching & Ranking\TfIdfMatching\tfidf_results_enhanced_antique.json", "r", encoding="utf-8") as f:
    results = json.load(f)

# دوال التقييم
def precision_at_k(retrieved, relevant, k):
    retrieved_k = retrieved[:k]
    if not retrieved_k:
        return 0.0
    return len([doc for doc in retrieved_k if doc in relevant]) / k

def recall_at_k(retrieved, relevant, k):
    retrieved_k = retrieved[:k]
    if not relevant:
        return 0.0
    return len([doc for doc in retrieved_k if doc in relevant]) / len(relevant)

def average_precision(retrieved, relevant, k):
    score = 0.0
    hits = 0
    for i, doc_id in enumerate(retrieved[:k], start=1):
        if doc_id in relevant:
            hits += 1
            score += hits / i
    return score / min(len(relevant), k) if relevant else 0.0

def dcg(retrieved, relevant, k):
    return sum([(1 if retrieved[i] in relevant else 0) / np.log2(i + 2) for i in range(min(len(retrieved), k))])

def idcg(relevant, k):
    return sum([1 / np.log2(i + 2) for i in range(min(len(relevant), k))])

def ndcg_at_k(retrieved, relevant, k):
    dcg_val = dcg(retrieved, relevant, k)
    idcg_val = idcg(relevant, k)
    return dcg_val / idcg_val if idcg_val > 0 else 0.0

# التقييم لجميع الاستعلامات
k = 10
precisions, recalls, maps, ndcgs = [], [], [], []

for qid, retrieved_docs in tqdm(results.items(), desc="📊 تقييم الاستعلامات"):
    retrieved_doc_ids = [doc_id for doc_id, _ in retrieved_docs]
    relevant_doc_ids = qrels[qid]

    precisions.append(precision_at_k(retrieved_doc_ids, relevant_doc_ids, k))
    recalls.append(recall_at_k(retrieved_doc_ids, relevant_doc_ids, k))
    maps.append(average_precision(retrieved_doc_ids, relevant_doc_ids, k))
    ndcgs.append(ndcg_at_k(retrieved_doc_ids, relevant_doc_ids, k))

# المتوسطات النهائية
evaluation_results = {
    "Precision@10": round(np.mean(precisions), 4),
    "Recall@10": round(np.mean(recalls), 4),
    "MAP@10": round(np.mean(maps), 4),
    "NDCG@10": round(np.mean(ndcgs), 4),
}

print("📈 نتائج التقييم:")
for metric, value in evaluation_results.items():
    print(f"{metric}: {value}")


📊 تقييم الاستعلامات: 100%|██████████| 176/176 [00:00<00:00, 15239.42it/s]

📈 نتائج التقييم:
Precision@10: 0.1097
Recall@10: 0.043
MAP@10: 0.0714
NDCG@10: 0.1224





In [2]:
import json
import ir_datasets
from collections import defaultdict
import numpy as np
from tqdm import tqdm

# تحميل qrels من BEIR Quora
dataset = ir_datasets.load("antique/test/non-offensive")
qrels = defaultdict(set)
for qrel in dataset.qrels_iter():
    if int(qrel.relevance) > 0:
        qrels[qrel.query_id].add(qrel.doc_id)

# تحميل نتائج المطابقة من ملف JSON
with open(r"C:\Users\Azzam\PycharmProjects\PythonProject\Query Matching & Ranking\BertMatching\bert_results_enhanced_antique.json", "r", encoding="utf-8") as f:
    results = json.load(f)

# دوال التقييم
def precision_at_k(retrieved, relevant, k):
    retrieved_k = retrieved[:k]
    if not retrieved_k:
        return 0.0
    return len([doc for doc in retrieved_k if doc in relevant]) / k

def recall_at_k(retrieved, relevant, k):
    retrieved_k = retrieved[:k]
    if not relevant:
        return 0.0
    return len([doc for doc in retrieved_k if doc in relevant]) / len(relevant)

def average_precision(retrieved, relevant, k):
    score = 0.0
    hits = 0
    for i, doc_id in enumerate(retrieved[:k], start=1):
        if doc_id in relevant:
            hits += 1
            score += hits / i
    return score / min(len(relevant), k) if relevant else 0.0

def dcg(retrieved, relevant, k):
    return sum([(1 if retrieved[i] in relevant else 0) / np.log2(i + 2) for i in range(min(len(retrieved), k))])

def idcg(relevant, k):
    return sum([1 / np.log2(i + 2) for i in range(min(len(relevant), k))])

def ndcg_at_k(retrieved, relevant, k):
    dcg_val = dcg(retrieved, relevant, k)
    idcg_val = idcg(relevant, k)
    return dcg_val / idcg_val if idcg_val > 0 else 0.0

# التقييم لجميع الاستعلامات
k = 10
precisions, recalls, maps, ndcgs = [], [], [], []

for qid, retrieved_docs in tqdm(results.items(), desc="📊 تقييم الاستعلامات"):
    retrieved_doc_ids = [doc_id for doc_id, _ in retrieved_docs]
    relevant_doc_ids = qrels[qid]

    precisions.append(precision_at_k(retrieved_doc_ids, relevant_doc_ids, k))
    recalls.append(recall_at_k(retrieved_doc_ids, relevant_doc_ids, k))
    maps.append(average_precision(retrieved_doc_ids, relevant_doc_ids, k))
    ndcgs.append(ndcg_at_k(retrieved_doc_ids, relevant_doc_ids, k))

# المتوسطات النهائية
evaluation_results = {
    "Precision@10": round(np.mean(precisions), 4),
    "Recall@10": round(np.mean(recalls), 4),
    "MAP@10": round(np.mean(maps), 4),
    "NDCG@10": round(np.mean(ndcgs), 4),
}

print("📈 نتائج التقييم:")
for metric, value in evaluation_results.items():
    print(f"{metric}: {value}")


📊 تقييم الاستعلامات: 100%|██████████| 176/176 [00:00<00:00, 11456.47it/s]

📈 نتائج التقييم:
Precision@10: 0.2227
Recall@10: 0.0756
MAP@10: 0.1637
NDCG@10: 0.256





In [4]:
import json
import ir_datasets
from collections import defaultdict
import numpy as np
from tqdm import tqdm

# تحميل qrels من BEIR Quora
dataset = ir_datasets.load("antique/test")
qrels = defaultdict(set)
for qrel in dataset.qrels_iter():
    if int(qrel.relevance) > 0:
        qrels[qrel.query_id].add(qrel.doc_id)

# تحميل نتائج المطابقة من ملف JSON
with open(r"C:\Users\Azzam\PycharmProjects\PythonProject\Query Matching & Ranking\HybridMatching\hybrid_results_enhanced_antique.json", "r", encoding="utf-8") as f:
    results = json.load(f)

# دوال التقييم
def precision_at_k(retrieved, relevant, k):
    retrieved_k = retrieved[:k]
    if not retrieved_k:
        return 0.0
    return len([doc for doc in retrieved_k if doc in relevant]) / k

def recall_at_k(retrieved, relevant, k):
    retrieved_k = retrieved[:k]
    if not relevant:
        return 0.0
    return len([doc for doc in retrieved_k if doc in relevant]) / len(relevant)

def average_precision(retrieved, relevant, k):
    score = 0.0
    hits = 0
    for i, doc_id in enumerate(retrieved[:k], start=1):
        if doc_id in relevant:
            hits += 1
            score += hits / i
    return score / min(len(relevant), k) if relevant else 0.0

def dcg(retrieved, relevant, k):
    return sum([(1 if retrieved[i] in relevant else 0) / np.log2(i + 2) for i in range(min(len(retrieved), k))])

def idcg(relevant, k):
    return sum([1 / np.log2(i + 2) for i in range(min(len(relevant), k))])

def ndcg_at_k(retrieved, relevant, k):
    dcg_val = dcg(retrieved, relevant, k)
    idcg_val = idcg(relevant, k)
    return dcg_val / idcg_val if idcg_val > 0 else 0.0

# التقييم لجميع الاستعلامات
k = 10
precisions, recalls, maps, ndcgs = [], [], [], []

for qid, retrieved_docs in tqdm(results.items(), desc="📊 تقييم الاستعلامات"):
    retrieved_doc_ids = [doc_id for doc_id, _ in retrieved_docs]
    relevant_doc_ids = qrels[qid]

    precisions.append(precision_at_k(retrieved_doc_ids, relevant_doc_ids, k))
    recalls.append(recall_at_k(retrieved_doc_ids, relevant_doc_ids, k))
    maps.append(average_precision(retrieved_doc_ids, relevant_doc_ids, k))
    ndcgs.append(ndcg_at_k(retrieved_doc_ids, relevant_doc_ids, k))

# المتوسطات النهائية
evaluation_results = {
    "Precision@10": round(np.mean(precisions), 4),
    "Recall@10": round(np.mean(recalls), 4),
    "MAP@10": round(np.mean(maps), 4),
    "NDCG@10": round(np.mean(ndcgs), 4),
}

print("📈 نتائج التقييم:")
for metric, value in evaluation_results.items():
    print(f"{metric}: {value}")


[INFO] Please confirm you agree to the authors' data usage agreement found at <https://ciir.cs.umass.edu/downloads/Antique/readme.txt>
[INFO] [starting] https://ciir.cs.umass.edu/downloads/Antique/antique-test.qrel
[INFO] [finished] https://ciir.cs.umass.edu/downloads/Antique/antique-test.qrel: [00:00] [150kB] [584kB/s]
📊 تقييم الاستعلامات: 100%|██████████| 176/176 [00:00<00:00, 9779.65it/s]               

📈 نتائج التقييم:
Precision@10: 0.2818
Recall@10: 0.0976
MAP@10: 0.2039
NDCG@10: 0.3133





In [4]:
import json
import ir_datasets
from collections import defaultdict
import numpy as np
from tqdm import tqdm

# تحميل qrels من BEIR Quora
dataset = ir_datasets.load("beir/quora/test")
qrels = defaultdict(set)
for qrel in dataset.qrels_iter():
    if int(qrel.relevance) > 0:
        qrels[qrel.query_id].add(qrel.doc_id)

# تحميل نتائج المطابقة من ملف JSON
with open(r"C:\Users\Azzam\PycharmProjects\PythonProject\Query Matching & Ranking\TfIdfMatching\tfidf_results_enhanced_qoura.json", "r", encoding="utf-8") as f:
    results = json.load(f)

# دوال التقييم
def precision_at_k(retrieved, relevant, k):
    retrieved_k = retrieved[:k]
    if not retrieved_k:
        return 0.0
    return len([doc for doc in retrieved_k if doc in relevant]) / k

def recall_at_k(retrieved, relevant, k):
    retrieved_k = retrieved[:k]
    if not relevant:
        return 0.0
    return len([doc for doc in retrieved_k if doc in relevant]) / len(relevant)

def average_precision(retrieved, relevant, k):
    score = 0.0
    hits = 0
    for i, doc_id in enumerate(retrieved[:k], start=1):
        if doc_id in relevant:
            hits += 1
            score += hits / i
    return score / min(len(relevant), k) if relevant else 0.0

def dcg(retrieved, relevant, k):
    return sum([(1 if retrieved[i] in relevant else 0) / np.log2(i + 2) for i in range(min(len(retrieved), k))])

def idcg(relevant, k):
    return sum([1 / np.log2(i + 2) for i in range(min(len(relevant), k))])

def ndcg_at_k(retrieved, relevant, k):
    dcg_val = dcg(retrieved, relevant, k)
    idcg_val = idcg(relevant, k)
    return dcg_val / idcg_val if idcg_val > 0 else 0.0

# التقييم لجميع الاستعلامات
k = 10
precisions, recalls, maps, ndcgs = [], [], [], []

for qid, retrieved_docs in tqdm(results.items(), desc="📊 تقييم الاستعلامات"):
    retrieved_doc_ids = [doc_id for doc_id, _ in retrieved_docs]
    relevant_doc_ids = qrels[qid]

    precisions.append(precision_at_k(retrieved_doc_ids, relevant_doc_ids, k))
    recalls.append(recall_at_k(retrieved_doc_ids, relevant_doc_ids, k))
    maps.append(average_precision(retrieved_doc_ids, relevant_doc_ids, k))
    ndcgs.append(ndcg_at_k(retrieved_doc_ids, relevant_doc_ids, k))

# المتوسطات النهائية
evaluation_results = {
    "Precision@10": round(np.mean(precisions), 4),
    "Recall@10": round(np.mean(recalls), 4),
    "MAP@10": round(np.mean(maps), 4),
    "NDCG@10": round(np.mean(ndcgs), 4),
}

print("📈 نتائج التقييم:")
for metric, value in evaluation_results.items():
    print(f"{metric}: {value}")


📊 تقييم الاستعلامات: 100%|██████████| 100/100 [00:00<00:00, 33341.05it/s]

📈 نتائج التقييم:
Precision@10: 0.027
Recall@10: 0.1826
MAP@10: 0.1186
NDCG@10: 0.1389



