In [1]:
import pandas as pd
import math

In [2]:
df = pd.read_excel("cranfield_q1_dataset.xlsx")

In [3]:
df_q1 = df[df["Query ID"] == "q1"].sort_values("Rank")

In [4]:
retrieved = df_q1["Document ID"].tolist()
relevant = set(df_q1[df_q1["Is_Relevant"] == "Yes"]["Document ID"].tolist())

In [5]:
def precision(retrieved, relevant):
    if not retrieved: return 0
    return sum(doc in relevant for doc in retrieved) / len(retrieved)

In [6]:
def recall(retrieved, relevant):
    if not relevant: return 0
    return sum(doc in relevant for doc in retrieved) / len(relevant)

In [7]:
def f_measure(p, r):
    if (p + r) == 0: return 0
    return 2 * p * r / (p + r)

In [8]:
def e_measure(p, r, beta=1):
    if p == 0 and r == 0: return 1
    return 1 - ((1 + beta**2) * p * r) / (beta**2 * p + r)

In [9]:
def dcg(retrieved, relevant):
    return sum(1 / math.log2(i+2) for i, doc in enumerate(retrieved) if doc in relevant)

In [10]:
def idcg(relevant, k):
    return sum(1 / math.log2(i+2) for i in range(min(len(relevant), k)))

In [11]:
def ndcg(retrieved, relevant):
    dcg_val = dcg(retrieved, relevant)
    idcg_val = idcg(relevant, len(retrieved))
    return dcg_val / idcg_val if idcg_val > 0 else 0


In [12]:
p = precision(retrieved, relevant)
r = recall(retrieved, relevant)
f = f_measure(p, r)
e = e_measure(p, r)
n = ndcg(retrieved, relevant)

print("Precision:", round(p, 3))
print("Recall:", round(r, 3))
print("F-measure:", round(f, 3))
print("E-measure:", round(e, 3))
print("NDCG:", round(n, 3))

Precision: 0.5
Recall: 1.0
F-measure: 0.667
E-measure: 0.333
NDCG: 0.889
