### Page 27

In [7]:
def average_precision(relevance_list):
    relevant_docs = [i + 1 for i, rel in enumerate(relevance_list) if rel == 'T']
    precisions = [(idx + 1) / rank for idx, rank in enumerate(relevant_docs)]
    return sum(precisions) / len(relevant_docs) if relevant_docs else 0

In [8]:
def mean_average_precision(system_results):
    return average_precision(system_results)

#### System results

In [9]:
system_A = "FTTTTTFFFF"
system_B = "TTTFFFFFTT"

#### Compute mAP

In [10]:
map_A = mean_average_precision(system_A)
map_B = mean_average_precision(system_B)

In [11]:
print(f"mAP for System A: {map_A:.4f}")
print(f"mAP for System B: {map_B:.4f}")

mAP for System A: 0.7100
mAP for System B: 0.7889


#### Determine better system

In [12]:
if map_A > map_B:
    print("System A performs better.")
elif map_A < map_B:
    print("System B performs better.")
else:
    print("Both systems perform equally.")

System B performs better.


### Page 38

In [13]:
import numpy as np

def dcg_at_k(relevance, k):
    relevance = np.array(relevance[:k])
    discounts = np.log2(np.arange(2, k + 2))
    return np.sum(relevance / discounts)

In [14]:
def ndcg_at_k(system_ranking, ideal_ranking, k):
    dcg = dcg_at_k(system_ranking, k)
    idcg = dcg_at_k(ideal_ranking, k)
    return dcg / idcg if idcg > 0 else 0

#### Define relevance scores

In [15]:
relevance_scores = {"Critical": 3, "Major": 2, "Minor": 1}

#### System rankings

In [16]:
system_A = ["Minor", "Critical", "Critical", "Critical", "Critical", "Minor", "Critical", "Minor", "Minor", "Minor"]
system_B = ["Critical", "Critical", "Critical", "Minor", "Minor", "Minor", "Minor", "Critical", "Critical", "Minor"]

#### Convert rankings to numerical relevance

In [17]:
system_A_relevance = [relevance_scores[bug] for bug in system_A]
system_B_relevance = [relevance_scores[bug] for bug in system_B]

#### Ideal ranking (all Critical bugs first)

In [18]:
ideal_ranking = sorted(system_A_relevance + system_B_relevance, reverse=True)

#### Compute NDCG@5

In [19]:
ndcg_A = ndcg_at_k(system_A_relevance, ideal_ranking, 5)
ndcg_B = ndcg_at_k(system_B_relevance, ideal_ranking, 5)

In [20]:
print(f"NDCG@5 for System A: {ndcg_A:.4f}")
print(f"NDCG@5 for System B: {ndcg_B:.4f}")

NDCG@5 for System A: 0.7739
NDCG@5 for System B: 0.8152


#### Determine better system

In [21]:
if ndcg_A > ndcg_B:
    print("System A performs better.")
elif ndcg_A < ndcg_B:
    print("System B performs better.")
else:
    print("Both systems perform equally.")

System B performs better.
