In [1]:
from typing import List
import json

In [2]:
def precision_at_k(results, k=5):
    results = results[:k]
    return sum(results) / len(results)

In [3]:
def mean_precision_at_k(results, k=5):
    precisions_at_k = []
    for result in results:
        precisions_at_k.append(precision_at_k(result, k))
    return sum(precisions_at_k) / len(precisions_at_k)

In [4]:
def average_precision_at_k(results, k=5):
    results = results[:k]
    total_relevant = 0
    sum_ = 0
    for i, val in enumerate(results):
        if val == 1:
            total_relevant += 1
            p_at_k = total_relevant / (i + 1)
            # print(p_at_k)
            sum_ += p_at_k
            # print(sum_)
    if total_relevant == 0:
        return 0
    return sum_ / total_relevant

In [5]:
def mean_average_precision_at_k(results: List[List[int]], k=5):
    ap_at_k = []
    for result in results:
        ap_at_k.append(average_precision_at_k(result, k))
    return sum(ap_at_k) / len(ap_at_k)

In [6]:
test = [1, 0, 1, 0, 1]
print(precision_at_k(test, 3))

0.6666666666666666


In [7]:
test = [1,0,0,1,1,0]
test = [0,1]
print(average_precision_at_k(test, 6))

0.5


In [8]:
test = [[1,0,0,1,1,0], [0,1]]
print(mean_average_precision_at_k(test, 6))

0.6000000000000001


In [9]:
def get_results(file_name):
    with open(file_name, "r") as f:
        data = json.load(f)
    results = []
    for item in data:   
        result = item["results"]
        result = list(map(lambda x: x['answer'], result))
        results.append(result)
    return results


## Mean Precision and MAP Scoring Functions

In [10]:
def get_mean_precision_scores(results):
    mp_at_1 = mean_precision_at_k(results, 1)
    mp_at_3 = mean_precision_at_k(results, 3)
    mp_at_5 = mean_precision_at_k(results, 5)
    mp_at_10 = mean_precision_at_k(results, 10)
    return mp_at_1, mp_at_3, mp_at_5, mp_at_10

In [11]:
def get_map_scores(results):
    map_at_1 = mean_average_precision_at_k(results, 1)
    map_at_3 = mean_average_precision_at_k(results, 3)
    map_at_5 = mean_average_precision_at_k(results, 5)
    map_at_10 = mean_average_precision_at_k(results, 10)
    return map_at_1, map_at_3, map_at_5, map_at_10


## Scores for Bi-Encoder

In [12]:
biencoder_results = get_results("biencoder_annotated.json")

In [13]:
print(get_mean_precision_scores(biencoder_results))

(0.5278969957081545, 0.3733905579399141, 0.31044349070100197, 0.2938909326248384)


In [14]:
print(get_map_scores(biencoder_results))

(0.5278969957081545, 0.5901287553648066, 0.5889008106819265, 0.588250525532101)


## Scores for Cross-Encoder

In [15]:
cross_encoder_results = get_results("cross_encoder_annotated.json")

In [16]:
print(get_mean_precision_scores(cross_encoder_results))

(0.5708154506437768, 0.3862660944206007, 0.3169527896995713, 0.29447850671026665)


In [17]:
print(get_map_scores(cross_encoder_results))

(0.5708154506437768, 0.6155221745350499, 0.6112124463519313, 0.6105593936430078)


## Scores for BM25 Keyword Search

In [18]:
keyword_results = get_results("keyword_results_annotated.json")

In [19]:
print(get_mean_precision_scores(keyword_results))

(0.5536480686695279, 0.3447782546494992, 0.2786123032904153, 0.25883745486749793)


In [20]:
print(get_map_scores(keyword_results))

(0.5536480686695279, 0.590486409155937, 0.5890915593705291, 0.5827940790049926)


## Scores for Hybrid Search

In [21]:
hybrid_results = get_results("hybrid_search_results_annotated.json")

In [22]:
print(get_mean_precision_scores(hybrid_results))

(0.6952789699570815, 0.675250357653791, 0.6595851216022889, 0.6535969752707949)


In [23]:
print(get_map_scores(hybrid_results))

(0.6952789699570815, 0.7160228898426324, 0.7155460181211253, 0.7152829601082206)
