In [2]:
import numpy as np

In [1]:
def dcg_at_k(r, k, method=1):
    """Score is discounted cumulative gain (dcg)

    Relevance is positive real values.  Can use binary
    as the previous methods.

    Example from
    http://www.stanford.edu/class/cs276/handouts/EvaluationNew-handout-6-per.pdf
    >>> r = [3, 2, 3, 0, 0, 1, 2, 2, 3, 0]
    >>> dcg_at_k(r, 1)
    3.0
    >>> dcg_at_k(r, 1, method=1)
    3.0
    >>> dcg_at_k(r, 2)
    5.0
    >>> dcg_at_k(r, 2, method=1)
    4.2618595071429155
    >>> dcg_at_k(r, 10)
    9.6051177391888114
    >>> dcg_at_k(r, 11)
    9.6051177391888114
    "An Introduction to Neural Information Retrieval" writes method 1 for DCG
    so using that as default.
    Args:
        r: Relevance scores (list or numpy) in rank order
            (first element is the first item)
        k: Number of results to consider
        method: If 0 then weights are [1.0, 1.0, 0.6309, 0.5, 0.4307, ...]
                If 1 then weights are [1.0, 0.6309, 0.5, 0.4307, ...]

    Returns:
        Discounted cumulative gain
    """
    r = np.asfarray(r)[:k]
    if r.size:
        if method == 0:
            return r[0] + np.sum(r[1:] / np.log2(np.arange(2, r.size + 1)))
        elif method == 1:
            return np.sum(r / np.log2(np.arange(2, r.size + 2)))
        else:
            raise ValueError('method must be 0 or 1.')
    return 0.


def ndcg_at_k(r, k, method=0):
    """Score is normalized discounted cumulative gain (ndcg)

    Relevance is positive real values.  Can use binary
    as the previous methods.

    Example from
    http://www.stanford.edu/class/cs276/handouts/EvaluationNew-handout-6-per.pdf
    >>> r = [3, 2, 3, 0, 0, 1, 2, 2, 3, 0]
    >>> ndcg_at_k(r, 1)
    1.0
    >>> r = [2, 1, 2, 0]
    >>> ndcg_at_k(r, 4)
    0.9203032077642922
    >>> ndcg_at_k(r, 4, method=1)
    0.96519546960144276
    >>> ndcg_at_k([0], 1)
    0.0
    >>> ndcg_at_k([1], 2)
    1.0

    Args:
        r: Relevance scores (list or numpy) in rank order
            (first element is the first item)
        k: Number of results to consider
        method: If 0 then weights are [1.0, 1.0, 0.6309, 0.5, 0.4307, ...]
                If 1 then weights are [1.0, 0.6309, 0.5, 0.4307, ...]

    Returns:
        Normalized discounted cumulative gain
    """
    dcg_max = dcg_at_k(sorted(r, reverse=True), k, method)
    if not dcg_max:
        return 0.
    return dcg_at_k(r, k, method) / dcg_max

In [3]:
import os
import pickle


def load_data(folder_path):
    # iterate over all pickle files in the directory and load them
    # save the loaded data in a dictionary and return it
    results = {}
    for pickle_file in os.listdir(folder_path):
        if pickle_file.endswith(".pkl"):
            with open(os.path.join(folder_path,pickle_file), 'rb') as f:
                data = pickle.load(f)
                # the dictionary key is the name of the pickle file without the extension or the path
                results[pickle_file.split('.')[0]] = data
    return results

def get_labels(label_file_path):
    topic_article_to_label_dict = {}
    with open(label_file_path, 'r') as file:
        # read file in format "topic_id 0 article_id {0,1,2}"
        for line in file.readlines():
            topic_id, _, article_id, label = line.split()
            topic_article_to_label_dict[(topic_id, article_id)] = label
    return topic_article_to_label_dict

In [45]:
pickle_folder = r"/cs/labs/tomhope/taltatal/aspire/data/data_old_format/reranker_out"
label_file = r"/cs/labs/tomhope/taltatal/aspire/data/data_old_format/qrels-treceval-2016.txt"
ascending = 1
k = 10
ndcg_percentage_1 = 0.2
ndcg_percentage_2 = 0.5

In [46]:
retrieved_documents = load_data(pickle_folder)
topic_article_to_label_dict = get_labels(label_file)
precisions = []
ndcgs_1 = []
ndcgs_2 = []
# iterate over all topics and calculate @10 precision
for topic_id, retrieved_document_distance_tuples_for_topic in retrieved_documents.items():
    retrieved_document_distance_tuples_for_topic = sorted(retrieved_document_distance_tuples_for_topic, key=lambda x: x[1], reverse=ascending)
    retrieved_documents_for_topic = [document for document, _ in retrieved_document_distance_tuples_for_topic]
    # get the labels for the documents retrieved for this topic
    labels = [topic_article_to_label_dict.get((topic_id, article_id), 0) for article_id in retrieved_documents_for_topic]
    # calculate precision at 10
    labels = [int(int(label) > 0) for label in labels][:1000]
    at_percent = k
    ndcg_1_k = ndcg_at_k(labels, int(len(labels) * ndcg_percentage_1))
    ndcg_2_k = ndcg_at_k(labels, int(len(labels) * ndcg_percentage_2))
    ndcgs_1.append(ndcg_1_k)
    ndcgs_2.append(ndcg_2_k)
    precision_at_k = sum(labels[:at_percent]) / float(at_percent)
    precisions.append(precision_at_k)
    print('Topic {} has precision at {} of {}'.format(topic_id, at_percent, precision_at_k))
    print('Topic {} has NCDG at {}% of {}'.format(topic_id, ndcg_percentage_1*100, ndcg_1_k))
    print('Topic {} has NCDG at {}% of {}'.format(topic_id, ndcg_percentage_2*100, ndcg_2_k))
    print()
print('Average precision at {} is {}'.format(at_percent, sum(precisions) / len(precisions)))
print('Average NCDG at {}% is {}'.format(ndcg_percentage_1*100, sum(ndcgs_1) / len(ndcgs_1)))
print('Average NCDG at {}% is {}'.format(ndcg_percentage_2*100, sum(ndcgs_2) / len(ndcgs_2)))

Topic 11 has precision at 10 of 0.3
Topic 11 has NCDG at 20.0% of 0.42410519005048175
Topic 11 has NCDG at 50.0% of 0.5918633633619576

Topic 5 has precision at 10 of 0.5
Topic 5 has NCDG at 20.0% of 0.5715005697193803
Topic 5 has NCDG at 50.0% of 0.6916672766753194

Topic 9 has precision at 10 of 0.3
Topic 9 has NCDG at 20.0% of 0.4651560739634739
Topic 9 has NCDG at 50.0% of 0.6101087218752753

Topic 8 has precision at 10 of 1.0
Topic 8 has NCDG at 20.0% of 0.7106879424342534
Topic 8 has NCDG at 50.0% of 0.73788050023706

Topic 13 has precision at 10 of 0.3
Topic 13 has NCDG at 20.0% of 0.45123497196800433
Topic 13 has NCDG at 50.0% of 0.5987893854832925

Topic 4 has precision at 10 of 0.1
Topic 4 has NCDG at 20.0% of 0.3340050109956404
Topic 4 has NCDG at 50.0% of 0.3340050109956404

Topic 28 has precision at 10 of 0.7
Topic 28 has NCDG at 20.0% of 0.645379484115667
Topic 28 has NCDG at 50.0% of 0.8298998411213634

Topic 12 has precision at 10 of 0.5
Topic 12 has NCDG at 20.0% of 0.