In [1]:
import collections as cl
import math

In [2]:
def compute_avg_precision(ranking, denominator_R=10, denominator_HR=10):
    """Compute average of precisions at relevant and highly-relevant documents. 
    
    Args:
        ranking: Relevance ranking
        denominator_R: Total relevant documents in collection
        denominator_HR: Total highly-relevant documents in collection
    Returns:
        Tuple with average of precisions of relevant and highly-relevant documents
    """

    R_precision_sum = 0
    R_docs = 0
    
    HR_precisions_sum = 0
    HR_docs = 0
    
    for i in range(len(ranking)):
        if ranking[i] == 1:
            R_docs += 1
            R_precision_sum += (R_docs / (i + 1))
        elif ranking[i] == 2:
            HR_docs += 1
            HR_precisions_sum += (HR_docs / (i + 1))
    
    return (R_precision_sum / denominator_R, HR_precisions_sum / denominator_HR)

In [3]:
def compute_DCG(ranking, k=5):
    """Compute Discounted Cumulative Gain at rank k. 
    
    Args:
        ranking: Relevance ranking
        k: Position on which DCG is computed; by default, 5
    Returns:
        DCG value at rank k
    """
    
    DCG = 0
    for i in range(k):
        DCG += (2 ** ranking[i] - 1) / (math.log2(i + 2))
    
    return DCG

def compute_nDCG(ranking, k=5, maxDCG=1):
    """Compute normalized Discounted Cumulative Gain at rank k. 
    
    Args:
        ranking: Relevance ranking
        k: Position on which DCG is computed; by default, 5
        maxDCG: DCG for best possible ranking for normalisation
    Returns:
        Normalized DCG value at rank k
    """
    
    return compute_DCG(ranking, k) / maxDCG

In [4]:
def compute_theta(rel, max_rel=2):
    """Compute probability of satisfaction.
    
    Args:
        rel: Relevance score of certain document
        max_rel: Maximum possible relevance
    Returns:
        Probability of satisfaction
    """
    return (2 ** rel - 1) / (2 ** max_rel)

def compute_ERR(ranking, max_rel=2):
    """Compute Expected Reciprocal Rank.
    
    Args:
        ranking: Relevance ranking
        max_rel: Maximum possible relevance of documents
    Returns:
        Expected Reciprocal Rank measure
    """
    ERR = 0
    
    for i in range(len(ranking)):
        proba = 1
        theta_i = compute_theta(ranking[i], max_rel)
        for j in range(i):
            proba *= (1 - compute_theta(ranking[j], max_rel)) * theta_i
        
        ERR += proba / (i + 1)
    
    return ERR

In [5]:
def compute_scores(rank_pairs):
    """Retrieve the 3 evaluation values for pairs of relevance ranking.
    
    Args:
        rank_pairs: Relevance ranking pairs for P and E
    Returns:
        Dictionary of AP, nDCG, ERR scores
    """
    
    scores = {}
    maxDCG = compute_DCG((2, 2, 2, 2, 2))
    
    for pair in rank_pairs:
        pair_scores = {}
        
        pair_scores['AP_P_R'], pair_scores['AP_P_HR'] = compute_avg_precision(pair.P)
        pair_scores['AP_E_R'], pair_scores['AP_E_HR'] = compute_avg_precision(pair.E)
        
        pair_scores['nDCG_P'] = compute_nDCG(pair.P, maxDCG=maxDCG)
        pair_scores['nDCG_E'] = compute_nDCG(pair.E, maxDCG=maxDCG)
        
        pair_scores['ERR_P'] = compute_ERR(pair.P)
        pair_scores['ERR_E'] = compute_ERR(pair.E)
        
        scores[pair] = pair_scores
        
    return scores