In [33]:
import itertools
from collections import Counter
from random import shuffle
import math

relevances = { 'N', 'R', 'HR' }
relevanceScores = { 'N':0, 'R':1, 'HR':2 }
rankingsOf5 = list(itertools.product(relevances, repeat=5))
pairsOfRankingsOf5 = list(itertools.product(rankingsOf5, rankingsOf5))
shuffle(pairsOfRankingsOf5)
print(len(pairsOfRankingsOf5), "pairs of rankings")

def getContingencies (items, k, relevantDocumentCount):
    retrievedCounter = Counter(items[:k])
    TP = retrievedCounter['R'] + retrievedCounter['HR']
    FP = retrievedCounter['N']
    
    notRetrievedCounter = Counter(items[k:])
    TN = notRetrievedCounter['N']
    FN = relevantDocumentCount - TP
    
    return TP, FP, TN, FN

def getPrecisionAtK (ranking, k):
    TP, FP, TN, FN = getContingencies(ranking, k, relevantDocumentCount)
    precisionAtK = TP / (TP + FP)
#     recallAtK = TP / (TP + FN)
#     F1AtK = 2*precisionAtK*recallAtK
#     if F1AtK > 0.0: 
#         F1AtK /= precisionAtK + recallAtK
#     accuracyAtK = (TP + TN)/(TP + FP + FN + TN)
    return precisionAtK

def getAveragePrecision (ranking, relevantDocumentCount):
    precisionsForAp = []
    for k in range(1, len(ranking)+1):
        precisionAtK = getPrecisionAtK(ranking, k)

        if ranking[k-1] == 'R' or ranking[k-1] == 'HR':
            # save for calculating AP later
            precisionsForAp.append(precisionAtK)
    
    averagePrecision = sum(precisionsForAp)/relevantDocumentCount
    return averagePrecision

def getDiscountedCumulativeGain (ranking):
    dcg = 0.0
    for r in range(1, len(ranking)+1):
        relevanceAtR = relevanceScores[ranking[r-1]]
        gain = (2 ** relevanceAtR) - 1
        discount = math.log2(1 + r)
        dcg += gain/discount
    return dcg

def getProbabilityOfRelevance (relevance):
    # from paper, similar to DCG score
    gain = (2 ** relevanceScores[relevance]) - 1
    discount = 2 ** max(relevanceScores.values())
    probability = gain / discount
#     print('probability of', relevance, 'is', probability)
    return probability

def getExpectedReciprocalRank (ranking):
    err = 0.0
    for r in range(1, len(ranking)+1):
        probabilityOfReachingRankR = 1.0
        for j in range(r-1):
            probabilityOfReachingRankR *= 1 - getProbabilityOfRelevance(ranking[j])
        probabilityOfStoppingAtRankR = getProbabilityOfRelevance(ranking[r-1])
        probabilityOfSatisfaction = probabilityOfReachingRankR * probabilityOfStoppingAtRankR
        expectedProbabilityOfSatisfaction = probabilityOfSatisfaction / r
        err += expectedProbabilityOfSatisfaction
    return err

averagePrecisionsForMapP = []
averagePrecisionsForMapE = []

for i, rankingPair in enumerate(pairsOfRankingsOf5):
    P = rankingPair[0]
    E = rankingPair[1]
    
    # show the pair
    print ('\nP: ', P, '\nE: ', E, '\n')

    # implement 1 of (binary):
    #   precision at rank k
    #   recall at rank k
    #   average precision   <--
    totalCounter = Counter(P) + Counter(E)
    relevantDocumentCount = totalCounter['R'] + totalCounter['HR']
    
    if relevantDocumentCount == 0:
        # result is irrelevant
        continue
    
    averagePrecisionP = getAveragePrecision(P, relevantDocumentCount)
    averagePrecisionE = getAveragePrecision(E, relevantDocumentCount)
    
    # save for calculating MAP later
    averagePrecisionsForMapP.append(averagePrecisionP)
    averagePrecisionsForMapE.append(averagePrecisionE)

    # implement 2 of (multi-graded):
    #   nDCG at rank k
    #   ERR
    
    # Normalized Discounted Cumulative Gain
    # first we have to determine the perfect ranking. Assuming the P and E results are always
    # different, the perfect ranking would include the results from both rankings.
    k = 3
    mergedRanking = P + E
    perfectRanking = sorted(mergedRanking, key=lambda relevance: relevanceScores[relevance], reverse=True)
    perfectDcgScore = getDiscountedCumulativeGain(perfectRanking[:k])
    dcgAtKP = getDiscountedCumulativeGain(P[:k])
    dcgAtKE = getDiscountedCumulativeGain(E[:k])
    nDcgAtKP = dcgAtKP / perfectDcgScore
    nDcgAtKE = dcgAtKE / perfectDcgScore
#     print('perfect score:', perfectDcgScore)
#     print('DCG P:', dcgP, ' E:', dcgE)
    print('nDCG P:', nDcgAtKP, ' E:', nDcgAtKE)
    
    
    # Expected Reciprocal Rank
    errP = getExpectedReciprocalRank(P[:k])
    errE = getExpectedReciprocalRank(E[:k])
    print('ERR P:', errP, ' E:', errE)
    
    
    # only try a few for now
    if i >= 10:
        break;
        
        
# we accidentally implemented MAP instead of just AP, but we'll leave it in
meanAveragePrecisionP = sum(averagePrecisionsForMapP)/len(averagePrecisionsForMapP)
meanAveragePrecisionE = sum(averagePrecisionsForMapE)/len(averagePrecisionsForMapE)
print('MAP (P) after {} results: {}'.format(len(averagePrecisionsForMapP), meanAveragePrecisionP))
print('MAP (E) after {} results: {}'.format(len(averagePrecisionsForMapE), meanAveragePrecisionE))

59049 pairs of rankings

P:  ('HR', 'N', 'HR', 'HR', 'R') 
E:  ('R', 'HR', 'N', 'R', 'R') 

nDCG P: 0.7039180890341348  E: 0.45250815297345076
probability of HR is 0.75
probability of N is 0.0
probability of N is 0.0
probability of N is 0.0
probability of HR is 0.75
probability of HR is 0.75
probability of R is 0.25
probability of HR is 0.75
probability of HR is 0.75
probability of HR is 0.75
probability of N is 0.0
probability of N is 0.0
ERR P: 0.8125  E: 0.34375

P:  ('N', 'N', 'HR', 'HR', 'HR') 
E:  ('HR', 'N', 'R', 'R', 'HR') 

nDCG P: 0.23463936301137825  E: 0.5474918470265493
probability of N is 0.0
probability of N is 0.0
probability of N is 0.0
probability of N is 0.0
probability of HR is 0.75
probability of HR is 0.75
probability of HR is 0.75
probability of N is 0.0
probability of N is 0.0
probability of N is 0.0
probability of R is 0.25
probability of R is 0.25
ERR P: 0.0625  E: 0.8125

P:  ('R', 'N', 'N', 'HR', 'R') 
E:  ('R', 'R', 'N', 'HR', 'R') 

nDCG P: 0.1854327976961