In [1]:
###
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
%pylab inline
import itertools


Populating the interactive namespace from numpy and matplotlib


In [5]:
def pretty_print(sample,result,k,method='precision'):
    '''
        Function for pretty printing precision@k.
        args: 
            sample     : the retrieval sample
            result     : the precision@k/ndcg@k of the sample
            k          : the k of evaluation
        Not to be used (necessarily) 
        just for showing some results
    '''
    print('{0},{1},{2},{3},{4}'.format(sample[0],sample[1],sample[2],sample[3],sample[4]))
    if method=='precision':
        print('Precision@%d:\t%0.2f'%(k,result))
    elif method=='ndcg':
        print('ndcg@%d:\t%0.2f'%(k,result))

def precision_at_k(ret, k):
    '''
        Function for calculating the precision@k for a single sample
        args:
            rel    : the retrieval sample
            k      : the k of evaluation
        returns:
            precision@k
    '''
    assert k >= 1 , "Precision@1 not supported"
    ret = np.asarray(ret)[:k] != 0
    if ret.size != k:
        raise ValueError('Relevance score length < k')
    return np.mean(ret)

def average_precision(ret):
    '''
        Function for calculating the average precision for a set of samples
        args:
            ret  : the retrieval samples 
        returns:
            AP for all samples.
    '''
    ret = np.asarray(ret) != 0
    out = []
    #print(ret.shape)
    for k in range(1,ret.shape[0]+1):
        out.append(precision_at_k(ret,k))
    
    if not out:
        return 0.
    out = np.array([ '%.2f' % elem for elem in out ])
    out = [float(i) for i in out]
    return np.mean(out)
    


def dcg_at_k(ret,k):
    '''
        Function for calculating the dicsounted cumulative gain for a retrieval sample.
        args:
            ret : the retrieval sample
            k   : the k for evaluation
            
        returns:
            dcg : the discounted cumulative gain for a retrieval sample at rank k.
    '''
    assert k >= 1 , "dcg@1 not supported"
    gain = np.power(2,ret)  - 1
    r = np.arange(k) + 1
    discount = np.log2(r+1)
    dcg = gain/discount
    return np.sum(dcg)

def ndcg_at_k(ret,k):
    '''
        Function for calculating the normalized discounted cumulative gain for a retrieval sample.
        args:
            ret : the retrieval sample
            k   : the k for evaluation
            
        returns:
            dcg : the discounted cumulative gain for a retrieval sample at rank k.
    '''
    ndcg = dcg_at_k(sorted(ret, reverse=True), k)
    return dcg_at_k(ret,k)/ndcg


In [12]:
def calculate_delta_measure(E_val, P_val, metric = None):
    
    if metric == "PA":
        print("Calculating dcg delta measure")
        delta =  E_val - P_val
    elif metric == "dcg":
        print("Calculating dcg delta measure")
        delta = E_val - P_val
        print (delta)
    else:
        print("hi")

##### Create  all possible combinations. 



In [13]:
### 0 = N , 1 = R , 2 = HR 
x = [0,1,2]
combinations = itertools.product(x, repeat=5)
combinations = list(itertools.combinations(combinations, 2))


##### Create all possible P/E pairs.
##### --TODO create also the reverse of them (some are missing) e.g.:
P:{N,N,N,N,R} <br>
E:{N,N,N,N,N} 
##### reverse 
P:{N,N,N,N,N} <br>
E:{N,N,N,N,R}

In [14]:
pe_pairs  = []
for i in range(len(combinations)):
    pe_pairs.append([combinations[i][0],combinations[i][0]])
    pe_pairs.append([combinations[i][0],combinations[i][1]])
    
pe_pairs = np.asarray(pe_pairs)

##### Shuffle and sample a subset .

In [15]:
print(len(pe_pairs))
indices = np.arange(pe_pairs.shape[0])
### THIS HAS TO BE CHANGED TO A UNIFORM
np.random.shuffle(indices)
pe_pairs = pe_pairs[indices]
n = 1000
sample_indices = np.random.choice(len(indices),n,replace=False)
samples = pe_pairs[sample_indices]


58806


In [16]:
p_pairs = pe_pairs[:,0]
e_pairs = pe_pairs[:,1]

#if (E_dcg > P_dcg):
#    calculate_delta_measure(E_dcg,P_dcg,"dcg")

### Initialize all possible score-lists that can be used later ##
P_average_prec = []
E_average_prec = []

P_ndcg_at_k = []
E_ndcg_at_k = []

for i in range(5):
    
    #add all the average precisions
    P_average_prec.append(average_precision(p_pairs[i]))
    E_average_prec.append(average_precision(e_pairs[i]))
    
    #add all the dcg_at_k
    P_ndcg_at_k.append(ndcg_at_k(p_pairs[i],5))
    E_ndcg_at_k.append(ndcg_at_k(e_pairs[i],5))


    
### Prints for debug ###    
print(P_average_prec)
print(E_average_prec)

print(P_ndcg_at_k)
print(E_ndcg_at_k)

calculate_delta_measure(P_average_prec,E_average_prec,"PA")

#print (prec_list)
#print (dcg5_list)
#print (average_list)

[0.45400000000000001, 0.54399999999999993, 1.0, 0.38600000000000001, 0.45400000000000001]
[0.45600000000000007, 0.58600000000000008, 1.0, 0.70399999999999996, 0.45400000000000001]
[0.71226306651459614, 0.65421732582938796, 0.82992507694387108, 0.55915069314098009, 0.65890720193788777]
[1.0, 0.85136554718046331, 0.93550952614283955, 0.90602543553468229, 0.65890720193788777]
Calculating dcg delta measure


TypeError: unsupported operand type(s) for -: 'list' and 'list'