In [1]:
###
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
%pylab inline
import itertools


Populating the interactive namespace from numpy and matplotlib


In [72]:
def pretty_print(sample,result,k,method='precision'):
    '''
        Function for pretty printing precision@k.
        args: 
            sample     : the retrieval sample
            result     : the precision@k/ndcg@k of the sample
            k          : the k of evaluation
        Not to be used (necessarily) 
        just for showing some results
    '''
    print('{0},{1},{2},{3},{4}'.format(sample[0],sample[1],sample[2],sample[3],sample[4]))
    if method=='precision':
        print('Precision@%d:\t%0.2f'%(k,result))
    elif method=='ndcg':
        print('ndcg@%d:\t%0.2f'%(k,result))

def precision_at_k(ret, k):
    '''
        Function for calculating the precision@k for a single sample
        args:
            rel    : the retrieval sample
            k      : the k of evaluation
        returns:
            precision@k
    '''
    assert k >= 1 , "Precision@1 not supported"
    ret = np.asarray(ret)[:k] != 0
    if ret.size != k:
        raise ValueError('Relevance score length < k')
    return np.mean(ret)

def average_precision(ret):
    '''
        Function for calculating the average precision for a set of samples
        args:
            ret  : the retrieval samples 
        returns:
            AP for all samples.
    '''
    ret = np.asarray(ret) != 0
    out = []
    for i in range(ret.shape[0]):
        for k in range(1,ret.shape[1]+1):
            out.append(precision_at_k(ret[i],k))
    
    if not out:
        return 0.
    out = np.array([ '%.2f' % elem for elem in out ])
    out = [float(i) for i in out]
    return np.mean(out)
    


def dcg_at_k(ret,k):
    '''
        Function for calculating the dicsounted cumulative gain for a retrieval sample.
        args:
            ret : the retrieval sample
            k   : the k for evaluation
            
        returns:
            dcg : the discounted cumulative gain for a retrieval sample at rank k.
    '''
    assert k >= 1 , "dcg@1 not supported"
    gain = np.power(2,ret)  - 1
    r = np.arange(k) + 1
    discount = np.log2(r+1)
    dcg = gain/discount
    return np.sum(dcg)

def ndcg_at_k(ret,k):
    '''
        Function for calculating the normalized discounted cumulative gain for a retrieval sample.
        args:
            ret : the retrieval sample
            k   : the k for evaluation
            
        returns:
            dcg : the discounted cumulative gain for a retrieval sample at rank k.
    '''
    ndcg = dcg_at_k(sorted(ret, reverse=True), k)
    return dcg_at_k(ret,k)/ndcg

##### Create  all possible combinations. 



In [65]:
### 0 = N , 1 = R , 2 = HR 
x = [0,1,2]
combinations = itertools.product(x, repeat=5)
combinations = list(itertools.combinations(combinations, 2))


##### Create all possible P/E pairs.
##### --TODO create also the reverse of them (some are missing) e.g.:
P:{N,N,N,N,R} <br>
E:{N,N,N,N,N} 
##### reverse 
P:{N,N,N,N,N} <br>
E:{N,N,N,N,R}

In [66]:
pe_pairs  = []
for i in range(len(combinations)):
    pe_pairs.append([combinations[i][0],combinations[i][0]])
    pe_pairs.append([combinations[i][0],combinations[i][1]])
pe_pairs = np.asarray(pe_pairs)


##### Shuffle and sample a subset .

In [67]:
print(len(pe_pairs))
indices = np.arange(pe_pairs.shape[0])
### THIS HAS TO BE CHANGED TO A UNIFORM
# np.random.shuffle(indices)
# pe_pairs = pe_pairs[indices]
n = 1000
sample_indices = np.random.choice(len(indices),n,replace=False)
samples = pe_pairs[sample_indices]


58806


In [73]:

for i in range(5):
    p5 = precision_at_k(samples[i][0],5)
    pretty_print(samples[i][0],p5,5)
    dcg5 = ndcg_at_k(samples[i][0],5)
    pretty_print(samples[i][0],dcg5,5,method='ndcg')

1,0,1,0,0
Precision@5:	0.40
1,0,1,0,0
ndcg@5:	0.92
0,0,1,1,0
Precision@5:	0.40
0,0,1,1,0
ndcg@5:	0.57
0,0,2,1,2
Precision@5:	0.60
0,0,2,1,2
ndcg@5:	0.57
2,1,1,0,1
Precision@5:	0.80
2,1,1,0,1
ndcg@5:	0.99
2,0,2,1,2
Precision@5:	0.80
2,0,2,1,2
ndcg@5:	0.89
