In [4]:
# https://www.kaggle.com/dietcoke/score-predictions-using-ndcg
# Building on Wendy Kan's ndgc_at_k example
# https://www.kaggle.com/wendykan/airbnb-recruiting-new-user-bookings/ndcg-example
#
# you can use this script for cross-validation

import numpy as np
import pandas as pd

def dcg_at_k(r, k, method=1):
    r = np.asfarray(r)[:k]
    if r.size:
        if method == 0:
            return r[0] + np.sum(r[1:] / np.log2(np.arange(2, r.size + 1)))
        elif method == 1:
            return np.sum(r / np.log2(np.arange(2, r.size + 2)))
        else:
            raise ValueError('method must be 0 or 1.')
    return 0.


def ndcg_at_k(r, k=5, method=1):
    dcg_max = dcg_at_k(sorted(r, reverse=True), k, method)
    if not dcg_max:
        return 0.
    return dcg_at_k(r, k, method) / dcg_max


def score_predictions(preds, truth, n_modes=5):
    """
    preds: pd.DataFrame
      one row for each observation, one column for each prediction.
      Columns are sorted from left to right descending in order of likelihood.
    truth: pd.Series
      one row for each obeservation.
    """
    assert(len(preds)==len(truth))
    r = pd.DataFrame(0, index=preds.index, columns=preds.columns, dtype=np.float64)
    for col in preds.columns:
        r[col] = (preds[col] == truth) * 1.0

    score = pd.Series(r.apply(ndcg_at_k, axis=1, reduce=True), name='score')
    return score
    
    
    
preds = pd.DataFrame([['US','FR'],['FR','US'],['FR','FR']])
truth = pd.Series(['US','US','FR'])
print "predictions: "
print preds
print ""
print ""
print "truth: "
print truth
print ""
print ""
print "scores:"
print score_predictions(preds, truth)

predictions: 
    0   1
0  US  FR
1  FR  US
2  FR  FR


truth: 
0    US
1    US
2    FR
dtype: object


scores:
0    1.00000
1    0.63093
2    1.00000
Name: score, dtype: float64




### Modified NDCG

In [51]:
# https://www.kaggle.com/dietcoke/score-predictions-using-ndcg
# Building on Wendy Kan's ndgc_at_k example
# https://www.kaggle.com/wendykan/airbnb-recruiting-new-user-bookings/ndcg-example
#
# you can use this script for cross-validation

import numpy as np
import pandas as pd

def dcg_at_k(r, k, method=1):
    print "\n[ 1S]"
    print r
    print "\n[ 1E]"
    r = np.asfarray(r)[:k]
    if r.size:
        if method == 0:
            return r[0] + np.sum(r[1:] / np.log2(np.arange(2, r.size + 2)))
        elif method == 1:
            return np.sum(r / np.log2(np.arange(2, r.size + 2)))
        else:
            raise ValueError('method must be 0 or 1.')
    return 0.


def ndcg_at_k(r, k=5, method=1):
    dcg_max = dcg_at_k(sorted(r, reverse=True), k, method)
    print "dcg_max: " + str(dcg_max)
    print "dcg_at_k: " + str(dcg_at_k(r, k, method) )
    if not dcg_max:
        return 0.
    return dcg_at_k(r, k, method) / dcg_max


def score_predictions(preds, truth, n_modes=5):
    """
    preds: pd.DataFrame
      one row for each observation, one column for each prediction.
      Columns are sorted from left to right descending in order of likelihood.
    truth: pd.Series
      one row for each obeservation.
    """
    assert(len(preds)==len(truth))
    r = pd.DataFrame(0, index=preds.index, columns=preds.columns, dtype=np.float64)
    
    for col in preds.columns:
        r[col] = (preds[col] == truth) * 1.0

    
    #r2 = {0: [1, 2], 'col2': [3, 4]}
   #df = pd.DataFrame(data=d)

    r2 = pd.DataFrame({0:[0],1:[2],2:[3],3:[1],4:[3]})
    print "\n[S]"
    print r2
    print "\n[E]"
    score = pd.Series(r2.apply(ndcg_at_k, axis=1, reduce=True), name='score')
    return score
    

    
preds = pd.DataFrame([['US','FR','AR','AA'],['FR','US','AR','AA'],['FR','FR','AR','BB']])
truth = pd.Series(['US','US','FR'])
print "predictions: "
print preds

print "\n\ntruth: "
print truth

print "\n\nscores:"
print score_predictions(preds, truth)

predictions: 
    0   1   2   3
0  US  FR  AR  AA
1  FR  US  AR  AA
2  FR  FR  AR  BB


truth: 
0    US
1    US
2    FR
dtype: object


scores:

[S]
   0  1  2  3  4
0  0  2  3  1  3

[E]

[ 1S]
[3, 3, 2, 1, 0]

[ 1E]
dcg_max: 6.323465818787765

[ 1S]
0    0
1    2
2    3
3    1
4    3
Name: 0, dtype: int64

[ 1E]
dcg_at_k: 4.353094486919932

[ 1S]
0    0
1    2
2    3
3    1
4    3
Name: 0, dtype: int64

[ 1E]
0    0.688403
Name: score, dtype: float64




In [69]:
print preds
r = pd.DataFrame(0, index=preds.index, columns=preds.columns, dtype=np.float64)
for col in preds.columns:
    for row in np.arange(preds[col].shape[0]):
        
        print row
    #for raw in preds.row:
        #print raw
    #r(col,1)=preds

#print r

    0   1   2   3
0  US  FR  AR  AA
1  FR  US  AR  AA
2  FR  FR  AR  BB
0
1
2
0
1
2
0
1
2
0
1
2


In [56]:
print r.shape

(3, 4)


In [49]:
4.353094486919932/6.323465818787765

0.6884032604377134