### Theoretical part
1. hoe werkt markdown en doe ik linebreaks
2.

### Experimental part

##### Step 1

In [20]:
import math, random, itertools, numpy as np

In [31]:
def pair_generator():
    """
    A generator that returns pairs of all possible combinations 
    of [N, R, HR] of length 5, with repeated elements.
    """
    for p in itertools.product(itertools.product(['N', 'R', 'HR'], repeat=5), repeat=2):
        yield p

print('Number of pairs:', len(list(pair_generator())))

print('first 10 pairs:')
pair_gen = pair_generator()
for _ in range(10000):
    next(pair_gen)

def random_sample(length):
    '''
    Returns a sample pair that
    consists of a production and 
    an experiment list, with as possible
    values {N, R, HR}.
    '''
    values = ['N', 'R', 'HR']
    
    p = [values[random.randint(0, 2)] for _ in range(length)]
    e = [values[random.randint(0, 2)] for _ in range(length)]
    
    return p, e


Number of pairs: 59049
first 10 pairs:


In [68]:
# Implement Evaluation Measures

import numpy as np

def binary_precision(p, f=None):
    return np.array([0 if x == 'N' else 1 for x in p]).sum() / len(p)

def ndcg(p, relevance_map):
    dcg = np.array([
        (2 ** relevance_map[x] - 1) / np.log2(r + 1) for r, x in enumerate(p, start=1) 
    ])

    return (dcg / (max(relevance_map.values()) * len(p))).sum() if dcg.max() != 0 else 0

def err(p, relevance_map):
    P = 1
    E = 0
    for r, v in enumerate(p, start=1):
        R = (2 ** relevance_map[v] - 1) / (2 ** max(relevance_map.values()))
        E += P * R / r
        P *= (1-R)
    return E
    
p, e = random_sample(5)

relevance_map = {
    'N': 0,
    'R': 1,
    'HR': 2
}

print('Precision of production algorithm:\t', binary_precision(p))
print('Precision of experimental algorithm:\t', binary_precision(e))

print('EER of production algorithm:\t\t', err(p, relevance_map))
print('EER of experimental algorithm:\t\t', err(e, relevance_map))

print('NDCG of production algorithm:\t\t', ndcg(p, relevance_map))
print('NDCG of experimental algorithm:\t\t', ndcg(e, relevance_map))

Precision of production algorithm:	 0.6
Precision of experimental algorithm:	 0.8
EER of production algorithm:		 0.2333333333333333
EER of experimental algorithm:		 0.83515625
NDCG of production algorithm:		 0.217888248145
NDCG of experimental algorithm:		 0.629148817528


In [71]:
def delta(data, eval_function, relevance_map):
    return [(x, y, eval_function(x, relevance_map) - eval_function(y, relevance_map))
            for x, y in data]

data = [random_sample(5) for _ in range(5000)]

filtered_data = [x for x in delta(data, ndcg, relevance_map) if x[2] > 0]
print('Average of ndcg delta for each pair where E outperforms P', 
      sum([x[2] for x in filtered_data]) / len(filtered_data))

filtered_data = [x for x in delta(data, err, relevance_map) if x[2] > 0]
print('Average of err delta for each pair where E outperforms P', 
      sum([x[2] for x in filtered_data]) / len(filtered_data))

filtered_data = [x for x in delta(data, binary_precision, relevance_map) if x[2] > 0]
print('Average of binary_precision delta for each pair where E outperforms P', 
      sum([x[2] for x in filtered_data]) / len(filtered_data))

Average of ndcg delta for each pair where E outperforms P 0.200319926403
Average of err delta for each pair where E outperforms P 0.2683316783196332
Average of binary_precision delta for each pair where E outperforms P 0.316516197557


In [26]:
def teamdraft_interleaving(docs1, docs2):
    team_a = set()
    team_b = set()
    i = []
    
    while len(set(docs1) - set(i)) > 0 and len(set(docs2) - set(i)):
        if len(team_a) < len(team_b) or (len(team_a) == len(team_b) and random.random() > 0.5):
            k = [x for x in docs1 if not x in i][0]
            i.append(k)
            team_a.add(k)
        else:
            k = [x for x in docs2 if not x in i][0]
            i.append(k)
            team_b.add(k)
    return i, team_a, team_b

docs1, docs2 = ['a', 'b', 'c', 'd'], ['b', 'c', 'd', 'a' ]
p, e = random_sample(5)
p_n = [x for x in range(len(p))]
e_n = [x + len(p) for x in range(len(p))]

i, a, b = teamdraft_interleaving(p_n, e_n)
labels_i = [(p+e)[vis] for vis in i]

print(a, b)
print(p_n, e_n)
print(i)

{0, 1, 2, 3} {5, 6, 7, 8, 9}
[0, 1, 2, 3, 4] [5, 6, 7, 8, 9]
[5, 0, 1, 6, 7, 2, 8, 3, 9]


In [35]:
random.random()

0.576893042361473

In [44]:
def load_yandex(filename):
    sessions = []
    
    with open(filename, 'r') as f:
        data = [line.strip().split('\t') for  line in f.readlines()]
    for i, query_line in enumerate(data):
        # Q indicates start of a session
        if query_line[2] != "Q":
            continue

        url_ids = query_line[5:]

        # Get url_ids of all subsequent lines that are clicks
        clicks = np.zeros(len(url_ids))
        for click_line in data[i+1:]:
            if click_line[2] == "C":
                click_url = click_line[3]
                if click_url not in url_ids:
                    continue
                clicks[url_ids.index(click_url)] = 1
            else:
                break

            sessions.append(clicks)
    return np.vstack(sessions)

print('Loading sessions from Yandex file...')
sessions = load_yandex('YandexRelPredChallenge.txt')
print('Number of sessions:', len(sessions))

Loading sessions from Yandex file...
Number of sessions: 42652


In [47]:
from collections import defaultdict


def calc_rel_prob(label, relevance_map):
    return 2 ** relevance_map[label] / 2 ** max(relevance_map.values())


class PBM(object):
    def __init__(self, ranking_size):
        self.alpha = defaultdict(lambda: 1)
        self.gamma = [random.random() for _ in range(ranking_size)]
        
    def estimate(self, S):
        self.gamma = S.sum(axis=0) / len(S)
    
    def predict(self, ranking, query):
        return [g * self.alpha[(u, query)] for g, u in zip(self.gamma, ranking)]
           
    def simulate():
        return something

class RCM:
    def _init_(self):
        self.rho = 0
        
    def estimate(self, S):
        self.rho = sum(sum(s) for s in S) / sum(len(s) for s in S)
        
    def predict(self, ranking):
        probs = [self.rho] * len(ranking)
        return probs
            
    def simulate(self, ranking):
        probs = self.predict(ranking)
        for i, (page, prob) in enumerate(zip(ranking, probs)):
            if np.random.random() < prob:
                return i
        return None

S = np.array(
    [[1, 0, 0, 0, 0], 
     [0, 1, 0, 0, 0], 
     [0, 0, 1, 0, 0], 
     [0, 0, 0, 1, 0], 
     [0, 0, 0, 0, 1], 
     [1, 0, 0, 0, 0]]
)

pbm_model = PBM(sessions.shape[1])
pbm_model.estimate(sessions)
print(pbm_model.gamma)

rcm_model = RCM()
rcm_model.estimate(sessions)
print(rcm_model.rho)


[ 0.45006096  0.1950905   0.13931351  0.10569258  0.08184845  0.06712464
  0.05936416  0.05296352  0.04808684  0.05071275]
0.125025790115
