### Theoretical part
1. hoe werkt markdown en doe ik linebreaks
2.

### Experimental part

##### Step 1

In [2]:
import math, random, itertools, numpy as np

In [3]:
def pair_generator():
    """
    A generator that returns pairs of all possible combinations 
    of [N, R, HR] of length 5, with repeated elements.
    """
    for p in itertools.product(itertools.product(['N', 'R', 'HR'], repeat=5), repeat=2):
        yield p

print('Number of pairs:', len(list(pair_generator())))

print('first 10 pairs:')
pair_gen = pair_generator()
for _ in range(10000):
    next(pair_gen)

def random_sample(length):
    '''
    Returns a sample pair that
    consists of a production and 
    an experiment list, with as possible
    values {N, R, HR}.
    '''
    values = ['N', 'R', 'HR']
    
    p = [values[random.randint(0, 2)] for _ in range(length)]
    e = [values[random.randint(0, 2)] for _ in range(length)]
    
    return p, e


Number of pairs: 59049
first 10 pairs:


In [4]:
# Implement Evaluation Measures

import numpy as np

def binary_precision(p, f=None):
    return np.array([0 if x == 'N' else 1 for x in p]).sum() / len(p)

def ndcg(p, relevance_map):
    dcg = np.array([
        (2 ** relevance_map[x] - 1) / np.log2(r + 1) for r, x in enumerate(p, start=1) 
    ])

    return (dcg / (max(relevance_map.values()) * len(p))).sum() if dcg.max() != 0 else 0

def err(p, relevance_map):
    P = 1
    E = 0
    for r, v in enumerate(p, start=1):
        R = (2 ** relevance_map[v] - 1) / (2 ** max(relevance_map.values()))
        E += P * R / r
        P *= (1-R)
    return E
    
p, e = random_sample(5)

relevance_map = {
    'N': 0,
    'R': 1,
    'HR': 2
}

print('Precision of production algorithm:\t', binary_precision(p))
print('Precision of experimental algorithm:\t', binary_precision(e))

print('EER of production algorithm:\t\t', err(p, relevance_map))
print('EER of experimental algorithm:\t\t', err(e, relevance_map))

print('NDCG of production algorithm:\t\t', ndcg(p, relevance_map))
print('NDCG of experimental algorithm:\t\t', ndcg(e, relevance_map))

Precision of production algorithm:	 0.8
Precision of experimental algorithm:	 0.8
EER of production algorithm:		 0.5734375
EER of experimental algorithm:		 0.58515625
NDCG of production algorithm:		 0.534537735664
NDCG of experimental algorithm:		 0.555334768242


In [5]:
def delta(data, eval_function, relevance_map):
    return [(x, y, eval_function(x, relevance_map) - eval_function(y, relevance_map))
            for x, y in data]

data = [random_sample(5) for _ in range(5000)]

filtered_data = [x for x in delta(data, ndcg, relevance_map) if x[2] > 0]
print('Average of ndcg delta for each pair where E outperforms P', 
      sum([x[2] for x in filtered_data]) / len(filtered_data))

filtered_data = [x for x in delta(data, err, relevance_map) if x[2] > 0]
print('Average of err delta for each pair where E outperforms P', 
      sum([x[2] for x in filtered_data]) / len(filtered_data))

filtered_data = [x for x in delta(data, binary_precision, relevance_map) if x[2] > 0]
print('Average of binary_precision delta for each pair where E outperforms P', 
      sum([x[2] for x in filtered_data]) / len(filtered_data))

Average of ndcg delta for each pair where E outperforms P 0.197621474183
Average of err delta for each pair where E outperforms P 0.26262055371067866
Average of binary_precision delta for each pair where E outperforms P 0.306401291017


In [6]:
def teamdraft_interleaving(docs1, docs2):
    team_a = set()
    team_b = set()
    i = []
    
    while len(set(docs1) - set(i)) > 0 and len(set(docs2) - set(i)):
        if len(team_a) < len(team_b) or (len(team_a) == len(team_b) and random.random() > 0.5):
            k = [x for x in docs1 if not x in i][0]
            i.append(k)
            team_a.add(k)
        else:
            k = [x for x in docs2 if not x in i][0]
            i.append(k)
            team_b.add(k)
    return i, team_a, team_b

docs1, docs2 = ['a', 'b', 'c', 'd'], ['b', 'c', 'd', 'a' ]
p, e = random_sample(5)
p_n = [x for x in range(len(p))]
e_n = [x + len(p) for x in range(len(p))]

i, a, b = teamdraft_interleaving(p_n, e_n)
labels_i = [(p+e)[vis] for vis in i]

print(a, b)
print(p_n, e_n)
print(i)

{0, 1, 2, 3, 4} {8, 5, 6, 7}
[0, 1, 2, 3, 4] [5, 6, 7, 8, 9]
[0, 5, 6, 1, 7, 2, 3, 8, 4]


In [7]:
def balanced_interleaving(A, B):
    I = []
    pointers = []
    k_a = 0
    k_b = 0
    first = random.randint(0, 1) # 0 or 1
    
    
    while((k_a < len(A)) and (k_b < len(B))): # while A or B still have unseen items
        if k_a < k_b or k_a == k_b and first:
            if A[k_a] not in I:
                I.append(A[k_a])
                pointers.append('A')
            k_a += 1 # always increase index if trying to add
        else:
            if B[k_b] not in I:
                I.append(B[k_b])
                pointers.append('B')
            k_b += 1 # always increase index if trying to add
            
    return np.array(I), pointers
        
def determine_winner(A, B, I, sessions):
    score_A = 0
    score_B = 0
    for session in sessions:
        print("S:", session)
        clicks = np.argwhere(session == 1)
        if clicks.any():
            last_item = I[clicks[-1][0]]
            playfield = priority_index(last_item, A, B) 
            print("No items will contribute to the score after index", playfield)
        else:
            playfield = -1 # no clicks mean A and B are reduced to empty lists
        A_reduced = A[:playfield+1]
        B_reduced = B[:playfield+1]
        print("A_reduced:", A_reduced)
        print("B_reduced:", B_reduced)
        print("I:", I)

        for index, clicked in enumerate(session):
            if clicked:
                if index in A_reduced:
                    score_A += 1
                if index in B_reduced:
                    score_B += 1
            
    return score_A, score_B

# Determine the line above which the items 
# of each set count towards the to be calculated score    
def priority_index(item, A, B):
    for index, _ in enumerate(A):
        if item == A[index] or item == B[index]:
            return index


A = np.array(random.sample(range(10), 10))
B = np.array(random.sample(range(10), 10))
I, _ = balanced_interleaving(A, B)
sessions = np.random.randint(2, size=(2, 10))
print("A:", A)
print("B:", B)
print("I:", I)
score_A, score_B = determine_winner(A, B, I, sessions)
print("Score of A:", score_A)
print("Score of B:", score_B)

A: [5 9 1 4 7 0 3 6 8 2]
B: [5 1 4 8 0 6 3 9 2 7]
I: [5 1 9 4 8 0 7 6 3 2]
S: [1 1 1 1 0 1 0 1 1 1]
No items will contribute to the score after index 8
A_reduced: [5 9 1 4 7 0 3 6 8]
B_reduced: [5 1 4 8 0 6 3 9 2]
I: [5 1 9 4 8 0 7 6 3 2]
S: [1 1 1 0 0 1 0 0 1 1]
No items will contribute to the score after index 8
A_reduced: [5 9 1 4 7 0 3 6 8]
B_reduced: [5 1 4 8 0 6 3 9 2]
I: [5 1 9 4 8 0 7 6 3 2]
Score of A: 12
Score of B: 13


In [35]:
random.random()

0.576893042361473

In [8]:
def load_yandex(filename):
    sessions = []
    
    with open(filename, 'r') as f:
        data = [line.strip().split('\t') for  line in f.readlines()]
    for i, query_line in enumerate(data):
        # Q indicates start of a session
        if query_line[2] != "Q":
            continue

        url_ids = query_line[5:]

        # Get url_ids of all subsequent lines that are clicks
        clicks = np.zeros(len(url_ids))
        for click_line in data[i+1:]:
            if click_line[2] == "C":
                click_url = click_line[3]
                if click_url not in url_ids:
                    continue
                clicks[url_ids.index(click_url)] = 1
            else:
                break

            sessions.append(clicks)
    return np.vstack(sessions)

print('Loading sessions from Yandex file...')
sessions = load_yandex('YandexRelPredChallenge.txt')
print('Number of sessions:', len(sessions))

Loading sessions from Yandex file...
Number of sessions: 56638


In [10]:
from collections import defaultdict


def calc_rel_prob(label):
    return 2 ** relevance_map[label] / 2 ** max(relevance_map.values())


class PBM(object):
    def __init__(self, ranking_size):
        self.alpha = defaultdict(lambda: 1)
        self.gamma = [random.random() for _ in range(ranking_size)]
        
    def estimate(self, S):
        self.gamma = S.sum(axis=0) / len(S)
    
    def predict(self, ranking):
        return [g * calc_rel_prob(u) for g, u in zip(self.gamma, ranking)]
           
    def simulate(self, ranking):
        return np.array([1 if np.random.random() < prob else 0 for prob in self.predict(ranking)])

class RCM:
    def _init_(self):
        self.rho = 0
        
    def estimate(self, S):
        self.rho = sum(sum(s) for s in S) / sum(len(s) for s in S)
        
    def predict(self, ranking):
        probs = [self.rho] * len(ranking)
        return probs
            
    def simulate(self, ranking):
        return np.array([1 if np.random.random() < prob else 0 for prob in self.predict(ranking)])

pbm_model = PBM(sessions.shape[1])
pbm_model.estimate(sessions)
print(pbm_model.gamma)

rcm_model = RCM()
rcm_model.estimate(sessions)
print(rcm_model.rho)

[ 0.67452947  0.44344786  0.37036972  0.30758501  0.25986087  0.22327766
  0.20406794  0.18409901  0.170345    0.17112186]
0.30087044034


In [11]:
def experiment(model):
    A_winners = 0
    B_winners = 0

    def one(pair):
        I, pointers = balanced_interleaving(pair[0], pair[1])
        winner = model.simulate(I)
        return [pointers[i] for i in winner]
    
    for pair in data:
        winners = one(pair)
        for winner in winners:
            A_winners += 1 if winner == 'A' else 0
            B_winners += 1 if winner == 'B' else 0
        
    return B_winners / (B_winners + A_winners)


experiment(rcm_model)

0.4975713210645139