In [2]:
%load_ext autoreload
%autoreload 2

import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import pickle
from collections import defaultdict

from torch.utils.data import Dataset, DataLoader
from Mmetrics import *

import LTR
import datautil
import permutationgraph
import DTR
import EEL
import PPG
import PL

ds2020, _ = datautil.load_data(2020, verbose=True)
ds2019, _ = datautil.load_data(2019, verbose=True)

ltrmodel = LTR.MSE_model(layers=[ds2020.trfm.shape[1], 256, 256, 1], lr=0.001, optimizer=torch.optim.Adam, dropout=0.1)
ltrmodel.fit(ds2020, epochs=10, batch_size=100, verbose=False)
y_pred2020 = ltrmodel.predict(ds2020.tefm, ds2020.tedlr)
print('LTR performance ndcg@10 for 2020:', LTRMetrics(ds2020.telv,np.diff(ds2020.tedlr),y_pred2020).NDCG(10))

ltrmodel = LTR.MSE_model(layers=[ds2019.trfm.shape[1], 256, 256, 1], lr=0.001, optimizer=torch.optim.Adam, dropout=0.1)
ltrmodel.fit(ds2019, epochs=10, batch_size=100, verbose=False)
y_pred2019 = ltrmodel.predict(ds2019.tefm, ds2019.tedlr)
print('LTR performance ndcg@10 for 2019:', LTRMetrics(ds2019.telv,np.diff(ds2019.tedlr),y_pred2019).NDCG(10))

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload
train: 4694 docs, 200 queries.
test: 4677 docs, 200 queries.
25 features
un-normalized train: 70.080336425764 <101.65279842486342>
un-normalized test: 75.07174581020573 <156.60887376760795>
normalized train: 5.180490685311132e-07 <0.7804636401064724>
normalized test: 1.3643793263153821e-06 <0.7887188357739844>
train: 2672 docs, 632 queries.
test: 4298 docs, 635 queries.
25 features
un-normalized train: 100.59542148518399 <112.00393006016057>
un-normalized test: 107.18769941395159 <118.13138269196044>
normalized train: 7.412363746142352e-08 <0.7544854415648509>
normalized test: -2.8577670556600825e-08 <0.7641385980060638>
LTR performance ndcg@10 for 2020: 0.41140919370554885
LTR performance ndcg@10 for 2019: 0.8086527690933426


In [13]:
from tqdm.notebook import trange

def learn_one_PPG(qid, verbose, y_pred, g, dlr, epochs, lr, exposure, samples_cnt, sessions_cnt):
    s, e = dlr[qid:qid+2]
    y_pred_s, g_s, sorted_docs_s, dlr_s = \
        EEL.copy_sessions(y=y_pred[s:e], g=g[s:e], sorted_docs=y_pred[s:e].argsort()[::-1], sessions=sessions_cnt)

    objective_ins = DTR.DTR(y_pred = y_pred_s, g = g_s, dlr = dlr_s, exposure=exposure)
    learner = PPG.Learner(  PPG_mat=None, samples_cnt=samples_cnt, 
                                objective_ins=objective_ins, 
                                sorted_docs = sorted_docs_s, 
                                dlr = dlr_s,
                                intra = np.arange(g_s.shape[0]),
#                                 intra = g_s,
                                inter = np.repeat(dlr_s[:-1], np.diff(dlr_s)))
    vals = learner.fit(epochs, lr, verbose=verbose)
    return vals


def learn_all_PPG(y_pred, g, dlr, epochs, lr, exposure, samples_cnt, sessions_cnt):
    sorted_docs = []
    
#     for qid in trange(dlr.shape[0] - 1, leave=False):
    for qid in range(dlr.shape[0] - 1):
        min_b = learn_one_PPG(qid, 0, y_pred, g, dlr, epochs, lr, exposure, samples_cnt, sessions_cnt)
        sorted_docs.append(min_b)
        

    # print(ndcg_dtr(exposure, lv, np.concatenate(y_rerank), dlr, g, query_counts))
    return sorted_docs

In [8]:

exposure2020 = np.array([1./np.log2(2+i) for i in range(1,np.diff(ds2020.tedlr).max()+2)])
exposure2019 = np.array([1./np.log2(2+i) for i in range(1,np.diff(ds2019.tedlr).max()+2)])

learn_one_PPG(91,2, y_pred2020, ds2020.teg, ds2020.tedlr, 10, 0.1, exposure=exposure2020,
        samples_cnt=4, sessions_cnt=4)

[16 14  3  0  2 12  6 10  9  5  8 15 11  7 13  1 17  4 34 32 21 18 20 30
 24 28 27 23 26 33 29 25 31 19 35 22 52 50 39 36 38 48 42 46 45 41 44 51
 47 43 49 37 53 40 70 68 57 54 56 66 60 64 63 59 62 69 65 61 67 55 71 58] inter: [ 0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0 18 18 18 18 18 18
 18 18 18 18 18 18 18 18 18 18 18 18 36 36 36 36 36 36 36 36 36 36 36 36
 36 36 36 36 36 36 54 54 54 54 54 54 54 54 54 54 54 54 54 54 54 54 54 54] intra: [ 0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23
 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47
 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71]
[ 5  0  9 11  4 14  2 10  8 17  1 13  6 15  3 12  7 16 19 28 23 30 20 34
 22 32 24 33 18 27 25 31 21 29 26 35 36 50 39 46 44 51 40 48 37 53 42 45
 41 49 38 47 43 52 54 64 61 70 58 66 57 67 60 69 55 63 59 68 56 65 62 71] -> [12 16  5 15  2 13  3  8  9  4 14  7  6  1  0 11 10 17 32 26 30 29 21 35
 20 31 24 19 34 23 28 25 18 33

[ 0  2  1  3  4  5  6  7  8  9 10 11 13 12 14 15 16 17 18 19 20 23 21 22
 24 28 25 29 26 27 30 31 32 33 35 34 37 36 39 38 40 41 42 43 44 45 46 47
 50 48 49 51 52 53 54 55 56 57 58 61 63 64 66 67 59 69 60 68 70 65 62 71] -> [ 4 17 14  1 13  7 11 15  8 10  6  5  9 12  2  0  3 16 35 25 31 22 33 28
 26 19 23 29 30 32 27 24 20 18 21 34 40 49 37 43 41 51 48 52 47 44 45 42
 53 39 50 46 38 36 59 67 69 55 62 58 71 61 65 63 66 64 60 56 54 68 70 57]
[ 0  2  1  3  4  5  6  7  8  9 10 11 13 12 14 15 16 17 18 19 20 23 21 22
 24 28 25 29 26 27 30 31 32 33 35 34 37 36 38 39 41 40 42 43 44 45 46 50
 47 51 52 48 49 53 54 55 57 56 58 59 61 63 64 66 67 60 69 68 70 62 65 71] -> [ 4 17 14  1 13  7 11 15  8 10  6  5  9 12  2  0  3 16 35 25 31 22 33 28
 26 19 23 29 30 32 27 24 20 18 21 34 40 49 43 37 51 41 48 52 47 44 45 53
 42 46 38 39 50 36 59 67 55 69 62 66 58 71 61 65 63 60 64 56 54 70 68 57]
[ 0  2  1  3  4  5  6  7  8  9 11 10 12 13 14 15 16 17 18 19 20 23 21 22
 24 25 26 27 28 29 30 31 32 33 35 34 37 3

array([ 4, 14, 17,  1, 13,  7, 11, 15,  8, 10,  6,  5, 12,  9,  2,  0,  3,
       16, 35, 25, 31, 33, 28, 22, 26, 23, 30, 32, 19, 29, 27, 24, 20, 18,
       34, 21, 49, 40, 43, 37, 41, 51, 48, 52, 47, 44, 45, 42, 39, 50, 53,
       46, 38, 36, 59, 67, 69, 55, 62, 66, 60, 58, 70, 71, 61, 68, 65, 63,
       56, 64, 54, 57])

In [14]:
learn_all_PPG(y_pred2020, ds2020.teg, ds2020.tedlr, 10, 0.1, exposure=exposure2020,
        samples_cnt=4, sessions_cnt=4)

HBox(children=(FloatProgress(value=0.0, max=200.0), HTML(value='')))



[array([ 7,  0,  4,  3, 10,  5,  9,  6,  1,  8,  2, 17, 12, 20, 21, 18, 15,
        19, 14, 11, 16, 13, 22, 31, 29, 26, 30, 28, 32, 27, 23, 25, 24, 37,
        39, 34, 36, 42, 43, 40, 41, 33, 38, 35]),
 array([ 2,  8,  3,  9,  7,  6,  4,  0,  5,  1, 16, 11, 10, 18, 13, 15, 12,
        17, 14, 19, 23, 24, 29, 28, 26, 27, 21, 22, 20, 25, 37, 35, 34, 39,
        38, 33, 36, 32, 30, 31]),
 array([15, 18,  0,  8, 14,  2,  4,  1,  6,  7,  9,  3, 12,  5, 13, 10, 16,
        17, 11, 19, 22, 24, 20, 34, 26, 23, 27, 21, 32, 35, 37, 31, 33, 28,
        30, 36, 29, 25, 50, 56, 40, 38, 53, 46, 42, 54, 41, 52, 39, 43, 44,
        51, 45, 47, 55, 48, 49, 60, 59, 70, 58, 73, 71, 66, 67, 57, 61, 69,
        72, 65, 75, 62, 63, 64, 74, 68]),
 array([ 32,  20,   9,  23,   2,  33,  21,   4,   7,  11,   6,  18,  29,
         14,  17,  16,  10,  25,  39,  13,  27,  12,  34,  38,  30,  19,
          8,   3,  24,   1,  31,  37,  36,  26,  22,  35,   5,  28,   0,
         15,  76,  60,  49,  68,  46,  65,  62,