In [None]:
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import pickle
from collections import defaultdict

from torch.utils.data import Dataset, DataLoader
from Mmetrics import *

import LTR
import datautil
import permutationgraph
import DTR
import EEL
import PPG
import PL

ds2020, _ = datautil.load_data(2020, verbose=True)
ds2019, _ = datautil.load_data(2019, verbose=True)

ltrmodel = LTR.MSE_model(layers=[ds2020.trfm.shape[1], 256, 256, 1], lr=0.001, optimizer=torch.optim.Adam, dropout=0.1)
ltrmodel.fit(ds2020, epochs=10, batch_size=100, verbose=False)
y_pred2020 = ltrmodel.predict(ds2020.tefm, ds2020.tedlr)
print('LTR performance ndcg@10 for 2020:', LTRMetrics(ds2020.telv,np.diff(ds2020.tedlr),y_pred2020).NDCG(10))

ltrmodel = LTR.MSE_model(layers=[ds2019.trfm.shape[1], 256, 256, 1], lr=0.001, optimizer=torch.optim.Adam, dropout=0.1)
ltrmodel.fit(ds2019, epochs=10, batch_size=100, verbose=False)
y_pred2019 = ltrmodel.predict(ds2019.tefm, ds2019.tedlr)
print('LTR performance ndcg@10 for 2019:', LTRMetrics(ds2019.telv,np.diff(ds2019.tedlr),y_pred2019).NDCG(10))

In [None]:
from tqdm.notebook import trange

def learn_one_PPG(qid, verbose, y_pred, g, dlr, epochs, lr, exposure, samples_cnt, sessions_cnt):
    s, e = dlr[qid:qid+2]
    y_pred_s, g_s, sorted_docs_s, dlr_s = \
        EEL.copy_sessions(y=y_pred[s:e], g=g[s:e], sorted_docs=y_pred[s:e].argsort()[::-1], sessions=sessions_cnt)

    objective_ins = DTR.DTR(y_pred = y_pred_s, g = g_s, dlr = dlr_s, exposure=exposure)
    learner = PPG.Learner(  PPG_mat=None, samples_cnt=samples_cnt, 
                                objective_ins=objective_ins, 
                                sorted_docs = sorted_docs_s, 
                                dlr = dlr_s,
                                intra = np.arange(g_s.shape[0]),
#                                 intra = g_s,
                                inter = np.repeat(dlr_s[:-1], np.diff(dlr_s)))
    vals = learner.fit(epochs, lr, verbose=verbose)
    return vals


def learn_all_PPG(y_pred, g, dlr, epochs, lr, exposure, samples_cnt, sessions_cnt):
    sorted_docs = []
    
#     for qid in trange(dlr.shape[0] - 1, leave=False):
    for qid in range(dlr.shape[0] - 1):
        min_b = learn_one_PPG(qid, 0, y_pred, g, dlr, epochs, lr, exposure, samples_cnt, sessions_cnt)
        sorted_docs.append(min_b)
        

    # print(ndcg_dtr(exposure, lv, np.concatenate(y_rerank), dlr, g, query_counts))
    return sorted_docs

In [None]:

exposure2020 = np.array([1./np.log2(2+i) for i in range(1,np.diff(ds2020.tedlr).max()+2)])
exposure2019 = np.array([1./np.log2(2+i) for i in range(1,np.diff(ds2019.tedlr).max()+2)])

learn_one_PPG(91,2, y_pred2020, ds2020.teg, ds2020.tedlr, 10, 0.1, exposure=exposure2020,
        samples_cnt=4, sessions_cnt=4)

In [None]:
learn_all_PPG(y_pred2020, ds2020.teg, ds2020.tedlr, 10, 0.1, exposure=exposure2020,
        samples_cnt=4, sessions_cnt=4)