In [1]:
import pandas as pd
import numpy as np
import os
os.environ['MKL_NUM_THREADS'] = '1'
from collections import Counter
import itertools
import json
import scipy.sparse as sparse
import pickle
import torch
import misc.util as util
from torch import nn

import importlib
from torch.utils.data import DataLoader
from misc.loader import AEDataset
from misc.util import *
from tqdm.auto import tqdm
from eval.rec_eval import *
import neuralsort.pl as pl
from models.loss import neuPrecLoss
from misc.loader import RecDataset
import models
from models.loss import *

In [2]:
with open(os.path.join("data", "parsed", "ml-20m"), 'rb') as f:
    a,b,c, tr_tr, tr_val, te = pickle.load(f)

In [3]:
import pickle
best_model = torch.load("saved_models/ml-20m/dae_prec5")

In [4]:
best_model

{'model': CDAE(
   (out_activation): Identity()
   (V_u): Embedding(136674, 512)
   (H_in): Linear(in_features=13681, out_features=512, bias=True)
   (H_out): Linear(in_features=512, out_features=13681, bias=True)
   (drop): Dropout(p=0.8, inplace=False)
 ),
 'epoch': 15,
 'bs': 256,
 'lr': 0.0005,
 'dim': 512,
 'lamb': 0.01,
 'dropout': 0.8,
 'metric_5': {'precision': 0.06229999999999983,
  'recall': 0.06141761486958444,
  'map': 0.12361833333333398,
  'ndcg': 0.0759649217662484},
 'metric_50': {'precision': 0.026364000000000887,
  'recall': 0.20101820138163762,
  'map': 0.11070810614615256,
  'ndcg': 0.12249443617562299}}

In [5]:
ae_dataset = AEDataset(tr_tr)

In [6]:
model = best_model['model'].eval()

In [7]:
wrapper = models.ae.implicitWrapper(model.eval(), tr_tr, naive_sparse2tensor, vae=False)

In [8]:
print(ranking_metrics_at_k(wrapper, tr_tr, te, K=5, num_threads=4))
print(ranking_metrics_at_k(wrapper, tr_tr, te, K=10, num_threads=4))

bm_state = best_model['model'].state_dict()
del best_model['model']
del model
del wrapper

{'precision': 0.061119999999999584, 'recall': 0.05912560350441865, 'map': 0.12103180555555634, 'ndcg': 0.07396703926062353}
{'precision': 0.04919000000000105, 'recall': 0.08909050447601738, 'map': 0.1237501081821617, 'ndcg': 0.08254372926942283}


In [9]:
from models.lrloss import approxNDCGLoss, rankNet, lambdaLoss

In [10]:
n_users, n_items = tr_tr.shape

In [11]:
models.ae.CDAE(
    n_users=tr_tr.shape[0], 
    n_items=tr_tr.shape[1], 
    n_hidden=best_model['dim'], 
    dropout=best_model['dropout'])


CDAE(
  (out_activation): Identity()
  (V_u): Embedding(136674, 512)
  (H_in): Linear(in_features=13681, out_features=512, bias=True)
  (H_out): Linear(in_features=512, out_features=13681, bias=True)
  (drop): Dropout(p=0.8, inplace=False)
)

In [12]:
import models.mf
import neuralsort.neuralobjs
importlib.reload(models.ae)
loader = DataLoader(ae_dataset, batch_size=128, shuffle=True, num_workers=6)
model = models.ae.CDAE(
    n_users=tr_tr.shape[0], 
    n_items=tr_tr.shape[1], 
    n_hidden=best_model['dim'], 
    dropout=best_model['dropout'])

model.load_state_dict(bm_state)
model.cuda()
optimizer = torch.optim.RMSprop(model.parameters(), lr=3 * 1e-8, weight_decay=0)
sc = neuralsort.neuralobjs.SC()
lm = -1 
for epoch in (range(1, 10 + 1)):
    tot_loss = 0
    tot_loss = 0
    model = model.train()
    for uid, rowl in tqdm(loader):
        row = rowl.float().cuda()
        uid = uid.cuda() 
        scores = model.forward(uid, row)
        loss  = neuPrecLoss(sc, scores, row, topk=500, k=5, tau=15.0, use_top=True).mean()
        loss.backward()
        tot_loss += loss.detach().cpu().numpy()
        optimizer.step()
    model = model.eval()
    if (epoch % 1 == 0):
        wrapper = models.ae.implicitWrapper(model, tr_tr, naive_sparse2tensor, vae=False)
        mm = (ranking_metrics_at_k(wrapper, tr_tr, tr_val, K=5, num_threads=4))
        print(ranking_metrics_at_k(wrapper, tr_tr, te, K=5, num_threads=4))
        print(ranking_metrics_at_k(wrapper, tr_tr, te, K=10, num_threads=4))
        if mm['precision'] < lm:
            break
        lm =  mm['precision']
        

HBox(children=(FloatProgress(value=0.0, max=1068.0), HTML(value='')))


{'precision': 0.06119999999999959, 'recall': 0.05895558172570692, 'map': 0.12136569444444528, 'ndcg': 0.07397170860203582}
{'precision': 0.049230000000001044, 'recall': 0.08918713555073533, 'map': 0.12395267734315946, 'ndcg': 0.0825906562233758}


HBox(children=(FloatProgress(value=0.0, max=1068.0), HTML(value='')))


{'precision': 0.061279999999999564, 'recall': 0.05899457516643508, 'map': 0.1219204166666675, 'ndcg': 0.07418762778214852}
{'precision': 0.04932000000000102, 'recall': 0.08930279287960656, 'map': 0.12443748488284191, 'ndcg': 0.08278865053611699}


HBox(children=(FloatProgress(value=0.0, max=1068.0), HTML(value='')))


{'precision': 0.06131999999999956, 'recall': 0.05910062375182673, 'map': 0.12211833333333416, 'ndcg': 0.07437914616828817}
{'precision': 0.04928000000000101, 'recall': 0.08925146193663436, 'map': 0.12477083371126205, 'ndcg': 0.08290299791059658}


HBox(children=(FloatProgress(value=0.0, max=1068.0), HTML(value='')))


{'precision': 0.061539999999999574, 'recall': 0.05936846846801317, 'map': 0.12281347222222296, 'ndcg': 0.07470890933579091}
{'precision': 0.04926000000000098, 'recall': 0.0891806272191957, 'map': 0.1254176643990927, 'ndcg': 0.08303553791724745}


HBox(children=(FloatProgress(value=0.0, max=1068.0), HTML(value='')))


{'precision': 0.061619999999999585, 'recall': 0.05940470750675221, 'map': 0.12375125000000065, 'ndcg': 0.07495674845985403}
{'precision': 0.04934000000000099, 'recall': 0.08932744535354674, 'map': 0.12592447623771685, 'ndcg': 0.08328645728850252}


HBox(children=(FloatProgress(value=0.0, max=1068.0), HTML(value='')))


{'precision': 0.0618199999999996, 'recall': 0.05946012212228833, 'map': 0.12416986111111185, 'ndcg': 0.07515516166420033}
{'precision': 0.049430000000001015, 'recall': 0.08957197363285324, 'map': 0.12622264687263762, 'ndcg': 0.08345982130972997}


In [13]:
import models.mf
import neuralsort.neuralobjs
importlib.reload(models.ae)
loader = DataLoader(ae_dataset, batch_size=128, shuffle=True, num_workers=6)
model = models.ae.MultiVAE(best_model['dim'] + [n_items], dropout=best_model['dropout'])
model.load_state_dict(bm_state)
model.cuda()
optimizer = torch.optim.RMSprop(model.parameters(), lr=1 * 1e-6, weight_decay=0)
sc = neuralsort.neuralobjs.SC()
lm = -1 
for epoch in (range(1, 10 + 1)):
    tot_loss = 0
    tot_loss = 0
    model = model.train()
    for uid, rowl in tqdm(loader):
        row = rowl.float().cuda()
        uid = uid.cuda() 
        scores, _, _ = model.forward(row)
        y_hat, indices = torch.topk(scores, 300)
        y_true = row.gather(1, indices)
        loss  = approxNDCGLoss(y_hat, y_true)
        loss.backward()
        tot_loss += loss.detach().cpu().numpy()
        optimizer.step()
    model = model.eval()
    if (epoch % 1 == 0):
        wrapper = models.ae.implicitWrapper(model, tr_tr, naive_sparse2tensor, vae=True)
        mm = (ranking_metrics_at_k(wrapper, tr_tr, tr_val, K=5, num_threads=4))
        print(ranking_metrics_at_k(wrapper, tr_tr, te, K=5, num_threads=4))
        print(ranking_metrics_at_k(wrapper, tr_tr, te, K=10, num_threads=4))
        if mm['precision'] < lm:
            break
        lm =  mm['precision']
        

TypeError: unsupported operand type(s) for +: 'int' and 'list'

In [None]:
import models.mf
import neuralsort.neuralobjs
importlib.reload(models.ae)
loader = DataLoader(ae_dataset, batch_size=128, shuffle=True, num_workers=6)
model = models.ae.MultiVAE(best_model['dim'] + [n_items], dropout=best_model['dropout'])
model.load_state_dict(bm_state)
model.cuda()
optimizer = torch.optim.RMSprop(model.parameters(), lr=1 * 1e-6, weight_decay=0)
sc = neuralsort.neuralobjs.SC()
lm = -1 
for epoch in (range(1, 10 + 1)):
    tot_loss = 0
    tot_loss = 0
    model = model.train()
    for uid, rowl in tqdm(loader):
        row = rowl.float().cuda()
        uid = uid.cuda() 
        scores, _, _ = model.forward(row)
        y_hat, indices = torch.topk(scores, 300)
        y_true = row.gather(1, indices)
        loss  = rankNet(y_hat, y_true)
        loss.backward()
        tot_loss += loss.detach().cpu().numpy()
        optimizer.step()
    model = model.eval()
    if (epoch % 1 == 0):
        wrapper = models.ae.implicitWrapper(model, tr_tr, naive_sparse2tensor, vae=True)
        mm = (ranking_metrics_at_k(wrapper, tr_tr, tr_val, K=5, num_threads=4))
        print(ranking_metrics_at_k(wrapper, tr_tr, te, K=5, num_threads=4))
        print(ranking_metrics_at_k(wrapper, tr_tr, te, K=10, num_threads=4))
        if mm['precision'] < lm:
            break
        lm =  mm['precision']
        

In [None]:
import models.mf
import neuralsort.neuralobjs
importlib.reload(models.ae)
loader = DataLoader(ae_dataset, batch_size=128, shuffle=True, num_workers=6)
model = models.ae.MultiVAE(best_model['dim'] + [n_items], dropout=best_model['dropout'])
model.load_state_dict(bm_state)
model.cuda()
optimizer = torch.optim.RMSprop(model.parameters(), lr=1 * 1e-6, weight_decay=0)
sc = neuralsort.neuralobjs.SC()
lm = -1 
for epoch in (range(1, 10 + 1)):
    tot_loss = 0
    tot_loss = 0
    model = model.train()
    for uid, rowl in tqdm(loader):
        row = rowl.float().cuda()
        uid = uid.cuda() 
        scores, _, _ = model.forward(row)
        y_hat, indices = torch.topk(scores, 300)
        y_true = row.gather(1, indices)
        loss  = lambdaLoss(y_hat, y_true)
        loss.backward()
        tot_loss += loss.detach().cpu().numpy()
        optimizer.step()
    model = model.eval()
    if (epoch % 1 == 0):
        wrapper = models.ae.implicitWrapper(model, tr_tr, naive_sparse2tensor, vae=True)
        mm = (ranking_metrics_at_k(wrapper, tr_tr, tr_val, K=5, num_threads=4))
        print(ranking_metrics_at_k(wrapper, tr_tr, te, K=5, num_threads=4))
        print(ranking_metrics_at_k(wrapper, tr_tr, te, K=10, num_threads=4))
        if mm['precision'] < lm:
            break
        lm =  mm['precision']
        