In [1]:
import functools
import itertools
import logging
import math
import os
import pickle
import sys
from pathlib import Path

import matplotlib as mpl
import matplotlib.pyplot as plt
import numpy as np
import scipy as sp
import seaborn as sns
import yaml

%load_ext autoreload
%autoreload 2

%matplotlib inline
%config InlineBackend.figure_format = 'retina'

sns.set_context("poster")
sns.set(rc={"figure.figsize": (16, 12.0)})
sns.set_style("whitegrid")

import numpy as np
import pandas as pd
import torch.nn.functional as F

pd.set_option("display.max_rows", 120)
pd.set_option("display.max_columns", 120)

logging.basicConfig(level=logging.INFO, stream=sys.stdout)

In [2]:
from lda4rec.datasets import Interactions, DataLoader, random_train_test_split
from lda4rec.evaluations import mrr_score, precision_recall_score, summary
from lda4rec.estimators import MFEst, PopEst, LDA4RecEst, SNMFEst
from lda4rec.utils import process_ids, cmp_ranks

In [3]:
import pyro
import pyro.distributions as dist
import pyro.optim as optim
import torch
from pyro.distributions import constraints
from pyro.infer import SVI, Predictive, Trace_ELBO, TraceEnum_ELBO, config_enumerate

In [4]:
import neptune.new as neptune
neptune.init(mode="offline");

offline/fc8f2e78-a0b4-412e-8ce2-b5641e73cc13


## Experimenting with Matrix Factorization as aLDA formulation

In [5]:
loader = DataLoader()
data = loader.load_movielens("100k")

In [6]:
data.max_user_interactions_(200)
data.implicit_(0.)
train, test = random_train_test_split(data)

In [7]:
mf_est = MFEst(embedding_dim=8, n_iter=20)
mf_est.fit(train)

INFO:lda4rec.estimators:Epoch     0: loss 0.38208416180836186
INFO:lda4rec.estimators:Epoch     1: loss 0.15811484210394525
INFO:lda4rec.estimators:Epoch     2: loss 0.11218590064830071
INFO:lda4rec.estimators:Epoch     3: loss 0.0929936156683677
INFO:lda4rec.estimators:Epoch     4: loss 0.08028081010120947
INFO:lda4rec.estimators:Epoch     5: loss 0.07376726289858689
INFO:lda4rec.estimators:Epoch     6: loss 0.06829813554178218
INFO:lda4rec.estimators:Epoch     7: loss 0.06338081282840387
INFO:lda4rec.estimators:Epoch     8: loss 0.06250412320667828
INFO:lda4rec.estimators:Epoch     9: loss 0.06092509962114934
INFO:lda4rec.estimators:Epoch    10: loss 0.05818225801293109
INFO:lda4rec.estimators:Epoch    11: loss 0.056461403536534796
INFO:lda4rec.estimators:Epoch    12: loss 0.05631985012441874
INFO:lda4rec.estimators:Epoch    13: loss 0.05467698960439176
INFO:lda4rec.estimators:Epoch    14: loss 0.05318752631444383
INFO:lda4rec.estimators:Epoch    15: loss 0.052155900376571994
INFO:ld

0.051019589129734685

### Overall summaries showing equivalence of MF and adjoint LDA formulation

In [8]:
mf_est.lda_trafo = False
summary(mf_est, train=train, test=test)

Unnamed: 0_level_0,train,test
metric,Unnamed: 1_level_1,Unnamed: 2_level_1
prec,0.344754,0.121475
recall,0.064034,0.091073
mrr,0.030658,0.040107


In [9]:
mf_est.lda_trafo = True
summary(mf_est, train=train, test=test)

Unnamed: 0_level_0,train,test
metric,Unnamed: 1_level_1,Unnamed: 2_level_1
prec,0.344754,0.121475
recall,0.064034,0.091073
mrr,0.030658,0.040107


### Compare equivalence of the ranking from MF and adjoint LDA formulation for a single user

In [10]:
user_id = 140
orig_scores = mf_est.predict(np.array([user_id], dtype=np.int))
item_probs = mf_est.get_item_probs(torch.tensor([user_id]))

In [11]:
cmp_ranks(orig_scores, item_probs, eps=1e-5)

True