In [1]:
import functools
import itertools
import logging
import math
import os
import pickle
import sys
from pathlib import Path

import matplotlib as mpl
import matplotlib.pyplot as plt
import numpy as np
import scipy as sp
import seaborn as sns
import yaml

%load_ext autoreload
%autoreload 2

%matplotlib inline
%config InlineBackend.figure_format = 'retina'

sns.set_context("poster")
sns.set(rc={"figure.figsize": (16, 12.0)})
sns.set_style("whitegrid")

import numpy as np
import pandas as pd
import torch.nn.functional as F

pd.set_option("display.max_rows", 120)
pd.set_option("display.max_columns", 120)

logging.basicConfig(level=logging.INFO, stream=sys.stdout)

In [2]:
from lda4rec.datasets import Interactions, DataLoader, random_train_test_split
from lda4rec.evaluations import summary
from lda4rec.estimators import MFEst, PopEst, LDA4RecEst, SNMFEst
from lda4rec.utils import process_ids, cmp_ranks

In [3]:
import pyro
import pyro.distributions as dist
import pyro.optim as optim
import torch
from pyro.distributions import constraints
from pyro.infer import SVI, Predictive, Trace_ELBO, TraceEnum_ELBO, config_enumerate

In [4]:
import neptune.new as neptune
neptune.init(mode="offline");

offline/29fb3826-b4be-4857-8a4b-c48642837488
Remember to stop your run once you’ve finished logging your metadata (https://docs.neptune.ai/api-reference/run#stop). It will be stopped automatically only when the notebook kernel/interactive console is terminated.


## Experimenting with Matrix Factorization as aLDA formulation

In [5]:
loader = DataLoader()
data = loader.load_movielens("100k")

In [6]:
data.max_user_interactions_(200)
data.implicit_(0.)
train, test = random_train_test_split(data)

In [7]:
mf_est = MFEst(embedding_dim=4, n_iter=20)
mf_est.fit(train)

INFO:lda4rec.estimators:Epoch     0: loss 0.40992174502965567
INFO:lda4rec.estimators:Epoch     1: loss 0.23019891880654
INFO:lda4rec.estimators:Epoch     2: loss 0.144037419517298
INFO:lda4rec.estimators:Epoch     3: loss 0.11529515619213516
INFO:lda4rec.estimators:Epoch     4: loss 0.09874400752218994
INFO:lda4rec.estimators:Epoch     5: loss 0.09072691570665385
INFO:lda4rec.estimators:Epoch     6: loss 0.08595180626253823
INFO:lda4rec.estimators:Epoch     7: loss 0.07748952490856519
INFO:lda4rec.estimators:Epoch     8: loss 0.07877413183450699
INFO:lda4rec.estimators:Epoch     9: loss 0.07074307178121966
INFO:lda4rec.estimators:Epoch    10: loss 0.07179524769654146
INFO:lda4rec.estimators:Epoch    11: loss 0.07104916256424543
INFO:lda4rec.estimators:Epoch    12: loss 0.06855491740075317
INFO:lda4rec.estimators:Epoch    13: loss 0.06840592015131905
INFO:lda4rec.estimators:Epoch    14: loss 0.06948257626814616
INFO:lda4rec.estimators:Epoch    15: loss 0.06656670535134303
INFO:lda4rec.

0.06474344418258281

### Overall summaries showing equivalence of MF and adjoint LDA formulation

In [8]:
user_id = 42
mf_est.lda_trafo = False
summary(mf_est, train=train, test=test)

Unnamed: 0_level_0,train,test
metric,Unnamed: 1_level_1,Unnamed: 2_level_1
prec,0.29459,0.107705
recall,0.055056,0.071533
mrr,0.505092,0.249342


In [12]:
mf_est.lda_trafo = True
summary(mf_est, train=train, test=test)

Unnamed: 0_level_0,train,test
metric,Unnamed: 1_level_1,Unnamed: 2_level_1
prec,0.29459,0.107705
recall,0.055056,0.071533
mrr,0.505092,0.249342


### Compare equivalence of the ranking from MF and adjoint LDA formulation for a single user

In [10]:
user_id = 220
mf_est.lda_trafo = False
orig_scores = mf_est.predict(np.array([user_id], dtype=int))
item_probs = mf_est.get_item_probs(torch.tensor([user_id]))

In [11]:
cmp_ranks(orig_scores, item_probs, eps=1e-5)

True