In [1]:
import functools
import itertools
import logging
import math
import os
import pickle
import sys
from pathlib import Path

import matplotlib as mpl
import matplotlib.pyplot as plt
import numpy as np
import scipy as sp
import seaborn as sns
import yaml

%load_ext autoreload
%autoreload 2

%matplotlib inline
%config InlineBackend.figure_format = 'retina'

sns.set_context("poster")
sns.set(rc={"figure.figsize": (16, 12.0)})
sns.set_style("whitegrid")

import numpy as np
import pandas as pd
import torch.nn.functional as F

pd.set_option("display.max_rows", 120)
pd.set_option("display.max_columns", 120)

logging.basicConfig(level=logging.INFO, stream=sys.stdout)

In [88]:
from lda4rec.datasets import Interactions, DataLoader, random_train_test_split
from lda4rec.evaluations import mrr_score, precision_recall_score, auc_score, summary
from lda4rec.estimators import BilinearBPREst, PopEst, LDA4RecEst, SPosBilinearBPREst, LDA4RecEst,NNBilinearBPREst

In [5]:
import pyro
import pyro.distributions as dist
import pyro.optim as optim
import torch
from pyro.distributions import constraints
from pyro.infer import SVI, Predictive, Trace_ELBO, TraceEnum_ELBO, config_enumerate

In [6]:
import neptune
# init dummy neptune to avoid problems with logging
neptune.init('a/b', backend=neptune.OfflineBackend()) 



<neptune.internal.backends.offline_backend.NoopObject at 0x7fbc6a37e100>

In [9]:
from icecream import ic, install

install()
# configure icecream
def ic_str(obj):
    if hasattr(obj, "shape"):
        return f"Shape {obj.shape} "  #
    else:
        return str(obj)

In [10]:
ic.configureOutput(argToStringFunction=ic_str)

In [103]:
loader = DataLoader()
data = loader.load_movielens("1m")

In [104]:
max_interactions = 200
data.max_user_interactions_(max_interactions)

In [105]:
data.implicit_(0.)
train, test = random_train_test_split(data)

In [14]:
pop_est = PopEst()
pop_est.fit(train)

In [15]:
summary(pop_est, train=train, test=test)

Unnamed: 0_level_0,train,test
metric,Unnamed: 1_level_1,Unnamed: 2_level_1
prec,0.28459,0.107377
recall,0.053188,0.074951
mrr,0.025804,0.037255
auc,0.910356,0.876643


In [41]:
lda_est = LDA4RecEst(embedding_dim=4, n_iter=1000, batch_size=128, learning_rate=0.01, use_jit=True, alpha=1.)

In [42]:
lda_est.fit(train)

INFO:lda4rec.estimators:Epoch     0: loss 522180.90625


KeyboardInterrupt: 

In [None]:
summary(lda_est, train=train, test=test)

In [79]:
from lda4rec.lda import model_idea, guide_idea

lda_est_new = LDA4RecEst(embedding_dim=4, n_iter=1000, batch_size=128, learning_rate=0.01, use_jit=True, alpha=1., model=model_idea, guide=guide_idea)

In [80]:
torch.sigmoid(2.5*torch.ones(1))

tensor([0.9241])

In [81]:
lda_est_new.fit(train)

INFO:lda4rec.estimators:Epoch     0: loss 548075.125
INFO:lda4rec.estimators:Epoch   100: loss 438822.8125
INFO:lda4rec.estimators:Epoch   200: loss 424236.8125
INFO:lda4rec.estimators:Epoch   300: loss 476291.5625
INFO:lda4rec.estimators:Epoch   400: loss 460567.46875
INFO:lda4rec.estimators:Epoch   500: loss 425163.96875
INFO:lda4rec.estimators:Epoch   600: loss 438090.84375
INFO:lda4rec.estimators:Epoch   700: loss 421772.5625
INFO:lda4rec.estimators:Epoch   800: loss 387596.15625
INFO:lda4rec.estimators:Epoch   900: loss 396930.3125


406669.5

In [82]:


summary(lda_est_new, train=train, test=test)

Unnamed: 0_level_0,train,test
metric,Unnamed: 1_level_1,Unnamed: 2_level_1
prec,0.261148,0.097541
recall,0.04575,0.064545
mrr,0.022431,0.030173
auc,0.917662,0.898437


In [106]:
sbi_est = SPosBilinearBPREst(embedding_dim=8, n_iter=30)
sbi_est.fit(train)

INFO:lda4rec.estimators:Epoch     0: loss 0.22934836784101814
INFO:lda4rec.estimators:Epoch     1: loss 0.16148075947875207
INFO:lda4rec.estimators:Epoch     2: loss 0.14513968917769304
INFO:lda4rec.estimators:Epoch     3: loss 0.13503888278788484
INFO:lda4rec.estimators:Epoch     4: loss 0.12911717690859217
INFO:lda4rec.estimators:Epoch     5: loss 0.12470578980097495
INFO:lda4rec.estimators:Epoch     6: loss 0.12148398384163364
INFO:lda4rec.estimators:Epoch     7: loss 0.11977702422682394
INFO:lda4rec.estimators:Epoch     8: loss 0.11753440275427679
INFO:lda4rec.estimators:Epoch     9: loss 0.11672416986268291
INFO:lda4rec.estimators:Epoch    10: loss 0.11525963822024489
INFO:lda4rec.estimators:Epoch    11: loss 0.11473550112144472
INFO:lda4rec.estimators:Epoch    12: loss 0.11319394919855406
INFO:lda4rec.estimators:Epoch    13: loss 0.11241429999997228
INFO:lda4rec.estimators:Epoch    14: loss 0.11233881023682704
INFO:lda4rec.estimators:Epoch    15: loss 0.11168711408630143
INFO:lda

0.10692596986947539

In [107]:
summary(sbi_est, train=train, test=test)

Unnamed: 0_level_0,train,test
metric,Unnamed: 1_level_1,Unnamed: 2_level_1
prec,0.415414,0.179811
recall,0.067634,0.10294
mrr,0.033373,0.04958
auc,0.921493,0.900206


In [108]:
bi_est = NNBilinearBPREst(embedding_dim=8, n_iter=30)
bi_est.fit(train)


INFO:lda4rec.estimators:Epoch     0: loss 0.22390741210102688
INFO:lda4rec.estimators:Epoch     1: loss 0.16694864440287419
INFO:lda4rec.estimators:Epoch     2: loss 0.1519624969136738
INFO:lda4rec.estimators:Epoch     3: loss 0.14206443593493823
INFO:lda4rec.estimators:Epoch     4: loss 0.13556810206889616
INFO:lda4rec.estimators:Epoch     5: loss 0.13088969506200723
INFO:lda4rec.estimators:Epoch     6: loss 0.1284240857379769
INFO:lda4rec.estimators:Epoch     7: loss 0.12562290326550618
INFO:lda4rec.estimators:Epoch     8: loss 0.12470110534849899
INFO:lda4rec.estimators:Epoch     9: loss 0.1224355610278647
INFO:lda4rec.estimators:Epoch    10: loss 0.12155920772705973
INFO:lda4rec.estimators:Epoch    11: loss 0.12004995593009561
INFO:lda4rec.estimators:Epoch    12: loss 0.11892486659184257
INFO:lda4rec.estimators:Epoch    13: loss 0.11792168664202339
INFO:lda4rec.estimators:Epoch    14: loss 0.11765150825870796
INFO:lda4rec.estimators:Epoch    15: loss 0.11584677993944717
INFO:lda4re

0.1119747965408188

In [109]:
summary(bi_est, train=train, test=test)

Unnamed: 0_level_0,train,test
metric,Unnamed: 1_level_1,Unnamed: 2_level_1
prec,0.273758,0.105018
recall,0.040842,0.056062
mrr,0.022718,0.029292
auc,0.915951,0.892039


In [29]:
slra_est = SimplePosBilinearEst(embedding_dim=8, n_iter=25)
slra_est.fit(ml100k_train)

INFO:lda4rec.estimators:Epoch 0: loss 0.39755058497995943
INFO:lda4rec.estimators:Epoch 1: loss 0.1931836449012563
INFO:lda4rec.estimators:Epoch 2: loss 0.1379744282244025
INFO:lda4rec.estimators:Epoch 3: loss 0.12481177019307742
INFO:lda4rec.estimators:Epoch 4: loss 0.11623588691692094
INFO:lda4rec.estimators:Epoch 5: loss 0.1061518636708324
INFO:lda4rec.estimators:Epoch 6: loss 0.09778834418267818
INFO:lda4rec.estimators:Epoch 7: loss 0.09306149701210292
INFO:lda4rec.estimators:Epoch 8: loss 0.08847690053485535
INFO:lda4rec.estimators:Epoch 9: loss 0.08227667711876534
INFO:lda4rec.estimators:Epoch 10: loss 0.0780268491522686
INFO:lda4rec.estimators:Epoch 11: loss 0.0744348161828679
INFO:lda4rec.estimators:Epoch 12: loss 0.07145940351325113
INFO:lda4rec.estimators:Epoch 13: loss 0.06839842264716689
INFO:lda4rec.estimators:Epoch 14: loss 0.06768081353826297
INFO:lda4rec.estimators:Epoch 15: loss 0.06565732044243329
INFO:lda4rec.estimators:Epoch 16: loss 0.06112832646514919
INFO:lda4rec

0.05322646721190698

In [30]:
summary(slra_est, train=ml100k_train, test=ml100k_test)

Unnamed: 0_level_0,train,test
metric,Unnamed: 1_level_1,Unnamed: 2_level_1
prec,0.331967,0.129508
recall,0.061089,0.092829
mrr,0.029249,0.041842
auc,0.964879,0.903955


In [33]:
lra_est = PosBilinearEst(embedding_dim=8, n_iter=25)
lra_est.fit(ml100k_train)

INFO:lda4rec.estimators:Epoch 0: loss 0.47544069531801586
INFO:lda4rec.estimators:Epoch 1: loss 0.22211888790936082
INFO:lda4rec.estimators:Epoch 2: loss 0.14464610694227992
INFO:lda4rec.estimators:Epoch 3: loss 0.126111490219026
INFO:lda4rec.estimators:Epoch 4: loss 0.11476287563910355
INFO:lda4rec.estimators:Epoch 5: loss 0.11025619404138745
INFO:lda4rec.estimators:Epoch 6: loss 0.10320340022444725
INFO:lda4rec.estimators:Epoch 7: loss 0.09600494869456098
INFO:lda4rec.estimators:Epoch 8: loss 0.09058575598976097
INFO:lda4rec.estimators:Epoch 9: loss 0.08669685620713878
INFO:lda4rec.estimators:Epoch 10: loss 0.08124634907455058
INFO:lda4rec.estimators:Epoch 11: loss 0.07881147558423313
INFO:lda4rec.estimators:Epoch 12: loss 0.07593562307994108
INFO:lda4rec.estimators:Epoch 13: loss 0.07219354410127209
INFO:lda4rec.estimators:Epoch 14: loss 0.07037132109037122
INFO:lda4rec.estimators:Epoch 15: loss 0.06985558191112973
INFO:lda4rec.estimators:Epoch 16: loss 0.06477439438773168
INFO:lda4

0.054229231533669944

In [38]:
lra_est._model.user_embeddings(torch.ones(1, dtype=torch.int))

tensor([[-0.5403, -4.8071, -6.7378, -3.8542,  1.3193,  2.0219,  0.3636, -2.2575]],
       grad_fn=<EmbeddingBackward>)

In [34]:
summary(lra_est, train=ml100k_train, test=ml100k_test)

Unnamed: 0_level_0,train,test
metric,Unnamed: 1_level_1,Unnamed: 2_level_1
prec,0.33082,0.121311
recall,0.062423,0.083869
mrr,0.029859,0.039169
auc,0.961974,0.899659


In [12]:
summary(lra_est, train=ml100k_train, test=ml100k_test)

Unnamed: 0_level_0,train,test
metric,Unnamed: 1_level_1,Unnamed: 2_level_1
prec,0.309016,0.11954
recall,0.056212,0.085363
mrr,0.029178,0.040881
auc,0.961975,0.914103


In [21]:
summary(lra_est, train=ml100k_train, test=ml100k_test)

Unnamed: 0_level_0,train,test
metric,Unnamed: 1_level_1,Unnamed: 2_level_1
prec,0.349836,0.126885
recall,0.066229,0.090332
mrr,0.031398,0.042569
auc,0.962789,0.905485


In [19]:
lda_est = LDA4RecEst(params=pyro.get_param_store(), n_items=ml100k.n_items)

In [13]:
summary(lra_est, train=ml100k_train, test=ml100k_test)

Unnamed: 0_level_0,train,test
metric,Unnamed: 1_level_1,Unnamed: 2_level_1
prec,0.327541,0.109016
recall,0.062915,0.077158
mrr,0.030026,0.036842
auc,0.96575,0.900079


In [21]:
summary(lda_est, train=ml100k_train, test=ml100k_test)

Unnamed: 0_level_0,train,test
metric,Unnamed: 1_level_1,Unnamed: 2_level_1
prec,0.349344,0.141148
recall,0.068502,0.10099
mrr,0.032233,0.045185
auc,0.938877,0.908419


In [123]:
dist.Dirichlet(torch.tensor([0.001,0.01])).sample((10_000,)).mean(axis=0)

tensor([0.0884, 0.9116])

In [60]:
n_samples = 200
rng = np.random.default_rng(42)
model_params["interactions"] = data.interactions

predictive = Predictive(model, guide=guide, num_samples=n_samples)
samples = {
    k: v.detach().cpu().numpy()
    for k, v in predictive(**model_params).items()
}

IndexError: index 1 is out of bounds for dimension 0 with size 1