In [1]:
import sys
import logging

import numpy as np
import scipy as sp
import sklearn
import statsmodels.api as sm
from statsmodels.formula.api import ols

%load_ext autoreload
%autoreload 2

import matplotlib as mpl
import matplotlib.pyplot as plt
%matplotlib inline
%config InlineBackend.figure_format = 'retina'

import seaborn as sns
sns.set_context("poster")
sns.set(rc={'figure.figsize': (16, 9.)})
sns.set_style("whitegrid")

import pandas as pd
pd.set_option("display.max_rows", 120)
pd.set_option("display.max_columns", 120)

logging.basicConfig(level=logging.INFO, stream=sys.stdout)

In [2]:
from lrann.datasets import DataLoader, random_train_test_split, Interactions
from lrann.estimators import ImplicitEst, ExplicitEst
from lrann.models import BilinearNet, DeepNet
from lrann.evaluations import mrr_score, precision_recall_score, rmse_score
from lrann.utils import is_cuda_available

In [3]:
def get_latent(n_users, n_items):
    users = np.random.uniform(-1, 1, size=n_users)
    items = np.random.uniform(-1, 1, size=n_items)
    return users, items

def get_interactions(users, items, size):
    """
    Multiply user and item latent variable and select as positive depending on the outcome
    """
    runs = 0
    while size > 0:
        user_idx = np.random.randint(users.shape[0])
        item_idx = np.random.randint(items.shape[0])
        user, item = users[user_idx], items[item_idx]
        prod = user * item
        signum = np.sign(prod)
        p = abs(prod)
        r = np.random.binomial(2, p)  # flip coin two times and only accept if two times head
        if r == 2:
            size -= 1
            yield (user_idx, item_idx, int(signum))
        runs += 1
    print(f"Number of runs {runs}")

In [4]:
# generate random latent features for users and items
users, items = get_latent(100, 2000)

In [5]:
# get the interactions using the latent features of users and items
raw = np.array(list(get_interactions(users, items, 20000)))

Number of runs 172584


In [6]:
user_ids = raw[:, 0]
item_ids = raw[:, 1]
ratings = raw[:, 2]
interactions = Interactions(user_ids, item_ids, ratings, n_users=users.shape[0], n_items=items.shape[0])

In [7]:
train, test = random_train_test_split(interactions)

In [21]:
# Switch here between MF model and deep neural network
nn_model = DeepNet(interactions.n_users, interactions.n_items, embedding_dim=1, sparse=False)
lra_model = BilinearNet(interactions.n_users, interactions.n_items, embedding_dim=1, sparse=False, biases=False)

In [22]:
lra_est = ExplicitEst(model=lra_model, n_iter=50, batch_size=128, learning_rate=1e-2)
nn_est = ExplicitEst(model=nn_model, n_iter=50, batch_size=128, learning_rate=1e-2)

In [28]:
nn_est.fit(train, verbose=True)

Epoch 0: loss 0.6937191733449859
Epoch 1: loss 0.6931403354347341
Epoch 2: loss 0.6903143634630522
Epoch 3: loss 0.6796162547015587
Epoch 4: loss 0.6583952123242752
Epoch 5: loss 0.5979939207796298
Epoch 6: loss 0.49372415027000094
Epoch 7: loss 0.44022152910980394
Epoch 8: loss 0.423910367478029
Epoch 9: loss 0.4173782830680882
Epoch 10: loss 0.41522980651351515
Epoch 11: loss 0.4139682404366124
Epoch 12: loss 0.4123608971808707
Epoch 13: loss 0.411168341129938
Epoch 14: loss 0.41101972697632855
Epoch 15: loss 0.4106381721436383
Epoch 16: loss 0.4106466336155904
Epoch 17: loss 0.4101009009285152
Epoch 18: loss 0.4103242458441001
Epoch 19: loss 0.40998622948822416
Epoch 20: loss 0.40994272844374774
Epoch 21: loss 0.4096847226539813
Epoch 22: loss 0.409785906338879
Epoch 23: loss 0.4094543050542525
Epoch 24: loss 0.40936850486811543
Epoch 25: loss 0.4093947166107063
Epoch 26: loss 0.4096454429835382
Epoch 27: loss 0.40941295644888537
Epoch 28: loss 0.4093477903159934
Epoch 29: loss 0.40

In [31]:
lra_est.fit(train, verbose=True)

Epoch 0: loss 0.7481511261183831
Epoch 1: loss 0.7025895122811333
Epoch 2: loss 0.6752965076987377
Epoch 3: loss 0.6100735373255861
Epoch 4: loss 0.4498917243804452
Epoch 5: loss 0.2549127092810166
Epoch 6: loss 0.13190072713813847
Epoch 7: loss 0.07302602569501235
Epoch 8: loss 0.04449255903981173
Epoch 9: loss 0.029377573165956686
Epoch 10: loss 0.020615203848145663
Epoch 11: loss 0.015100890913736916
Epoch 12: loss 0.011464038147112002
Epoch 13: loss 0.00892713605927714
Epoch 14: loss 0.007100602586858955
Epoch 15: loss 0.00574458083025163
Epoch 16: loss 0.004711666581048681
Epoch 17: loss 0.003913603296113405
Epoch 18: loss 0.003285804166448936
Epoch 19: loss 0.002783036358305369
Epoch 20: loss 0.0023795926318499966
Epoch 21: loss 0.002049676205309553
Epoch 22: loss 0.0017781552498140612
Epoch 23: loss 0.0015528977902437046
Epoch 24: loss 0.0013621367829789862
Epoch 25: loss 0.0012009408844244657
Epoch 26: loss 0.0010643546886146289
Epoch 27: loss 0.0009472047355020078
Epoch 28: lo

In [36]:
rmse_score(nn_est, train), rmse_score(nn_est, test)

(0.3605006926248949, 0.40157441027992025)

In [35]:
rmse_score(lra_est, train), rmse_score(lra_est, test)

(0.00037134401082285673, 0.06408689542038146)