In [1]:
import os
import sys
from collections import defaultdict
from pathlib import Path

sys.path.append(str(Path(__name__).resolve().parents[1]))

import pandas as pd
import numpy as np


from src.loader.movielens import MovieLensLoader
from src.utils.metrics import RecSysMetrics

import warnings

warnings.filterwarnings("ignore")



In [2]:
movielens_loader = MovieLensLoader(
    num_users=1000,
    num_test_items=5,
)

In [3]:
moivelens_dataset = movielens_loader.load()

In [4]:
train = moivelens_dataset.train
test = moivelens_dataset.test
rank_test = moivelens_dataset.test_user2item

In [6]:
train_ratings = train.rating
test_ratings = test.rating

In [9]:
n_users = train.user_id.nunique()
n_items = train.movie_id.nunique()
print(n_users, n_items)

943 1671


In [10]:
K = 30
user_vector = np.random.normal(scale=1/K, size=(n_users, K))
item_vector = np.random.normal(scale=1/K, size=(n_items, K))

user_vector.shape, item_vector.shape

((943, 30), (1671, 30))

In [14]:
mean_bias = train_ratings.mean()
user_bias = np.zeros(n_users)
item_bias = np.zeros(n_items)

In [15]:
user_movie_matrix = train.pivot(index="user_id", columns="movie_id", values="rating")
user_movie_matrix.head()

movie_id,1,2,3,4,5,6,7,8,9,10,...,1672,1673,1675,1676,1677,1678,1679,1680,1681,1682
user_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,5.0,3.0,4.0,3.0,,5.0,4.0,1.0,5.0,3.0,...,,,,,,,,,,
2,4.0,,,,,,,,,2.0,...,,,,,,,,,,
3,,,,,,,,,,,...,,,,,,,,,,
4,,,,,,,,,,,...,,,,,,,,,,
5,4.0,3.0,,,,,,,,,...,,,,,,,,,,


In [40]:
rating_matrix = user_movie_matrix.values

In [28]:
non_zero_row_index, non_zero_col_index = user_movie_matrix.values.nonzero()

In [29]:
def predict(u, i):
    bias = mean_bias+user_bias[u]+item_bias[i]
    vectors = user_vector[u] @ item_vector[i]
    return bias + vectors


In [52]:
u, i = non_zero_row_index[0], non_zero_col_index[0]
pred_rating = predict(u, i)
err = (rating_matrix[u, i] - pred_rating) ** 2

In [53]:
user_vector[0], item_vector[0]

(array([-0.00586545,  0.0762359 ,  0.0446221 , -0.0549261 , -0.01641619,
         0.04252651, -0.01255309,  0.007091  ,  0.01061814, -0.021493  ,
         0.03333826,  0.09085998,  0.02854962, -0.02869032,  0.00923656,
         0.01549121, -0.03373245,  0.04064788,  0.00980354,  0.02826588,
         0.0296832 ,  0.00517428,  0.08152218,  0.0220407 ,  0.02060653,
        -0.03935292, -0.01115211,  0.0596852 , -0.01537076, -0.03199872]),
 array([-0.01453708,  0.01398739,  0.01326487, -0.08564942,  0.05530404,
        -0.00576203, -0.01106125, -0.00011792,  0.0108628 , -0.01094816,
        -0.04953939, -0.01171978, -0.04321392, -0.00795963,  0.00265586,
         0.00397538, -0.04142475,  0.00557228,  0.01867337,  0.06289115,
         0.01258971, -0.01777558, -0.0492952 , -0.01719054,  0.03655806,
         0.04352424,  0.01045703, -0.03260327, -0.03179537, -0.02352731]))

In [54]:
lr = 0.01
alpha = 0.01
user_bias += lr * (err - alpha * user_bias[u])
item_bias += lr * (err - alpha * item_bias[i])

user_vector += lr * (err * item_vector[i, :] - alpha * user_vector[u, :])
item_vector += lr * (err * user_vector[u, :] - alpha * item_vector[i, :])


In [55]:
user_vector[0], item_vector[0]

(array([-0.00615912,  0.0765114 ,  0.04488614, -0.05665429, -0.0152951 ,
         0.04240562, -0.01277573,  0.00708791,  0.01083696, -0.02171246,
         0.03233217,  0.09061367,  0.02767205, -0.02884856,  0.0092894 ,
         0.01557013, -0.03456759,  0.04075661,  0.01018054,  0.02953608,
         0.02993507,  0.00481396,  0.08051621,  0.02169053,  0.02134446,
        -0.03846798, -0.01093933,  0.05901929, -0.01601282, -0.03247176]),
 array([-1.46602954e-02,  1.55347136e-02,  1.41721122e-02, -8.67876375e-02,
         5.49889157e-02, -4.90309485e-03, -1.13187508e-02,  2.55624792e-05,
         1.10810766e-02, -1.13865578e-02, -4.88799805e-02, -9.88443346e-03,
        -4.26494736e-02, -8.54277431e-03,  2.84362663e-03,  4.29015044e-03,
        -4.21203129e-02,  6.39670643e-03,  1.88775696e-02,  6.34827160e-02,
         1.31943879e-02, -1.76763575e-02, -4.76604837e-02, -1.67497637e-02,
         3.69864536e-02,  4.27412312e-02,  1.02345550e-02, -3.14053596e-02,
        -3.21163212e-02, -2.

In [56]:
u, i = non_zero_row_index[0], non_zero_col_index[0]
pred_rating = predict(u, i)
err = (rating_matrix[u, i] - pred_rating) ** 2
err

1.9064400669313208