In [9]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

In [10]:
df = pd.read_csv("data/final_data.csv")
data_matrix = df.pivot(index='u_id', columns='a_id', values='score').fillna(0)
data_matrix_values = data_matrix.values

In [11]:
data_matrix.shape

(7162, 1590)

In [12]:

def als(matrix, rank, iterations, regularization=0.1):
    num_users, num_items = matrix.shape
    U = np.random.rand(num_users, rank)
    V = np.random.rand(num_items, rank)

    mask = matrix > 0

    for _ in range(iterations):
        for i in range(num_users):
            V_i = V[mask[i]]
            if V_i.size == 0:
                continue
            A = V_i.T @ V_i + regularization * np.eye(rank)
            b = V_i.T @ matrix[i, mask[i]]
            U[i] = np.linalg.lstsq(A, b, rcond=None)[0]
        
        # Update item factors
        for j in range(num_items):
            U_j = U[mask[:, j]]
            if U_j.size == 0:
                continue
            A = U_j.T @ U_j + regularization * np.eye(rank)
            b = U_j.T @ matrix[mask[:, j], j]
            V[j] = np.linalg.lstsq(A, b, rcond=None)[0]
    
    return U, V.T

In [13]:
U, VT = als(data_matrix_values, rank=10, iterations=10)
predicted_ratings = U @ VT
predicted_ratings

array([[ 7.95780885,  8.27966288,  6.99430988, ...,  6.93846807,
         4.22523424,  8.12143534],
       [10.5056495 , 10.53898098,  9.40149534, ...,  7.76994262,
         3.60622706,  6.90738827],
       [ 8.26209133,  8.7237284 ,  5.79160005, ...,  5.00476928,
         5.205381  ,  3.59648459],
       ...,
       [ 3.42747088,  4.49291   ,  2.88241243, ...,  1.76816348,
         4.42274837,  2.0371612 ],
       [ 7.64796062,  7.34079168,  7.79308483, ...,  7.36533652,
         3.0969885 ,  8.09951639],
       [ 3.67480919,  5.21162094,  4.45865445, ...,  4.59912391,
         3.87477896,  4.29869492]])

In [14]:
# example given in the notebook
'''
Original matrix = 0.5 ? 4
                   1  3 5
'''
U = np.array([[0.7461],
              [1.7966]])
P = np.array([[0.758, 2.5431, 4.7999]])
U@P

array([[0.5655438 , 1.89740691, 3.58120539],
       [1.3618228 , 4.56893346, 8.62350034]])

In [15]:
mask = data_matrix_values > 0
mask

array([[False, False, False, ..., False, False, False],
       [False, False, False, ..., False, False, False],
       [False, False, False, ..., False, False, False],
       ...,
       [False, False, False, ..., False, False, False],
       [False, False, False, ..., False, False, False],
       [False, False, False, ..., False, False, False]])

In [16]:
data_matrix.shape

(7162, 1590)

In [17]:
data_matrix

a_id,1,5,6,7,15,16,19,20,21,22,...,39417,39456,39468,39491,39523,39533,39539,39597,39799,40004
u_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
0,0.0,0.0,0.0,0.0,0.0,0.0,8.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
6,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
42845,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
42896,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
42901,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
44074,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
