# ALS factorization

In [None]:
## GD 

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

In [2]:
def matrix_factorization(R, P, Q, K, steps=5000, alpha=0.0002, beta=0.02):
    '''
    R: rating matrix
    P: |U| * K (User features matrix)
    Q: K * |I| (Item features matrix)
    K: latent features
    steps: iterations
    alpha: learning rate
    beta: regularization parameter'''    

    for step in range(steps):
        for i in range(len(R)):
            for j in range(len(R[i])):
                if R[i][j] > 0:
                    # calculate error between Rij and R_hat_ij
                    eij = R[i][j] - np.dot(P[i,:],Q[:,j])

                    for k in range(K):
                        # calculate gradient with alpha and beta parameter
                        P[i][k] = P[i][k] + alpha * (2 * eij * Q[k][j] - beta * P[i][k])
                        Q[k][j] = Q[k][j] + alpha * (2 * eij * P[i][k] - beta * Q[k][j])

        eR = np.dot(P,Q) # R hat

        e = 0 # error

        for i in range(len(R)):

            for j in range(len(R[i])):

                if R[i][j] > 0:

                    e = e + pow(R[i][j] - np.dot(P[i,:],Q[:,j]), 2)

                    for k in range(K):

                        e = e + (beta/2) * (pow(P[i][k],2) + pow(Q[k][j],2))
        # 0.001: local minimum
        if e < 0.001:

            break

    return P, Q

In [3]:
R = [
     [0, 4.5, 2.0, 0],
     [4, 0, 3.5, 0],
     [0, 5, 0, 2], 
     [0, 3.5, 4.0, 1.0]
    ]

R = np.array(R)
# N: num of User
N = len(R)
# M: num of Movie
M = len(R[0])
# Num of Features
K = 2

 
P = np.random.rand(N,K)
Q = np.random.rand(K,M)


nP, nQ = matrix_factorization(R, P, Q, K, steps=10000)

nR = np.dot(nP, nQ)

nR

array([[3.43170473, 4.47961846, 2.0072065 , 1.85613828],
       [3.98880305, 4.884435  , 3.49082882, 1.78689104],
       [3.90864445, 4.98881825, 2.6933289 , 1.98378502],
       [3.13268627, 3.49279457, 3.97440568, 1.00878281]])

In [4]:
nP

array([[1.75762048, 0.40737575],
       [1.59057038, 1.2207838 ],
       [1.84280482, 0.72679066],
       [0.76748654, 1.754475  ]])

In [5]:
nQ

array([[1.71222712, 2.32276883, 0.68657009, 1.02690275],
       [1.03653515, 0.97470797, 1.96495954, 0.12576342]])

In [6]:
R = [
     [2, 5, 1, 3],
     [4, 0, 0, 1],
     [0, 4, 2, 0], 
     [2, 4, 3, 1],
     [1, 3, 2, 0]
    ]

R = np.array(R)
# N: num of User
N = len(R)
# M: num of Movie
M = len(R[0])
# Num of Features
K = 2

 
P = np.random.rand(N,K)
Q = np.random.rand(K,M)


nP, nQ = matrix_factorization(R, P, Q, K, steps=10000)

nR = np.dot(nP, nQ)

nR

array([[1.89577339, 5.00693605, 1.04370594, 2.9852719 ],
       [3.85663695, 7.14615882, 5.94906312, 1.05272356],
       [1.81516337, 3.96334912, 2.04486983, 1.4863513 ],
       [2.06287214, 4.02899064, 2.92206358, 0.90429915],
       [1.41575831, 2.87134095, 1.87172306, 0.79607133]])

In [7]:
nP

array([[2.1110812 , 0.53127409],
       [0.59442564, 2.74764472],
       [1.01009678, 0.96421265],
       [0.56943939, 1.35639566],
       [0.52012165, 0.87264817]])

In [8]:
nQ

array([[ 0.57614482,  1.81609126, -0.053394  ,  1.39354648],
       [ 1.27897238,  2.20793743,  2.17670132,  0.08165678]])

## SGD 
- link https://github.com/albertauyeung/matrix-factorization-in-python

In [9]:
import numpy as np
from mf import MF

In [16]:
# Perform training and obtain the user and item matrices 
mf = MF(R, K=2, alpha=0.1, beta=0.01, iterations=1000)
training_process = mf.train()

Iteration: 10 ; error = 1.2474
Iteration: 20 ; error = 0.6504
Iteration: 30 ; error = 0.3856
Iteration: 40 ; error = 0.2755
Iteration: 50 ; error = 0.2191
Iteration: 60 ; error = 0.1838
Iteration: 70 ; error = 0.1598
Iteration: 80 ; error = 0.1412
Iteration: 90 ; error = 0.1312
Iteration: 100 ; error = 0.1197
Iteration: 110 ; error = 0.1128
Iteration: 120 ; error = 0.1070
Iteration: 130 ; error = 0.1019
Iteration: 140 ; error = 0.0997
Iteration: 150 ; error = 0.0970
Iteration: 160 ; error = 0.0931
Iteration: 170 ; error = 0.0905
Iteration: 180 ; error = 0.0890
Iteration: 190 ; error = 0.0875
Iteration: 200 ; error = 0.0863
Iteration: 210 ; error = 0.0864
Iteration: 220 ; error = 0.0827
Iteration: 230 ; error = 0.0842
Iteration: 240 ; error = 0.0825
Iteration: 250 ; error = 0.0832
Iteration: 260 ; error = 0.0811
Iteration: 270 ; error = 0.0808
Iteration: 280 ; error = 0.0800
Iteration: 290 ; error = 0.0790
Iteration: 300 ; error = 0.0770
Iteration: 310 ; error = 0.0785
Iteration: 320 ; 

In [13]:
mf.full_matrix()

array([[2.00086709, 4.97812452, 1.0209364 , 2.99645186],
       [3.98275957, 3.28553054, 3.78952139, 1.0111819 ],
       [2.20092685, 3.99633193, 2.00315468, 1.65198914],
       [2.00501115, 3.97274427, 2.99185871, 1.022205  ],
       [1.0213927 , 3.00906131, 1.99217613, 0.06609643]])

In [14]:
mf.P

array([[-1.20886133,  0.56993655],
       [ 0.71483615, -1.13803609],
       [-0.20410656,  0.13383063],
       [ 0.59682098,  0.47535563],
       [ 0.5777785 ,  0.48209749]])

In [17]:
mf.Q.T

array([[-0.92998677,  0.52378772, -0.4001607 ,  0.6060199 ],
       [ 0.33258867,  0.40165206, -0.99406305,  0.92585185]])