## Meta -

This file show matrix factorization with the math behind it explained

## Author - Rahul Suresh

In [10]:
"""
@INPUT:
    R     : a matrix to be factorized, dimension N x M
    P     : an initial matrix of dimension N x K
    Q     : an initial matrix of dimension M x K
    K     : the number of latent features
    steps : the maximum number of steps to perform the optimisation
    alpha : the learning rate
    beta  : the regularization parameter
@OUTPUT:
    the final matrices P and Q
"""
def matrix_factorization(R, P, Q, K, steps=5000, alpha=0.0002, beta=0.02):
    Q = Q.T
    for step in range(steps):
        for i in range(len(R)):
            for j in range(len(R[i])):
                if R[i][j] > 0:
                    eij = R[i][j] - numpy.dot(P[i,:],Q[:,j])
                    for k in range(K):
                        P[i][k] = P[i][k] + alpha * (2 * eij * Q[k][j] - beta * P[i][k])
                        Q[k][j] = Q[k][j] + alpha * (2 * eij * P[i][k] - beta * Q[k][j])
        eR = numpy.dot(P,Q)
        e = 0
        for i in range(len(R)):
            for j in range(len(R[i])):
                if R[i][j] > 0:
                    e = e + pow(R[i][j] - numpy.dot(P[i,:],Q[:,j]), 2)
                    for k in range(K):
                        e = e + (beta/2) * ( pow(P[i][k],2) + pow(Q[k][j],2) )
        if e < 0.001:
            break
    return P, Q.T

In [11]:
import numpy 

R = [
     [5,3,0,1],
     [4,0,0,1],
     [1,1,0,5],
     [1,0,0,4],
     [0,1,5,4],
    ]

R = numpy.array(R)

N = len(R)
M = len(R[0])

# normally the value of K is chosen through cross validation
K = 2

P = numpy.random.rand(N,K)
Q = numpy.random.rand(M,K)

nP, nQ = matrix_factorization(R, P, Q, K)

In [12]:
nP

array([[0.13555879, 2.31270787],
       [0.19206309, 1.83416381],
       [1.98728321, 0.49254363],
       [1.58614049, 0.43794287],
       [1.56104293, 0.86962604]])

In [13]:
nQ

array([[0.00894313, 2.1597205 ],
       [0.0925249 , 1.261396  ],
       [2.09907642, 1.86902622],
       [2.42530469, 0.28929357]])

In [14]:
# show matrix multiplication first

In [26]:
import numpy as np

a=np.ones((3,2))
b=np.ones((2,4))

In [28]:
R = np.dot(a,b)

In [29]:
R = numpy.array(R)

N = len(R)
M = len(R[0])
K = 2

P = numpy.random.rand(N,K)
Q = numpy.random.rand(M,K)

nP, nQ = matrix_factorization(R, P, Q, K)

In [31]:
nP

array([[1.30273731, 0.81319602],
       [1.34072187, 0.75903288],
       [1.4119938 , 0.65740393]])

In [32]:
nQ

array([[1.06712483, 0.7365186 ],
       [1.07775943, 0.71713857],
       [0.96036209, 0.9310962 ],
       [1.14910008, 0.58712314]])

### Anticipated problems on deploying on MIK dataset
#### How do you predict for new Customers ?
#### The model can predict unknown values for existing customers, but what about entirely new customers ?
#### Also computationally the model may be very expensive 