<a href="https://colab.research.google.com/github/MangoHaha/MLFromScratch/blob/master/MatrixFactorization.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
!pip install sklearn
import numpy as np
from sklearn import datasets
import matplotlib.pyplot as plt
import math
import sys



[A Simple Tutorial and Implementation in Python](http://www.albertauyeung.com/post/python-matrix-factorization/)

        prediction(i, j) = self.b + self.b_u[i] + self.b_i[j] + self.P[i, :].dot(self.Q[j, :].T)


In [0]:
class MatrixFactorization():
  def __init__(self,X, itr=20, lr = 0.1, lamb=0.01, dim_k = 2):
    self.data = X # dataset m x n
    self.lr = lr
    self.itr = itr
    self.lamb = lamb
    self.dim_k = dim_k
    self.num_movie, self.num_user = np.shape(X)
    self.P = np.random.random_sample((self.num_movie, dim_k)) #matrix of movie x features m x k
    self.Q = np.random.random_sample((self.num_user, dim_k)) # matrix of user x features n x k
    self.b = 0
    self.bu = np.zeros(self.num_user)
    self.bm = np.zeros(self.num_movie)
    
  def predict(self, i, j):
    return self.b + self.bm[i] + self.bu[j] + self.P[i].dot(self.Q[j].T)
  
  def fit(self):
    learning_curvey = []
    for n in range(self.itr):
      for i in range(self.num_movie):
        for j in range(self.num_user):
          self._gradient_descent(i, j)
      learning_curvey.append(self._mse_())
    return learning_curvey
          
  def _gradient_descent(self, i, j):
    predict = self.predict(i, j)
    err = self.data[i][j] - predict
    
    #updating the bias
    self.b -= self.lr*(err + self.lamb*self.b)
    self.bm[i] += self.lr*(err + self.lamb*self.bm[i])
    self.bu[j] += self.lr*(err + self.lamb*self.bu[j])
    
    #updating parameter
    self.P[i, :] += self.lr*(err + self.lamb*self.P[i, :])
    self.Q[j, :] += self.lr*(err + self.lamb*self.Q[j, :])
    
  def _mse_(self):
    err = 0
    for i in range(self.num_movie):
      for j in range(self.num_user):
        err += (self.predict(i, j) - self.data[i][j])**2
        
    return np.sqrt(err)    

In [28]:
R = np.array([[5,3,0,1],[4,0,0,1],[1,1,0,5],[1,0,0,4],[0,1,5,4],[0,0,1,0]])
mf = MatrixFactorization(R)
print(mf.fit())


[8.307743208453065, 8.2861241366868, 8.468520591381633, 8.417512159820193, 8.359075296542557, 8.307703952140697, 8.25878805112167, 8.211474632256547, 8.165795526144, 8.1218216522318, 8.07960389981914, 8.039183507580086, 8.000596979970926, 7.963878164709413, 7.929059918915982, 7.896175987338906, 7.865263284976943, 7.836364737055561, 7.809532894999047, 7.784834695061843]
