In [30]:
import pandas as pd 
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity
from scipy import sparse 

class MF(object):
    def __init__(self, Y, k, X = None, W = None, lamda = 0.1,
                dist_func = cosine_similarity, learning_rate = 0.5, user_based = 1):
        self.Y = Y
        self.lamda = lamda
        self.k = k
        self.dist_func = dist_func
        self.learning_rate = learning_rate
        self.user_based = user_based
        self.users_count = int(np.max(self.Y[:, 0])) + 1
        self.items_count = int(np.max(self.Y[:, 1])) + 1
        self.ratings_count = Y.shape[0]
        if X == None:
            self.X = np.random.randn(self.items_count, k)
        if W == None:
            self.W = np.random.randn(k, self.users_count)
        self.Ybar = self.Y.copy()
        
    def normalizeY(self):
        users = self.Y[:, 0]
        self.mu = np.zeros((self.users_count,))
        for i in range(self.users_count):
            ids = np.where(users == i)[0].astype(int)
            ratings = self.Y[ids, 2]
            m = np.mean(ratings)
            if np.isnan(m):
                m = 0
            self.mu[i] = m
            self.Ybar[ids, 2] = ratings - self.mu[i]
        print(self.Ybar)
    
    def get_user_rated_item(self, i):
        ids = np.where(i == self.Ybar[:, 1])[0].astype(int)
        users = self.Ybar[ids, 0].astype(int)
        ratings = self.Ybar[ids, 2]
        
        return (users, ratings)
        

    def get_item_rated_by_user(self, u):
        ids = np.where(u == self.Ybar[:, 0])[0].astype(int)
        items = self.Ybar[ids, 1].astype(int)
        ratings = self.Ybar[ids, 2]
        
        return (items, ratings)
    
    def updateX(self):
        for i in range(self.items_count):
            users, ratings = self.get_user_rated_item(i)
            Wi = self.W[:, users]
            a = -(ratings - self.X[i, :].dot(Wi)).dot(Wi.T)/self.ratings_count + \
            self.lamda*self.X[i, :]
            self.X[i, :] -= self.learning_rate*(a).reshape((self.k,))
        
    def updateW(self):
        for u in range(self.users_count):
            items, ratings = self.get_item_rated_by_user(u)
            Xn = self.X[items, :]
            a = -Xn.T.dot(ratings - Xn.dot(self.W[:, u]))/self.ratings_count + self.lamda*self.W[:, u]
            self.W[:, u] -= self.learning_rate*(a).reshape((self.k,))
        
    def fit(self):
        self.normalizeY()
        for it in range(1000):
            self.updateX()
            self.updateW()
            
    def pred(self, u, i):
        """ 
        predict the rating of user u for item i 
        if you need the un
        """
        u = int(u)
        i = int(i)
        
        if self.user_based:
            bias = self.mu[u]
        else: 
            bias = self.mu[i]
        pred = self.X[i, :].dot(self.W[:, u]) + bias 
        if pred < 1:
            return 1 
        if pred > 5: 
            return 5 
        return pred

In [31]:
r_cols = ['user_id', 'item_id', 'rating']
ratings = pd.read_csv('ex.dat', sep = ' ', names = r_cols, encoding='latin-1')
Y_data = ratings.as_matrix()

rs = MF(Y_data, k = 2)

rs.fit()
rs.pred(6, 1)

[[ 0.          0.          1.75      ]
 [ 0.          1.          0.75      ]
 [ 0.          3.         -1.25      ]
 [ 0.          4.         -1.25      ]
 [ 1.          0.          2.25      ]
 [ 1.          2.          1.25      ]
 [ 1.          3.         -0.75      ]
 [ 1.          4.         -2.75      ]
 [ 2.          0.         -0.5       ]
 [ 2.          2.         -1.5       ]
 [ 2.          3.          0.5       ]
 [ 2.          4.          1.5       ]
 [ 3.          0.         -1.33333333]
 [ 3.          1.         -1.33333333]
 [ 3.          3.          2.66666667]
 [ 4.          0.         -1.5       ]
 [ 4.          3.          1.5       ]
 [ 5.          1.          0.5       ]
 [ 5.          2.         -0.5       ]
 [ 6.          2.         -2.33333333]
 [ 6.          3.          0.66666667]
 [ 6.          4.          1.66666667]]


2.9373719058635412