In [2]:
import numpy as np
import scipy.sparse as sparse
class CF():
    def __init__(self,rating_tuples, K, lambda_u, lambda_v,n_iter,alpha):
        self.K = K
        self.ratings = self.load_matrix(rating_tuples)
        self.n_iter = n_iter
        self.lambda_u = lambda_u
        self.lambda_v = lambda_v
        self.alpha = alpha
        
        self.num_users = int(numpy.max(self.ratings[:, 0]) + 1)
        self.num_products = int(numpy.max(self.ratings[:, 1]) + 1)
        
        self.U = np.sqrt(1.0/lambda_u) * np.random.randn(self.num_users,K)
        self.V = np.sqrt(1.0/lambda_v) * np.random.randn(self.num_products,K)
        
    def fit(self):
        for i in range(self.n_iter):
            # Update user vector
            self.U = self.update("user")
            #  Update product vector
            self.V = self.update("product")
    def update(self,target_type):
        if target_type == "user":
            num = self.num_users
            fixed_factors = sparse.csr_matrix(self.V)
            lambda_t = self.lambda_u
        else:
            num = self.num_products
            fixed_factors = sparse.csr_matrix(self.U)
            lambda_t = self.lambda_v
        num_fixed = fixed_factors.shape[0]
        eye = sparse.eye(num_fixed)
        lambda_eye = lambda_t * eye
        YTY = fixed_factors.T.dot(fixed_factors) 
        # accumulate YtCuY + regularization*I in A
        A = YTY + lambda_eye
        # accumulate YtCuPu in b
        b = np.zeros(self.K)
        # placeholder for solution
        X = np.zeros((num, self.K))
        for i in range(num):
            Ri = self.ratings[i]
            X[i] = self.solve_equation(A,b,fixed_factors,Ri)
        return X
            
    def solve_equation(self,A,b,Y,Ri):
        # Xu = (YtCuY + regularization * I)^-1 (YtCuPu)
        # YtCuY + regularization * I = YtY + regularization * I + Yt(Cu-I)

        # accumulate YtCuY + regularization*I in A
        # accumulate YtCuPu in b
        for j, r in Ri:
            factor = Y[j]
            confidence = 1+self.alpha*r
            if r > 0:     
                b += confidence * factor
                A += (confidence - 1) * np.outer(factor, factor)                
        x = np.linalg.solve(A, b)
        #x = spsolve(A, b)
        return x
    # TO DO: 
    # 1. transfer from rating_tuples to matrix
    # 2. predict rating of an user to product
    # 3. score: MAP or NDCG
    # 4. load_data: load rating tuples from file
    def load_matrix(self,rating_tuples):
        self.ratings = result_matrix
    def predict(self,user_id, product_id):
        rating = self.U[user_id].dot(self.V[product_id])
        return rating
    def score(self,test,score_type):
        return score

def load_data(file_name):
    return train,test,validation

In [None]:
# reference
def load_matrix(filename, num_users, num_items):
    t0 = time.time()
    counts = sparse.dok_matrix((num_users, num_items), dtype=float)
    total = 0.0
    num_zeros = num_users * num_items
    for i, line in enumerate(open(filename, 'r')):
        user, item, count = line.strip().split('\t')
        user = int(user)
        item = int(item)
        count = float(count)
        if user >= num_users:
            continue
        if item >= num_items:
            continue
        if count != 0:
            counts[user, item] = count
            total += count
            num_zeros -= 1
        if i % 100000 == 0:
            print 'loaded %i counts...' % i
    alpha = num_zeros / total
    print 'alpha %.2f' % alpha
    counts *= alpha
    counts = counts.tocsr()
    t1 = time.time()
    print 'Finished loading matrix in %f seconds' % (t1 - t0)
    return counts