In [65]:
import pandas as pd
import numpy as np
#read in the data
data = pd.read_csv("../data/ml-latest-small/ratings.csv")

#get the necessary dimensions
n_users = data['userId'].nunique()
n_movies = data['movieId'].nunique()

In [66]:
#split the data into different bins for the temporal function
def get_bin(num):
    #subtracts 25 because they started collecting the data 25 years after january 1970
    return int((num)/(60*60*24*365)) - 25
data["bin"] = data["timestamp"].apply(get_bin)
data_groups = data.groupby("bin")

In [136]:
#create R matrix
n_users = data.userId.unique().shape[0]
n_items = data.movieId.unique().shape[0]
movieIds = sorted(data.movieId.unique())

R_init = np.zeros((n_users, n_items))
bin_match = np.zeros((n_users, n_items)) 

#stored the bin number for each user movie pair
for row in data.itertuples():
    #print(row)
    R_init[row[1]-1, movieIds.index(row[2])] = row[3]
    bin_match[row[1]-1, movieIds.index(row[2])] = row[5]

In [68]:
#get initial B_i_bin_t dimensions: n_bins by n_movies
B_i_bint = np.zeros((data['bin'].nunique(),n_movies))
for i in range(len(B_i_bint)):
    sub_frame = data[data.bin==i+1]
    #for movie in movieId, then access 
    count = 0
    total = 0
    for j in range(len(movieIds)):
        r = sub_frame['rating'][sub_frame['movieId']==movieIds[j]].values
        B_i_bint[i,j] = (r.sum())/((r!=0).sum()).astype(float)
B_i_bint  = B_i_bint - (R.sum()/(R!=0).sum().astype(float))
B_i_bint[np.isnan(B_i_bint)] = 0   
    

  # This is added back by InteractiveShellApp.init_path()


In [100]:
def get_error(R, R_hat):
    #only for the that had ratings in the training matrix
    mask = (R>0)
    R_hat[mask] = 0 #set the values we don't need predictions for to 0 
    return np.sqrt(((R - R_hat)**2).mean()) #return the RMSE

def predict(U, M, mu,b_i, b_u, bin_match ,B_i_bint):
    R = np.zeros((len(U[0]),len(M[0])))
    for u in range(len(U[0])):#iterate over users
        for i in range(len(M[0])):#iterate over movies
            bin_num = int(bin_match[u,i]-1) #get the bin number for user u and movie i
            R[u,i] = mu+b_u[u]+b_i[i]+B_i_bint[bin_num,i]+np.dot(U[:,u].T,M[:,i])
    return R

In [141]:
#basic ALS so far
np.random.seed(1)
def train(Mat, bin_match, f, lambda_, n_iter, B_i_bint):
    R = Mat[:]
    # Step 1 Initialize matrix M by assigning the average rating for that movie as the first row, and small random numbers for the remaining entries.
    M = 5*np.random.rand(n_movies,f)
    M[:,0] = R.sum(0)/(R!=0).sum(0).astype(float)
    
    #Randomly initialize U matrix
    U = 5*np.random.rand(n_users,f)
    
    #function to minimize: sum(r_ui - ^r_ui) + lambda(b_i^2 + b_u^2+ |q|^2 + |p|^2 )
    for epoch in range(n_iter):
        #Step 2 Fix M, Solve U by minimizing the objective function
        for u in range(n_users):
            E = np.identity(f)
            MTM = np.dot(M.T, M)
            U[u,:] = np.linalg.solve((MTM+(lambda_*E)), np.dot(M.T, R[u,:]))

        #Step 3 Fix U, solve M by minimizing the objective function similarly; 
        for m in range(n_movies):
            UTU = np.dot(U.T, U)
            M[m,:] = np.linalg.solve((UTU+(lambda_* np.identity(f))), np.dot(U.T, R[:,m]))
        
    return U, M

In [None]:
#basic ALS so far
np.random.seed(1)
def train(Mat, bin_match, f, lambda_, n_iter, B_i_bint):
    R = Mat[:]
    # Step 1 Initialize matrix M by assigning the average rating for that movie as the first row, and small random numbers for the remaining entries.
    M = 5*np.random.rand(n_movies,f)
    M[:,0] = R.sum(0)/(R!=0).sum(0).astype(float)
    
    #Randomly initialize U matrix
    U = 5*np.random.rand(n_users,f)
    
    b_i = np.random.rand(n_movies) - mu #item bias len(movies)
    b_u = np.random.rand(n_users) - mu #user bias len(users)
    
    
    #function to minimize: sum(r_ui - ^r_ui) + lambda(b_i^2 + b_u^2+ |q|^2 + |p|^2 )
    for epoch in range(n_iter):
        #Step 2 Fix M, Solve U by minimizing the objective function
        for u in range(n_users):
            #E = np.identity(f)
            E = np.identity(f+1)
            #MTM = np.dot(M.T, M)
            m = np.ones(n_movies)
            M_p = np.vstack(m,M)
            MTM_prime = np.dot(M_p.T, M_p)
            r_u = R[u,:] - b_u[u]
            #U[u,:] = np.linalg.solve((MTM+(lambda_*E)), np.dot(M.T, R[u,:]))
            u_prime = np.linalg.solve((MTM_prime+(lambda_*E)), np.dot(M_p.T, R[u,:]))

        #Step 3 Fix U, solve M by minimizing the objective function similarly; 
        for m in range(n_movies):
            UTU = np.dot(U.T, U)
            M[m,:] = np.linalg.solve((UTU+(lambda_* np.identity(f))), np.dot(U.T, R[:,m]))
        
    return U, M

In [146]:
#train(Mat, bin_match, f, Lambda, n_iter, B_i_bint)
U_hat, M_hat = train(R_init,bin_match, 50,2, 100, B_i_bint)

In [145]:
np.dot(U_hat,M_hat.T)

array([[ 2.83686571e+00,  9.31239696e-01,  9.65874528e-01, ...,
        -1.24833940e-02, -1.24833940e-02, -1.92673657e-02],
       [ 1.89397148e-01, -8.12387215e-03, -2.81088171e-02, ...,
         5.87107758e-03,  5.87107758e-03,  1.30214295e-02],
       [ 3.13652933e-02,  1.64029966e-02,  1.93400977e-02, ...,
         2.37311269e-04,  2.37311269e-04, -1.87412699e-03],
       ...,
       [ 2.80129145e+00,  1.97075513e+00,  1.79338763e+00, ...,
        -4.80179725e-02, -4.80179725e-02, -3.06000681e-03],
       [ 8.27536799e-01,  6.53153855e-01,  3.06052742e-01, ...,
         3.00966947e-04,  3.00966947e-04,  8.10575070e-04],
       [ 8.01479163e-01,  2.70642000e+00, -2.77335493e-01, ...,
         4.53353765e-02,  4.53353765e-02,  6.38959520e-02]])

In [128]:
"""#alternate implementation from online resource

np.random.seed(1)
def train(Mat, bin_match, f, Lambda, n_iter, B_i_bint):
    R = Mat[:]
    # Step 1 Initialize matrix M by assigning the average rating for that movie as the first row, and small random numbers for the remaining entries.
    M = 1*np.random.rand(f, R.shape[1])
    M[0,:] = R.sum(0)/(R!=0).sum(0).astype(float)
    
    U = 1*np.random.rand(f, R.shape[0])#initialize U matrix
    
    mu = R.sum()/(R!=0).sum().astype(float)#average rating
    b_i = (R.sum(0)/(R!=0).sum(0).astype(float)) - mu #initialize b_i = len(movies)
    b_u = (R.sum(1)/(R!=0).sum(1).astype(float)) - mu #initialize b_u = len(users)
    
    #function to minimize: sum(r_ui - ^r_ui) + lambda(b_i^2 + b_u^2+ |q|^2 + |p|^2 )
    for epoch in range(n_iter):
        #Step 2 Fix M, Solve U by minimizing the objective function
        for i in range(n_users):
            I_i = np.nonzero(R[i,:])[0] #set of movies that user i rated
            n_ui = len(I_i) #number of ratings user i has given matrix 
            M_Ii = M[:,I_i] #denotes the sub-matrix of M where rows j in I_i are selected
            E = np.identity(f)
            R_i_I = R[i,I_i]
            U[:,i] = np.dot(np.linalg.inv(np.dot(M_Ii,M_Ii.T) + (Lambda*n_ui*E)),np.dot(M_Ii, R_i_I.T))

        #Step 3 Fix U, solve M by minimizing the objective function similarly; 
        for j in range(n_movies):
            I_j = np.nonzero(R[:,j])[0] #set of users that have rated movie j
            n_mj = len(I_j) #number of ratings movie j has received 
            U_Ij = U[:,I_j] #denotes the sub-matrix of U where rows i in I_j are selected
            R_j_I = R[I_j, j]
            #print(U_Ij)
            M[:,j] = np.dot(np.linalg.inv(np.dot(U_Ij,U_Ij.T) + (Lambda*n_mj*E)),np.dot(U_Ij,R_j_I.T))
        
        #get predictions
        #R_hat = predict(U, M, mu,b_i, b_u, bin_match ,B_i_bint)
    
        #get the error
        #error = get_error(R_hat, R)
        #print(error)
        
        #b_i = b_i +  
        #update all the biases
        
    #Step 4 Repeat Steps 2 and 3 until a stopping criterion is satisfied.
    return R, U, M, b_u, b_i, B_i_bint"""
#attempted implementation according to the paper. Runs into singulatity problems

(10, 232)

(10, 232)

(10, 232, 0)