In [104]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

In [100]:
# Create m x n matrix for Y
Y_train = np.loadtxt('./data/train.txt').astype(int)
Y_test = np.loadtxt('./data/test.txt').astype(int)

In [None]:


def get_err_bias(U, V, Y, A, B, reg=0.0):
    sum_err = 0
    for element in Y:
        i = np.int(element[0])-1
        j = np.int(element[1])-1
        sum_err += (element[2]-np.dot(U[i],V[j])-A[i]-B[j])**2
    return (reg/2 * ((np.linalg.norm(U, 'fro')+np.linalg.norm(V, 'fro'))) + sum_err)/len(Y)


def train_model_bias(M, N, K, eta, reg, Y, eps=0.0001, max_epochs=100):
    """
    Given a training data matrix Y containing rows (i, j, Y_ij)
    where Y_ij is user i's rating on movie j, learns an
    M x K matrix U and N x K matrix V such that rating Y_ij is approximated
    by (UV^T)_ij.

    Uses a learning rate of <eta> and regularization of <reg>. Stops after
    <max_epochs> epochs, or once the magnitude of the decrease in regularized
    MSE between epochs is smaller than a fraction <eps> of the decrease in
    MSE after the first epoch.

    Returns a tuple (U, V, err) consisting of U, V, and the unregularized MSE
    of the model.
    """
    #Creating U and V matrices
    U = np.random.rand(M, K)-0.5
    V = np.random.rand(N, K)-0.5
    
    #Creating A and B vectors
    A = np.random.rand(M, 1)-0.5
    B = np.random.rand(N, 1)-0.5
    delta = 0
    for epoch in range(max_epochs):
        prev_error = get_err_bias(U, V, Y, A, B, 0)
        
        #Shuffling Y matrix
        Y_shuffled = Y[np.random.permutation(np.arange(len(Y)))]
        #Looping through the 2 dimensions of Y
        for element in Y_shuffled:
            i = np.int(element[0])-1
            j = np.int(element[1])-1
            #Computing gradient and descending along it. Trivial.
            cur_grad_u = grad_U_bias(U[i], element[2], V[j], A[i], B[j], reg, eta)
            cur_grad_v = grad_V_bias(V[j], element[2], U[i], A[i], B[j], reg, eta)
            cur_grad_a = grad_A_bias(U[i], element[2], V[j], A[i], B[j], reg, eta)
            cur_grad_b = grad_B_bias(V[j], element[2], U[i], A[i], B[j], reg, eta)
            U[i] = U[i]-cur_grad_u
            V[j] = V[j]-cur_grad_v
            A[i] = A[i]-cur_grad_a
            B[j] = B[j]-cur_grad_b
        cur_error = get_err_bias(U, V, Y, A, B, 0)
        
        if (epoch==0):
            delta = np.abs(cur_error-prev_error) # Setting loss for the first epoch.
        elif (np.abs((cur_error-prev_error))/delta<= eps):
            break
    #Returning Error from final model 
    
    return U, V, A, B, get_err_bias(U, V, Y, A, B, 0)

### Method 2 (Adding Bias Term a and b)

In [132]:
def grad_U_bias(Ui, Yij, Vj, Ai, Bj, reg, eta):
    return eta * (reg * Ui.T - (Yij - (np.dot(Ui, Vj) - Ai-Bj)) * Vj.T )

def grad_V_bias(Vj, Yij, Ui, Ai, Bj, reg, eta):
    return eta * (reg*Vj.T - (Yij - np.dot(Vj,Ui) - (Ai + Bj)) * Ui.T)

def grad_A_bias(Ui, Yij, Vj, Ai, Bj, reg, eta):
    
    return eta * (-(Yij - np.dot(Ui, Vj) - (Ai + Bj)))

def grad_B_bias(Vj, Yij, Ui, Ai, Bj, reg, eta):
    return eta * (-(Yij - np.dot(Ui, Vj) - (Ai + Bj)))

def bias_err(U, V, Y, a, b, reg=0.0):    
    err = 0
    for x in range(Y.shape[0]):
        i = Y[x][0] - 1
        j = Y[x][1] - 1
        
        err += pow(Y[x][2] - (np.dot(U[i,:], V[j,:]) + a[i] + b[j]), 2)    
    return 1 / Y.shape[0] * ((reg * (np.linalg.norm(U)**2 + np.linalg.norm(V)**2 
                    + np.linalg.norm(a)**2 + np.linalg.norm(b)**2) + err) / 2) 


def train_bias_model(M, N, K, eta, reg, Y, eps=0.0001, max_epochs=300):
    U = np.random.uniform(-0.5, 0.5, size=(M, K))
    V = np.random.uniform(-0.5, 0.5, size=(N, K))
    a = np.random.uniform(-0.5, 0.5, size=(M, ))
    b = np.random.uniform(-0.5, 0.5, size=(N, ))
    
    mu = np.average(Y[:,2])
    loss = []
    loss.append(bias_err(U, V, Y, a, b, mu, reg))
    
    for epoch in range(max_epochs): 
        index = np.random.permutation(Y.shape[0])
        for idx in index:
            i = Y[idx][0] - 1
            j = Y[idx][1] - 1
            U[i,:] -= eta * grad_bias_U(U[i,:], Y[idx][2], V[j,:], reg, eta, a[i], b[j], mu)
            V[j,:] -= eta * grad_bias_V(U[i,:], Y[idx][2], V[j,:], reg, eta, a[i], b[j], mu)
            a[i] -= eta * grad_bias_a(U[i,:], Y[idx][2], V[j,:], reg, eta, a[i], b[j], mu)
            b[j] -= eta * grad_bias_b(U[i,:], Y[idx][2], V[j,:], reg, eta, a[i], b[j], mu)
        
        err = bias_err(U, V, Y, a, b, mu, reg)
        loss.append(err)
        if (abs(loss[-1] - loss[-2]) / abs(loss[1] - loss[0]) < eps):
            print(epoch)
            break
    
    err = bias_err(U, V, Y, a, b, mu)
    return (U, V, err, a, b)

In [None]:
# Create 943 users x 1682 movies 
mu = np.average(Y_test[:,2])
# Use to compute Ein and Eout using k=20
U_bias, V_bias, E_in_bias, a, b = train_bias_model(M, N, k, eta, reg, Y_train)
E_out_bias = bias_err(U, V, Y_test, a, b, mu)

print("Training error is: " + str(E_in_bias))
print("Test error is: " + str(E_out_bias))

In [None]:
visualize(np.transpose(U_bias), np.tranpose(V_bias))

### Method 3 (Using scikit-surprise)

In [None]:
from surprise.model_selection import train_test_split
reader = Reader(rating_scale=(1, 5))
data = Dataset.load_from_df(pd.DataFrame(np.concatenate((Y_train, Y_test))), reader)

algo = SVD(n_factors = 20, biased=True)

trainset, testset = train_test_split(data, test_size=0.1)
algo.fit(trainset)
# User factors (u)
u = algo.pu
# Item factors (v)
v = algo.qi
predictions = algo.test(testset)
accuracy.rmse(predictions)