In [104]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from surprise import Reader, Dataset
from surprise import SVD, accuracy

### Method 1 (Modifying Code from Homework 5)

In [100]:
# Create m x n matrix for Y
Y_train = np.loadtxt('./data/train.txt').astype(int)
Y_test = np.loadtxt('./data/test.txt').astype(int)

In [9]:
def grad_U(Ui, Yij, Vj, reg, eta):
    """
    Takes an input Ui (the ith row of U), a training point Yij, the column
    vector Vj (jth column of V^T), reg (the regularization parameter lambda),
    and eta (the learning rate).

    Returns the gradient of the regularized loss function with
    respect to Ui multiplied by eta.
    """
    return reg * Ui - Vj * (Yij - np.dot(Ui, Vj))

def grad_V(Vj, Yij, Ui, reg, eta):
    """
    Takes an input the column vector Vj (jth column of V^T), a training point Yij,
    Ui (the ith row of U), reg (the regularization parameter lambda),
    and eta (the learning rate).

    Returns the gradient of the regularized loss function with
    respect to Vj multiplied by eta.
    """
    return reg * Vj - Ui * (Yij - np.dot(Ui, Vj))

def get_err(U, V, Y, reg=0.0):
    """
    Takes an input of a matrix Y of triples (i, j, Y_ij) where i is the index of a user,
    j is the index of a movie, and Y_ij is user i's rating of movie j and
    user/movie matrices U and V.

    Returns the mean regularized squared-error of predictions made by
    estimating Y_{ij} as the dot product of the ith row of U and the jth column of V^T.
    """
    err = 0
    for x in range(Y.shape[0]):
        i = Y[x][0] - 1
        j = Y[x][1] - 1
        
        err += pow(Y[x][2] - np.dot(U[i,:], V[j,:]), 2)    
    return 1 / Y.shape[0] * ((reg * (np.linalg.norm(U)**2 + np.linalg.norm(V)**2) + err) / 2) 

def train_model(M, N, K, eta, reg, Y, eps=0.0001, max_epochs=300):
    """
    Given a training data matrix Y containing rows (i, j, Y_ij)
    where Y_ij is user i's rating on movie j, learns an
    M x K matrix U and N x K matrix V such that rating Y_ij is approximated
    by (UV^T)_ij.

    Uses a learning rate of <eta> and regularization of <reg>. Stops after
    <max_epochs> epochs, or once the magnitude of the decrease in regularized
    MSE between epochs is smaller than a fraction <eps> of the decrease in
    MSE after the first epoch.

    Returns a tuple (U, V, err) consisting of U, V, and the unregularized MSE
    of the model.
    """
    U = np.random.uniform(-0.5, 0.5, size=(M, K))
    V = np.random.uniform(-0.5, 0.5, size=(N, K))
    loss = []
    loss.append(get_err(U, V, Y, reg))
    
    for epoch in range(max_epochs): 
        index = np.random.permutation(Y.shape[0])
        for idx in index:
            i = Y[idx][0] - 1
            j = Y[idx][1] - 1
            U[i,:] -= eta * grad_U(U[i,:], Y[idx][2], V[j,:], reg, eta)
            V[j,:] -= eta * grad_V(V[j,:], Y[idx][2], U[i,:], reg, eta)
        
        err = get_err(U, V, Y, reg)
        loss.append(err)
        if (abs(loss[-1] - loss[-2]) / abs(loss[1] - loss[0]) < eps):
            break
    
    err = get_err(U, V, Y)
    return (U, V, err)

In [94]:
# Create 943 users x 1682 movies 
M = max(max(Y_train[:,0]), max(Y_test[:,0])).astype(int) # users
N = max(max(Y_train[:,1]), max(Y_test[:,1])).astype(int) # movies
print("Factorizing with ", M, " users, ", N, " movies.")

k = 20

reg = 0.0
eta = 0.03 # learning rate

# Use to compute Ein and Eout using k=20
U, V, E_in = train_model(M, N, k, eta, reg, Y_train)
E_out = get_err(U, V, Y_test)

print("Training error is: " + str(E_in))
print("Test error is: " + str(E_out))

Factorizing with  943  users,  1682  movies.
Training error is: 0.2494398706178021
Test error is: 0.6733794601112535


In [97]:
# Visualize and interpret results

def visualize(U, V):
    # The variable V is currently V^T
    A, sigma, B = np.linalg.svd(V)

    U_tilde = np.matmul(np.transpose(A[:,:2]), U)
    V_tilde = np.matmul(np.transpose(A[:,:2]), V)

    print(U_tilde.shape)
    print(V_tilde.shape)
    
visualize(np.transpose(U), np.transpose(V))

(2, 943)
(2, 1682)


### Method 2 (Adding Bias Term a and b)

In [127]:
def grad_bias_U(Ui, Yij, Vj, reg, eta, ai, bj, mu):
    return reg * Ui - Vj * (Yij - np.dot(Ui, Vj) - ai - bj)

def grad_bias_V(Ui, Yij, Vj, reg, eta, ai, bj, mu):
    return reg * Vj - Ui * (Yij - np.dot(Ui, Vj) - ai - bj)

def grad_bias_a(Ui, Yij, Vj, reg, eta, ai, bj, mu):
    return reg * ai - (Yij - np.dot(Ui, Vj) - ai - bj)

def grad_bias_b(Ui, Yij, Vj, reg, eta, ai, bj, mu):
    return reg * bj - (Yij - np.dot(Ui, Vj) - ai - bj)

def bias_err(U, V, Y, a, b, reg=0.0):
    mu = np.average(Y)
    
    err = 0
    for x in range(Y.shape[0]):
        i = Y[x][0] - 1
        j = Y[x][1] - 1
        
        err += pow(Y[x][2] - (np.dot(U[i,:], V[j,:]) + a[i] + b[j]), 2)    
    return 1 / Y.shape[0] * ((reg * (np.linalg.norm(U)**2 + np.linalg.norm(V)**2 
                    + np.linalg.norm(a)**2 + np.linalg.norm(b)**2) + err) / 2) 

def train_bias_model(M, N, K, eta, reg, Y, eps=0.0001, max_epochs=300):
    U = np.random.uniform(-0.5, 0.5, size=(M, K))
    V = np.random.uniform(-0.5, 0.5, size=(N, K))
    a = np.random.uniform(-0.5, 0.5, size=(M, ))
    b = np.random.uniform(-0.5, 0.5, size=(N, ))
    
    loss = []
    loss.append(bias_err(U, V, Y, a, b, reg))
    
    mu = np.average(Y)
    
    for epoch in range(max_epochs): 
        index = np.random.permutation(Y.shape[0])
        for idx in index:
            i = Y[idx][0] - 1
            j = Y[idx][1] - 1
            U[i,:] -= eta * grad_bias_U(U[i,:], Y[idx][2], V[j,:], reg, eta, a[i], b[j], mu)
            V[j,:] -= eta * grad_bias_V(U[i,:], Y[idx][2], V[j,:], reg, eta, a[i], b[j], mu)
            a[i] -= eta * grad_bias_a(U[i,:], Y[idx][2], V[j,:], reg, eta, a[i], b[j], mu)
            b[j] -= eta * grad_bias_b(U[i,:], Y[idx][2], V[j,:], reg, eta, a[i], b[j], mu)
        
        err = bias_err(U, V, Y, a, b, reg)
        loss.append(err)
        if (abs(loss[-1] - loss[-2]) / abs(loss[1] - loss[0]) < eps):
            break
    
    err = bias_err(U, V, Y, a, b)
    return (U, V, err, a, b)

In [128]:
# Create 943 users x 1682 movies 

# Use to compute Ein and Eout using k=20
U_bias, V_bias, E_in_bias, a, b = train_bias_model(M, N, k, eta, reg, Y_train)
E_out_bias = bias_err(U, V, Y_test, a, b)

print("Training error is: " + str(E_in_bias))
print("Test error is: " + str(E_out_bias))

Training error is: 0.18380830212340524
Test error is: 6.636491628376683


In [None]:
visualize(np.transpose(U_bias), np.tranpose(V_bias))

### Method 3 (Using scikit-surprise)

In [106]:
reader = Reader()
data = Dataset.load_from_df(data[['userId', 'movieId', 'rating']], reader)

algo = SVD()
algo.fit(pd.DataFrame(Y_train))
predictions = algo.test(pd.DataFrame(Y_test))

NameError: name 'data' is not defined