In [104]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

In [100]:
# Create m x n matrix for Y
Y_train = np.loadtxt('./data/train.txt').astype(int)
Y_test = np.loadtxt('./data/test.txt').astype(int)

### Method 2 (Adding Bias Term a and b)

In [132]:
def grad_bias_U(Ui, Yij, Vj, reg, eta, ai, bj, mu):
    return reg * Ui - Vj * (Yij - mu - np.dot(Ui, Vj) - ai - bj)

def grad_bias_V(Ui, Yij, Vj, reg, eta, ai, bj, mu):
    return reg * Vj - Ui * (Yij - mu - np.dot(Ui, Vj) - ai - bj)

def grad_bias_a(Ui, Yij, Vj, reg, eta, ai, bj, mu):
    return reg * ai - (Yij - mu - np.dot(Ui, Vj) - ai - bj)

def grad_bias_b(Ui, Yij, Vj, reg, eta, ai, bj, mu):
    return reg * bj - (Yij - mu - np.dot(Ui, Vj) - ai - bj)

def bias_err(U, V, Y, a, b, mu, reg=0.0):    
    err = 0
    for x in range(Y.shape[0]):
        i = Y[x][0] - 1
        j = Y[x][1] - 1
        
        err += pow(Y[x][2] - (np.dot(U[i,:], V[j,:]) + a[i] + b[j]), 2)    
    return 1 / Y.shape[0] * ((reg * (np.linalg.norm(U)**2 + np.linalg.norm(V)**2 
                    + np.linalg.norm(a)**2 + np.linalg.norm(b)**2) + err) / 2) 

def train_bias_model(M, N, K, eta, reg, Y, eps=0.0001, max_epochs=300):
    U = np.random.uniform(-0.5, 0.5, size=(M, K))
    V = np.random.uniform(-0.5, 0.5, size=(N, K))
    a = np.random.uniform(-0.5, 0.5, size=(M, ))
    b = np.random.uniform(-0.5, 0.5, size=(N, ))
    
    mu = np.average(Y[:,2])
    loss = []
    loss.append(bias_err(U, V, Y, a, b, mu, reg))
    
    for epoch in range(max_epochs): 
        index = np.random.permutation(Y.shape[0])
        for idx in index:
            i = Y[idx][0] - 1
            j = Y[idx][1] - 1
            U[i,:] -= eta * grad_bias_U(U[i,:], Y[idx][2], V[j,:], reg, eta, a[i], b[j], mu)
            V[j,:] -= eta * grad_bias_V(U[i,:], Y[idx][2], V[j,:], reg, eta, a[i], b[j], mu)
            a[i] -= eta * grad_bias_a(U[i,:], Y[idx][2], V[j,:], reg, eta, a[i], b[j], mu)
            b[j] -= eta * grad_bias_b(U[i,:], Y[idx][2], V[j,:], reg, eta, a[i], b[j], mu)
        
        err = bias_err(U, V, Y, a, b, mu, reg)
        loss.append(err)
        if (abs(loss[-1] - loss[-2]) / abs(loss[1] - loss[0]) < eps):
            print(epoch)
            break
    
    err = bias_err(U, V, Y, a, b, mu)
    return (U, V, err, a, b)

In [None]:
# Create 943 users x 1682 movies 
mu = np.average(Y_test[:,2])
# Use to compute Ein and Eout using k=20
U_bias, V_bias, E_in_bias, a, b = train_bias_model(M, N, k, eta, reg, Y_train)
E_out_bias = bias_err(U, V, Y_test, a, b, mu)

print("Training error is: " + str(E_in_bias))
print("Test error is: " + str(E_out_bias))

In [None]:
visualize(np.transpose(U_bias), np.tranpose(V_bias))

### Method 3 (Using scikit-surprise)

In [None]:
from surprise.model_selection import train_test_split
reader = Reader(rating_scale=(1, 5))
data = Dataset.load_from_df(pd.DataFrame(np.concatenate((Y_train, Y_test))), reader)

algo = SVD(n_factors = 20, biased=True)

trainset, testset = train_test_split(data, test_size=0.1)
algo.fit(trainset)
# User factors (u)
u = algo.pu
# Item factors (v)
v = algo.qi
predictions = algo.test(testset)
accuracy.rmse(predictions)