In [1]:
import pandas as pd
import numpy as np
from sklearn.metrics import mean_absolute_error as mae
import random
random.seed(42)
import warnings
warnings.filterwarnings("ignore")

In [2]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [77]:
columns = ['user_id', 'movie_id', 'rating', 'timestamp']
def makeDataset(fold):
    base = pd.read_csv("/content/drive/MyDrive/CFAssignmentsDataset/u" + str(fold) + ".base", sep = "\t", names = columns, encoding = "latin-1")
    test = pd.read_csv("/content/drive/MyDrive/CFAssignmentsDataset/u" + str(fold) + ".test", sep = "\t", names = columns, encoding = "latin-1")
    
    return base, test

# Make Y and R matrix
def makeYandRMatrix(data):
    totalUsers = len(data['user_id'].unique())
    totalMovies = 1682
    Y = np.zeros([totalUsers, totalMovies])
    R = np.zeros([totalUsers, totalMovies])

    for i in range(data.shape[0]):
        userId = data['user_id'][i]-1
        movieId = data['movie_id'][i]-1
        curRating = data['rating'][i]

        Y[userId][movieId] = curRating 
        R[userId][movieId] = 1

    return Y, R

def train(Y, R, t, max_iters = 10, lambda_ = 0.2, p = 1):
    nmaes = []
    # Initialize X randomely
    X = np.random.uniform(size=(943, 1682), low=1, high=5)

    for i in range(max_iters):
        B = X + Y - np.multiply(R, X)

        # Calculting SVD
        U, S, V = np.linalg.svd(B, full_matrices=False)
        S = np.diag(S)

        # Using soft thresholding from the paper
        S = np.sign(S)*(np.maximum(np.zeros(S.shape), np.abs(S) - (lambda_/2)*p*(np.abs(S))**(p-1)))
        d = np.diag(S)
        Sf = np.diag(d)

        # Recalculating X
        X = np.dot(np.dot(U, Sf), V)
        rPred = predictRating(X, t)
        nmaes.append(nmae(t, rPred))
           
    return X, nmaes

# Predict for test data
def predictRating(X, test):
    ratingsPred = []

    for i in range(test.shape[0]):
        userId = test['user_id'][i]-1
        movieId = test['movie_id'][i]-1

        ratingPredicted = X[userId][movieId]
        ratingsPred.append(round(ratingPredicted))

    return ratingsPred

# Calculate NMAE
def nmae(test, ratingsPred):
    inds = np.nonzero(ratingsPred)
    ratingsTrue = np.array(test['rating'])[inds]
    ratingsPred = np.array(ratingsPred)[inds]
    error = mae(np.array(test['rating']), ratingsPred)/4
    return error



In [62]:
f = 1 #For fold 1
# Make dataset
b, t = makeDataset(f)
Y, R = makeYandRMatrix(b)

In [81]:
X, nmaes = train(Y, R, t, max_iters = 30, lambda_ = 0.2, p = 0.1) # p = 0.1
print(min(nmaes))

0.3529


In [79]:
X, nmaes = train(Y, R, t, 30, p = 0.5) # p = 0.5
print(min(nmaes))

0.355025


In [80]:
X, nmaes = train(Y, R, t, 30, p = 1) # p = 1
print(min(nmaes))

0.3407
