In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from surprise import Reader, Dataset, SVD, accuracy
from surprise.model_selection import train_test_split

In [3]:
# Create m x n matrix for Y
Y_train = np.loadtxt('./data/train_cleaned.txt').astype(int)
Y_test = np.loadtxt('./data/test_cleaned.txt').astype(int)

### Method 3 (Using scikit-surprise)

In [5]:
reader = Reader(rating_scale=(1, 5))
np.concatenate((Y_train, Y_test))
data = Dataset.load_from_df(pd.DataFrame(np.concatenate((Y_train, Y_test))), reader)

for reg in [10**-4, 10**-3, 0.01, 0.1, 1, 0]:
    print("reg: {}".format(reg))
    algo = SVD(n_factors = 20, biased=True, reg_all = reg, n_epochs = 40)
    trainset, testset = train_test_split(data, test_size=0.1)
    algo.fit(trainset)
    # User factors (u)
    u = algo.pu
    # Item factors (v)
    v = algo.qi
    predictions = algo.test(testset)
    print(accuracy.rmse(predictions))
print("\n\n")

for lr in [0.3, 0.03, 0.003,0.0003]:
    print("lr: {}".format(lr))
    algo = SVD(n_factors = 20, biased=True, reg_all = 0.1, lr_all = lr, n_epochs = 40)
    trainset, testset = train_test_split(data, test_size=0.1)
    algo.fit(trainset)
    # User factors (u)
    u = algo.pu
    # Item factors (v)
    v = algo.qi
    predictions = algo.test(testset)
    print(accuracy.rmse(predictions))

reg: 0.0001
RMSE: 0.9697
0.9696542703316504
reg: 0.001
RMSE: 0.9605
0.9604780502705889
reg: 0.01
RMSE: 0.9569
0.9569094664724928
reg: 0.1
RMSE: 0.9202
0.9201546304509504
reg: 1
RMSE: 0.9826
0.9825518901805712
reg: 0
RMSE: 0.9606
0.9605585878179745


lr: 0.3
RMSE: 1.8585
1.8584940139801367
lr: 0.03
RMSE: 0.9170
0.9170476306133247
lr: 0.003
RMSE: 0.9242
0.9242311446483035
lr: 0.0003
RMSE: 0.9747
0.9747412966532608


In [18]:
# Best setup
algo = SVD(n_factors = 20, biased=True, reg_all = 0.1, lr_all = 0.03, n_epochs = 40)
trainset, testset = train_test_split(data, test_size=0.1)
algo.fit(trainset)
# User factors (u)
u = algo.pu
# Item factors (v)
v = algo.qi
predictions = algo.test(testset)
print(accuracy.rmse(predictions))

RMSE: 0.9246
0.9245541427481175
