# DSA4212 Group 2 Assignment 2

In [1]:
import pandas as pd
import numpy as np

In [2]:
# Read in the data

anime_data = pd.read_csv('assignment_2_anime.csv')
ratings_test = pd.read_csv('assignment_2_ratings_test.csv')
ratings_train = pd.read_csv('assignment_2_ratings_train.csv')

In [3]:
from surprise import Dataset, Reader, accuracy

In [4]:
# Find the minimum and maximum rating for the Reader object

min_rating = ratings_train.rating.min()
max_rating = ratings_train.rating.max()

In [5]:
# Train data
reader = Reader(rating_scale = (min_rating, max_rating))

train_data = Dataset.load_from_df(ratings_train[['user_id', 'anime_id', 'rating']], reader)

trainset = train_data.build_full_trainset()

In [6]:
# Test data
reader = Reader(rating_scale = (min_rating, max_rating))

test_data = Dataset.load_from_df(ratings_test[['user_id', 'anime_id', 'rating']], reader)

testset = test_data.build_full_trainset().build_testset()

## 1. Model based Collaborative Filtering
### NMF

In [9]:
from surprise import NMF
from surprise.model_selection import cross_validate

In [10]:
nmf = NMF()
results = cross_validate(nmf, train_data, measures=['MSE'], cv=10, verbose=True)

Evaluating MSE of algorithm NMF on 10 split(s).

                  Fold 1  Fold 2  Fold 3  Fold 4  Fold 5  Fold 6  Fold 7  Fold 8  Fold 9  Fold 10 Mean    Std     
MSE (testset)     4.9351  4.9537  4.9308  4.9697  5.0328  4.9969  4.9609  5.0152  4.8969  4.9875  4.9680  0.0393  
Fit time          97.15   99.12   93.00   91.70   90.96   89.53   93.03   98.70   93.48   92.80   93.95   3.11    
Test time         6.81    7.21    7.23    7.61    7.28    7.29    7.16    7.34    7.36    6.00    7.13    0.42    


In [11]:
predictions = nmf.test(testset)
accuracy.mse(predictions)

MSE: 4.9878


4.987815612078931

### SVD (run this)

In [8]:
from surprise import SVD

In [10]:
svd = SVD()
results = cross_validate(svd, train_data, measures=['MSE'], cv=10, verbose=True)

Evaluating MSE of algorithm SVD on 10 split(s).

                  Fold 1  Fold 2  Fold 3  Fold 4  Fold 5  Fold 6  Fold 7  Fold 8  Fold 9  Fold 10 Mean    Std     
MSE (testset)     1.3434  1.3439  1.3382  1.3433  1.3426  1.3489  1.3417  1.3402  1.3487  1.3506  1.3442  0.0038  
Fit time          52.98   53.01   57.81   59.37   58.78   58.93   58.75   58.47   58.55   64.95   58.16   3.20    
Test time         2.82    4.29    4.48    2.68    2.66    4.45    4.46    4.44    4.45    4.44    3.92    0.79    


In [11]:
predictions = svd.test(testset)
accuracy.mse(predictions2)

MSE: 1.3441


1.344138026055986

#### Hyperparameter Tuning

In [None]:
from surprise.model_selection import GridSearchCV
param_grid = {
    'n_factors': [10, 100, 500],
    'n_epochs': [5, 20, 50], 
    'lr_all': [0.001, 0.005, 0.02],
    'reg_all': [0.005, 0.02, 0.1]}

tuning = GridSearchCV(SVD, param_grid, measures=['mse'], cv=10)
tuning.fit(train_data)

In [17]:
print(tuning.best_params['mse']['n_factors'])
print(tuning.best_params['mse']['n_epochs'])
print(tuning.best_params['mse']['lr_all'])
print(tuning.best_params['mse']['reg_all'])

500
50
0.005
0.1


In [18]:
params = tuning.best_params['mse']
svd = SVD(n_factors=params['n_factors'], n_epochs=params['n_epochs'],lr_all=params['lr_all'], reg_all=params['reg_all'])
svd.fit(trainset)

<surprise.prediction_algorithms.matrix_factorization.SVD at 0x1197249ac10>

In [19]:
predictions = svd.test(testset)
accuracy.mse(predictions)

MSE: 1.2660


1.2660169206778658

### SVD++ (run this)

In [9]:
from surprise import SVDpp

svdpp = SVDpp()
results = cross_validate(svdpp, train_data, measures=['MSE'], cv=10, verbose=True)

predictions2 = svdpp.test(testset)
accuracy.mse(predictions2)

Evaluating MSE of algorithm SVDpp on 10 split(s).

                  Fold 1  Fold 2  Fold 3  Fold 4  Fold 5  Fold 6  Fold 7  Fold 8  Fold 9  Fold 10 Mean    Std     
MSE (testset)     1.4323  1.4465  1.4498  1.4308  1.4373  1.4375  1.4416  1.4502  1.4486  1.4504  1.4425  0.0072  
Fit time          958.72  954.14  975.88  1006.00 1014.02 974.10  966.35  973.68  975.92  969.92  976.87  18.04   
Test time         68.25   67.82   90.14   75.87   68.79   68.59   67.88   68.27   67.57   68.88   71.21   6.72    
MSE: 1.4446


1.4445884154098154