In [1]:
import pandas as pd
import warnings
warnings.filterwarnings('ignore')

from Modules.dataEngineering import dataEngineering
dataEngineering = dataEngineering()

In [2]:
df_ratings        = dataEngineering.loadRatings()['data']

In [3]:
df_ratings.head(5)

Unnamed: 0,UserID,MovieID,userRating,Title,Genres,meanRating
0,1,1,4.0,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy,3.92
1,5,1,4.0,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy,3.92
2,7,1,4.5,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy,3.92
3,15,1,2.5,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy,3.92
4,17,1,4.5,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy,3.92


In [4]:
from surprise import Dataset, Reader
from surprise.model_selection import cross_validate
from surprise import SVD, KNNWithMeans, KNNBasic, KNNWithZScore

options_KNNwithMeans = {
    'k': 40,
    'sim_options': {'name': 'pearson', 'user_based': True},
    'bsl_options': {'method': 'sgd', 'learning_rate': 0.0005}
}

option_SVD = {
    'n_epochs': 30, 'lr_all': 0.005, 'reg_all': 0.4
}

option_KNNBasic = {
    'k': 100,
    'sim_options': {'name': 'pearson', 'user_based': False},
    'bsl_options': {'method': 'sgd', 'learning_rate': 0.0005}
}

option_KNNWithZScore = {
    'k': 40,
    'sim_options': {'name': 'pearson', 'user_based': True},
    'bsl_options': {'method': 'sgd', 'learning_rate': 0.0005}
}

# Define the metrics you want to use for evaluation
metrics = ['rmse', 'mae']
fold = 5

In [5]:
# Create a Reader object
reader = Reader(rating_scale=(1, 5))

# Load the dataset using the Reader
data = Dataset.load_from_df(df_ratings[['UserID', 'MovieID', 'userRating']], reader)

In [6]:
# Define the models
model_SVD = SVD(
    n_epochs=option_SVD['n_epochs'], lr_all=option_SVD['lr_all'], reg_all=option_SVD['reg_all'],
)

model_KNNwithMeans = KNNWithMeans( k=options_KNNwithMeans['k'], 
    sim_options=options_KNNwithMeans['sim_options'], bsl_options=options_KNNwithMeans['bsl_options']
)

model_KNNBasic = KNNBasic(k=option_KNNBasic['k'], 
    sim_options=option_KNNBasic['sim_options'], bsl_options=option_KNNBasic['bsl_options'],
)

model_KNNWithZScore = KNNWithZScore(k=option_KNNWithZScore['k'], 
    sim_options=option_KNNWithZScore['sim_options'], bsl_options=option_KNNWithZScore['bsl_options'],
)



In [7]:
# Perform cross-validation for SVD
results_SVD = cross_validate(model_SVD, data, measures=metrics, cv=fold, verbose=True)

Evaluating RMSE, MAE of algorithm SVD on 5 split(s).

                  Fold 1  Fold 2  Fold 3  Fold 4  Fold 5  Mean    Std     
RMSE (testset)    0.8828  0.8835  0.8762  0.8789  0.8796  0.8802  0.0027  
MAE (testset)     0.6799  0.6813  0.6789  0.6800  0.6776  0.6795  0.0012  
Fit time          1.69    1.69    1.67    1.67    1.66    1.68    0.01    
Test time         0.19    0.13    0.18    0.19    0.12    0.16    0.03    


In [8]:
# Perform cross-validation for KNN with Means
results_KNNwithMeans = cross_validate(model_KNNwithMeans, data, measures=metrics, cv=fold, verbose=True)

Computing the pearson similarity matrix...
Done computing similarity matrix.
Computing the pearson similarity matrix...
Done computing similarity matrix.
Computing the pearson similarity matrix...
Done computing similarity matrix.
Computing the pearson similarity matrix...
Done computing similarity matrix.
Computing the pearson similarity matrix...
Done computing similarity matrix.
Evaluating RMSE, MAE of algorithm KNNWithMeans on 5 split(s).

                  Fold 1  Fold 2  Fold 3  Fold 4  Fold 5  Mean    Std     
RMSE (testset)    0.8994  0.8960  0.8976  0.9038  0.8922  0.8978  0.0038  
MAE (testset)     0.6834  0.6841  0.6837  0.6867  0.6794  0.6835  0.0023  
Fit time          0.68    0.67    0.66    0.66    0.66    0.67    0.01    
Test time         1.26    1.40    1.34    1.25    1.44    1.34    0.08    


In [9]:
results_KNNBasic = cross_validate(model_KNNBasic, data, measures=metrics, cv=fold, verbose=True)

Computing the pearson similarity matrix...
Done computing similarity matrix.
Computing the pearson similarity matrix...
Done computing similarity matrix.
Computing the pearson similarity matrix...
Done computing similarity matrix.
Computing the pearson similarity matrix...
Done computing similarity matrix.
Computing the pearson similarity matrix...
Done computing similarity matrix.
Evaluating RMSE, MAE of algorithm KNNBasic on 5 split(s).

                  Fold 1  Fold 2  Fold 3  Fold 4  Fold 5  Mean    Std     
RMSE (testset)    0.9594  0.9539  0.9546  0.9621  0.9531  0.9566  0.0035  
MAE (testset)     0.7441  0.7392  0.7398  0.7451  0.7372  0.7411  0.0030  
Fit time          10.17   10.29   9.78    10.15   10.05   10.09   0.17    
Test time         8.66    8.47    9.19    9.12    8.64    8.82    0.29    


In [10]:
results_KNNWithZScore = cross_validate(model_KNNWithZScore, data, measures=metrics, cv=fold, verbose=True)

Computing the pearson similarity matrix...
Done computing similarity matrix.
Computing the pearson similarity matrix...
Done computing similarity matrix.
Computing the pearson similarity matrix...
Done computing similarity matrix.
Computing the pearson similarity matrix...
Done computing similarity matrix.
Computing the pearson similarity matrix...
Done computing similarity matrix.
Evaluating RMSE, MAE of algorithm KNNWithZScore on 5 split(s).

                  Fold 1  Fold 2  Fold 3  Fold 4  Fold 5  Mean    Std     
RMSE (testset)    0.8880  0.8920  0.8896  0.8930  0.8992  0.8923  0.0039  
MAE (testset)     0.6701  0.6761  0.6730  0.6734  0.6791  0.6743  0.0030  
Fit time          0.72    0.89    0.80    0.88    0.76    0.81    0.06    
Test time         1.63    1.48    1.49    1.53    1.46    1.52    0.06    


In [106]:
import numpy as np

def Avg(lis):
    return np.average(lis)

def Std(lis):
    return np.std(lis)

In [None]:
def displatFold(results, name):
    print(name+':')
    test_rmse = results['test_rmse']
    test_mae = results['test_mae']
    fit_time = list(results['fit_time'])
    test_time = list(results['test_time'])

    lis_fit, lis_test = [], []

    print("\tRMSE (testset)\tMAE (testset)\tFit time\tTest time")
    for fold_idx, (rmse, mae, fit_time, test_time) in enumerate(zip(test_rmse, test_mae, fit_time, test_time), start=1):
        print("Fold %d: %.4f\t\t%.4f\t\t%.4f\t\t%.4f"%(fold_idx, rmse, mae, fit_time, test_time))
        lis_fit.append(round(fit_time, 6)), lis_test.append(round(test_time, 6))
        

    print("Mean  : %.4f\t\t%.4f\t\t%.4f\t\t%.4f"%(test_rmse.mean(), test_mae.mean(), Avg(lis_fit), Avg(lis_test)))
    print("Std   : %.4f\t\t%.4f\t\t%.4f\t\t%.4f"%(test_rmse.std(), test_mae.std(), Std(lis_fit), Std(lis_test)))


In [126]:
displatFold(results_KNNwithMeans, 'KNNwithMeans Results')

KNNwithMeans Results:
	RMSE (testset)	MAE (testset)	Fit time	Test time
Fold 1: 0.8994		0.6834		0.6840		1.2601
Fold 2: 0.8960		0.6841		0.6720		1.4031
Fold 3: 0.8976		0.6837		0.6620		1.3351
Fold 4: 0.9038		0.6867		0.6621		1.2491
Fold 5: 0.8922		0.6794		0.6590		1.4391
Mean  : 0.8978		0.6835		0.6678		1.3373
Std   : 0.0038		0.0023		0.0092		0.0754


In [127]:
displatFold(results_SVD, 'SVD Results')

SVD Results:
	RMSE (testset)	MAE (testset)	Fit time	Test time
Fold 1: 0.8828		0.6799		1.6921		0.1870
Fold 2: 0.8835		0.6813		1.6931		0.1320
Fold 3: 0.8762		0.6789		1.6681		0.1820
Fold 4: 0.8789		0.6800		1.6651		0.1880
Fold 5: 0.8796		0.6776		1.6621		0.1240
Mean  : 0.8802		0.6795		1.6761		0.1626
Std   : 0.0027		0.0012		0.0136		0.0284


In [128]:
displatFold(results_KNNBasic, 'KNNBasic Results')

KNNBasic Results:
	RMSE (testset)	MAE (testset)	Fit time	Test time
Fold 1: 0.9594		0.7441		10.1668		8.6642
Fold 2: 0.9539		0.7392		10.2892		8.4665
Fold 3: 0.9546		0.7398		9.7833		9.1857
Fold 4: 0.9621		0.7451		10.1548		9.1247
Fold 5: 0.9531		0.7372		10.0458		8.6417
Mean  : 0.9566		0.7411		10.0880		8.8165
Std   : 0.0035		0.0030		0.1707		0.2855


In [129]:
displatFold(results_KNNWithZScore, 'KNNWithZScore Results')

KNNWithZScore Results:
	RMSE (testset)	MAE (testset)	Fit time	Test time
Fold 1: 0.8880		0.6701		0.7231		1.6341
Fold 2: 0.8920		0.6761		0.8881		1.4791
Fold 3: 0.8896		0.6730		0.7961		1.4851
Fold 4: 0.8930		0.6734		0.8761		1.5341
Fold 5: 0.8992		0.6791		0.7561		1.4631
Mean  : 0.8923		0.6743		0.8079		1.5191
Std   : 0.0039		0.0030		0.0650		0.0622
