In [2]:
import os

import pandas as pd

movie_dataset_path = os.path.join(os.getcwd(), 'movie-dataset', 'movies.csv')
rating_dataset_path = os.path.join(os.getcwd(), 'movie-dataset', 'ratings.csv')

movie_dataset = pd.read_csv(movie_dataset_path, sep=',', engine='python')
rating_dataset = pd.read_csv(rating_dataset_path, sep=',', engine='python')

movie_dataset = movie_dataset.dropna()
rating_dataset = rating_dataset.dropna()

In [3]:
from surprise import Dataset, Reader

elaborated_data = Dataset.load_from_df(rating_dataset[['userId', 'movieId', 'rating']], Reader(rating_scale=(0.5, 5.0)))

In [4]:
import numpy as np
from surprise import AlgoBase, accuracy
from surprise.model_selection import KFold, train_test_split


def test_algorithm(algorithm: AlgoBase):
    train, test = train_test_split(elaborated_data, test_size=0.2)
    algorithm.fit(train)
    split_predictions = algorithm.test(test)
    split_predictions_measure = dict()
    split_predictions_measure['rmse'] = accuracy.rmse(split_predictions, verbose=False)
    split_predictions_measure['mae'] = accuracy.mae(split_predictions, verbose=False)
    split_predictions_measure['mse'] = accuracy.mse(split_predictions, verbose=False)
    
    kf = KFold(n_splits=5)
    cross_predictions_measure = dict()
    cross_predictions_measure['rmse'] = np.array([])
    cross_predictions_measure['mae'] = np.array([])
    cross_predictions_measure['mse'] = np.array([])
    for k_train, k_test in kf.split(elaborated_data):
        algorithm.fit(k_train)
        k_predictions = algorithm.test(k_test)
        cross_predictions_measure['rmse'] = np.append(cross_predictions_measure['rmse'], accuracy.rmse(k_predictions, verbose=False))
        cross_predictions_measure['mae'] = np.append(cross_predictions_measure['mae'], accuracy.mae(k_predictions, verbose=False))
        cross_predictions_measure['mse'] = np.append(cross_predictions_measure['mse'], accuracy.mse(k_predictions, verbose=False))
    
    return (split_predictions_measure, cross_predictions_measure)

In [5]:
from surprise import SVD, KNNBaseline, KNNBasic, KNNWithMeans, KNNWithZScore

algorithms = list([SVD(), KNNBaseline(k=40, verbose=False), KNNBasic(k=40, verbose=False), KNNWithMeans(k=40, verbose=False), KNNWithZScore(k=40, verbose=False)])
algorithms_results = pd.DataFrame(columns=['Algorithm', 'Split RMSE', 'Split MAE', 'Split MSE', 'Cross RMSE', 'Cross MAE', 'Cross MSE'])

for algo in algorithms:
    split_result, cross_result = test_algorithm(algo)
    result_df = pd.DataFrame({
        'Algorithm': algo.__class__.__name__, 
        'Split RMSE': split_result['rmse'], 
        'Split MAE': split_result['mae'], 
        'Split MSE': split_result['mse'], 
        'Cross RMSE': cross_result['rmse'].mean(), 
        'Cross MAE': cross_result['mae'].mean(), 
        'Cross MSE': cross_result['mse'].mean()
    }, index=[0])
    algorithms_results = pd.concat([algorithms_results, result_df], ignore_index=True)
    
algorithms_results

Unnamed: 0,Algorithm,Split RMSE,Split MAE,Split MSE,Cross RMSE,Cross MAE,Cross MSE
0,SVD,0.877467,0.676449,0.769949,0.874364,0.67174,0.764536
1,KNNBaseline,0.872716,0.668307,0.761633,0.873952,0.668342,0.763842
2,KNNBasic,0.946533,0.724821,0.895925,0.948047,0.726245,0.89881
3,KNNWithMeans,0.897829,0.684921,0.806096,0.896864,0.685127,0.804392
4,KNNWithZScore,0.905859,0.690986,0.82058,0.896554,0.679943,0.803829
