In [1]:
import os

import pandas as pd

movie_dataset_path = os.path.join(os.getcwd(), 'movie-dataset', 'movies.csv')
rating_dataset_path = os.path.join(os.getcwd(), 'movie-dataset', 'ratings.csv')

movie_dataset = pd.read_csv(movie_dataset_path, sep='::', engine='python')
rating_dataset = pd.read_csv(rating_dataset_path, sep='::', engine='python')

movie_dataset = movie_dataset.dropna()
rating_dataset = rating_dataset.dropna()

In [2]:
from surprise import Dataset, Reader

elaborated_data = Dataset.load_from_df(rating_dataset[['userId', 'movieId', 'rating']], Reader(rating_scale=(0.5, 5.0)))

In [3]:
import numpy as np
from surprise import AlgoBase, accuracy
from surprise.model_selection import KFold, train_test_split


def test_algorithm(algorithm: AlgoBase):
    train, test = train_test_split(elaborated_data, test_size=0.2)
    algorithm.fit(train)
    split_predictions = algorithm.test(test)
    split_predictions_measure = dict()
    split_predictions_measure['rmse'] = accuracy.rmse(split_predictions, verbose=False)
    split_predictions_measure['mae'] = accuracy.mae(split_predictions, verbose=False)
    split_predictions_measure['mse'] = accuracy.mse(split_predictions, verbose=False)
    
    kf = KFold(n_splits=5)
    cross_predictions_measure = dict()
    cross_predictions_measure['rmse'] = np.array([])
    cross_predictions_measure['mae'] = np.array([])
    cross_predictions_measure['mse'] = np.array([])
    for k_train, k_test in kf.split(elaborated_data):
        algorithm.fit(k_train)
        k_predictions = algorithm.test(k_test)
        cross_predictions_measure['rmse'] = np.append(cross_predictions_measure['rmse'], accuracy.rmse(k_predictions, verbose=False))
        cross_predictions_measure['mae'] = np.append(cross_predictions_measure['mae'], accuracy.mae(k_predictions, verbose=False))
        cross_predictions_measure['mse'] = np.append(cross_predictions_measure['mse'], accuracy.mse(k_predictions, verbose=False))
    
    return (split_predictions_measure, cross_predictions_measure)

In [4]:
from surprise import SVD

test_algorithm(SVD())

({'rmse': 0.8718380184130274,
  'mae': 0.6840324472869662,
  'mse': 0.7601015303503544},
 {'rmse': array([0.87355617, 0.8761734 , 0.87538284, 0.87161462, 0.87376793]),
  'mae': array([0.68539516, 0.68725865, 0.68721544, 0.68498816, 0.68702616]),
  'mse': array([0.76310038, 0.76767982, 0.76629512, 0.75971205, 0.76347039])})