In [1]:
import pandas as pd

In [2]:
movies_df = pd.read_csv('data/movies.csv')
ratings_df = pd.read_csv('data/ratings.csv')
tags_df = pd.read_csv('data/tags.csv')
links_df = pd.read_csv('data/links.csv')

In [3]:
from surprise import KNNBasic, KNNWithMeans, KNNWithZScore, KNNBaseline, SVD, NMF, SlopeOne, CoClustering
from surprise.model_selection import train_test_split
from surprise import Reader, Dataset, accuracy

In [4]:
data = ratings_df[['userId', 'movieId', 'rating']]
min_scale = ratings_df.rating.min()
max_scale = ratings_df.rating.max()

# load data into surprise dataset
reader = Reader(rating_scale=(min_scale, max_scale))
data = Dataset.load_from_df(data, reader)

In [5]:
# split data
trainset, testset = train_test_split(data, test_size=0.2, random_state=123)

In [6]:
# List of model - check performance
algorithms = [KNNBasic, SlopeOne, SVD, NMF, KNNWithMeans, KNNWithZScore, KNNBaseline, CoClustering]

results = []

# Train and matrices for each model
for algo in algorithms:
    model = algo()
    model.fit(trainset)
    predictions = model.test(testset)
    
    mae = accuracy.mae(predictions, verbose=False)
    rmse = accuracy.rmse(predictions, verbose=False)
    fcp = accuracy.fcp(predictions, verbose=False)
    
    results.append({
        'Model': algo.__name__,
        'MAE': mae,
        'RMSE': rmse,
        'FCP': fcp
    })

# Convert results to DataFrame
results_df = pd.DataFrame(results)
print(results_df)

Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Estimating biases using als...
Computing the msd similarity matrix...
Done computing similarity matrix.
           Model       MAE      RMSE       FCP
0       KNNBasic  0.722961  0.945738  0.678015
1       SlopeOne  0.685771  0.898052  0.665431
2            SVD  0.668687  0.871615  0.665716
3            NMF  0.705711  0.919496  0.656933
4   KNNWithMeans  0.681930  0.894327  0.655998
5  KNNWithZScore  0.676957  0.894385  0.655771
6    KNNBaseline  0.664862  0.871826  0.680364
7   CoClustering  0.729185  0.939416  0.647089


## KNNBaseline

In [7]:
full_train = data.build_full_trainset()
algo = SVD()
algo.fit(full_train)

<surprise.prediction_algorithms.matrix_factorization.SVD at 0x15c111990>

In [8]:
full_testset = full_train.build_anti_testset()
predictions = algo.test(full_testset)

In [9]:
accuracy.mae(predictions, verbose=False)

0.37583577932049866

In [10]:
accuracy.rmse(predictions, verbose=False)

0.4853174092152505

In [14]:
# take too long for output
#accuracy.fcp(predictions)