In [1]:
# Import necessary package.
import numpy as np  
import matplotlib.pyplot as plt
import pandas as pd
from surprise import Reader, Dataset
from surprise import SVD, evaluate
from surprise import NMF, KNNBasic

In [None]:
### Data Preparation.

In [2]:
# Read data from csv file.
anime_data = pd.read_csv('data/anime.csv')
rating_data = pd.read_csv('data/rating.csv')

In [3]:
# Convert data into UxM-Matrix, with unknown ratings as zeros.
rating_data = rating_data.drop_duplicates(subset=['user_id', 'anime_id'], keep='first') # Delete the duplicate with error.
indexs = list(rating_data[rating_data['rating'] == -1].index)
rating_data = rating_data.drop(indexs)

In [4]:
# Load data from dataframe into Reader.
reader = Reader(rating_scale=(1.0, 10.0))
data = Dataset.load_from_df(rating_data[['user_id', 'anime_id', 'rating']], reader)

In [12]:
# Split data and evaluate algorithm.
data.split(n_folds=2)
# SVD
algo = SVD()
evaluate(algo, data, measures=['RMSE'])

Evaluating RMSE of algorithm SVD.

------------
Fold 1
RMSE: 1.1831
------------
Fold 2
RMSE: 1.1850
------------
------------
Mean RMSE: 1.1840
------------
------------


CaseInsensitiveDefaultDict(list,
                           {'rmse': [1.1831236491937385, 1.1849668529344668]})

In [6]:
# NMF, pointing to Non negative Matrix Factorization.
data.split(n_folds=2)
algo = NMF()
evaluate(algo, data, measures=['RMSE'])



Evaluating RMSE of algorithm NMF.

------------
Fold 1
RMSE: 2.2006
------------
Fold 2
RMSE: 2.2189
------------
------------
Mean RMSE: 2.2097
------------
------------




CaseInsensitiveDefaultDict(list,
                           {'rmse': [2.2005717695674427, 2.2188522699385946]})

In [None]:
# KNN.
data.split(n_folds=2)
algo = KNNBasic()
evaluate(algo, data, measures=['RMSE'])

Evaluating RMSE of algorithm KNNBasic.

