In [None]:
import sys
sys.path.append('..')

In [None]:
from source.code.utils import preprocessing

In [None]:
import pandas as pd
import numpy as np

In [None]:
ratings = pd.read_csv('data/BX-Book-Ratings.csv', sep=';', header=0, error_bad_lines=False, encoding='Windows-1251', low_memory=False)

In [None]:
ratings.info()

In [None]:
books = pd.read_csv('data/BX-Books.csv', sep=';', header=0, error_bad_lines=False, encoding='Windows-1251', low_memory=False)

In [None]:
books.info()

In [None]:
books['Year-Of-Publication'] = books['Year-Of-Publication'].astype(np.int)

In [None]:
books.info()

In [None]:
users = pd.read_csv('data/BX-Users.csv', sep=';', header=0, error_bad_lines=False, encoding='Windows-1251', low_memory=False)

In [None]:
users.info()

# Preprocessing phase

In [None]:
data_dict = {}
data_dict['books'] = books
data_dict['users'] = users
data_dict['ratings'] = ratings

In [None]:
preprocessed_data_dict = preprocessing(data_dict=data_dict, is_explicit=True, book_ratings_count_threshold=2, user_ratings_count_threshold=2)

In [None]:
preprocessed_data_dict['ratings'].head()

In [None]:
preprocessed_data_dict['ratings'] = preprocessed_data_dict['ratings'].rename({'User-ID': 'userID', 'ISBN': 'itemID', 'Book-Rating': 'rating'}, axis='columns')

In [None]:
preprocessed_data_dict['ratings'].head()

# Surprise experiments

In [None]:
from surprise import SVD
from surprise import NMF
from surprise.prediction_algorithms.knns import KNNBasic
from surprise.model_selection import cross_validate
from surprise import Dataset
from surprise import Reader

## Data reader

In [None]:
reader = Reader(rating_scale=(1, 5))

In [None]:
data = Dataset.load_from_df(preprocessed_data_dict['ratings'][['userID', 'itemID', 'rating']], reader)

## SVD

In [None]:
algo_svd = SVD()

In [None]:
cross_validate(algo_svd, data, measures=['RMSE', 'MAE'], cv=5, verbose=True, n_jobs=-1)

## NMF

In [None]:
algo_nmf = NMF()

In [None]:
cross_validate(algo_nmf, data, measures=['RMSE', 'MAE'], cv=5, verbose=True, n_jobs=-1)

## KNN (Item-based)

In [None]:
sim_options = {
    'name': 'cosine',
    'user_based': False
}

In [None]:
algo_knn = KNNBasic(k=5, sim_options=sim_options)

In [None]:
cross_validate(algo_knn, data, measures=['RMSE', 'MAE'], cv=5, verbose=True, n_jobs=1)

## KNN (User-based)

In [None]:
sim_options = {
    'name': 'cosine'
}

In [None]:
algo_knn = KNNBasic(k=5, sim_options=sim_options)

In [None]:
cross_validate(algo_knn, data, measures=['RMSE', 'MAE'], cv=5, verbose=True, n_jobs=1)