# Test for Recommender System using SciKit-Surprise

### import libraries

In [1]:
# Useful starting lines
%matplotlib inline
import numpy as np
import scipy
import scipy.io
import scipy.sparse as sp
import matplotlib.pyplot as plt
import time
import datetime
%load_ext autoreload
%autoreload 2

In [2]:
# import scikit-surprise stuff
from surprise import SVD
from surprise import Reader
from surprise import Dataset
from surprise.model_selection import cross_validate, GridSearchCV

In [3]:
# import pandas for DataFrame
import pandas as pd

In [4]:
# import costum stuff
from utility import *

### Import train and test set

In [6]:
# prepare train set file in correct format for scikit-surprise

data_df = loadData2df

In [7]:
datafilepath = "../data/data_train.csv"
data_df = loadData2df(datafilepath)

In [8]:
data_ds = loadData2ds(data_df)

In [9]:
predinfilepath = "../data/sample_submission.csv"
pred_df = loadData2df(predinfilepath)

In [10]:
full_train_ds = data_ds.build_full_trainset()

### Select and train algorithm

#### Preselected algorithm

In [11]:
#algo = SVD(n_factors=20, n_epochs=10, init_mean=0, init_std_dev=0.1, lr_all=0.007, reg_all=0.02, verbose = True)

In [None]:
#algo.fit(full_train_ds)

#### Algorithm from gridsearch

In [12]:
grid = \
{\
"n_factors": [i for i in range(2,25,2)],\
"lr_all": [0.007],\
"reg_all": [0.02],\
"n_epochs": [10],\
"biased": [True]\
}

gridsearch = GridSearchCV(algo_class=SVD,param_grid=grid,measures=['RMSE'], cv=5, n_jobs=-1,refit=True,joblib_verbose=2)

In [None]:
gridsearch.fit(data_ds)

[Parallel(n_jobs=-1)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=-1)]: Done  33 tasks      | elapsed: 12.5min


In [None]:
print(gridsearch.best_score['rmse'])
print(gridsearch.best_params['rmse'])

In [None]:
results_cv = pd.DataFrame.from_dict(gridsearch.cv_results)

In [16]:
results_cv_path = "../data/cvresults/cv.csv"
results_cv_path = addDateAndTime(results_cv_path)
print(results_cv_path)

../data/cvresults/cv_2018-12-16_1857.csv


In [None]:
results_cv.to_csv(r"../data/SVD-cv-nfact-regall-2.csv")

In [None]:
algo = gridsearch.best_estimator["rmse"]
algo.fit(full_train)

### Make predictions

In [None]:
generatePredictions(algo,pred_df)

### Export predictions and algo parameters

In [None]:
exportAlgoParameters(algo,add_date=True,add_time=True)
exportPredictions(pred_df,add_date=True,add_time=True)