In [28]:
from surprise import Dataset, Reader
from surprise.prediction_algorithms import SVD, KNNBasic, KNNWithMeans, KNNWithZScore
from surprise.model_selection import GridSearchCV
import pandas as pd
from IPython.display import display

In [8]:
reader = Reader(line_format='user item rating', sep='\t', rating_scale=(1, 5))
train_data = Dataset.load_from_file('../data/interim/train.csv', reader=reader)

In [19]:
param_grid = {
    SVD: {
        'n_epochs': [5, 10, 20],
        'lr_all': [0.002, 0.005],
        'reg_all': [0.02, 0.04, 0.06],
        'n_factors': [20, 50, 100],
    },
    KNNBasic: {
        'k': [20, 40],
        'min_k': [1, 5],
        'sim_options': {
            'name': ['cosine', 'msd', 'pearson'],
            'user_based': [True, False],
        },
        'verbose': [False]
    },
    KNNWithMeans: {
        'k': [20, 40],
        'min_k': [1, 5],
        'sim_options': {
            'name': ['cosine', 'msd', 'pearson'],
            'user_based': [True, False],
        },
        'verbose': [False]
    },
    KNNWithZScore: {
        'k': [20, 40],
        'min_k': [1, 5],
        'sim_options': {
            'name': ['cosine', 'msd', 'pearson'],
            'user_based': [True, False],
        },
        'verbose': [False]
    }
}

In [20]:
grid_search_results = dict()

for algorithm in param_grid.keys():
    print(f'Current algorithm: {algorithm.__name__}')
    current_param_grid = param_grid[algorithm]

    grid_search = GridSearchCV(algorithm, current_param_grid, measures=['rmse', 'mae', 'mse', 'fcp'], cv=5)
    grid_search.fit(train_data)
    grid_search_results[algorithm] = grid_search

Current algorithm: SVD
Current algorithm: KNNBasic
Current algorithm: KNNWithMeans
Current algorithm: KNNWithZScore


In [35]:
result_dataframe = pd.DataFrame.from_dict({
    'Algorithm': [cls.__name__ for cls in grid_search_results.keys()],
    'RMSE': [result.best_score['rmse'] for result in grid_search_results.values()],
    'MAE': [result.best_score['mae'] for result in grid_search_results.values()],
    'MSE': [result.best_score['mse'] for result in grid_search_results.values()],
    'FCP': [result.best_score['fcp'] for result in grid_search_results.values()]
})
display(result_dataframe)

Unnamed: 0,Algorithm,RMSE,MAE,MSE,FCP
0,SVD,0.939384,0.74334,0.882475,0.693238
1,KNNBasic,0.985601,0.77897,0.971427,0.700289
2,KNNWithMeans,0.941704,0.739734,0.88682,0.695249
3,KNNWithZScore,0.944415,0.741464,0.89194,0.69568


In [43]:
best_algo = grid_search_results[SVD].best_estimator['rmse']
best_params = grid_search_results[SVD].best_params['rmse']

In [44]:
from pprint import pprint

print(f'chosen algorithm: {best_algo.__class__.__name__}')
print('with parameters:')
pprint(best_params)

chosen algorithm: SVD
with parameters:
{'lr_all': 0.005, 'n_epochs': 20, 'n_factors': 100, 'reg_all': 0.06}


In [45]:
result_dataframe.describe()

Unnamed: 0,RMSE,MAE,MSE,FCP
count,4.0,4.0,4.0,4.0
mean,0.952776,0.750877,0.908166,0.696114
std,0.02198,0.018786,0.042352,0.00298
min,0.939384,0.739734,0.882475,0.693238
25%,0.941124,0.741032,0.885734,0.694746
50%,0.94306,0.742402,0.88938,0.695464
75%,0.954711,0.752248,0.911812,0.696832
max,0.985601,0.77897,0.971427,0.700289
