# Slope One on Movielens Dataset


## Preparation

In [1]:
%pip install -r ../requirements.txt

Note: you may need to restart the kernel to use updated packages.


In [50]:
import sys, os
sys.path.append(os.path.abspath("../"))

import pandas as pd

from core.utils import load_algo, load_cv
from api.model import recommend_top_k

pd.set_option('display.max_rows', None)
pd.set_option('display.max_columns', None)
pd.set_option('display.expand_frame_repr', False)

## Model Training

In [22]:
%run ../core/train_slope_one.py

1.85756546332813

## Model Usage

In [52]:
predictions, algo = load_algo("slope_one_full")

In [55]:
uid = str(100)
iid = str(11)

algo.predict(uid, iid, r_ui=4.0)

Prediction(uid='100', iid='11', r_ui=4.0, est=3.8456792276240215, details={'was_impossible': False})

In [58]:
uid = str(100)
iid = str(115)

algo.predict(uid, iid)

Prediction(uid='100', iid='115', r_ui=None, est=3.501556983616962, details={'was_impossible': True, 'reason': 'User and/or item is unknown.'})

In [61]:
uid = str(100)
k = 20

top_20 = recommend_top_k(predictions, k=k)

pd.DataFrame(top_20[uid], index=(f"Top {n}" for n in range(1, k + 1)))

Unnamed: 0,movieId,est_r,title,genres,imdbId,tmdbId
Top 1,5746,5,Galaxy of Terror (Quest) (1981),Action|Horror|Mystery|Sci-Fi,82431,28893
Top 2,5764,5,Looker (1981),Drama|Horror|Sci-Fi|Thriller,82677,21874
Top 3,5919,5,Android (1982),Sci-Fi,83557,38849
Top 4,6835,5,Alien Contamination (1980),Action|Horror|Sci-Fi,82000,39916
Top 5,7899,5,Master of the Flying Guillotine (Du bi quan wa...,Action,72913,49636
Top 6,1046,5,Beautiful Thing (1996),Drama|Romance,115640,10938
Top 7,3851,5,I'm the One That I Want (2000),Comedy,251739,38548
Top 8,113275,5,The Hundred-Foot Journey (2014),Comedy|Drama,2980648,228194
Top 9,113394,5,"Pretty One, The (2013)",Comedy|Drama,2140577,174337
Top 10,137595,5,Magic Mike XXL (2015),Comedy|Drama,2268016,264999


## Model Testing

In [None]:
%cd ../core
%python test_slope_one.py

In [49]:
cv_dict = load_cv("cv_slope_one")

cv_df = pd.DataFrame(cv_dict, index=(f"Fold {k}" for k in range(1, 11)))

cv_df.loc["Mean"] = cv_df.mean(axis=0)
cv_df.loc["Std"] = cv_df.std(axis=0)

cv_df.drop(columns=["fit_time", "test_time"]).transpose()

Unnamed: 0,Fold 1,Fold 2,Fold 3,Fold 4,Fold 5,Fold 6,Fold 7,Fold 8,Fold 9,Fold 10,Mean,Std
test_rmse,0.891047,0.913585,0.898217,0.879436,0.902256,0.88124,0.907897,0.896901,0.88903,0.888254,0.894786,0.010518
test_mae,0.682181,0.698055,0.685295,0.672142,0.690271,0.672226,0.688539,0.68265,0.683091,0.678806,0.683325,0.007528
test_prec,0.715247,0.696523,0.695111,0.697301,0.685635,0.697258,0.699038,0.693665,0.70076,0.699643,0.698018,0.007015
test_rec,0.670862,0.673821,0.677203,0.691066,0.678756,0.678838,0.682636,0.674199,0.678281,0.667363,0.677303,0.006208
test_f1,0.668839,0.664292,0.665994,0.674426,0.660571,0.667477,0.669024,0.663788,0.667988,0.659359,0.666176,0.004188
