Using matrix factorization predict recommendations.For this
- Build User Based Model
- Build Item Based Model
- Use Grid Serach
- Data Set:
    - ./data/rating.csv

In [1]:
import pandas as pd
import numpy as np
import surprise as surprise

In [2]:
#1. Load the ratings file
ratings_ds = pd.read_csv('../data/ratings.csv')
ratings_ds.head()

Unnamed: 0,userId,movieId,rating,timestamp
0,1,31,2.5,1260759144
1,1,1029,3.0,1260759179
2,1,1061,3.0,1260759182
3,1,1129,2.0,1260759185
4,1,1172,4.0,1260759205


In [3]:
#2. Drop irrelevant Colums
ratings_ds.drop('timestamp', axis = 1, inplace=True)
ratings_ds.head()

Unnamed: 0,userId,movieId,rating
0,1,31,2.5
1,1,1029,3.0
2,1,1061,3.0
3,1,1129,2.0
4,1,1172,4.0


In [4]:
#3.Rename the column as per surprise expectations : {'user', 'item','rating'}
ratings_ds.rename(columns={'userId':'user', 'movieId':'item'}, inplace=True)
ratings_ds.head()

Unnamed: 0,user,item,rating
0,1,31,2.5
1,1,1029,3.0
2,1,1061,3.0
3,1,1129,2.0
4,1,1172,4.0


In [5]:
#4. Create the surprise object and training set
Reader = surprise.dataset.Reader(line_format='user item rating',rating_scale=(1,5))
surprise_ds = surprise.dataset.Dataset.load_from_df(ratings_ds,reader=Reader)
surprise_train_set = surprise_ds.build_full_trainset()

In [6]:
#5 .Use SVD to create an user and item based model
from surprise import SVD

In [7]:
model = SVD(n_factors= 20)
model.fit(surprise_train_set)

<surprise.prediction_algorithms.matrix_factorization.SVD at 0x7f9601faadc0>

In [8]:
model.predict(uid=1, iid=31, r_ui=2.5)

Prediction(uid=1, iid=31, r_ui=2.5, est=2.387539641796759, details={'was_impossible': False})

In [9]:
#6. Use NMF for alternate
from surprise import NMF

In [10]:
modelOne = NMF(n_factors=20, biased= True)
modelOne.fit(surprise_train_set)

<surprise.prediction_algorithms.matrix_factorization.NMF at 0x7f9601faa100>

In [11]:
modelOne.predict(uid=1, iid=31, r_ui=2.5)

Prediction(uid=1, iid=31, r_ui=2.5, est=1.4508720851802441, details={'was_impossible': False})

In [12]:
#7.Evaluate performance using Cross Validate
from surprise.model_selection import cross_validate

In [13]:
algo = SVD(n_factors= 20)
result = cross_validate(algo = algo,data=surprise_ds, cv=5, measures = ['RMSE','MAE'],return_train_measures=True)

In [14]:
print(result['test_rmse'].mean())

0.8921621672635585


In [15]:
print(result['test_mae'].mean())

0.6870267117505653


In [16]:
algoOne = NMF(n_factors= 20,biased = True)
resultOne = cross_validate(algo = algoOne,data=surprise_ds, cv=5, measures = ['RMSE','MAE'],return_train_measures=True)

In [17]:
print(resultOne['test_rmse'].mean())

1.3381786152526882


In [18]:
print(resultOne['test_mae'].mean())

1.026668314605684


In [19]:
from surprise.model_selection import GridSearchCV

In [20]:
param_grid = {'n_factors' :[15,20,25,30]}
algo = SVD

In [21]:
grid_search = GridSearchCV(algo, param_grid,measures=['RMSE','MAE'],cv=5)

In [24]:
grid_search.fit(surprise_ds)

In [25]:
print(grid_search.best_params['rmse'])

{'n_factors': 15}


In [26]:
print(grid_search.best_params['mae'])

{'n_factors': 25}


In [27]:
print(grid_search.best_score['rmse'])

0.890556324852214


In [28]:
print(grid_search.best_score['mae'])

0.685864606086614
