### Using Surprise Determine the recommendation for Based On collaborate filtering
- Build User Based Model
- Build Item Based Model
- Use Grid Serach
- Data Set:
  - ./data/rating.csv

In [1]:
pip install surprise

Note: you may need to restart the kernel to use updated packages.


In [2]:
import pandas as pd
import numpy as np
import matplotlib  as pyplot
import surprise

In [3]:
#1. Read the csv file using panda
user_item_ds = pd.read_csv('./data/ratings.csv')
user_item_ds.head()

Unnamed: 0,userId,movieId,rating,timestamp
0,1,31,2.5,1260759144
1,1,1029,3.0,1260759179
2,1,1061,3.0,1260759182
3,1,1129,2.0,1260759185
4,1,1172,4.0,1260759205


In [4]:
#2.Drop colums which is not to be mapped to {user, item, rating}
user_item_ds.drop(axis =1 ,columns=['timestamp'], inplace=True)

In [5]:
user_item_ds.head()

Unnamed: 0,userId,movieId,rating
0,1,31,2.5
1,1,1029,3.0
2,1,1061,3.0
3,1,1129,2.0
4,1,1172,4.0


In [6]:
#3. Rename colums to match {user, item, rating}
user_item_ds.rename(columns={"userId": "user", "movieId": "item"}, inplace=True)

In [7]:
user_item_ds.head()

Unnamed: 0,user,item,rating
0,1,31,2.5
1,1,1029,3.0
2,1,1061,3.0
3,1,1129,2.0
4,1,1172,4.0


In [8]:
#4. Create the Surprise Reader object
Reader  = surprise.dataset.Reader(line_format='user item rating',rating_scale=(1, 5))

In [63]:
# 5. Build the surprise dataset
data = surprise.dataset.Dataset.load_from_df(user_item_ds,Reader)


In [10]:
#6.Build The train set
mr_data_set = data.build_full_trainset()


<surprise.trainset.Trainset at 0x7fb295325220>

In [11]:
from surprise.prediction_algorithms.knns import KNNBasic


In [12]:
# Do a user based collaberative recommendation engine

In [13]:
#7. Use KnnBasic model with k=40 and hyperparameters {cosine, user_based}
knnBasic = KNNBasic(k = 40 ,min_k=1, sim_options= {'name':'cosine', 'user_based': True})

In [14]:
knnBasic.fit(mr_data_set)

Computing the cosine similarity matrix...
Done computing similarity matrix.


<surprise.prediction_algorithms.knns.KNNBasic at 0x7fb29535a940>

In [15]:
#predict for user Id
knnBasic.predict(uid= 1, iid=31)

Prediction(uid=1, iid=31, r_ui=None, est=3.1834796860227086, details={'actual_k': 40, 'was_impossible': False})

In [16]:
# Do a item based collaberative recommendation engine

In [17]:
knnBasic = KNNBasic(k = 40 ,min_k=1, sim_options= {'name':'cosine', 'user_based': False})

In [18]:
knnBasic.fit(mr_data_set)

Computing the cosine similarity matrix...
Done computing similarity matrix.


<surprise.prediction_algorithms.knns.KNNBasic at 0x7fb2971add00>

In [19]:
 knnBasic.predict(uid= 1, iid=31)

Prediction(uid=1, iid=31, r_ui=None, est=2.547471538910294, details={'actual_k': 20, 'was_impossible': False})

In [20]:
from surprise.model_selection import GridSearchCV
from surprise.prediction_algorithms import knns as knns

In [21]:
param_grid = {'k':[10,20], 'sim_options':{'name':['msd','cosine'],'user_based':[False]}}

In [22]:
#Do a grid search to find the best parameter
gridSearch = GridSearchCV(knns.KNNWithMeans, param_grid, measures=['RMSE','MAE'], cv=5)

In [24]:
gridSearch.fit(data)

Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the cosine similarity matrix...
Done computing similarity matrix.
Computing the cosine similarity matrix...
Done computing similarity matrix.
Computing the cosine similarity matrix...
Done computing similarity matrix.
Computing the cosine similarity matrix...
Done computing similarity matrix.
Computing the cosine similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix.

In [32]:
gridSearch.best_score['rmse']
gridSearch.best_params['rmse']

{'k': 20, 'sim_options': {'name': 'msd', 'user_based': False}}

In [33]:
gridSearch.best_score['mae']
gridSearch.best_params['mae']

{'k': 20, 'sim_options': {'name': 'msd', 'user_based': False}}

In [50]:
#Item based filtering
model_opt  = knns.KNNWithMeans(k=20,sim_options =  {'name': 'msd', 'user_based': False} )

In [51]:
model_opt.fit(mr_data_set)

Computing the msd similarity matrix...
Done computing similarity matrix.


<surprise.prediction_algorithms.knns.KNNWithMeans at 0x7fb1cc48f9d0>

In [52]:
model_opt.predict(uid = 1 , iid=31)

Prediction(uid=1, iid=31, r_ui=None, est=2.2031583019543524, details={'actual_k': 20, 'was_impossible': False})

In [58]:
#Predict Similar users to userid = 1
model_opt  = knns.KNNWithMeans(k=20,sim_options =  {'name': 'msd', 'user_based': True} )
model_opt.fit(mr_data_set)

Computing the msd similarity matrix...
Done computing similarity matrix.


<surprise.prediction_algorithms.knns.KNNWithMeans at 0x7fb1cb6f1b20>

In [60]:

model_opt.get_neighbors(mr_data_set.to_inner_uid(1), k=5)

[8, 32, 67, 95, 98]

In [62]:
uids = []
for i in [8, 32, 67, 95, 98]:
    uids.append(mr_data_set.to_raw_uid(i))
uids

[9, 33, 68, 96, 99]