In [1]:
!pip install scikit-surprise

Collecting scikit-surprise
  Downloading scikit-surprise-1.1.3.tar.gz (771 kB)
[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/772.0 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [91m━━━━━━━━━━━━[0m[91m╸[0m[90m━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m256.0/772.0 kB[0m [31m8.0 MB/s[0m eta [36m0:00:01[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m772.0/772.0 kB[0m [31m11.9 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
Building wheels for collected packages: scikit-surprise
  Building wheel for scikit-surprise (setup.py) ... [?25l[?25hdone
  Created wheel for scikit-surprise: filename=scikit_surprise-1.1.3-cp310-cp310-linux_x86_64.whl size=3097764 sha256=f4d6af0bbab0b3be509577c5f375b22afe2c668bfd03af9c94140fdad6dfb71c
  Stored in directory: /root/.cache/pip/wheels/a5/ca/a8/4e28def53797fdc4363ca4af740db15a9c2f1595ebc51fb445
Successfully built scikit-surprise
Installing collected packages: scikit-

In [2]:
import pandas as pd
import numpy as np
import surprise
import os

In [3]:
os.chdir("/content/drive/MyDrive/Cases/filmtrust")
ratings = pd.read_csv("ratings.txt",sep=' ',names = ['uid','iid','rating'])
ratings.head()

Unnamed: 0,uid,iid,rating
0,1,1,2.0
1,1,2,4.0
2,1,3,3.5
3,1,4,3.0
4,1,5,4.0


In [4]:
lowest_rating = ratings['rating'].min()
highest_rating = ratings['rating'].max()
print("Ratings range between {0} and {1}".format(lowest_rating,highest_rating))

Ratings range between 0.5 and 4.0


In [5]:
reader = surprise.Reader(rating_scale = (lowest_rating,highest_rating))
data = surprise.Dataset.load_from_df(ratings,reader)
type(data)

surprise.dataset.DatasetAutoFolds

In [6]:
similarity_options = {'name': 'cosine', 'user_based': True}
# Default k = 40
algo = surprise.KNNBasic(sim_options = similarity_options)
output = algo.fit(data.build_full_trainset())

Computing the cosine similarity matrix...
Done computing similarity matrix.


Expected rating for user 50 for item 217:

In [7]:
pred = algo.predict(uid='50',iid='217')
print(pred.est)

3.0028030537791928


In [8]:
pred

Prediction(uid='50', iid='217', r_ui=None, est=3.0028030537791928, details={'was_impossible': True, 'reason': 'User and/or item is unknown.'})

Total Items:

In [9]:
iids = ratings['iid'].unique()
print(iids)

[   1    2    3 ... 2069 2070 2071]


The list of items rated by user 50:

In [10]:
u_iid = ratings[ratings['uid']==50]['iid'].unique()
print(u_iid)

[  8 211   3   2 219 234  12 254 250 207  11 253 236  84  10   7 233  13
   1   5   6 252 241 216 257 206   4 217   9 215 213  17 255 220 121 245
 239 251 235]


In [11]:
len(u_iid)

39

List of the items not rated by user 50:

In [12]:
iids_to_predict = np.setdiff1d(iids, u_iid)
print(iids_to_predict)

[  14   15   16 ... 2069 2070 2071]


In [13]:
len(iids_to_predict)

2032

Extracting the estimated rating from iids_to_predict

In [None]:
testset = [[50,iid,0.] for iid in iids_to_predict]
testset

In [15]:
testset = [[50,iid,0.] for iid in iids_to_predict]
predictions = algo.test(testset)

In [16]:
type(predictions)

list

In [17]:
predictions[:5]

[Prediction(uid=50, iid=14, r_ui=0.0, est=1.0249112823172175, details={'actual_k': 2, 'was_impossible': False}),
 Prediction(uid=50, iid=15, r_ui=0.0, est=2.3010819030660024, details={'actual_k': 5, 'was_impossible': False}),
 Prediction(uid=50, iid=16, r_ui=0.0, est=3.365656247496976, details={'actual_k': 4, 'was_impossible': False}),
 Prediction(uid=50, iid=18, r_ui=0.0, est=3.4750887176827825, details={'actual_k': 2, 'was_impossible': False}),
 Prediction(uid=50, iid=19, r_ui=0.0, est=2.9501774353655654, details={'actual_k': 2, 'was_impossible': False})]

In [18]:
len(predictions)

2032

In [19]:
predictions[4].est

2.9501774353655654

In [20]:
pred_ratings = np.array([pred.est for pred in predictions])
pred_ratings

array([1.02491128, 2.3010819 , 3.36565625, ..., 2.5       , 3.        ,
       3.        ])

In [21]:
iids_to_predict

array([  14,   15,   16, ..., 2069, 2070, 2071])

In [None]:
predicted_rating = pd.DataFrame({'ItemID':iids_to_predict,
                                 'Est_Ratings': pred_ratings})
predicted_rating.sort_values('Est_Ratings', ascending=False)

Getting the item with highest expected rating

In [23]:
sorted_exp = predicted_rating.sort_values(by=['Est_Ratings','ItemID'], ascending=[False, True])
sorted_exp.head(10)

Unnamed: 0,ItemID,Est_Ratings
20,35,4.0
39,54,4.0
53,68,4.0
81,97,4.0
91,107,4.0
95,111,4.0
102,118,4.0
119,136,4.0
145,162,4.0
202,228,4.0


Tuning for best K

In [None]:
from surprise.model_selection import GridSearchCV
from surprise.model_selection.split import KFold

In [None]:
param_grid = {'k': np.arange(30,70,10)}
param_grid

{'k': array([30, 40, 50, 60])}

In [None]:
kfold = KFold(n_splits=5, random_state=23, shuffle=True)
gs = GridSearchCV(surprise.KNNBasic, param_grid,
                  measures=['rmse', 'mae'], cv=kfold)

In [None]:
gs.fit(data)

Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computi

Best Score:

In [None]:
print(gs.best_score['rmse'])

0.8641633357915124


Best Parameter:

In [None]:
print(gs.best_params['rmse'])

{'k': 60}


We can now use the algorithm that yields the best rmse:

In [None]:
algo = gs.best_estimator['rmse']
algo.fit(data.build_full_trainset())

Computing the msd similarity matrix...
Done computing similarity matrix.


<surprise.prediction_algorithms.knns.KNNBasic at 0x7f58b3c225f0>

The recommendations can be generated for any user with the object **algo**.