In [37]:
import pandas as pd
import numpy as np
import surprise
from surprise import SVD

In [38]:
ratings =pd.read_csv('ratings.txt',sep=' ',names = ['uid','iid','rating'])
ratings.head()

Unnamed: 0,uid,iid,rating
0,1,1,2.0
1,1,2,4.0
2,1,3,3.5
3,1,4,3.0
4,1,5,4.0


In [39]:
lowest_rating = ratings['rating'].min()
highest_rating = ratings['rating'].max()
print("rating ranges between {0} and {1}" .format(lowest_rating, highest_rating))

rating ranges between 0.5 and 4.0


In [41]:
reader = surprise.Reader(rating_scale= (lowest_rating,highest_rating))
data = surprise.Dataset.load_from_df(ratings,reader)
type(data)

surprise.dataset.DatasetAutoFolds

In [42]:
similarity_options = {'name':'cosine','user_based': True}
#Default k=40
algo = surprise.KNNBasic(sim_options=similarity_options)
output = algo.fit(data.build_full_trainset())

Computing the cosine similarity matrix...
Done computing similarity matrix.


In [43]:
pred = algo.predict(uid='100', iid='217')
print(pred.est)

#Total items
iids =ratings['iid'].unique()
print(iids)

3.0028030537791928
[   1    2    3 ... 2069 2070 2071]


In [44]:
# items rated by UID 50
u_iid =ratings[ratings['uid']==100]['iid'].unique()
print(u_iid)

iids_to_predict  = np.setdiff1d(iids, u_iid)    #Not rated by UID 50
print(iids_to_predict)

[215]
[   1    2    3 ... 2069 2070 2071]


In [45]:
#Extracting the estimated ratings from iids_to_predict
testset = [[100,iid,0.] for iid in iids_to_predict]
testset

testset = [[100,iid,0.] for iid in iids_to_predict]
predictions = algo.test(testset)
predictions[:5]

[Prediction(uid=100, iid=1, r_ui=0.0, est=2.95, details={'actual_k': 40, 'was_impossible': False}),
 Prediction(uid=100, iid=2, r_ui=0.0, est=2.95, details={'actual_k': 40, 'was_impossible': False}),
 Prediction(uid=100, iid=3, r_ui=0.0, est=3.0375, details={'actual_k': 40, 'was_impossible': False}),
 Prediction(uid=100, iid=4, r_ui=0.0, est=3.325, details={'actual_k': 40, 'was_impossible': False}),
 Prediction(uid=100, iid=5, r_ui=0.0, est=3.075, details={'actual_k': 40, 'was_impossible': False})]

In [46]:
(predictions[4].iid,predictions[4].est)

(5, 3.075)

In [47]:
pred_ratings=[(predictions[i].iid,predictions[i].est) for i in range(0,len(predictions))]
predicted_rating=pd.DataFrame(pred_ratings,columns=['iid','est_rating'])
exp_ratings=predicted_rating.sort_values(by='est_rating',ascending=False)

In [48]:
sorted_exp=exp_ratings.sort_values(by=['est_rating','iid'],ascending=[False,True])
sorted_exp.head(10)

Unnamed: 0,iid,est_rating
36,37,4.0
42,43,4.0
51,52,4.0
60,61,4.0
67,68,4.0
68,69,4.0
94,95,4.0
96,97,4.0
104,105,4.0
125,126,4.0


In [33]:
from surprise.model_selection import GridSearchCV
from surprise.model_selection.split import KFold

param_grid={'lr_all':np.linspace(0.001,1,3),'reg_all':np.linspace(.01,0.8,5),'n_factors':[40,30]}
kfold=KFold(random_state=23,n_splits = 5,shuffle=True)
gcv = GridSearchCV(surprise.KNNBasic,param_grid,measures=['rmse','mae'],cv=kfold)
gcv

<surprise.model_selection.search.GridSearchCV at 0x7f5f1f6e9d10>

In [34]:
gcv.fit(data)

Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computi

Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd

In [35]:
print(gcv.best_score['rmse'])
print(gcv.best_score['mae'])

0.8651450573129178
0.6545346264246396


In [36]:
print(gcv.best_params['rmse'])
print(gcv.best_params['mae'])

{'lr_all': 0.001, 'reg_all': 0.01, 'n_factors': 40}
{'lr_all': 0.001, 'reg_all': 0.01, 'n_factors': 40}
