# Libraries

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt

In [None]:
import pickle

In [None]:
!pip install scikit-surprise

Collecting scikit-surprise
[?25l  Downloading https://files.pythonhosted.org/packages/97/37/5d334adaf5ddd65da99fc65f6507e0e4599d092ba048f4302fe8775619e8/scikit-surprise-1.1.1.tar.gz (11.8MB)
[K     |████████████████████████████████| 11.8MB 230kB/s 
Building wheels for collected packages: scikit-surprise
  Building wheel for scikit-surprise (setup.py) ... [?25l[?25hdone
  Created wheel for scikit-surprise: filename=scikit_surprise-1.1.1-cp37-cp37m-linux_x86_64.whl size=1617640 sha256=e3c4bcbdd4c109cc6012d64e3577374d7275ca7183fc3051fa0587a65fbde2e6
  Stored in directory: /root/.cache/pip/wheels/78/9c/3d/41b419c9d2aff5b6e2b4c0fc8d25c538202834058f9ed110d0
Successfully built scikit-surprise
Installing collected packages: scikit-surprise
Successfully installed scikit-surprise-1.1.1


In [None]:
from surprise import Dataset
from surprise import Reader
from surprise import KNNWithMeans, KNNBasic, KNNWithZScore
from surprise import accuracy
from surprise.model_selection import train_test_split, GridSearchCV,cross_validate

# Loading Data

In [None]:
ratings = pd.read_csv('/content/drive/MyDrive/Project/ratings.csv')
reader = Reader(rating_scale=(1, 10))
data = Dataset.load_from_df(ratings[['userID', 'ISBN', 'bookRating']], reader)

In [None]:
ratings.shape

(57496, 3)

# User Based CF

# KNNBasic

In [None]:
algo = KNNBasic(sim_options={'user_based' : [True]})
cv_results = cross_validate(algo, data, measures=['RMSE', 'MAE'], cv=5, verbose=True)
pd.DataFrame(cv_results).mean()

Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Evaluating RMSE, MAE of algorithm KNNBasic on 5 split(s).

                  Fold 1  Fold 2  Fold 3  Fold 4  Fold 5  Mean    Std     
RMSE (testset)    1.8358  1.8191  1.8311  1.8324  1.8091  1.8255  0.0099  
MAE (testset)     1.3942  1.3943  1.4017  1.3955  1.3714  1.3914  0.0104  
Fit time          0.23    0.19    0.16    0.15    0.16    0.18    0.03    
Test time         0.48    0.44    0.44    0.46    0.54    0.47    0.04    


test_rmse    1.825510
test_mae     1.391413
fit_time     0.179707
test_time    0.471357
dtype: float64

# KNNWithZScore

In [None]:
algo = KNNWithZScore(sim_options={'user_based' : True})
cv_results = cross_validate(algo, data, measures=['RMSE', 'MAE'], cv=5, verbose=True)
pd.DataFrame(cv_results).mean()

Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Evaluating RMSE, MAE of algorithm KNNWithZScore on 5 split(s).

                  Fold 1  Fold 2  Fold 3  Fold 4  Fold 5  Mean    Std     
RMSE (testset)    1.6627  1.6639  1.6455  1.6405  1.6626  1.6551  0.0100  
MAE (testset)     1.2153  1.2292  1.2151  1.2105  1.2275  1.2195  0.0074  
Fit time          0.26    0.29    0.28    0.26    0.27    0.27    0.01    
Test time         0.51    0.61    0.51    0.54    0.53    0.54    0.03    


test_rmse    1.655052
test_mae     1.219507
fit_time     0.272417
test_time    0.539673
dtype: float64

# KNNWithMeans

In [None]:
algo = KNNWithMeans(sim_options={'user_based' : True})
cv_results = cross_validate(algo, data, measures=['RMSE', 'MAE'], cv=5, verbose = True)
pd.DataFrame(cv_results).mean()

Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Evaluating RMSE, MAE of algorithm KNNWithMeans on 5 split(s).

                  Fold 1  Fold 2  Fold 3  Fold 4  Fold 5  Mean    Std     
RMSE (testset)    1.6415  1.6407  1.6505  1.6535  1.6477  1.6468  0.0050  
MAE (testset)     1.2261  1.2242  1.2233  1.2410  1.2246  1.2278  0.0066  
Fit time          0.19    0.19    0.18    0.19    0.19    0.19    0.00    
Test time         0.47    0.55    0.50    0.53    0.45    0.50    0.04    


test_rmse    1.646762
test_mae     1.227849
fit_time     0.185535
test_time    0.500838
dtype: float64

The comparision of 3 algorithms for the user-based approach, shows that KNNWithMeans algorithm gives the best performance in terms of accuracy and fit time.

# Checking for best parameters for KNNWithMeans

In [None]:
param_grid = {'k': [40,45,50],
              'min_k': [1,3,5,10],
              'sim_options': {'name': ['pearson','cosine','msd'],
                              'user_based': [True]}
              }
gs = GridSearchCV(KNNWithMeans, param_grid, measures=['rmse', 'mae'], cv=3)

gs.fit(data)
print(gs.best_score['rmse'])
print(gs.best_params['rmse'])

Computing the pearson similarity matrix...
Done computing similarity matrix.
Computing the pearson similarity matrix...
Done computing similarity matrix.
Computing the pearson similarity matrix...
Done computing similarity matrix.
Computing the cosine similarity matrix...
Done computing similarity matrix.
Computing the cosine similarity matrix...
Done computing similarity matrix.
Computing the cosine similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the pearson similarity matrix...
Done computing similarity matrix.
Computing the pearson similarity matrix...
Done computing similarity matrix.
Computing the pearson similarity matrix...
Done computing similarity matrix.
Computing the cosine similarity matrix...
Done computing similarity matrix.
Computing the c

# Splitting into train and test data

In [None]:
trainset, testset = train_test_split(data, test_size=.15,random_state = 2)

# Training and testing the model with best parameters

In [None]:
algo = KNNWithMeans(k=45,min_k = 10, sim_options={'name': 'cosine', 'user_based': True},)
algo.fit(trainset)
test_pred = algo.test(testset)
print("User-based Model : Test Set")
accuracy.rmse(test_pred, verbose=True)

Computing the cosine similarity matrix...
Done computing similarity matrix.
User-based Model : Test Set
RMSE: 1.5068


1.5068138482913844

# Saving the model


In [None]:
pickle.dump(algo, open('/content/drive/MyDrive/Project/User_based.sav', 'wb'))

In [None]:
uid = 187145
iid = '0671014919'
pred = algo.predict(uid, iid, verbose = True)

user: 187145     item: 0671014919 r_ui = None   est = 8.30   {'actual_k': 5, 'was_impossible': False}


# Item based CF

# KNNBasic

In [None]:
algo = KNNBasic(sim_options={'user_based' : False})
cv_results = cross_validate(algo, data, measures=['RMSE', 'MAE'], cv=5, verbose=True)
pd.DataFrame(cv_results).mean()

Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Evaluating RMSE, MAE of algorithm KNNBasic on 5 split(s).

                  Fold 1  Fold 2  Fold 3  Fold 4  Fold 5  Mean    Std     
RMSE (testset)    1.6481  1.6543  1.6415  1.6317  1.6428  1.6437  0.0075  
MAE (testset)     1.1988  1.2169  1.1988  1.1946  1.2076  1.2034  0.0080  
Fit time          1.52    1.17    1.33    1.15    1.06    1.24    0.16    
Test time         1.39    1.24    1.37    1.30    1.30    1.32    0.05    


test_rmse    1.643680
test_mae     1.203353
fit_time     1.242654
test_time    1.319575
dtype: float64

# KNNWithZScore

In [None]:
algo = KNNWithZScore(sim_options={'user_based' : False})
cv_results = cross_validate(algo, data, measures=['RMSE', 'MAE'], cv=5, verbose=True)
pd.DataFrame(cv_results).mean()

Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Evaluating RMSE, MAE of algorithm KNNWithZScore on 5 split(s).

                  Fold 1  Fold 2  Fold 3  Fold 4  Fold 5  Mean    Std     
RMSE (testset)    1.6665  1.6652  1.6630  1.6623  1.6749  1.6664  0.0045  
MAE (testset)     1.2505  1.2437  1.2441  1.2440  1.2520  1.2469  0.0036  
Fit time          1.36    1.65    1.43    1.40    1.56    1.48    0.11    
Test time         1.58    1.35    1.50    1.42    1.51    1.47    0.08    


test_rmse    1.666393
test_mae     1.246855
fit_time     1.482361
test_time    1.471711
dtype: float64

# KNNWithMeans

In [None]:
algo = KNNWithMeans(sim_options={'user_based' : False})
cv_results = cross_validate(algo, data, measures=['RMSE', 'MAE'], cv=5, verbose=True)
pd.DataFrame(cv_results).mean()

Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Evaluating RMSE, MAE of algorithm KNNWithMeans on 5 split(s).

                  Fold 1  Fold 2  Fold 3  Fold 4  Fold 5  Mean    Std     
RMSE (testset)    1.6652  1.6146  1.6679  1.6427  1.6756  1.6532  0.0222  
MAE (testset)     1.2484  1.2155  1.2502  1.2328  1.2405  1.2375  0.0126  
Fit time          1.60    1.16    1.49    1.37    1.14    1.35    0.18    
Test time         1.38    1.28    1.31    1.35    1.42    1.35    0.05    


test_rmse    1.653203
test_mae     1.237484
fit_time     1.352931
test_time    1.349051
dtype: float64

In case of item-based, KNNBasic algorithm gives the best performance in terms of accuracy and fit time.

# Checking For Best Parameters

In [None]:
param_grid = {'k': [40,45,50],
              'min_k': [1,3,5,10],
              'sim_options': {'name': ['pearson','cosine','msd'],
                              'user_based': [False]}
              }
gs = GridSearchCV(KNNBasic, param_grid, measures=['rmse', 'mae'], cv=3)

gs.fit(data)
print(gs.best_score['rmse'])
print(gs.best_params['rmse'])

Computing the pearson similarity matrix...
Done computing similarity matrix.
Computing the pearson similarity matrix...
Done computing similarity matrix.
Computing the pearson similarity matrix...
Done computing similarity matrix.
Computing the cosine similarity matrix...
Done computing similarity matrix.
Computing the cosine similarity matrix...
Done computing similarity matrix.
Computing the cosine similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the pearson similarity matrix...
Done computing similarity matrix.
Computing the pearson similarity matrix...
Done computing similarity matrix.
Computing the pearson similarity matrix...
Done computing similarity matrix.
Computing the cosine similarity matrix...
Done computing similarity matrix.
Computing the c

# Training and testing the model with best parameters

In [None]:
algo = KNNBasic(k=50,min_k = 3, sim_options={'name': 'cosine', 'user_based': False})
algo.fit(trainset)
test_pred = algo.test(testset)
print("Item-based Model : Test Set")
accuracy.rmse(test_pred, verbose=True)

Computing the cosine similarity matrix...
Done computing similarity matrix.
Item-based Model : Test Set
RMSE: 1.5378


1.5377613792620537

# Saving the Item-based model

In [None]:
pickle.dump(algo, open('/content/drive/MyDrive/Project/Item_based.sav', 'wb'))