In [3]:
import numpy as np
import pandas as pd
from surprise.model_selection import train_test_split
from surprise.model_selection import cross_validate
from surprise.model_selection import KFold
from surprise import NormalPredictor
from surprise import BaselineOnly
from surprise import KNNBasic
from surprise import KNNWithMeans
from surprise import KNNBaseline
from surprise import SVD
from surprise import SVDpp
from surprise import NMF
from surprise import SlopeOne
from surprise import CoClustering
import matplotlib.pyplot as plt
import seaborn as sns
from surprise.reader import Reader
from surprise.dataset import Dataset
from sklearn.metrics import accuracy_score
from surprise import accuracy

ModuleNotFoundError: No module named 'surprise'

In [None]:
df = pd.read_csv("E:\DATA SCIENCE\ASSIGNMENT\Recomondation System/book.csv", encoding = "ISO-8859-1")

In [None]:
df

In [None]:
df1 = df.iloc[:,1:]

In [None]:
df1

In [None]:
df1.columns = ['userID', 'title', 'bookRating']

In [None]:
df1.head()

In [None]:
print('# of records: %d\n# of books: %d\n# of users: %d' % (len(df1), len(df1['title'].unique()), len(df1['userID'].unique())))

In [None]:
palette = sns.color_palette("RdBu", 10)

In [2]:
fig, ax = plt.subplots(figsize=(10, 6))
sns.countplot(x='bookRating', data=df1, palette=palette)
ax.set_title('Distribution of book ratings')

plt.show()

NameError: name 'plt' is not defined

### The majority of ratings is between 5 and 10. Most often users tend to rate books for 8. Second most frequent score is 7.

In [11]:
df1.bookRating.unique()

array([ 5,  3,  6,  8,  7, 10,  9,  4,  1,  2], dtype=int64)

In [12]:
reader = Reader(rating_scale=(1, 10))

In [13]:
data = Dataset.load_from_df(df1[['userID', 'title', 'bookRating']], reader)

In [14]:
trainset, testset = train_test_split(data, test_size=.20)

### Finding the best algorithm for our Recommendation System

In [15]:
benchmark = []

In [16]:
for algorithm in [SVD(), SVDpp(), SlopeOne(), NMF(), NormalPredictor(), KNNBaseline(), KNNBasic(), KNNWithMeans(), BaselineOnly(), CoClustering()]:
    results = cross_validate(algorithm, data, measures=['RMSE'], cv=3, verbose=False)
    tmp = pd.DataFrame.from_dict(results).mean(axis=0)
    tmp = tmp.append(pd.Series([str(algorithm).split(' ')[0].split('.')[-1]], index=['Algorithm']))
    benchmark.append(tmp)

Estimating biases using als...
Computing the msd similarity matrix...
Done computing similarity matrix.
Estimating biases using als...
Computing the msd similarity matrix...
Done computing similarity matrix.
Estimating biases using als...
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Estimating biases using als...
Estimating biases using als...
Estimating biases using als...


In [17]:
pd.DataFrame(benchmark).set_index('Algorithm').sort_values('test_rmse')

Unnamed: 0_level_0,test_rmse,fit_time,test_time
Algorithm,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
SVDpp,1.682381,7.853478,0.048304
SVD,1.688545,0.707892,0.036645
BaselineOnly,1.704505,0.041307,0.033645
KNNBaseline,1.707185,0.11726,0.040642
KNNWithMeans,1.819341,0.128084,0.040308
SlopeOne,1.822441,1.43278,0.060962
KNNBasic,1.823646,0.081516,0.07732
CoClustering,1.850626,1.909819,0.033313
NMF,1.91039,1.634655,0.03931
NormalPredictor,2.464297,0.015989,0.082949


##### SVD () algorithm gave us the least RMSE, therefore, we will train and predict with SVD

## 1. Building Recommendation System using surprice

In [18]:
algo = SVD()

In [19]:
algo.fit(trainset)

<surprise.prediction_algorithms.matrix_factorization.SVD at 0x2bfb84d6910>

In [28]:
predictions = algo.test(testset)

In [29]:
accuracy.rmse(predictions, verbose=True)

RMSE: 1.6616


1.6615620940504578

In [31]:
def recommendation(userID):
    user = df1.copy()
    already_read = df1[df1['userID'] == userID]['title'].unique()
    user = user.reset_index()
    user = user[~user['title'].isin(already_read)]
    user['Estimate_Score']=user['title'].apply(lambda x: algo.predict(userID, x).est)
    #user = user.drop('title', axis = 1)
    user = user.sort_values('Estimate_Score', ascending=False)
    print(user.head(10))

## 2. Building Recommendation System using Correlation

In [32]:
rating = pd.pivot_table(df1, index='userID', values='bookRating', columns='title', fill_value=0)

In [34]:
rating

title,"Jason, Madison &amp",Other Stories;Merril;1985;McClelland &amp,Repairing PC Drives &amp,'48,'O Au No Keia: Voices from Hawai'I's Mahu and Transgender Communities,...AND THE HORSE HE RODE IN ON : THE PEOPLE V. KENNETH STARR,01-01-00: A Novel of the Millennium,"1,401 More Things That P*Ss Me Off",10 Commandments Of Dating,"100 Great Fantasy Short, Short Stories",...,Zora Hurston and the Chinaberry Tree (Reading Rainbow Book),\Even Monkeys Fall from Trees\ and Other Japanese Proverbs,\I Won't Learn from You\: And Other Thoughts on Creative Maladjustment,"\More More More,\ Said the Baby",\O\ Is for Outlaw,"\Surely You're Joking, Mr. Feynman!\: Adventures of a Curious Character","\Well, there's your problem\: Cartoons",iI Paradiso Degli Orchi,stardust,Ã?Â?bermorgen.
userID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
8,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
9,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
10,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
12,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
14,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
278846,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
278849,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
278851,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,7,0,0
278852,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [39]:
corr = np.corrcoef(rating.T)

In [40]:
corr.shape

(9659, 9659)

In [41]:
book_list=  list(rating)

In [42]:
book_titles =[] 

In [43]:
for i in range(len(book_list)):
    book_titles.append(book_list[i])

In [44]:
book_titles

[' Jason, Madison &amp',
 ' Other Stories;Merril;1985;McClelland &amp',
 ' Repairing PC Drives &amp',
 "'48",
 "'O Au No Keia: Voices from Hawai'I's Mahu and Transgender Communities",
 '...AND THE HORSE HE RODE IN ON : THE PEOPLE V. KENNETH STARR',
 '01-01-00: A Novel of the Millennium',
 '1,401 More Things That P*Ss Me Off',
 '10 Commandments Of Dating',
 '100 Great Fantasy Short, Short Stories',
 '1001 Brilliant Ways to Checkmate',
 '101 Bright Ideas: Esl Activities for All Ages',
 '101 Dalmatians',
 '101 Essential Tips: House Plants',
 '101 Telephone Jokes',
 '101 Wacky Computer Jokes',
 '101 Ways to Make Money at Home',
 '11 Edward Street',
 '11th Hour',
 '13 99 Euros',
 '14,000 Things to Be Happy About',
 '1984',
 '1st to Die: A Novel',
 '20 Jahre 40 bleiben. Jung und schÃ?Â¶n in den besten Jahren.',
 '20 Years of Censored News',
 '20,000 Leagues Under the Sea (Wordsworth Collection)',
 '20,001 Names for Baby',
 '2001 Spanish and English Idioms/2001 Modismos Espanoles E Ingleses: 

In [45]:
def get_recommendation(books_list):
    book_similarities = np.zeros(corr.shape[0])
    
    for book in books_list:
        book_index = book_titles.index(book)
        book_similarities += corr[book_index] 
        book_preferences = []
    for i in range(len(book_titles)):
        book_preferences.append((book_titles[i],book_similarities[i]))
        
    return sorted(book_preferences, key= lambda x: x[1], reverse=True)

### You can enter any User ID and get the recommendations & estimated score

In [46]:
recommendation(276747)

      index  userID                                              title  \
3449   3449     882                         Still Life with Woodpecker   
2443   2443      82                         Still Life with Woodpecker   
5580   5580    2442                Els conys saborosos (Tros de paper)   
5578   5578    2442              Evangeli gris (Biblioteca A tot vent)   
9317   9317  161807                                            On Edge   
956     956  277538  Wall Street's Picks for 2000: An Insider's Gui...   
4379   4379    1768                           Dark Victory (Star Trek)   
2791   2791     388                                             Druids   
6087   6087    2977           Blankets of Sand: Poems of War and Exile   
1942   1942  278418                          Instances of the Number 3   

      bookRating  Estimate_Score  
3449          10        8.297269  
2443          10        8.297269  
5580          10        8.275922  
5578          10        8.269880  
9317      

### You can enter your favourite book and get the recommendation on what you might like

In [47]:
my_fav_books = ['Classical Mythology','Clara Callan']

In [48]:
print('The books you might like: \n' , get_recommendation(my_fav_books)[:10])

The books you might like: 
 [('Clara Callan', 0.9995414947271893), ('Decision in Normandy', 0.9995414947271893), ('Classical Mythology', 0.9995414947271892), ("A Dangerous Place: California's Unsettling Fate (Images of America)", -0.0009170105456212511), ('Angle of Repose (Contemporary American Fiction)', -0.0009170105456212511), ("Jacob Marley's Christmas Carol", -0.0009170105456212511), ('Of Love and Shadows', -0.0009170105456212511), ('On Secret Service', -0.0009170105456212511), ('Other worlds: The comical history of the states and empires of the moon and sun (Science fiction master series)', -0.0009170105456212511), ('Possessing the Secret of Joy', -0.0009170105456212511)]


In [None]:
conda install -c conda-forge scikit-surprise