In [2]:
# Importing necessary library
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

In [3]:
### Load csv data to tables

In [4]:
books_data = pd.read_csv('books_data.csv')
books_data.columns

Index(['Title', 'description', 'authors', 'image', 'previewLink', 'publisher',
       'publishedDate', 'infoLink', 'categories', 'ratingsCount'],
      dtype='object')

In [5]:
books_rating = pd.read_csv('books_rating.csv',  nrows=1000000)
books_rating.columns

Index(['Id', 'Title', 'Price', 'User_id', 'profileName', 'review/helpfulness',
       'review/score', 'review/time', 'review/summary', 'review/text'],
      dtype='object')

In [6]:
books_data.rename(columns={"Title": 'title',
                           'previewLink': 'preview_link',
                           'publishedDate': 'published_date',
                           'infoLink': 'info_link',
                           'ratingsCount': 'ratings_count'},inplace=True)
books_data.columns

Index(['title', 'description', 'authors', 'image', 'preview_link', 'publisher',
       'published_date', 'info_link', 'categories', 'ratings_count'],
      dtype='object')

In [7]:
# showing all cell's content

pd.set_option('display.max_colwidth', None)
books_data.head()['image']

0              http://books.google.com/books/content?id=DykPAAAACAAJ&printsec=frontcover&img=1&zoom=1&source=gbs_api
1    http://books.google.com/books/content?id=IjvHQsCn_pgC&printsec=frontcover&img=1&zoom=1&edge=curl&source=gbs_api
2              http://books.google.com/books/content?id=2tsDAAAACAAJ&printsec=frontcover&img=1&zoom=1&source=gbs_api
3              http://books.google.com/books/content?id=aRSIgJlq6JwC&printsec=frontcover&img=1&zoom=1&source=gbs_api
4                                                                                                                NaN
Name: image, dtype: object

In [8]:
#Rename of books_rating columns

books_rating.rename(columns={'Id':'id',
                            'Title': 'title',
                            'Price':'price',
                            'User_id':'user_id',
                            'profileName':'profile_name',
                            'review/helpfulness':'review_helpfulness',
                            'review/score':'rating',
                            'review/time':'review_time',
                            'review/summary':'review_summary',
                            'review/text':'review_text'},inplace=True)
books_rating.columns


Index(['id', 'title', 'price', 'user_id', 'profile_name', 'review_helpfulness',
       'rating', 'review_time', 'review_summary', 'review_text'],
      dtype='object')

In [9]:
books_rating_arranged = books_rating.drop(columns=['review_helpfulness', 'review_time', 'review_summary', 'review_text', 'price'])
books_rating_arranged = books_rating_arranged.dropna(subset=['user_id'])

In [10]:
#outliers drop: eliminate books with less than n reviews

number_rating = books_rating_arranged.groupby('title')['rating'].count().reset_index()
number_rating.rename(columns={'rating': 'total_ratings'}, inplace = True)

In [11]:
books_rating_filtered = books_rating_arranged.merge(number_rating ,on='title')

In [12]:
#books_rating table shape reviews no filtering
books_rating_filtered.shape

(808363, 6)

In [13]:
books_rating_filtered = books_rating_filtered[books_rating_filtered['total_ratings'] >= 200]

In [14]:
#books_rating table shape reviews after filtering
books_rating_filtered.shape

(292306, 6)

In [15]:
books_rating_filtered

Unnamed: 0,id,title,user_id,profile_name,rating,total_ratings
926,B0007H4QBK,Economics in one lesson,A2FYWUHFF21Q8F,Mark Twian,5.0,314
927,B0007H4QBK,Economics in one lesson,A5P9PV92PRYEE,gary@clearbridge.com,5.0,314
928,B0007H4QBK,Economics in one lesson,A1D4N3NVIN55PK,fmj30cal,4.0,314
929,B0007H4QBK,Economics in one lesson,A34AW9TMV7F69T,GE,3.0,314
930,B0007H4QBK,Economics in one lesson,A1IHQR1NT6CKVR,"Bogey62 ""Bill""",5.0,314
...,...,...,...,...,...,...
805835,084230052X,The Atonement Child,A2YVBNX4Q4CQV8,JoyAnne,5.0,259
805836,084230052X,The Atonement Child,A2WI388LZQM42A,E. Gabriella,5.0,259
805837,084230052X,The Atonement Child,A8POSLLBQUG4V,JB,5.0,259
805838,084230052X,The Atonement Child,A1V8BSW4REJHBN,M.D.C,4.0,259


In [16]:
books_rating_filtered.drop_duplicates(['user_id','title'], inplace=True)
books_rating_filtered.shape

(245139, 6)

In [17]:
matrix_ratings = books_rating_filtered.pivot_table(columns='user_id', index='title', values= 'rating')

In [18]:
matrix_ratings.shape

(526, 147766)

In [20]:
matrix_ratings.fillna(0, inplace=True)

In [21]:
matrix_ratings

user_id,A0015610VMNR0JC9XVL1,A00274963RTZUW5BU5ROI,A00538832OF17R8Q8JHTB,A00540411RKGTDNU543WS,A00878773S2MNB00COHKV,A00891092QIVH4W1YP46A,A0092581WFYQNV4KMUZ3,A01023015VSQI0VE22HU,A01038432MVI9JXYTTK5T,A010809536IK2VS9SAU9Q,...,AZZLYAJWAPX91,AZZQV95X90WT7,AZZR4T996J02D,AZZUIE66HZNY1,AZZUTPP7O8M98,AZZVOB0B882KK,AZZVZL4QEHEHO,AZZWKE7JW54GB,AZZXSP27F21T6,AZZYLDF6HREX3
title,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
"(ESV) English Standard Version Large Print Bible. Premium Bonded Leather, Black, Red Letter Text (English Language)",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1491: New Revelations of the Americas Before Columbus,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1632 (The Assiti Shards),0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1984,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,5.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
"20, 000 Leagues Under the Sea",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,5.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
Wuthering Heights (Riverside editions),0.0,0.0,0.0,0.0,0.0,2.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
Year of Wonders (Turtleback School & Library Binding Edition),0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
Zen and the Art of Motorcycle Maintenance,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
the Picture of Dorian Gray,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [22]:
from scipy.sparse import csr_matrix

book_sparse = csr_matrix(matrix_ratings)

In [24]:
from sklearn.neighbors import NearestNeighbors

model = NearestNeighbors(algorithm= 'brute')

model.fit(book_sparse)

In [35]:
distance, suggestion = model.kneighbors(matrix_ratings.iloc[200,:].values.reshape(1,-1), n_neighbors=6 )

In [26]:
distance

array([[ 0.        ,  5.        , 64.90762667, 67.12674579, 69.007246  ,
        69.05070601]])

In [29]:
suggestion

array([[237, 128, 299, 439, 365, 277]], dtype=int64)

In [38]:
matrix_ratings

user_id,A0015610VMNR0JC9XVL1,A00274963RTZUW5BU5ROI,A00538832OF17R8Q8JHTB,A00540411RKGTDNU543WS,A00878773S2MNB00COHKV,A00891092QIVH4W1YP46A,A0092581WFYQNV4KMUZ3,A01023015VSQI0VE22HU,A01038432MVI9JXYTTK5T,A010809536IK2VS9SAU9Q,...,AZZLYAJWAPX91,AZZQV95X90WT7,AZZR4T996J02D,AZZUIE66HZNY1,AZZUTPP7O8M98,AZZVOB0B882KK,AZZVZL4QEHEHO,AZZWKE7JW54GB,AZZXSP27F21T6,AZZYLDF6HREX3
title,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
"(ESV) English Standard Version Large Print Bible. Premium Bonded Leather, Black, Red Letter Text (English Language)",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1491: New Revelations of the Americas Before Columbus,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1632 (The Assiti Shards),0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1984,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,5.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
"20, 000 Leagues Under the Sea",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,5.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
Wuthering Heights (Riverside editions),0.0,0.0,0.0,0.0,0.0,2.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
Year of Wonders (Turtleback School & Library Binding Edition),0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
Zen and the Art of Motorcycle Maintenance,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
the Picture of Dorian Gray,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [36]:
for i in range(len(suggestion)):
    print(matrix_ratings.index[suggestion[i]])

Index(['Jane Eyre (Signet classics)', 'Jane Eyre (Simple English)',
       'Jane Eyre (New Windmill)', 'Jane Eyre (Everyman's Classics)',
       'Jane Eyre: Complete and Unabridged (Puffin Classics)', 'Jane Eyre'],
      dtype='object', name='title')


In [40]:
book_titles = [] 
for i in suggestion:
    book_titles.append(matrix_ratings.index[i])
print(book_titles)

[Index(['Jane Eyre (Signet classics)', 'Jane Eyre (Simple English)',
       'Jane Eyre (New Windmill)', 'Jane Eyre (Everyman's Classics)',
       'Jane Eyre: Complete and Unabridged (Puffin Classics)', 'Jane Eyre'],
      dtype='object', name='title')]


In [41]:
ids_index = []
for name in book_titles[0]: 
    ids = np.where(books_data['title'] == name)[0][0]
    ids_index.append(ids)

In [43]:
for idx in ids_index:
    url = books_data.iloc[idx]
    print(url)

title                                                                                                                                  Jane Eyre (Signet classics)
description       The classic 1847 novel traces the doomed love affair between an orphaned, independent-minded governess and her brooding employer, Mr. Rochester.
authors                                                                                                                                       ['Charlotte Brontë']
image                                                        http://books.google.com/books/content?id=A3bVnQEACAAJ&printsec=frontcover&img=1&zoom=1&source=gbs_api
preview_link                                                  http://books.google.com/books?id=A3bVnQEACAAJ&dq=Jane+Eyre+(Signet+classics)&hl=&cd=1&source=gbs_api
publisher                                                                                                                                                   Signet
published_date        