In [1]:
# Importing necessary library
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from scipy.sparse import csr_matrix
from sklearn.neighbors import NearestNeighbors

In [2]:
### Load csv data to tables

In [3]:
books_data = pd.read_csv('books_data.csv')
books_data.columns

Index(['Title', 'description', 'authors', 'image', 'previewLink', 'publisher',
       'publishedDate', 'infoLink', 'categories', 'ratingsCount'],
      dtype='object')

In [4]:
type(books_data.authors.drop_duplicates)

method

In [5]:
books_rating = pd.read_csv('books_rating.csv',  nrows=1000000)
books_rating.columns

Index(['Id', 'Title', 'Price', 'User_id', 'profileName', 'review/helpfulness',
       'review/score', 'review/time', 'review/summary', 'review/text'],
      dtype='object')

In [6]:
books_data.rename(columns={"Title": 'title',
                           'previewLink': 'preview_link',
                           'publishedDate': 'published_date',
                           'infoLink': 'info_link',
                           'ratingsCount': 'ratings_count'},inplace=True)
books_data.columns

Index(['title', 'description', 'authors', 'image', 'preview_link', 'publisher',
       'published_date', 'info_link', 'categories', 'ratings_count'],
      dtype='object')

In [7]:
# showing all cell's content

pd.set_option('display.max_colwidth', None)
books_data.head()['image']

0              http://books.google.com/books/content?id=DykPAAAACAAJ&printsec=frontcover&img=1&zoom=1&source=gbs_api
1    http://books.google.com/books/content?id=IjvHQsCn_pgC&printsec=frontcover&img=1&zoom=1&edge=curl&source=gbs_api
2              http://books.google.com/books/content?id=2tsDAAAACAAJ&printsec=frontcover&img=1&zoom=1&source=gbs_api
3              http://books.google.com/books/content?id=aRSIgJlq6JwC&printsec=frontcover&img=1&zoom=1&source=gbs_api
4                                                                                                                NaN
Name: image, dtype: object

In [8]:
#Rename of books_rating columns

books_rating.rename(columns={'Id':'id',
                            'Title': 'title',
                            'Price':'price',
                            'User_id':'user_id',
                            'profileName':'profile_name',
                            'review/helpfulness':'review_helpfulness',
                            'review/score':'rating',
                            'review/time':'review_time',
                            'review/summary':'review_summary',
                            'review/text':'review_text'},inplace=True)
books_rating.columns


Index(['id', 'title', 'price', 'user_id', 'profile_name', 'review_helpfulness',
       'rating', 'review_time', 'review_summary', 'review_text'],
      dtype='object')

In [9]:
books_rating_arranged = books_rating.drop(columns=['review_helpfulness', 'review_time', 'review_summary', 'review_text', 'price'])
books_rating_arranged = books_rating_arranged.dropna(subset=['user_id'])

In [10]:
#outliers drop: eliminate books with less than n reviews

number_rating = books_rating_arranged.groupby('title')['rating'].count().reset_index()
number_rating.rename(columns={'rating': 'total_ratings'}, inplace = True)

In [11]:
books_rating_filtered = books_rating_arranged.merge(number_rating ,on='title')

In [12]:
#books_rating table shape reviews no filtering
books_rating_filtered.shape

(808363, 6)

In [13]:
books_rating_filtered = books_rating_filtered[books_rating_filtered['total_ratings'] >= 200]

In [14]:
#books_rating table shape reviews after filtering
books_rating_filtered.shape

(292306, 6)

In [15]:
books_rating_filtered

Unnamed: 0,id,title,user_id,profile_name,rating,total_ratings
926,B0007H4QBK,Economics in one lesson,A2FYWUHFF21Q8F,Mark Twian,5.0,314
927,B0007H4QBK,Economics in one lesson,A5P9PV92PRYEE,gary@clearbridge.com,5.0,314
928,B0007H4QBK,Economics in one lesson,A1D4N3NVIN55PK,fmj30cal,4.0,314
929,B0007H4QBK,Economics in one lesson,A34AW9TMV7F69T,GE,3.0,314
930,B0007H4QBK,Economics in one lesson,A1IHQR1NT6CKVR,"Bogey62 ""Bill""",5.0,314
...,...,...,...,...,...,...
805835,084230052X,The Atonement Child,A2YVBNX4Q4CQV8,JoyAnne,5.0,259
805836,084230052X,The Atonement Child,A2WI388LZQM42A,E. Gabriella,5.0,259
805837,084230052X,The Atonement Child,A8POSLLBQUG4V,JB,5.0,259
805838,084230052X,The Atonement Child,A1V8BSW4REJHBN,M.D.C,4.0,259


In [16]:
books_rating_filtered.drop_duplicates(['user_id','title'], inplace=True)
books_rating_filtered.shape

(245139, 6)

In [17]:
matrix_ratings = books_rating_filtered.pivot_table(columns='user_id', index='title', values= 'rating')

In [18]:
matrix_ratings.shape

(526, 147766)

In [19]:
matrix_ratings.fillna(0, inplace=True)

In [20]:
matrix_ratings

user_id,A0015610VMNR0JC9XVL1,A00274963RTZUW5BU5ROI,A00538832OF17R8Q8JHTB,A00540411RKGTDNU543WS,A00878773S2MNB00COHKV,A00891092QIVH4W1YP46A,A0092581WFYQNV4KMUZ3,A01023015VSQI0VE22HU,A01038432MVI9JXYTTK5T,A010809536IK2VS9SAU9Q,...,AZZLYAJWAPX91,AZZQV95X90WT7,AZZR4T996J02D,AZZUIE66HZNY1,AZZUTPP7O8M98,AZZVOB0B882KK,AZZVZL4QEHEHO,AZZWKE7JW54GB,AZZXSP27F21T6,AZZYLDF6HREX3
title,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
"(ESV) English Standard Version Large Print Bible. Premium Bonded Leather, Black, Red Letter Text (English Language)",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1491: New Revelations of the Americas Before Columbus,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1632 (The Assiti Shards),0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1984,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,5.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
"20, 000 Leagues Under the Sea",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,5.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
Wuthering Heights (Riverside editions),0.0,0.0,0.0,0.0,0.0,2.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
Year of Wonders (Turtleback School & Library Binding Edition),0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
Zen and the Art of Motorcycle Maintenance,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
the Picture of Dorian Gray,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [21]:
## Authors influention

In [22]:
title_authors = books_data.drop(columns=['description', 'image', 'preview_link', 'publisher',
       'published_date', 'info_link', 'categories', 'ratings_count'])
title_authors

Unnamed: 0,title,authors
0,Its Only Art If Its Well Hung!,['Julie Strain']
1,Dr. Seuss: American Icon,['Philip Nel']
2,Wonderful Worship in Smaller Churches,['David R. Ray']
3,Whispers of the Wicked Saints,['Veronica Haddon']
4,"Nation Dance: Religion, Identity and Cultural Difference in the Caribbean",['Edward Long']
...,...,...
212399,The Orphan Of Ellis Island (Time Travel Adventures),['Elvira Woodruff']
212400,Red Boots for Christmas,
212401,Mamaw,['Wild Wild Cabbage']
212402,The Autograph Man,['Zadie Smith']


In [23]:
## deleting parenthesis, and seperate by commas

title_authors['authors_splited'] = title_authors.authors.str[1:-1].str.split(', ')
title_authors = title_authors.drop(columns=['authors'])
title_authors

Unnamed: 0,title,authors_splited
0,Its Only Art If Its Well Hung!,['Julie Strain']
1,Dr. Seuss: American Icon,['Philip Nel']
2,Wonderful Worship in Smaller Churches,['David R. Ray']
3,Whispers of the Wicked Saints,['Veronica Haddon']
4,"Nation Dance: Religion, Identity and Cultural Difference in the Caribbean",['Edward Long']
...,...,...
212399,The Orphan Of Ellis Island (Time Travel Adventures),['Elvira Woodruff']
212400,Red Boots for Christmas,
212401,Mamaw,['Wild Wild Cabbage']
212402,The Autograph Man,['Zadie Smith']


In [24]:
#explode seperate multiple authors
title_authors = title_authors.explode('authors_splited')
title_authors

Unnamed: 0,title,authors_splited
0,Its Only Art If Its Well Hung!,'Julie Strain'
1,Dr. Seuss: American Icon,'Philip Nel'
2,Wonderful Worship in Smaller Churches,'David R. Ray'
3,Whispers of the Wicked Saints,'Veronica Haddon'
4,"Nation Dance: Religion, Identity and Cultural Difference in the Caribbean",'Edward Long'
...,...,...
212400,Red Boots for Christmas,
212401,Mamaw,'Wild Wild Cabbage'
212402,The Autograph Man,'Zadie Smith'
212403,Student's Solutions Manual for Johnson/Mowry's Mathematics: A Practical Odyssey,'David B. Johnson'


In [25]:
title_authors

Unnamed: 0,title,authors_splited
0,Its Only Art If Its Well Hung!,'Julie Strain'
1,Dr. Seuss: American Icon,'Philip Nel'
2,Wonderful Worship in Smaller Churches,'David R. Ray'
3,Whispers of the Wicked Saints,'Veronica Haddon'
4,"Nation Dance: Religion, Identity and Cultural Difference in the Caribbean",'Edward Long'
...,...,...
212400,Red Boots for Christmas,
212401,Mamaw,'Wild Wild Cabbage'
212402,The Autograph Man,'Zadie Smith'
212403,Student's Solutions Manual for Johnson/Mowry's Mathematics: A Practical Odyssey,'David B. Johnson'


In [26]:
#pd.pivot_table(title_authors, values="authors_splited")

In [27]:
#merging ratings with authors

matrix_ratings_authors = matrix_ratings.merge(title_authors, on='title')
matrix_ratings_authors = matrix_ratings_authors.set_index('title')
matrix_ratings_authors

Unnamed: 0_level_0,A0015610VMNR0JC9XVL1,A00274963RTZUW5BU5ROI,A00538832OF17R8Q8JHTB,A00540411RKGTDNU543WS,A00878773S2MNB00COHKV,A00891092QIVH4W1YP46A,A0092581WFYQNV4KMUZ3,A01023015VSQI0VE22HU,A01038432MVI9JXYTTK5T,A010809536IK2VS9SAU9Q,...,AZZQV95X90WT7,AZZR4T996J02D,AZZUIE66HZNY1,AZZUTPP7O8M98,AZZVOB0B882KK,AZZVZL4QEHEHO,AZZWKE7JW54GB,AZZXSP27F21T6,AZZYLDF6HREX3,authors_splited
title,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
"(ESV) English Standard Version Large Print Bible. Premium Bonded Leather, Black, Red Letter Text (English Language)",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,'Crossway Bibles'
1491: New Revelations of the Americas Before Columbus,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,'Charles C. Mann'
1632 (The Assiti Shards),0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,'Eric Flint'
1984,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,5.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,'George Orwell'
"20, 000 Leagues Under the Sea",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,5.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,'Jules Verne'
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
Wuthering Heights (Riverside editions),0.0,0.0,0.0,0.0,0.0,2.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,'Emily Brontë'
Year of Wonders (Turtleback School & Library Binding Edition),0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,'Arthur Pike & David Pike'
Zen and the Art of Motorcycle Maintenance,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,'Robert M. Pirsig'
the Picture of Dorian Gray,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,'Oscar Wilde'


In [28]:
pattern = r'authors\w+'
#final_matrix = final_matrix.drop(list(final_matrix.filter(regex=pattern)))
authors_matrix = matrix_ratings_authors.filter(regex=pattern)
authors_matrix

Unnamed: 0_level_0,authors_splited
title,Unnamed: 1_level_1
"(ESV) English Standard Version Large Print Bible. Premium Bonded Leather, Black, Red Letter Text (English Language)",'Crossway Bibles'
1491: New Revelations of the Americas Before Columbus,'Charles C. Mann'
1632 (The Assiti Shards),'Eric Flint'
1984,'George Orwell'
"20, 000 Leagues Under the Sea",'Jules Verne'
...,...
Wuthering Heights (Riverside editions),'Emily Brontë'
Year of Wonders (Turtleback School & Library Binding Edition),'Arthur Pike & David Pike'
Zen and the Art of Motorcycle Maintenance,'Robert M. Pirsig'
the Picture of Dorian Gray,'Oscar Wilde'


In [29]:

authors_matrix = pd.get_dummies(authors_matrix)

In [30]:
authors_matrix = authors_matrix.groupby('title').sum()
authors_matrix = authors_matrix.mul(20)

In [31]:
authors_matrix

Unnamed: 0_level_0,"authors_splited_""Alex Prud'homme""","authors_splited_""Ellen O'Connor""","authors_splited_""Kate O'Beirne""",authors_splited_'A. J. Russell',authors_splited_'Abraham Verghese',authors_splited_'Adam Smith',authors_splited_'Aerie Books Ltd',authors_splited_'Agatha Christie',authors_splited_'Alan Paton',authors_splited_'Aldous Huxley',...,authors_splited_'William Manchester',authors_splited_'William Queen',authors_splited_'William S. Burroughs',authors_splited_'William Shakespeare',authors_splited_'Óscar Wilde',authors_splited_Booksellers,authors_splited_Chicago',authors_splited_Firm,authors_splited_Inc',authors_splited_Jr.'
title,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
"(ESV) English Standard Version Large Print Bible. Premium Bonded Leather, Black, Red Letter Text (English Language)",0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1491: New Revelations of the Americas Before Columbus,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1632 (The Assiti Shards),0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1984,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
"20, 000 Leagues Under the Sea",0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
Wuthering Heights (Riverside editions),0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
Year of Wonders (Turtleback School & Library Binding Edition),0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
Zen and the Art of Motorcycle Maintenance,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
the Picture of Dorian Gray,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [32]:
matrix_ratings_authors.drop(columns=['authors_splited'])

Unnamed: 0_level_0,A0015610VMNR0JC9XVL1,A00274963RTZUW5BU5ROI,A00538832OF17R8Q8JHTB,A00540411RKGTDNU543WS,A00878773S2MNB00COHKV,A00891092QIVH4W1YP46A,A0092581WFYQNV4KMUZ3,A01023015VSQI0VE22HU,A01038432MVI9JXYTTK5T,A010809536IK2VS9SAU9Q,...,AZZLYAJWAPX91,AZZQV95X90WT7,AZZR4T996J02D,AZZUIE66HZNY1,AZZUTPP7O8M98,AZZVOB0B882KK,AZZVZL4QEHEHO,AZZWKE7JW54GB,AZZXSP27F21T6,AZZYLDF6HREX3
title,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
"(ESV) English Standard Version Large Print Bible. Premium Bonded Leather, Black, Red Letter Text (English Language)",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1491: New Revelations of the Americas Before Columbus,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1632 (The Assiti Shards),0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1984,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,5.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
"20, 000 Leagues Under the Sea",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,5.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
Wuthering Heights (Riverside editions),0.0,0.0,0.0,0.0,0.0,2.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
Year of Wonders (Turtleback School & Library Binding Edition),0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
Zen and the Art of Motorcycle Maintenance,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
the Picture of Dorian Gray,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [33]:
final_matrix = matrix_ratings_authors.merge(authors_matrix, on='title')

In [37]:
# select numeric columns
numeric_cols = final_matrix.select_dtypes(include=[float, int]).columns

# drop non-numeric columns
final_matrix = final_matrix[numeric_cols]

In [39]:
final_matrix = final_matrix.astype(int)
final_matrix.dtypes

A0015610VMNR0JC9XVL1     int32
A00274963RTZUW5BU5ROI    int32
A00538832OF17R8Q8JHTB    int32
A00540411RKGTDNU543WS    int32
A00878773S2MNB00COHKV    int32
                         ...  
AZZVOB0B882KK            int32
AZZVZL4QEHEHO            int32
AZZWKE7JW54GB            int32
AZZXSP27F21T6            int32
AZZYLDF6HREX3            int32
Length: 147766, dtype: object

In [None]:
#index = final_matrix.index.get_loc('Jane Eyre (Signet classics)')
#print(index)


In [40]:
def recommendation_model_rat_auth(book_title, num_neighbours) :
    book_sparse = csr_matrix(final_matrix)
    model = NearestNeighbors(algorithm= 'brute')
    model.fit(book_sparse)
    index = final_matrix.index.get_loc(book_title)
    suggestion = model.kneighbors(final_matrix.iloc[index,:].values.reshape(1,-1), n_neighbors=num_neighbours )[1]
    book_titles = [] 
    for i in suggestion:
        book_titles.append(final_matrix.index[i])
    return book_titles

In [42]:
recommendation_model_rat_auth('20, 000 Leagues Under the Sea' , 20)

[Index(['20, 000 Leagues Under the Sea',
        'Twenty thousand leagues under the sea, (The Windermere series)',
        'Twenty Thousand Leagues Under the Sea (Caxton Edition)',
        'Twenty Thousand Leagues Under the Sea (Caxton Edition)',
        'Twenty thousand leagues under the sea',
        'Twenty Thousand Leagues Under the Sea (Caxton Edition)',
        'Twenty Thousand Leagues Under the Sea (Caxton Edition)',
        'Twenty Thousand Leagues Under the Sea (Thorndike Press Large Print Perennial Bestsellers Series)',
        'Roman Catholicism', 'The Saboteurs (Men at War Series)',
        'The Saboteurs (Men at War Series)', 'Pirate (Hawke Series)',
        'The Conspiracy Club', 'Second Sight (The Arcane Society, Book 1)',
        'Trojan Odyssey: A Dirk Pitt Novel', 'Cranford',
        'The bridge of San Luis Rey', '4 Blondes', 'The Sea Wolf',
        'Death Match'],
       dtype='object', name='title')]

In [None]:
book_sparse = csr_matrix(final_matrix)

In [None]:
model = NearestNeighbors(algorithm= 'brute')

model.fit(book_sparse)

In [None]:
distance, suggestion = model.kneighbors(final_matrix.iloc[200,:].values.reshape(1,-1), n_neighbors=6 )

In [None]:
distance

array([[ 0.,  0.,  0.,  0.,  0., 10.]])

In [None]:
suggestion

array([[198, 199, 197, 196, 200, 195]], dtype=int64)

In [None]:
final_matrix

user_id,A0015610VMNR0JC9XVL1,A00274963RTZUW5BU5ROI,A00538832OF17R8Q8JHTB,A00540411RKGTDNU543WS,A00878773S2MNB00COHKV,A00891092QIVH4W1YP46A,A0092581WFYQNV4KMUZ3,A01023015VSQI0VE22HU,A01038432MVI9JXYTTK5T,A010809536IK2VS9SAU9Q,...,AZZLYAJWAPX91,AZZQV95X90WT7,AZZR4T996J02D,AZZUIE66HZNY1,AZZUTPP7O8M98,AZZVOB0B882KK,AZZVZL4QEHEHO,AZZWKE7JW54GB,AZZXSP27F21T6,AZZYLDF6HREX3
title,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
"(ESV) English Standard Version Large Print Bible. Premium Bonded Leather, Black, Red Letter Text (English Language)",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1491: New Revelations of the Americas Before Columbus,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1632 (The Assiti Shards),0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1984,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,5.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
"20, 000 Leagues Under the Sea",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,5.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
Wuthering Heights (Riverside editions),0.0,0.0,0.0,0.0,0.0,2.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
Year of Wonders (Turtleback School & Library Binding Edition),0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
Zen and the Art of Motorcycle Maintenance,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
the Picture of Dorian Gray,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [None]:
for i in suggestion:
    print(final_matrix.iloc[i])

user_id                                               A0015610VMNR0JC9XVL1  \
title                                                                        
Jane Eyre (Signet classics)                                            0.0   
Jane Eyre (Simple English)                                             0.0   
Jane Eyre (New Windmill)                                               0.0   
Jane Eyre (Everyman's Classics)                                        0.0   
Jane Eyre: Complete and Unabridged (Puffin Classics)                   0.0   
Jane Eyre                                                              0.0   

user_id                                               A00274963RTZUW5BU5ROI  \
title                                                                         
Jane Eyre (Signet classics)                                             0.0   
Jane Eyre (Simple English)                                              0.0   
Jane Eyre (New Windmill)                                   

In [None]:
book_titles = [] 
for i in suggestion:
    book_titles.append(final_matrix.index[i])
print(book_titles)

[Index(['Jane Eyre (Signet classics)', 'Jane Eyre (Simple English)',
       'Jane Eyre (New Windmill)', 'Jane Eyre (Everyman's Classics)',
       'Jane Eyre: Complete and Unabridged (Puffin Classics)', 'Jane Eyre'],
      dtype='object', name='title')]


In [None]:
ids_index = []
for name in book_titles[0]: 
    ids = np.where(books_data['title'] == name)[0][0]
    ids_index.append(ids)

In [None]:
for idx in ids_index:
    url = books_data.iloc[idx]['title']
    print(url)

Jane Eyre (Signet classics)
Jane Eyre (Simple English)
Jane Eyre (New Windmill)
Jane Eyre (Everyman's Classics)
Jane Eyre: Complete and Unabridged (Puffin Classics)
Jane Eyre
