In [65]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

In [66]:
#load data
books = pd.read_csv('./datasets/books.csv', sep=';', encoding='utf-8', on_bad_lines='skip')

In [67]:
books.head(2)

Unnamed: 0,ISBN,Book-Title,Book-Author,Year-Of-Publication,Publisher,Image-URL
0,001V,Cây Cam Ngọt Của Tôi,José Mauro de Vasconcelos,2020,NXB Hội Nhà Văn,https://cdn0.fahasa.com/media/catalog/product/...
1,002V,Một Ví Dụ Xoàng (Tái Bản 2023),Nguyễn Bình Phương,2023,Hội Nhà Văn,https://cdn0.fahasa.com/media/catalog/product/...


In [68]:
books.shape 
# => (số hàng, số cọc)

(100, 6)

In [69]:
# Analyzing Data
books.columns

Index(['ISBN', 'Book-Title', 'Book-Author', 'Year-Of-Publication', 'Publisher',
       'Image-URL'],
      dtype='object')

In [70]:
books.rename(columns={
    'Book-Title': 'title',
    'Book-Author': 'author',
    'Year-Of-Publication': 'year',
    'Publisher': 'publisher',
    'Image-URL': 'img_url',
}, inplace=True)

In [71]:
books.head(2)

Unnamed: 0,ISBN,title,author,year,publisher,img_url
0,001V,Cây Cam Ngọt Của Tôi,José Mauro de Vasconcelos,2020,NXB Hội Nhà Văn,https://cdn0.fahasa.com/media/catalog/product/...
1,002V,Một Ví Dụ Xoàng (Tái Bản 2023),Nguyễn Bình Phương,2023,Hội Nhà Văn,https://cdn0.fahasa.com/media/catalog/product/...


In [72]:
users = pd.read_csv('./datasets/users.csv', sep=';', encoding='utf-8', on_bad_lines='skip')

In [73]:
users.head(2)

Unnamed: 0,User-ID,Location,Age
0,97,Hà Nội,38.0
1,135,Lạng Sơn,


In [74]:
users.shape

(200, 3)

In [75]:
ratings = pd.read_csv('./datasets/ratings.csv', sep=';', encoding='utf-8', on_bad_lines='skip')

In [76]:
ratings.head(5)

Unnamed: 0,User-ID,ISBN,Book-Rating
0,53,092V,3
1,12,080V,2
2,187,051E,1
3,138,066E,1
4,174,055E,4


In [77]:
print(books.shape)
print(users.shape)
print(ratings.shape)

(100, 6)
(200, 3)
(2037, 3)


In [78]:
ratings.rename(columns={
    'User-ID': 'user_id',
    'Book-Rating': 'rating',
}, inplace=True)

In [79]:
ratings.head()

Unnamed: 0,user_id,ISBN,rating
0,53,092V,3
1,12,080V,2
2,187,051E,1
3,138,066E,1
4,174,055E,4


In [80]:
ratings['user_id'].value_counts()

user_id
100    22
79     21
54     20
26     20
108    20
       ..
66      4
173     4
167     3
141     3
176     3
Name: count, Length: 200, dtype: int64

In [81]:
ratings['user_id'].unique().shape

(200,)

In [82]:
x = ratings['user_id'].value_counts() > 5

In [83]:
x[x].shape

(183,)

In [84]:
y = x[x].index

In [85]:
y

Index([100,  79,  54,  26, 108, 186,  13,  53,  83,  58,
       ...
        28, 126, 175, 199,  98, 154, 193,  12, 132, 184],
      dtype='int64', name='user_id', length=183)

In [86]:
ratings = ratings[ratings['user_id'].isin(y)]

In [87]:
ratings.head()

Unnamed: 0,user_id,ISBN,rating
0,53,092V,3
1,12,080V,2
2,187,051E,1
3,138,066E,1
4,174,055E,4


In [88]:
ratings.shape

(1963, 3)

In [89]:
books.head(2)

Unnamed: 0,ISBN,title,author,year,publisher,img_url
0,001V,Cây Cam Ngọt Của Tôi,José Mauro de Vasconcelos,2020,NXB Hội Nhà Văn,https://cdn0.fahasa.com/media/catalog/product/...
1,002V,Một Ví Dụ Xoàng (Tái Bản 2023),Nguyễn Bình Phương,2023,Hội Nhà Văn,https://cdn0.fahasa.com/media/catalog/product/...


In [90]:
ratings_with_books = ratings.merge(books, on = 'ISBN')

In [91]:
ratings_with_books.head()

Unnamed: 0,user_id,ISBN,rating,title,author,year,publisher,img_url
0,53,092V,3,Kiếp Ba Khía,Trần Bảo Định,2020,NXB Tổng Hợp TPHCM,https://cdn0.fahasa.com/media/catalog/product/...
1,12,080V,2,"Không Đầu, Không Cuối, Không Biết Tại Sao",Nhiều Tác Giả,2023,NXB Văn Học,https://cdn0.fahasa.com/media/catalog/product/...
2,187,051E,1,SAT Math Prep: Over 400 Practice Questions + O...,Kaplan Test Prep,2020,Kaplan Publishing,https://cdn0.fahasa.com/media/catalog/product/...
3,138,066E,1,Will,"Will Smith, Mark Manson",2021,Penguin,https://cdn0.fahasa.com/media/catalog/product/...
4,174,055E,4,"The Scorch Trials (Maze Runner, Book 2)",James Dashner,2011,Ember,https://cdn0.fahasa.com/media/catalog/product/...


In [92]:
ratings_with_books.shape

(1963, 8)

In [93]:
num_rating = ratings_with_books.groupby('title')['rating'].count().reset_index()

In [94]:
num_rating.head()

Unnamed: 0,title,rating
0,A Clash Of Kings,23
1,"Boy Erased: A Memoir of Identity, Faith, and F...",22
2,Búi Thông Thơ Dại,16
3,Bắt Đầu Cất Lên Tiếng Cười,31
4,Bức Thư Tình Có Vị Ngọt Của Bánh,10


In [95]:
num_rating.rename(columns={
    'rating': 'num_of_rating'
}, inplace=True)

In [96]:
num_rating.head()

Unnamed: 0,title,num_of_rating
0,A Clash Of Kings,23
1,"Boy Erased: A Memoir of Identity, Faith, and F...",22
2,Búi Thông Thơ Dại,16
3,Bắt Đầu Cất Lên Tiếng Cười,31
4,Bức Thư Tình Có Vị Ngọt Của Bánh,10


In [97]:
final_rating = ratings_with_books.merge(num_rating, on = 'title')

In [98]:
final_rating.head(2)

Unnamed: 0,user_id,ISBN,rating,title,author,year,publisher,img_url,num_of_rating
0,53,092V,3,Kiếp Ba Khía,Trần Bảo Định,2020,NXB Tổng Hợp TPHCM,https://cdn0.fahasa.com/media/catalog/product/...,30
1,12,080V,2,"Không Đầu, Không Cuối, Không Biết Tại Sao",Nhiều Tác Giả,2023,NXB Văn Học,https://cdn0.fahasa.com/media/catalog/product/...,28


In [99]:
final_rating.shape

(1963, 9)

In [100]:
final_rating = final_rating[final_rating['num_of_rating'] >= 3]

In [101]:
final_rating.head()

Unnamed: 0,user_id,ISBN,rating,title,author,year,publisher,img_url,num_of_rating
0,53,092V,3,Kiếp Ba Khía,Trần Bảo Định,2020,NXB Tổng Hợp TPHCM,https://cdn0.fahasa.com/media/catalog/product/...,30
1,12,080V,2,"Không Đầu, Không Cuối, Không Biết Tại Sao",Nhiều Tác Giả,2023,NXB Văn Học,https://cdn0.fahasa.com/media/catalog/product/...,28
2,187,051E,1,SAT Math Prep: Over 400 Practice Questions + O...,Kaplan Test Prep,2020,Kaplan Publishing,https://cdn0.fahasa.com/media/catalog/product/...,29
3,138,066E,1,Will,"Will Smith, Mark Manson",2021,Penguin,https://cdn0.fahasa.com/media/catalog/product/...,34
4,174,055E,4,"The Scorch Trials (Maze Runner, Book 2)",James Dashner,2011,Ember,https://cdn0.fahasa.com/media/catalog/product/...,23


In [102]:
final_rating.sample(10)

Unnamed: 0,user_id,ISBN,rating,title,author,year,publisher,img_url,num_of_rating
1575,89,024V,5,Hôm Nay Thương Nhớ Rơi Vỡ Lòng Ai - Tặng Kèm B...,A Tòn,2023,Phụ Nữ Việt Nam,https://cdn0.fahasa.com/media/catalog/product/...,22
341,85,024V,1,Hôm Nay Thương Nhớ Rơi Vỡ Lòng Ai - Tặng Kèm B...,A Tòn,2023,Phụ Nữ Việt Nam,https://cdn0.fahasa.com/media/catalog/product/...,22
1006,139,055E,3,"The Scorch Trials (Maze Runner, Book 2)",James Dashner,2011,Ember,https://cdn0.fahasa.com/media/catalog/product/...,23
1264,193,054E,2,Wonder Woman: Warbringer,Leigh Bardugo,2017,Random House LCC US,https://cdn0.fahasa.com/media/catalog/product/...,23
1893,154,065E,5,Father Of Lions,Louise Callaghan,2020,Forge Trade,https://cdn0.fahasa.com/media/catalog/product/...,21
49,104,008V,2,Vui Vẻ Không Quạu Nha 2 - Một Cuốn Sách Buồn… ...,Ở Đây Zui Nè,2021,\tNXB Phụ Nữ Việt Nam,https://cdn0.fahasa.com/media/catalog/product/...,15
1805,42,077V,5,Mùa Đi Qua Phố,Nhiều Tác Giả,2022,Tổng Hợp TPHCM,https://cdn0.fahasa.com/media/catalog/product/...,24
1343,189,060E,3,Legend. by Marie Lu,Marie Lu,2012,Puffin Books,https://cdn0.fahasa.com/media/catalog/product/...,21
723,68,096V,3,"Nè Cô Bé, Đừng Ngồi Trên Cửa Sổ",Lê Đắc Hoàng Hựu,2020,NXB Tổng Hợp TPHCM,https://cdn0.fahasa.com/media/catalog/product/...,31
239,87,088V,0,Tớ Thích Cậu Hơn Cả Harvard (Tái Bản 2023),Lan Rùa,2023,Dân Trí,https://cdn0.fahasa.com/media/catalog/product/...,25


In [103]:
final_rating.shape

(1963, 9)

In [104]:
final_rating.drop_duplicates(['user_id', 'title'], inplace=True)

In [105]:
final_rating.shape

(1750, 9)

In [106]:
# Converting To Pivot Table
final_rating

Unnamed: 0,user_id,ISBN,rating,title,author,year,publisher,img_url,num_of_rating
0,53,092V,3,Kiếp Ba Khía,Trần Bảo Định,2020,NXB Tổng Hợp TPHCM,https://cdn0.fahasa.com/media/catalog/product/...,30
1,12,080V,2,"Không Đầu, Không Cuối, Không Biết Tại Sao",Nhiều Tác Giả,2023,NXB Văn Học,https://cdn0.fahasa.com/media/catalog/product/...,28
2,187,051E,1,SAT Math Prep: Over 400 Practice Questions + O...,Kaplan Test Prep,2020,Kaplan Publishing,https://cdn0.fahasa.com/media/catalog/product/...,29
3,138,066E,1,Will,"Will Smith, Mark Manson",2021,Penguin,https://cdn0.fahasa.com/media/catalog/product/...,34
4,174,055E,4,"The Scorch Trials (Maze Runner, Book 2)",James Dashner,2011,Ember,https://cdn0.fahasa.com/media/catalog/product/...,23
...,...,...,...,...,...,...,...,...,...
1957,152,051E,0,SAT Math Prep: Over 400 Practice Questions + O...,Kaplan Test Prep,2020,Kaplan Publishing,https://cdn0.fahasa.com/media/catalog/product/...,29
1958,143,069E,0,Impact: A Step-By-Step Plan To Create The Worl...,"Christen Brandt, Tammy Tibbetts",2023,PublicAffairs,https://cdn0.fahasa.com/media/catalog/product/...,32
1959,109,010V,5,Dài Hơn Một Mùa Hè,Lu,2023,Phụ Nữ Việt Nam,https://cdn0.fahasa.com/media/catalog/product/...,15
1960,72,088V,4,Tớ Thích Cậu Hơn Cả Harvard (Tái Bản 2023),Lan Rùa,2023,Dân Trí,https://cdn0.fahasa.com/media/catalog/product/...,25


In [107]:
book_pivot = final_rating.pivot_table(columns='user_id', index='title', values='rating')

In [108]:
book_pivot

user_id,1,2,3,4,5,6,7,8,9,10,...,189,190,191,192,193,194,195,196,199,200
title,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
A Clash Of Kings,,,,,,,,,,,...,,4.0,3.0,,,1.0,5.0,,,
"Boy Erased: A Memoir of Identity, Faith, and Family",,,,,,,,,,,...,0.0,,0.0,,,5.0,,,,0.0
Búi Thông Thơ Dại,,,,,,,,,,0.0,...,,,,,,,,,,
Bắt Đầu Cất Lên Tiếng Cười,,,,,,,,,,,...,,,,,,,,,,
Bức Thư Tình Có Vị Ngọt Của Bánh,,,,,,,,,,,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
Đi Hết Một Đời Anh Vẫn Là Của Em (Tái Bản),,,,,,,,,,,...,,,,,,,,,,
Điều Còn Lại Giữa Chúng Ta,,,,,,,,,,,...,,,,,,,,,,
Đổi Nụ Cười Lấy Bên Người Trăm Năm - Tặng Kèm Bookmark + 2 Postcard,,,,,,,,1.0,,,...,,,,,,,,,,
"Đợi Một Danh Phận, Cả Đời Oán Hận",,,,,,,,,,,...,,,,,,,,,,


In [109]:
book_pivot.shape

(100, 183)

In [110]:
book_pivot.fillna(0, inplace=True)

In [111]:
book_pivot

user_id,1,2,3,4,5,6,7,8,9,10,...,189,190,191,192,193,194,195,196,199,200
title,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
A Clash Of Kings,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,4.0,3.0,0.0,0.0,1.0,5.0,0.0,0.0,0.0
"Boy Erased: A Memoir of Identity, Faith, and Family",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,5.0,0.0,0.0,0.0,0.0
Búi Thông Thơ Dại,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
Bắt Đầu Cất Lên Tiếng Cười,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
Bức Thư Tình Có Vị Ngọt Của Bánh,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
Đi Hết Một Đời Anh Vẫn Là Của Em (Tái Bản),0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
Điều Còn Lại Giữa Chúng Ta,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
Đổi Nụ Cười Lấy Bên Người Trăm Năm - Tặng Kèm Bookmark + 2 Postcard,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
"Đợi Một Danh Phận, Cả Đời Oán Hận",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [112]:
from scipy.sparse import csr_matrix

In [113]:
book_sparse = csr_matrix(book_pivot)

In [114]:
book_sparse

<100x183 sparse matrix of type '<class 'numpy.float64'>'
	with 1456 stored elements in Compressed Sparse Row format>

In [115]:
# Model Building
from sklearn.neighbors import NearestNeighbors
model = NearestNeighbors(algorithm='brute')

In [116]:
model.fit(book_sparse)

In [117]:
distance, suggestion = model.kneighbors(book_pivot.iloc[12, :].values.reshape(1, -1), n_neighbors=6)

In [118]:
distance

array([[ 0.        , 14.2126704 , 14.2126704 , 14.24780685, 14.31782106,
        14.52583905]])

In [119]:
suggestion

array([[12, 90,  9, 68, 20, 57]], dtype=int64)

In [120]:
for i in range(len(suggestion)):
    print(book_pivot.index[suggestion[i]])

Index(['Compact First for Schools Workbook Without Answers with Audio-CD',
       'Will', 'Chưa Kịp Lớn Đã Trưởng Thành (Tái Bản 2023)',
       'Sông (Tái Bàn 2020)', 'Dear, Darling', 'Những Ta'],
      dtype='object', name='title')


In [121]:
book_pivot.index[3]

'Bắt Đầu Cất Lên Tiếng Cười'

In [122]:
book_pivot.index

Index(['A Clash Of Kings',
       'Boy Erased: A Memoir of Identity, Faith, and Family',
       'Búi Thông Thơ Dại', 'Bắt Đầu Cất Lên Tiếng Cười',
       'Bức Thư Tình Có Vị Ngọt Của Bánh', 'Chim Trời Bay Về Sau Cơn Mưa',
       'Chuyện Kể Rằng Có Nàng Và Tôi', 'Chí Phèo',
       'Chúng Ta Rồi Sẽ Hạnh Phúc, Theo Những Cách Khác Nhau',
       'Chưa Kịp Lớn Đã Trưởng Thành (Tái Bản 2023)',
       'Chắc Gì Ta Đã Yêu Nhau (Tái Bản 2022)',
       'Chọn Cô Đơn Giữa Biển Người Vô Tận',
       'Compact First for Schools Workbook Without Answers with Audio-CD',
       'Con Chim Nhỏ Gắp Cọng Rơm Vàng',
       'Cuối Con Đường Sẽ Gặp Một Người Thương - Tặng Kèm Bookmark',
       'Cây Cam Ngọt Của Tôi', 'Có Một Ngày, Bố Mẹ Sẽ Già Đi',
       'Cô Bé Ngủ Trên Dây Điện', 'DO LUNCH OR BE LUNCH',
       'Danh Tác Việt Nam - Đôi Mắt', 'Dear, Darling',
       'Disruption Proof: Empower People, Create Value, Drive Change',
       'Donald Trump Và Cô Bé Sài Gòn', 'Dài Hơn Một Mùa Hè',
       'English Gramm

In [123]:
books_name = book_pivot.index

In [124]:
import pickle
pickle.dump(model, open('./artifacts/model.pkl', 'wb'))
pickle.dump(books_name, open('./artifacts/books_name.pkl', 'wb'))
pickle.dump(final_rating, open('./artifacts/final_rating.pkl', 'wb'))
pickle.dump(book_pivot, open('./artifacts/book_pivot.pkl', 'wb'))

In [125]:
def recommend_book(book_name):
    book_id = np.where(book_pivot.index == book_name)[0][0]
    distance, suggestion = model.kneighbors(book_pivot.iloc[book_id, :].values.reshape(1, -1), n_neighbors=6)

    for i in range(len(suggestion)):
        books = book_pivot.index[suggestion[i]]
        for j in books:
            print(j)

In [126]:
book_name = 'Cây Cam Ngọt Của Tôi'
recommend_book(book_name)

Cây Cam Ngọt Của Tôi
Sông (Tái Bàn 2020)
Những Ngày Buồn Chóng Qua
Dear, Darling
Chọn Cô Đơn Giữa Biển Người Vô Tận
Về Nghe Yêu Kể


In [127]:
book_name = 'The Infinite Leader: Balancing The Demands Of Modern Business Leadership (Kogan Page Inspire)'
recommend_book(book_name)

The Infinite Leader: Balancing The Demands Of Modern Business Leadership (Kogan Page Inspire)
Chưa Kịp Lớn Đã Trưởng Thành (Tái Bản 2023)
Sông (Tái Bàn 2020)
Dear, Darling
Những Ta
Về Nghe Yêu Kể


In [128]:
book_name = 'DO LUNCH OR BE LUNCH'
recommend_book(book_name)

DO LUNCH OR BE LUNCH
Chưa Kịp Lớn Đã Trưởng Thành (Tái Bản 2023)
Sông (Tái Bàn 2020)
The Infinite Leader: Balancing The Demands Of Modern Business Leadership (Kogan Page Inspire)
Dear, Darling
Những Ta
