# Book Recommendation

## Library

In [1]:
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow import keras
from sklearn.metrics.pairwise import cosine_similarity as cs
from sklearn.feature_extraction.text import CountVectorizer
from scipy.sparse import csr_matrix
from sklearn.neighbors import NearestNeighbors
import pickle5 as pickle
from typing import Dict, Text
import warnings
warnings.filterwarnings('ignore')

2023-05-14 01:14:32.918804: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2023-05-14 01:14:33.248815: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory
2023-05-14 01:14:33.248851: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine.
2023-05-14 01:14:34.370253: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory
2023-

## Dataset Overview


1. **Books data** : Giving the detail overview about the book information including the book title, publication year, as well as the author of the book
2. **Users data** : Give the detailed overview about each user such as the user's location as well as the user's age
3. **Ratings data** : Give the detailed overview about the rating that each user give to the book

## Preprocessing

### Books Data

In [2]:
df_books1 = pd.read_csv('data/Books.csv', sep=',', error_bad_lines=False, usecols = [0,1,2,3,4])

In [3]:
df_books1.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 271360 entries, 0 to 271359
Data columns (total 5 columns):
 #   Column               Non-Null Count   Dtype 
---  ------               --------------   ----- 
 0   ISBN                 271360 non-null  object
 1   Book-Title           271360 non-null  object
 2   Book-Author          271359 non-null  object
 3   Year-Of-Publication  271360 non-null  object
 4   Publisher            271358 non-null  object
dtypes: object(5)
memory usage: 10.4+ MB


In [4]:
try:
    df_books1['Year-Of-Publication']  = df_books1['Year-Of-Publication'].astype(int)
except Exception as e:
    print(e)


invalid literal for int() with base 10: 'DK Publishing Inc'


In [5]:
df_books1[df_books1['Year-Of-Publication'] == 'DK Publishing Inc']

Unnamed: 0,ISBN,Book-Title,Book-Author,Year-Of-Publication,Publisher
209538,078946697X,"DK Readers: Creating the X-Men, How It All Beg...",2000,DK Publishing Inc,http://images.amazon.com/images/P/078946697X.0...
221678,0789466953,"DK Readers: Creating the X-Men, How Comic Book...",2000,DK Publishing Inc,http://images.amazon.com/images/P/0789466953.0...


In [6]:
df_books1['Year-Of-Publication'] = pd.to_numeric(df_books1['Year-Of-Publication'],errors='coerce')

In [7]:
df_books1 = df_books1.dropna()
df_books1['Year-Of-Publication'] = df_books1['Year-Of-Publication'].astype(int)

In [8]:
df_books1.head()

Unnamed: 0,ISBN,Book-Title,Book-Author,Year-Of-Publication,Publisher
0,195153448,Classical Mythology,Mark P. O. Morford,2002,Oxford University Press
1,2005018,Clara Callan,Richard Bruce Wright,2001,HarperFlamingo Canada
2,60973129,Decision in Normandy,Carlo D'Este,1991,HarperPerennial
3,374157065,Flu: The Story of the Great Influenza Pandemic...,Gina Bari Kolata,1999,Farrar Straus Giroux
4,393045218,The Mummies of Urumchi,E. J. W. Barber,1999,W. W. Norton &amp; Company


In [9]:
df_books2 = pd.read_csv('data/BooksWithCategory.csv')

In [10]:
df_books2.set_axis(['ASIN', 'Filename', 'Image Url', 'Book-Title', 'Author', 'Category ID', 'Category'],
                    axis=1,inplace=True)

In [11]:
df_books2.head()

Unnamed: 0,ASIN,Filename,Image Url,Book-Title,Author,Category ID,Category
0,1623439671,1623439671.jpg,http://ecx.images-amazon.com/images/I/61t-hrSw...,Doug the Pug 2016 Wall Calendar,Doug the Pug,3,Calendars
1,B00O80WC6I,B00O80WC6I.jpg,http://ecx.images-amazon.com/images/I/41X-KQqs...,"Moleskine 2016 Weekly Notebook, 12M, Large, Bl...",Moleskine,3,Calendars
2,761182187,0761182187.jpg,http://ecx.images-amazon.com/images/I/61j-4gxJ...,365 Cats Color Page-A-Day Calendar 2016,Workman Publishing,3,Calendars
3,1578052084,1578052084.jpg,http://ecx.images-amazon.com/images/I/51Ry4Tsq...,Sierra Club Engagement Calendar 2016,Sierra Club,3,Calendars
4,1578052076,1578052076.jpg,http://ecx.images-amazon.com/images/I/619KxYEq...,Sierra Club Wilderness Calendar 2016,Sierra Club,3,Calendars


In [12]:
df_books2 = df_books2.drop(['ASIN', 'Filename', 'Image Url', 'Author'], axis=1)

In [13]:
df_books2.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 207571 entries, 0 to 207570
Data columns (total 3 columns):
 #   Column       Non-Null Count   Dtype 
---  ------       --------------   ----- 
 0   Book-Title   207571 non-null  object
 1   Category ID  207571 non-null  int64 
 2   Category     207571 non-null  object
dtypes: int64(1), object(2)
memory usage: 4.8+ MB


In [14]:
df_books = pd.merge(df_books1, df_books2, on='Book-Title')

### Users Data

In [15]:
df_users = pd.read_csv('data/Users.csv', sep=',', error_bad_lines=False, usecols = [0,1,2])

In [16]:
df_users.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 278858 entries, 0 to 278857
Data columns (total 3 columns):
 #   Column    Non-Null Count   Dtype  
---  ------    --------------   -----  
 0   User-ID   278858 non-null  int64  
 1   Location  278858 non-null  object 
 2   Age       168096 non-null  float64
dtypes: float64(1), int64(1), object(1)
memory usage: 6.4+ MB


In [17]:
df_users.head()

Unnamed: 0,User-ID,Location,Age
0,1,"nyc, new york, usa",
1,2,"stockton, california, usa",18.0
2,3,"moscow, yukon territory, russia",
3,4,"porto, v.n.gaia, portugal",17.0
4,5,"farnborough, hants, united kingdom",


### Ratings Data

In [18]:
df_ratings = pd.read_csv('data/Ratings.csv', sep=',', error_bad_lines=False, usecols = [0,1,2])    

In [19]:
df_ratings.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1149780 entries, 0 to 1149779
Data columns (total 3 columns):
 #   Column       Non-Null Count    Dtype 
---  ------       --------------    ----- 
 0   User-ID      1149780 non-null  int64 
 1   ISBN         1149780 non-null  object
 2   Book-Rating  1149780 non-null  int64 
dtypes: int64(2), object(1)
memory usage: 26.3+ MB


In [20]:
df_ratings

Unnamed: 0,User-ID,ISBN,Book-Rating
0,276725,034545104X,0
1,276726,0155061224,5
2,276727,0446520802,0
3,276729,052165615X,3
4,276729,0521795028,6
...,...,...,...
1149775,276704,1563526298,9
1149776,276706,0679447156,0
1149777,276709,0515107662,10
1149778,276721,0590442449,10


### Data Cleaning

In [21]:
df_books

Unnamed: 0,ISBN,Book-Title,Book-Author,Year-Of-Publication,Publisher,Category ID,Category
0,0195153448,Classical Mythology,Mark P. O. Morford,2002,Oxford University Press,15,Literature & Fiction
1,0801319536,Classical Mythology,Mark P. O. Morford,1998,John Wiley &amp; Sons,15,Literature & Fiction
2,0399135782,The Kitchen God's Wife,Amy Tan,1991,Putnam Pub Group,15,Literature & Fiction
3,080410753X,The Kitchen God's Wife,Amy Tan,1992,Ivy Books,15,Literature & Fiction
4,080410753x,The Kitchen God's Wife,Amy Tan,1992,Ivy Books,15,Literature & Fiction
...,...,...,...,...,...,...,...
14620,0911647155,Cutting (Western Horseman Books),Leon Harrel,2002,Western Horseman,26,Sports & Outdoors
14621,0691027641,Makers of Modern Strategy from Machiavelli to ...,Peter Paret,1986,Princeton University Press,12,History
14622,0395957699,One Man's Garden,Henry Mitchell,1999,Mariner Books,8,"Crafts, Hobbies & Home"
14623,0881924989,The Cactus Family,Edward F. Anderson,2001,Timber Press (OR),8,"Crafts, Hobbies & Home"


In [22]:
print('Number of data before cleaning : {}'.format(len(df_ratings)))
df_ratings = df_ratings[df_ratings['ISBN'].isin(df_books['ISBN'])]
print('Number of data after cleaning : {}'.format(len(df_ratings)))

Number of data before cleaning : 1149780
Number of data after cleaning : 96495


In [23]:
df_ratings

Unnamed: 0,User-ID,ISBN,Book-Rating
2,276727,0446520802,0
8,276744,038550120X,7
10,276746,0425115801,0
19,276747,0671537458,9
24,276748,0747558167,6
...,...,...,...
1149751,276690,0440439884,0
1149763,276704,0395404258,0
1149768,276704,0446605409,0
1149771,276704,0743211383,7


In [24]:
df_official = pd.merge(df_ratings, df_books, on='ISBN')

In [25]:
df_official.drop_duplicates(subset=['User-ID', 'ISBN'], inplace=True, keep='last')

In [26]:
df_official

Unnamed: 0,User-ID,ISBN,Book-Rating,Book-Title,Book-Author,Year-Of-Publication,Publisher,Category ID,Category
0,276727,0446520802,0,The Notebook,Nicholas Sparks,1996,Warner Books,22,Romance
1,278418,0446520802,0,The Notebook,Nicholas Sparks,1996,Warner Books,22,Romance
2,638,0446520802,0,The Notebook,Nicholas Sparks,1996,Warner Books,22,Romance
3,3363,0446520802,0,The Notebook,Nicholas Sparks,1996,Warner Books,22,Romance
4,7158,0446520802,10,The Notebook,Nicholas Sparks,1996,Warner Books,22,Romance
...,...,...,...,...,...,...,...,...,...
115568,276556,055337849x,10,The Brothers K,David James Duncan,1996,Bantam,15,Literature & Fiction
115569,276581,0312311362,0,Radiance: A Novel,Carter Scholz,2003,Picador USA,24,Science Fiction & Fantasy
115570,276641,0151006555,0,Bay of Tigers: An Odyssey through War-torn Angola,Pedro Rosa Mendes,2003,Harcourt,29,Travel
115571,276688,0060168307,0,Sacred Clowns,Tony Hillerman,1994,Harpercollins,15,Literature & Fiction


In [27]:
df_official

Unnamed: 0,User-ID,ISBN,Book-Rating,Book-Title,Book-Author,Year-Of-Publication,Publisher,Category ID,Category
0,276727,0446520802,0,The Notebook,Nicholas Sparks,1996,Warner Books,22,Romance
1,278418,0446520802,0,The Notebook,Nicholas Sparks,1996,Warner Books,22,Romance
2,638,0446520802,0,The Notebook,Nicholas Sparks,1996,Warner Books,22,Romance
3,3363,0446520802,0,The Notebook,Nicholas Sparks,1996,Warner Books,22,Romance
4,7158,0446520802,10,The Notebook,Nicholas Sparks,1996,Warner Books,22,Romance
...,...,...,...,...,...,...,...,...,...
115568,276556,055337849x,10,The Brothers K,David James Duncan,1996,Bantam,15,Literature & Fiction
115569,276581,0312311362,0,Radiance: A Novel,Carter Scholz,2003,Picador USA,24,Science Fiction & Fantasy
115570,276641,0151006555,0,Bay of Tigers: An Odyssey through War-torn Angola,Pedro Rosa Mendes,2003,Harcourt,29,Travel
115571,276688,0060168307,0,Sacred Clowns,Tony Hillerman,1994,Harpercollins,15,Literature & Fiction


In [28]:
df_official

Unnamed: 0,User-ID,ISBN,Book-Rating,Book-Title,Book-Author,Year-Of-Publication,Publisher,Category ID,Category
0,276727,0446520802,0,The Notebook,Nicholas Sparks,1996,Warner Books,22,Romance
1,278418,0446520802,0,The Notebook,Nicholas Sparks,1996,Warner Books,22,Romance
2,638,0446520802,0,The Notebook,Nicholas Sparks,1996,Warner Books,22,Romance
3,3363,0446520802,0,The Notebook,Nicholas Sparks,1996,Warner Books,22,Romance
4,7158,0446520802,10,The Notebook,Nicholas Sparks,1996,Warner Books,22,Romance
...,...,...,...,...,...,...,...,...,...
115568,276556,055337849x,10,The Brothers K,David James Duncan,1996,Bantam,15,Literature & Fiction
115569,276581,0312311362,0,Radiance: A Novel,Carter Scholz,2003,Picador USA,24,Science Fiction & Fantasy
115570,276641,0151006555,0,Bay of Tigers: An Odyssey through War-torn Angola,Pedro Rosa Mendes,2003,Harcourt,29,Travel
115571,276688,0060168307,0,Sacred Clowns,Tony Hillerman,1994,Harpercollins,15,Literature & Fiction


### Collaborative Filtering

In [29]:
df1 = df_official.copy()
df1.drop(['Publisher', 'Category ID', 'ISBN', 'Year-Of-Publication', 'Book-Author', 'Category'], axis=1, inplace=True)
df1.drop_duplicates(subset=['User-ID', 'Book-Title'], inplace=True, keep='last')
df2 = df1.copy()
df2.drop_duplicates(subset=['User-ID', 'Book-Title'], inplace=True, keep='last')

In [30]:
df1

Unnamed: 0,User-ID,Book-Rating,Book-Title
0,276727,0,The Notebook
2,638,0,The Notebook
3,3363,0,The Notebook
5,8253,10,The Notebook
6,9939,0,The Notebook
...,...,...,...
115568,276556,10,The Brothers K
115569,276581,0,Radiance: A Novel
115570,276641,0,Bay of Tigers: An Odyssey through War-torn Angola
115571,276688,0,Sacred Clowns


In [31]:
book_pivot=df1.pivot_table(columns='User-ID',index='Book-Title',values='Book-Rating')

In [32]:
book_pivot

User-ID,2,8,9,16,17,26,32,53,67,75,...,278672,278723,278732,278769,278771,278781,278843,278846,278849,278851
Book-Title,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
$30 Film School,,,,,,,,,,,...,,,,,,,,,,
'Salem's Lot,,,,,,,,,,,...,,,,,,,,,,
'Til Death Do Us Part,,,,,,,,,,,...,,,,,,,,,,
'Tis: A Memoir,,,,,,,,,,,...,,,,,,,,,,
"1,000 Vegetarian Recipes",,,,,,,,,,,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
Zodiac,,,,,,,,,,,...,,,,,,,,,,
Zodiac Unmasked: The Identity of America's Most Elusive Serial Killer Revealed,,,,,,,,,,,...,,,,,,,,,,
Zohar: The Book of Splendor: Basic Readings from the Kabbalah,,,,,,,,,,,...,,,,,,,,,,
"Zone Food Blocks: The Quick and Easy, Mix-and-Match Counter for Staying in the Zone",,,,,,,,,,,...,,,,,,,,,,


In [33]:
book_pivot.fillna(0,inplace=True)

In [34]:
df1['Book-Rating'].value_counts()

0     55288
8      9749
10     9128
9      7289
7      6318
5      3569
6      2642
4       652
3       456
2       192
1       133
Name: Book-Rating, dtype: int64

In [35]:
book_sparse=csr_matrix(book_pivot)

In [36]:
model=NearestNeighbors(algorithm='brute')

In [37]:
model.fit(book_sparse)

In [38]:
distances,suggestions=model.kneighbors(book_pivot.iloc[237,:].values.reshape(1,-1),n_neighbors=6)

In [39]:
suggestions

array([[ 237, 5112, 2014, 5105, 5101, 5114]])

In [40]:
for i in range(len(suggestions)):
    print(book_pivot.index[suggestions[i]])


Index(['A Storm in Flanders: The Ypres Salient, 1914-1918: Tragedy and Triumph on the Western Front',
       'The Alfred Hitchcock Presents Companion',
       'For the President's Eyes Only: Secret Intelligence and the American Presidency from Washington to Bush',
       'The African Queen', 'The Affair', 'The Allure Of Men'],
      dtype='object', name='Book-Title')


In [41]:
def recommend_book(book_name):
    bookid=np.where(book_pivot.index==book_name)[0][0]
    distances,suggestions=model.kneighbors(book_pivot.iloc[bookid,:].values.reshape(1,-1),n_neighbors=6)
    
    for i in range(len(suggestions)):
        if i==0:
            print("Recommended Books Are :")
        if not i:
            print(book_pivot.index[suggestions[i]])
            

In [42]:
recommend_book("The Brothers K")

Recommended Books Are :
Index(['The Brothers K', 'In Tuscany',
       'After the Ball: How America Will Conquer Its Fear and Hatred of Gays in the 90's',
       'Going the Other Way: Lessons from a Life in and out of Major-League Baseball',
       'QED: The Strange Theory of Light and Matter',
       'Forbidden Grief: The Unspoken Pain of Abortion'],
      dtype='object', name='Book-Title')


In [43]:
pickle.dump(model, open('model/book_cb.pkl', 'wb'))

#### With rating that >50 rate

In [44]:
df2

Unnamed: 0,User-ID,Book-Rating,Book-Title
0,276727,0,The Notebook
2,638,0,The Notebook
3,3363,0,The Notebook
5,8253,10,The Notebook
6,9939,0,The Notebook
...,...,...,...
115568,276556,10,The Brothers K
115569,276581,0,Radiance: A Novel
115570,276641,0,Bay of Tigers: An Odyssey through War-torn Angola
115571,276688,0,Sacred Clowns


In [45]:
recommend_book("The Notebook")

Recommended Books Are :
Index(['The Notebook', 'The Kissing Hand', 'A Cottage in Portugal',
       'Nicole Brown Simpson: The Private Diary of a Life Interrupted',
       'The Interpretation of Dreams (Oxford World's Classics)',
       'The Sisters: The Saga of the Mitford Family'],
      dtype='object', name='Book-Title')


In [46]:
df2.rename(columns={'User-ID':'user_id', 'Book-Title':'book_title', 'Book-Rating':'book_rating'}, inplace=True)

In [47]:
number_rating = df2.groupby('book_title')['book_rating'].count().reset_index()
number_rating.rename(columns= {'book_rating':'number_of_ratings'}, inplace=True)
final_rating = df2.merge(number_rating, on='book_title')
final_rating.shape
final_rating = final_rating[final_rating['number_of_ratings'] >= 50]
final_rating.drop_duplicates(['user_id','book_title'], inplace=True)

In [58]:
book_pivot = final_rating.pivot_table(columns='user_id', index='book_title', values="book_rating")
book_pivot.fillna(0, inplace=True)
book_pivot

user_id,8,16,17,26,32,67,75,77,91,95,...,278633,278653,278672,278723,278732,278769,278771,278781,278843,278846
book_title,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2010: Odyssey Two,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
A Beautiful Mind: The Life of Mathematical Genius and Nobel Laureate John Nash,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
A Bend in the Road,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
A Case of Need,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
A Civil Action,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
Where the Red Fern Grows,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
White Teeth: A Novel,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
Wild Swans: Three Daughters of China,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
Women Who Run with the Wolves,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [49]:
book_sparse = csr_matrix(book_pivot)

In [50]:
model = NearestNeighbors(algorithm='brute')
model.fit(book_sparse)

In [51]:
distances, suggestions = model.kneighbors(book_pivot.iloc[237, :].values.reshape(1, -1))

In [52]:
def recommend_book(book_name):
    bookid=np.where(book_pivot.index==book_name)[0][0]
    distances,suggestions=model.kneighbors(book_pivot.iloc[bookid,:].values.reshape(1,-1),n_neighbors=6)
    
    for i in range(len(suggestions)):
        if i==0:
            print("Recommended Books Are :")
        if not i:
            print(book_pivot.index[suggestions[i]])
            

In [53]:
recommend_book("The Notebook")

Recommended Books Are :
Index(['The Notebook', 'Reasonable Doubt', 'Flight of the Old Dog',
       'Now and Forever', 'Special Delivery', 'Mixed Blessings'],
      dtype='object', name='book_title')


In [54]:
cek_book = final_rating.drop_duplicates(subset=['book_title'])

In [55]:
cek_book

Unnamed: 0,user_id,book_rating,book_title,number_of_ratings
0,276727,0,The Notebook,619
619,276744,7,A Painted House,818
1437,276746,0,Lightning,256
1693,276747,9,Waiting to Exhale,132
1829,276755,5,The Pillars of the Earth,231
...,...,...,...,...
75834,13552,8,The Mask,72
77705,16695,0,Not Without My Daughter,60
79071,18398,5,Timequake,68
80854,23792,8,The Presence,66


In [56]:
cek_book2 = df1.drop_duplicates(subset=['Book-Title'])

In [59]:
cek_book2.duplicated(subset=['Book-Title']).sum()

0