In [1]:
# Importing Libraries
import pandas as pd

In [2]:
# Adjusting pandas dataframe settings
pd.set_option('display.max_columns', 500)

In [3]:
# Reading Dataframes
books = pd.read_csv("../input/book-recommendation-dataset/Books.csv", low_memory='False')
ratings = pd.read_csv('../input/book-recommendation-dataset/Ratings.csv', low_memory='False')
users = pd.read_csv("../input/book-recommendation-dataset/Users.csv", low_memory='False')

  exec(code_obj, self.user_global_ns, self.user_ns)


In [4]:
# Merging Dataframes
temp_df = books.merge(ratings, how='left', on='ISBN')
final_df = temp_df.merge(users, how='left', on='User-ID')
final_df.shape

(1032345, 12)

In [5]:
# Data Preprocessing
final_df.drop(['Image-URL-S', 'Image-URL-M','Image-URL-L'], axis=1, inplace=True)
final_df.dropna(inplace=True)

final_df['User-ID'] = final_df['User-ID'].astype('int')
final_df=final_df[final_df["Book-Rating"]>0]

In [6]:
# Analysis of Data

final_df.columns
final_df['User-ID'].nunique() # 40543 unique Readers
final_df['ISBN'].nunique() # 119945 unique ISBN number (books)
final_df['Book-Title'].nunique() # 109210 unique Books.
final_df['Book-Author'].nunique() # 50883 unique Author.
final_df['Publisher'].nunique() # 9749 unique Publisher.
final_df['Book-Rating'].nunique() # 10 unique Rating (1-10).

final_df.groupby('User-ID')['Book-Title'].agg('count').sort_values(ascending=False)

User-ID
98391     5689
153662    1845
235105    1020
171118     962
16795      959
          ... 
120886       1
120862       1
120861       1
120852       1
278852       1
Name: Book-Title, Length: 40543, dtype: int64

In [7]:
final_df.groupby('Book-Author')['ISBN'].agg('count').sort_values(ascending=False)

Book-Author
Stephen King         3488
Nora Roberts         2138
John Grisham         1670
James Patterson      1559
J. K. Rowling        1375
                     ... 
James Hawkins           1
James Henry Gray        1
James Henry Rubin       1
James Hoffman           1
Ã?Â?pictÃ?Â¨te          1
Name: ISBN, Length: 50883, dtype: int64

In [8]:
final_df.groupby('ISBN').agg({'Book-Title':'count'}).sort_values(by='Book-Title',ascending=False).head(100)

Unnamed: 0_level_0,Book-Title
ISBN,Unnamed: 1_level_1
0971880107,473
0316666343,473
0385504209,373
0312195516,227
059035342X,224
...,...
0385492081,82
0385486804,82
0449212602,81
0375700757,80


In [9]:
final_df.groupby('User-ID')['Book-Rating'].agg('count').sort_values(ascending=False)

User-ID
98391     5689
153662    1845
235105    1020
171118     962
16795      959
          ... 
120886       1
120862       1
120861       1
120852       1
278852       1
Name: Book-Rating, Length: 40543, dtype: int64

In [10]:
# We can calculate the ratings of the books and eliminate the rare books by determining a threshold which is 85 in this case by removing the main dataframe.
# Finally, we have common books which are rated more than 85.
rating_book = pd.DataFrame(final_df['Book-Title'].value_counts())
final_df['Book-Title'].value_counts().mean() # 2.4688

rare_books = rating_book[rating_book['Book-Title'] <= 85].index

common_books = final_df[~final_df["Book-Title"].isin(rare_books)]
common_books['Book-Title'].value_counts().mean() # 139.6589

139.65891472868216

In [11]:
# Creating User-Item Matrix Dataframe
user_book_matrix = common_books.pivot_table(index=['User-ID'], columns=['Book-Title'], values='Book-Rating')
user_book_matrix.shape

(8742, 129)

In [12]:
# Item based collaborative filtering engine provide us a relations between items that is books in this case, by looking their correlations each other.
# If the correlations are quite similar, that means these books are similar according to maybe their properties, genres. Hence, we can recommend them to user.
book_name = "Bridget Jones's Diary"

def book_to_ISBN(name):
    """
    This function gives us the ISBN number of the book for the name of the book which you want to learn ISBN number.
    :param name: the name of the book
    :return: ISBN; the books are identified by their respective ISBN
    """
    ISBN = final_df[final_df['Book-Title'] == name][['ISBN']].iloc[0,0]
    return ISBN

ISBN_no = book_to_ISBN(book_name)

book_df = user_book_matrix[book_name]
book_df.sort_values(ascending=False)

User-ID
143253    10.0
110912    10.0
125519    10.0
48306     10.0
179733    10.0
          ... 
278798     NaN
278800     NaN
278836     NaN
278844     NaN
278846     NaN
Name: Bridget Jones's Diary, Length: 8742, dtype: float64

In [13]:
# Let's look at the correlations between books with selected book and sort the best.
similar_to_book_name = user_book_matrix.corrwith(book_df)

corr_book_name = pd.DataFrame(similar_to_book_name, columns=['Correlation'])
corr_book_name.dropna(inplace=True)
corr_book_name.sort_values(by='Correlation',ascending=False).head(50)

Unnamed: 0_level_0,Correlation
Book-Title,Unnamed: 1_level_1
Dreamcatcher,1.0
Bridget Jones's Diary,1.0
Timeline,0.997176
Outlander,0.973124
The Alienist,0.967533
The Bonesetter's Daughter,0.901388
Interview with the Vampire,0.887412
Bel Canto: A Novel,0.886621
"The Subtle Knife (His Dark Materials, Book 2)",0.885615
White Oleander : A Novel,0.881529


In [14]:
# Creating recommendation list.
recommended_list = corr_book_name.reset_index().sort_values(by=('Correlation'),ascending=False)['Book-Title'].head(10).to_list()
recommended_list

['Dreamcatcher',
 "Bridget Jones's Diary",
 'Timeline',
 'Outlander',
 'The Alienist',
 "The Bonesetter's Daughter",
 'Interview with the Vampire',
 'Bel Canto: A Novel',
 'The Subtle Knife (His Dark Materials, Book 2)',
 'White Oleander : A Novel']