Book Recommendation System

Importing the necessary Python libraries and the dataset

In [None]:
import numpy as np 
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.cluster import KMeans
from sklearn import neighbors
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler

# Open the CSV file and handle errors while reading
try:
    with open('books.csv', 'r', errors='ignore') as file:
        df = pd.read_csv(file)
        # Display the first few rows of the DataFrame
        df.head()
except Exception as e:
    print("Error:", e)
    print("Failed to read the CSV file.")


In [None]:
df.head()

Data Exploration

In [None]:
df.isnull().sum()

In [None]:
df.describe()

In [None]:
top_ten = df[df['ratings_count'] > 1000000]

# Sort the filtered DataFrame by average rating in descending order
top_ten = top_ten.sort_values(by='average_rating', ascending=False)

# Plot the top 10 books by average rating
plt.figure(figsize=(10, 10))
data = top_ten.head(10)
sns.barplot(x="average_rating", y="title", data=data, palette='inferno')
plt.xlabel('Average Rating')
plt.ylabel('Title')
plt.title('Top 10 Books by Average Rating')
plt.show()

In [None]:
most_books = df.groupby('authors')['title'].count().reset_index().sort_values('title', ascending=False).head(10).set_index('authors')

# Plot the bar chart
plt.figure(figsize=(15,10))
ax = sns.barplot(x=most_books['title'], y=most_books.index, palette='inferno')
ax.set_title("Top 10 authors with most books")
ax.set_xlabel("Total number of books")

# Add text labels to the bars
for i in ax.patches:
    ax.text(i.get_width()+0.2, i.get_y() + i.get_height()/2, str(round(i.get_width())), fontsize=12, color='black')

plt.show()

In [None]:
most_rated = df.sort_values('ratings_count', ascending=False).head(10).set_index('title')

# Plot the bar chart
plt.figure(figsize=(15,10))
ax = sns.barplot(x=most_rated['ratings_count'], y=most_rated.index, palette='inferno')

# Add text labels to the bars
for i in ax.patches:
    ax.text(i.get_width()+0.2, i.get_y() + i.get_height()/2, str(round(i.get_width())), fontsize=12, color='black')

plt.xlabel('Number of Ratings')
plt.ylabel('Book Title')
plt.title('Top 10 Most Rated Books')
plt.show()

In [None]:
df.average_rating = df.average_rating.astype(float)
fig, ax = plt.subplots(figsize=[15,10])
sns.distplot(df['average_rating'],ax=ax)
ax.set_title('Average rating distribution for all books',fontsize=20)
ax.set_xlabel('Average rating',fontsize=13)

In [None]:
ax = sns.relplot(data=df, x="average_rating", y="ratings_count", color = 'red', sizes=(100, 200), height=7, marker='o')
plt.title("Relation between Rating counts and Average Ratings",fontsize = 15)
ax.set_axis_labels("Average Rating", "Ratings Count")

In [None]:
plt.figure(figsize=(15,10))
ax = sns.relplot(x="average_rating", y="  num_pages", data = df, color = 'red',sizes=(100, 200), height=7, marker='o')
ax.set_axis_labels("Average Rating", "Number of Pages")

Data Preparation

In [None]:
df2 = df.copy()

In [None]:
df2.loc[ (df2['average_rating'] >= 0) & (df2['average_rating'] <= 1), 'rating_between'] = "between 0 and 1"
df2.loc[ (df2['average_rating'] > 1) & (df2['average_rating'] <= 2), 'rating_between'] = "between 1 and 2"
df2.loc[ (df2['average_rating'] > 2) & (df2['average_rating'] <= 3), 'rating_between'] = "between 2 and 3"
df2.loc[ (df2['average_rating'] > 3) & (df2['average_rating'] <= 4), 'rating_between'] = "between 3 and 4"
df2.loc[ (df2['average_rating'] > 4) & (df2['average_rating'] <= 5), 'rating_between'] = "between 4 and 5"


In [None]:
rating_df = pd.get_dummies(df2['rating_between'])
language_df = pd.get_dummies(df2['language_code'])

In [None]:
features = pd.concat([rating_df, 
                      language_df, 
                      df2['average_rating'], 
                      df2['ratings_count']], axis=1)

Final Step

In [None]:
from sklearn.preprocessing import MinMaxScaler
min_max_scaler = MinMaxScaler()
features = min_max_scaler.fit_transform(features)

In [None]:
model = neighbors.NearestNeighbors(n_neighbors=6, algorithm='ball_tree')
model.fit(features)
dist, idlist = model.kneighbors(features)

In [None]:
def BookRecommender(book_name):
    book_list_name = []
    book_id = df2[df2['title'] == book_name].index
    book_id = book_id[0]
    for newid in idlist[book_id]:
        book_list_name.append(df2.loc[newid].title)
    return book_list_name
    
BookNames = BookRecommender('Harry Potter and the Half-Blood Prince (Harry Potter  #6)')
BookNames