<a href="https://colab.research.google.com/github/AbdAziz1/Book-Recommender-System/blob/main/Books_Recommender_System.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import pandas as pd
import numpy as np

In [None]:
books = pd.read_csv('/content/Books.csv')
users = pd.read_csv('/content/Users.csv')
ratings = pd.read_csv('/content/Ratings.csv')

In [None]:
books.head(2)

Unnamed: 0,ISBN,Book-Title,Book-Author,Year-Of-Publication,Publisher,Image-URL-S,Image-URL-M,Image-URL-L
0,195153448,Classical Mythology,Mark P. O. Morford,2002.0,Oxford University Press,http://images.amazon.com/images/P/0195153448.0...,http://images.amazon.com/images/P/0195153448.0...,http://images.amazon.com/images/P/0195153448.0...
1,2005018,Clara Callan,Richard Bruce Wright,2001.0,HarperFlamingo Canada,http://images.amazon.com/images/P/0002005018.0...,http://images.amazon.com/images/P/0002005018.0...,http://images.amazon.com/images/P/0002005018.0...


In [None]:
users.head(2)

Unnamed: 0,User-ID,Location,Age
0,1,"nyc, new york, usa",
1,2,"stockton, california, usa",18.0


In [None]:
ratings.head(2)

Unnamed: 0,User-ID,ISBN,Book-Rating
0,276725,034545104X,0
1,276726,0155061224,5


In [None]:
books.isnull().sum()


ISBN                   0
Book-Title             0
Book-Author            2
Year-Of-Publication    1
Publisher              3
Image-URL-S            1
Image-URL-M            1
Image-URL-L            1
dtype: int64

In [None]:
ratings.isnull().sum()


User-ID        0
ISBN           0
Book-Rating    0
dtype: int64

In [None]:
users.isnull().sum()

User-ID          0
Location         0
Age         110762
dtype: int64

# Popularity Recommender System

In [None]:
ratings_with_names = ratings.merge(books,on='ISBN')

In [None]:
num_rating_df = ratings_with_names.groupby('Book-Title').count()['Book-Rating'].reset_index()

In [None]:
num_rating_df.rename(columns={'Book-Rating':'num_ratings'},inplace=True)

In [None]:
# Convert 'Book-Rating' to numeric, handling errors by setting invalid values to NaN
ratings_with_names['Book-Rating'] = pd.to_numeric(ratings_with_names['Book-Rating'], errors='coerce')

# Calculate the average rating, ignoring non-numeric values
avg_rating_df = ratings_with_names.groupby('Book-Title')['Book-Rating'].mean()

# Reset the index to have 'Book-Title' as a column
avg_rating_df = avg_rating_df.reset_index()

avg_rating_df.rename(columns={'Book-Rating': 'avg_rating'}, inplace=True)

In [None]:
avg_rating_df.head()

Unnamed: 0,Book-Title,avg_rating
0,A Light in the Storm: The Civil War Diary of ...,2.25
1,Apple Magic (The Collector's series),0.0
2,Beyond IBM: Leadership Marketing and Finance ...,0.0
3,Dark Justice,10.0
4,Earth Prayers From around the World: 365 Pray...,5.0


In [None]:
popularity_df = num_rating_df.merge(avg_rating_df,on='Book-Title')
popularity_df

Unnamed: 0,Book-Title,num_ratings,avg_rating
0,A Light in the Storm: The Civil War Diary of ...,4,2.250000
1,Apple Magic (The Collector's series),1,0.000000
2,Beyond IBM: Leadership Marketing and Finance ...,1,0.000000
3,Dark Justice,1,10.000000
4,Earth Prayers From around the World: 365 Pray...,10,5.000000
...,...,...,...
127069,Ã?Â?berallnie. AusgewÃ?Â¤hlte Gedichte 1928 - ...,1,10.000000
127070,Ã?Â?bermorgen.,1,0.000000
127071,Ã?Â?rger mit Produkt X. Roman.,4,5.250000
127072,Ã?Â?stlich der Berge.,3,2.666667


In [None]:
popular_df = popularity_df[popularity_df['num_ratings']>=250].sort_values('avg_rating',ascending=False).head(50)

In [None]:
popular_df = popular_df.merge(books,on='Book-Title').drop_duplicates('Book-Title')[['Book-Title','Book-Author','Image-URL-M','num_ratings','avg_rating']]

# Collaberative Filtering Based Recommender System

In [None]:
x = ratings_with_names.groupby('User-ID').count()['Book-Rating'] > 200
users_200 = x[x].index

In [None]:
filtered_ratings = ratings_with_names[ratings_with_names['User-ID'].isin(users_200)]

In [None]:
y = filtered_ratings.groupby('Book-Title').count()['Book-Rating'] >= 50
famous_books = y[y].index

In [None]:
final_ratings = filtered_ratings[filtered_ratings['Book-Title'].isin(famous_books)]

In [None]:
pt = final_ratings.pivot_table(index='Book-Title',columns='User-ID',values='Book-Rating')

In [None]:
pt.shape

(600, 649)

In [None]:
pt.fillna(0,inplace=True)

In [None]:
from sklearn.metrics.pairwise import cosine_similarity

In [None]:
similarity_score = cosine_similarity(pt)

In [None]:
similarity_score.shape

(600, 600)

# Recommender Fucntion

In [None]:
def recommend(book_name):
    # Get the index of the book in the DataFrame
  index = np.where(pt.index == book_name)[0][0]
  similar_items = sorted(list(enumerate(similarity_score[index])),key=lambda x:x[1],reverse=True)[1:6]
  for i in similar_items:
    print(pt.index[i[0]])


In [None]:
recommend('1984')

The Handmaid's Tale
Animal Farm
The Vampire Lestat (Vampire Chronicles, Book II)
Brave New World
The Hours : A Novel


In [None]:
import pickle
pickle.dump(popular_df,open('popular.pkl','wb'))

In [None]:
pickle.dump(pt,open('pt.pkl','wb'))
pickle.dump(books,open('books.pkl','wb'))
pickle.dump(similarity_score,open('similarity_scores.pkl','wb'))