<a href="https://colab.research.google.com/github/ShivaniPandey2/HOTEL-BOOKING-ANALYSIS/blob/main/book_recommender_system.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import numpy as np
import pandas as pd

In [None]:
books = pd.read_csv('/content/Books (1).csv')
users = pd.read_csv('/content/Users (1).csv')
ratings = pd.read_csv('/content/Ratings (1).csv')

In [None]:
books['Image-URL-M'][1]

'http://images.amazon.com/images/P/0002005018.01.MZZZZZZZ.jpg'

In [None]:
users.head()

Unnamed: 0,User-ID,Location,Age
0,1,"nyc, new york, usa",
1,2,"stockton, california, usa",18.0
2,3,"moscow, yukon territory, russia",
3,4,"porto, v.n.gaia, portugal",17.0
4,5,"farnborough, hants, united kingdom",


In [None]:
ratings.head()

Unnamed: 0,User-ID,ISBN,Book-Rating
0,276725,034545104X,0
1,276726,0155061224,5
2,276727,0446520802,0
3,276729,052165615X,3
4,276729,0521795028,6


In [None]:
print(books.shape)
print(ratings.shape)
print(users.shape)

(129038, 8)
(1149780, 3)
(278858, 3)


In [None]:
books.isnull().sum()

Unnamed: 0,0
ISBN,0
Book-Title,0
Book-Author,1
Year-Of-Publication,0
Publisher,2
Image-URL-S,1
Image-URL-M,1
Image-URL-L,1


In [None]:
users.isnull().sum()

Unnamed: 0,0
User-ID,0
Location,0
Age,110762


In [None]:
ratings.isnull().sum()

Unnamed: 0,0
User-ID,0
ISBN,0
Book-Rating,0


In [None]:
books.duplicated().sum()

0

In [None]:
ratings.duplicated().sum()

0

In [None]:
users.duplicated().sum()

0

## Popularity Based Recommender System

In [None]:
ratings_with_name = ratings.merge(books,on='ISBN')

In [None]:
num_rating_df = ratings_with_name.groupby('Book-Title').count()['Book-Rating'].reset_index()
num_rating_df.rename(columns={'Book-Rating':'num_ratings'},inplace=True)
num_rating_df

Unnamed: 0,Book-Title,num_ratings
0,A Light in the Storm: The Civil War Diary of ...,4
1,Beyond IBM: Leadership Marketing and Finance ...,1
2,Dark Justice,1
3,Earth Prayers From around the World: 365 Pray...,10
4,Final Fantasy Anthology: Official Strategy Gu...,4
...,...,...
116673,Ã?Â?berallnie. AusgewÃ?Â¤hlte Gedichte 1928 - ...,1
116674,Ã?Â?bermorgen.,1
116675,Ã?Â?rger mit Produkt X. Roman.,4
116676,Ã?Â?stlich der Berge.,3


In [None]:
# Convert 'Book-Rating' to numeric, forcing errors to NaN
ratings_with_name['Book-Rating'] = pd.to_numeric(ratings_with_name['Book-Rating'], errors='coerce')

# Drop rows where 'Book-Rating' is NaN
ratings_with_name = ratings_with_name.dropna(subset=['Book-Rating'])

# Group by 'Book-Title' and calculate mean of 'Book-Rating'
avg_rating_df = ratings_with_name.groupby('Book-Title').agg({'Book-Rating': 'mean'}).reset_index()

# Rename the column for clarity
avg_rating_df.rename(columns={'Book-Rating': 'avg_rating'}, inplace=True)

# Inspect the result
print(avg_rating_df)


                                               Book-Title  avg_rating
0        A Light in the Storm: The Civil War Diary of ...    2.250000
1        Beyond IBM: Leadership Marketing and Finance ...    0.000000
2                                            Dark Justice   10.000000
3        Earth Prayers From around the World: 365 Pray...    5.000000
4        Final Fantasy Anthology: Official Strategy Gu...    5.000000
...                                                   ...         ...
116673  Ã?Â?berallnie. AusgewÃ?Â¤hlte Gedichte 1928 - ...   10.000000
116674                                     Ã?Â?bermorgen.    0.000000
116675                     Ã?Â?rger mit Produkt X. Roman.    5.250000
116676                              Ã?Â?stlich der Berge.    2.666667
116677                                  Ã?Â?thique en toc    4.000000

[116678 rows x 2 columns]


In [None]:
popular_df = num_rating_df.merge(avg_rating_df,on='Book-Title')
popular_df

Unnamed: 0,Book-Title,num_ratings,avg_rating
0,A Light in the Storm: The Civil War Diary of ...,4,2.250000
1,Beyond IBM: Leadership Marketing and Finance ...,1,0.000000
2,Dark Justice,1,10.000000
3,Earth Prayers From around the World: 365 Pray...,10,5.000000
4,Final Fantasy Anthology: Official Strategy Gu...,4,5.000000
...,...,...,...
116673,Ã?Â?berallnie. AusgewÃ?Â¤hlte Gedichte 1928 - ...,1,10.000000
116674,Ã?Â?bermorgen.,1,0.000000
116675,Ã?Â?rger mit Produkt X. Roman.,4,5.250000
116676,Ã?Â?stlich der Berge.,3,2.666667


In [None]:
popular_df = popular_df[popular_df['num_ratings']>=250].sort_values('avg_rating',ascending=False).head(50)

In [None]:
popular_df = popular_df.merge(books,on='Book-Title').drop_duplicates('Book-Title')[['Book-Title','Book-Author','Image-URL-M','num_ratings','avg_rating']]

In [None]:
popular_df['Image-URL-M'][0]

'http://images.amazon.com/images/P/0439136350.01.MZZZZZZZ.jpg'

## Collaborative Filtering Based Recommender System

In [None]:
x = ratings_with_name.groupby('User-ID').count()['Book-Rating'] > 200
padhe_likhe_users = x[x].index

In [None]:
filtered_rating = ratings_with_name[ratings_with_name['User-ID'].isin(padhe_likhe_users)]

In [None]:
y = filtered_rating.groupby('Book-Title').count()['Book-Rating']>=50
famous_books = y[y].index

In [None]:
final_ratings = filtered_rating[filtered_rating['Book-Title'].isin(famous_books)]

In [None]:
pt = final_ratings.pivot_table(index='Book-Title',columns='User-ID',values='Book-Rating')

In [None]:
pt.fillna(0,inplace=True)

In [None]:
pt

User-ID,254,2276,2766,2977,3363,4017,4385,6251,6323,6543,...,271448,271705,273979,274004,274061,274301,274308,275970,277427,278418
Book-Title,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1984,9.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,10.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1st to Die: A Novel,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,9.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2nd Chance,0.0,10.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4 Blondes,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
A Bend in the Road,0.0,0.0,7.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
Year of Wonders,0.0,0.0,0.0,7.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,9.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
You Belong To Me,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
Zen and the Art of Motorcycle Maintenance: An Inquiry into Values,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
Zoya,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [None]:
from sklearn.metrics.pairwise import cosine_similarity

In [None]:
similarity_scores = cosine_similarity(pt)

In [None]:
similarity_scores.shape

(593, 593)

In [None]:
def recommend(book_name):
    # index fetch
    index = np.where(pt.index==book_name)[0][0]
    similar_items = sorted(list(enumerate(similarity_scores[index])),key=lambda x:x[1],reverse=True)[1:5]

    data = []
    for i in similar_items:
        item = []
        temp_df = books[books['Book-Title'] == pt.index[i[0]]]
        item.extend(list(temp_df.drop_duplicates('Book-Title')['Book-Title'].values))
        item.extend(list(temp_df.drop_duplicates('Book-Title')['Book-Author'].values))
        item.extend(list(temp_df.drop_duplicates('Book-Title')['Image-URL-M'].values))

        data.append(item)

    return data

In [None]:
recommend('1984')

[["The Handmaid's Tale",
  'Margaret Atwood',
  'http://images.amazon.com/images/P/0449212602.01.MZZZZZZZ.jpg'],
 ['Animal Farm',
  'George Orwell',
  'http://images.amazon.com/images/P/0451526341.01.MZZZZZZZ.jpg'],
 ['The Vampire Lestat (Vampire Chronicles, Book II)',
  'ANNE RICE',
  'http://images.amazon.com/images/P/0345313860.01.MZZZZZZZ.jpg'],
 ['Brave New World',
  'Aldous Huxley',
  'http://images.amazon.com/images/P/0060809833.01.MZZZZZZZ.jpg']]

In [None]:
pt.index[545]

'Timeline'

In [None]:
import pickle
pickle.dump(popular_df,open('popular.pkl','wb'))

In [None]:
books.drop_duplicates('Book-Title')

Unnamed: 0,ISBN,Book-Title,Book-Author,Year-Of-Publication,Publisher,Image-URL-S,Image-URL-M,Image-URL-L
0,0195153448,Classical Mythology,Mark P. O. Morford,2002,Oxford University Press,http://images.amazon.com/images/P/0195153448.0...,http://images.amazon.com/images/P/0195153448.0...,http://images.amazon.com/images/P/0195153448.0...
1,0002005018,Clara Callan,Richard Bruce Wright,2001,HarperFlamingo Canada,http://images.amazon.com/images/P/0002005018.0...,http://images.amazon.com/images/P/0002005018.0...,http://images.amazon.com/images/P/0002005018.0...
2,0060973129,Decision in Normandy,Carlo D'Este,1991,HarperPerennial,http://images.amazon.com/images/P/0060973129.0...,http://images.amazon.com/images/P/0060973129.0...,http://images.amazon.com/images/P/0060973129.0...
3,0374157065,Flu: The Story of the Great Influenza Pandemic...,Gina Bari Kolata,1999,Farrar Straus Giroux,http://images.amazon.com/images/P/0374157065.0...,http://images.amazon.com/images/P/0374157065.0...,http://images.amazon.com/images/P/0374157065.0...
4,0393045218,The Mummies of Urumchi,E. J. W. Barber,1999,W. W. Norton &amp; Company,http://images.amazon.com/images/P/0393045218.0...,http://images.amazon.com/images/P/0393045218.0...,http://images.amazon.com/images/P/0393045218.0...
...,...,...,...,...,...,...,...,...
129032,020163354X,"The Implementation (TCP/IP Illustrated, Volume 2)",Gary R. Wright,1995,Addison-Wesley Professional,http://images.amazon.com/images/P/020163354X.0...,http://images.amazon.com/images/P/020163354X.0...,http://images.amazon.com/images/P/020163354X.0...
129033,044652803X,"The Lady of the Sorrows (The Bitterbynde, Book 2)",Cecilia Dart-Thornton,2002,Aspect,http://images.amazon.com/images/P/044652803X.0...,http://images.amazon.com/images/P/044652803X.0...,http://images.amazon.com/images/P/044652803X.0...
129034,0130810819,"UNIX Network Programming, Volume 2: Interproce...",W. Richard Stevens,1998,Prentice Hall PTR,http://images.amazon.com/images/P/0130810819.0...,http://images.amazon.com/images/P/0130810819.0...,http://images.amazon.com/images/P/0130810819.0...
129035,0345445775,"Stormrider (Rigante Series, Book 4)",DAVID GEMMELL,2002,Del Rey,http://images.amazon.com/images/P/0345445775.0...,http://images.amazon.com/images/P/0345445775.0...,http://images.amazon.com/images/P/0345445775.0...


In [None]:
pickle.dump(pt,open('pt.pkl','wb'))
pickle.dump(books,open('books.pkl','wb'))
pickle.dump(similarity_scores,open('similarity_scores.pkl','wb'))

#**Github Link**