In [1]:
import numpy as np
import pandas as pd
import warnings
warnings.filterwarnings('ignore')

## Importing Datasets

In [2]:
books = pd.read_csv('data/books.csv')
users = pd.read_csv('data/users.csv')
ratings = pd.read_csv('data/ratings.csv')

In [3]:
books.head()

Unnamed: 0,ISBN,Book-Title,Book-Author,Year-Of-Publication,Publisher,Image-URL-S,Image-URL-M,Image-URL-L
0,195153448,Classical Mythology,Mark P. O. Morford,2002,Oxford University Press,http://images.amazon.com/images/P/0195153448.0...,http://images.amazon.com/images/P/0195153448.0...,http://images.amazon.com/images/P/0195153448.0...
1,2005018,Clara Callan,Richard Bruce Wright,2001,HarperFlamingo Canada,http://images.amazon.com/images/P/0002005018.0...,http://images.amazon.com/images/P/0002005018.0...,http://images.amazon.com/images/P/0002005018.0...
2,60973129,Decision in Normandy,Carlo D'Este,1991,HarperPerennial,http://images.amazon.com/images/P/0060973129.0...,http://images.amazon.com/images/P/0060973129.0...,http://images.amazon.com/images/P/0060973129.0...
3,374157065,Flu: The Story of the Great Influenza Pandemic...,Gina Bari Kolata,1999,Farrar Straus Giroux,http://images.amazon.com/images/P/0374157065.0...,http://images.amazon.com/images/P/0374157065.0...,http://images.amazon.com/images/P/0374157065.0...
4,393045218,The Mummies of Urumchi,E. J. W. Barber,1999,W. W. Norton &amp; Company,http://images.amazon.com/images/P/0393045218.0...,http://images.amazon.com/images/P/0393045218.0...,http://images.amazon.com/images/P/0393045218.0...


In [4]:
users.head()

Unnamed: 0,User-ID,Location,Age
0,1,"nyc, new york, usa",
1,2,"stockton, california, usa",18.0
2,3,"moscow, yukon territory, russia",
3,4,"porto, v.n.gaia, portugal",17.0
4,5,"farnborough, hants, united kingdom",


In [5]:
ratings.head()

Unnamed: 0,User-ID,ISBN,Book-Rating
0,276725,034545104X,0
1,276726,0155061224,5
2,276727,0446520802,0
3,276729,052165615X,3
4,276729,0521795028,6


## Cleaning Datasets

In [6]:
print("Books:\n\n",books.isnull().sum())
print("-----------------------------------")
print("Users:\n\n",users.isnull().sum())
print("-----------------------------------")
print("Ratings\n\n",ratings.isnull().sum())
print("-----------------------------------")

Books:

 ISBN                   0
Book-Title             0
Book-Author            1
Year-Of-Publication    0
Publisher              2
Image-URL-S            0
Image-URL-M            0
Image-URL-L            3
dtype: int64
-----------------------------------
Users:

 User-ID          0
Location         0
Age         110762
dtype: int64
-----------------------------------
Ratings

 User-ID        0
ISBN           0
Book-Rating    0
dtype: int64
-----------------------------------


In [7]:
print(books.duplicated().sum())
print(users.duplicated().sum())
print(ratings.duplicated().sum())

0
0
0


In [8]:
print(books.shape)
print(users.shape)
print(ratings.shape)

(271360, 8)
(278858, 3)
(1149780, 3)


## Popularity Based Reccommender

In [9]:
ratings_name = ratings.merge(books,on="ISBN")

In [10]:
# Adding number of rating in the dataframe
num_rating = ratings_name.groupby('Book-Title').count()['Book-Rating'].reset_index()
num_rating.rename(columns={'Book-Rating':'num_ratings'},inplace = True)
print(num_rating)

                                               Book-Title  num_ratings
0        A Light in the Storm: The Civil War Diary of ...            4
1                                   Always Have Popsicles            1
2                    Apple Magic (The Collector's series)            1
3        Ask Lily (Young Women of Faith: Lily Series, ...            1
4        Beyond IBM: Leadership Marketing and Finance ...            1
...                                                   ...          ...
241066                                      Ã?Â?lpiraten.            2
241067                     Ã?Â?rger mit Produkt X. Roman.            4
241068                                Ã?Â?sterlich leben.            1
241069                              Ã?Â?stlich der Berge.            3
241070                                  Ã?Â?thique en toc            2

[241071 rows x 2 columns]


In [11]:
# Adding avgber of rating in the dataframe
avg_rating = ratings_name.groupby('Book-Title').mean()['Book-Rating'].reset_index()
avg_rating.rename(columns={'Book-Rating':'avg_ratings'},inplace = True)
print(avg_rating)

                                               Book-Title  avg_ratings
0        A Light in the Storm: The Civil War Diary of ...     2.250000
1                                   Always Have Popsicles     0.000000
2                    Apple Magic (The Collector's series)     0.000000
3        Ask Lily (Young Women of Faith: Lily Series, ...     8.000000
4        Beyond IBM: Leadership Marketing and Finance ...     0.000000
...                                                   ...          ...
241066                                      Ã?Â?lpiraten.     0.000000
241067                     Ã?Â?rger mit Produkt X. Roman.     5.250000
241068                                Ã?Â?sterlich leben.     7.000000
241069                              Ã?Â?stlich der Berge.     2.666667
241070                                  Ã?Â?thique en toc     4.000000

[241071 rows x 2 columns]


In [12]:
popular_book = num_rating.merge(avg_rating,on='Book-Title')
print(popular_book)

                                               Book-Title  num_ratings  \
0        A Light in the Storm: The Civil War Diary of ...            4   
1                                   Always Have Popsicles            1   
2                    Apple Magic (The Collector's series)            1   
3        Ask Lily (Young Women of Faith: Lily Series, ...            1   
4        Beyond IBM: Leadership Marketing and Finance ...            1   
...                                                   ...          ...   
241066                                      Ã?Â?lpiraten.            2   
241067                     Ã?Â?rger mit Produkt X. Roman.            4   
241068                                Ã?Â?sterlich leben.            1   
241069                              Ã?Â?stlich der Berge.            3   
241070                                  Ã?Â?thique en toc            2   

        avg_ratings  
0          2.250000  
1          0.000000  
2          0.000000  
3          8.000000  
4

In [13]:
popular_book = popular_book[popular_book['num_ratings']>=500].sort_values('avg_ratings',ascending=False).head(50)
popular_book = popular_book.merge(books,on='Book-Title').drop_duplicates('Book-Title')[['Book-Title','Book-Author','Image-URL-M','num_ratings','avg_ratings']]

In [14]:
print("Number of popular books: ", popular_book['Image-URL-M'].shape[0])
print(popular_book['Image-URL-M'])
# Shown below is the books where it has been rated over 500 times

Number of popular books:  37
0      http://images.amazon.com/images/P/0439064872.0...
3      http://images.amazon.com/images/P/059035342X.0...
5      http://images.amazon.com/images/P/0446310786.0...
13     http://images.amazon.com/images/P/0385504209.0...
19     http://images.amazon.com/images/P/0316666343.0...
20     http://images.amazon.com/images/P/0142001740.0...
26     http://images.amazon.com/images/P/0312195516.0...
27     http://images.amazon.com/images/P/0452282152.0...
28     http://images.amazon.com/images/P/0446672211.0...
29     http://images.amazon.com/images/P/0151008116.0...
33     http://images.amazon.com/images/P/0345417623.0...
40     http://images.amazon.com/images/P/0671027360.0...
45     http://images.amazon.com/images/P/0316601950.0...
46     http://images.amazon.com/images/P/0446610038.0...
48     http://images.amazon.com/images/P/0446605239.0...
53     http://images.amazon.com/images/P/0345337662.0...
55     http://images.amazon.com/images/P/0312278586.0...
57