In [1]:
import pandas as pd
import numpy as np

In [2]:
books = pd.read_csv('Books.csv', low_memory = False)

In [3]:
books.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 271360 entries, 0 to 271359
Data columns (total 8 columns):
 #   Column               Non-Null Count   Dtype 
---  ------               --------------   ----- 
 0   ISBN                 271360 non-null  object
 1   Book-Title           271360 non-null  object
 2   Book-Author          271358 non-null  object
 3   Year-Of-Publication  271360 non-null  object
 4   Publisher            271358 non-null  object
 5   Image-URL-S          271360 non-null  object
 6   Image-URL-M          271360 non-null  object
 7   Image-URL-L          271357 non-null  object
dtypes: object(8)
memory usage: 16.6+ MB


In [4]:
books.duplicated().sum()

np.int64(0)

In [5]:
books.isnull().sum()

ISBN                   0
Book-Title             0
Book-Author            2
Year-Of-Publication    0
Publisher              2
Image-URL-S            0
Image-URL-M            0
Image-URL-L            3
dtype: int64

In [6]:
books.dropna(inplace=True)

In [7]:
rating = pd.read_csv('Ratings.csv')

In [8]:
rating.head()

Unnamed: 0,User-ID,ISBN,Book-Rating
0,276725,034545104X,0
1,276726,0155061224,5
2,276727,0446520802,0
3,276729,052165615X,3
4,276729,0521795028,6


In [9]:
rating.duplicated().sum()

np.int64(0)

In [10]:
rating.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1149780 entries, 0 to 1149779
Data columns (total 3 columns):
 #   Column       Non-Null Count    Dtype 
---  ------       --------------    ----- 
 0   User-ID      1149780 non-null  int64 
 1   ISBN         1149780 non-null  object
 2   Book-Rating  1149780 non-null  int64 
dtypes: int64(2), object(1)
memory usage: 26.3+ MB


In [11]:
rating.shape

(1149780, 3)

In [12]:
rating.isnull().sum()

User-ID        0
ISBN           0
Book-Rating    0
dtype: int64

In [13]:
user = pd.read_csv('Users.csv')

In [14]:
user.head()

Unnamed: 0,User-ID,Location,Age
0,1,"nyc, new york, usa",
1,2,"stockton, california, usa",18.0
2,3,"moscow, yukon territory, russia",
3,4,"porto, v.n.gaia, portugal",17.0
4,5,"farnborough, hants, united kingdom",


In [15]:
user.shape

(278858, 3)

In [16]:
user.isnull().sum()

User-ID          0
Location         0
Age         110762
dtype: int64

In [17]:
user.duplicated().sum()

np.int64(0)

# Popularity based recommendation system

### Merge ratings and books based on ISBN

In [18]:
books_with_ratings = rating.merge(books, on='ISBN')
books_with_ratings

Unnamed: 0,User-ID,ISBN,Book-Rating,Book-Title,Book-Author,Year-Of-Publication,Publisher,Image-URL-S,Image-URL-M,Image-URL-L
0,276725,034545104X,0,Flesh Tones: A Novel,M. J. Rose,2002,Ballantine Books,http://images.amazon.com/images/P/034545104X.0...,http://images.amazon.com/images/P/034545104X.0...,http://images.amazon.com/images/P/034545104X.0...
1,276726,0155061224,5,Rites of Passage,Judith Rae,2001,Heinle,http://images.amazon.com/images/P/0155061224.0...,http://images.amazon.com/images/P/0155061224.0...,http://images.amazon.com/images/P/0155061224.0...
2,276727,0446520802,0,The Notebook,Nicholas Sparks,1996,Warner Books,http://images.amazon.com/images/P/0446520802.0...,http://images.amazon.com/images/P/0446520802.0...,http://images.amazon.com/images/P/0446520802.0...
3,276729,052165615X,3,Help!: Level 1,Philip Prowse,1999,Cambridge University Press,http://images.amazon.com/images/P/052165615X.0...,http://images.amazon.com/images/P/052165615X.0...,http://images.amazon.com/images/P/052165615X.0...
4,276729,0521795028,6,The Amsterdam Connection : Level 4 (Cambridge ...,Sue Leather,2001,Cambridge University Press,http://images.amazon.com/images/P/0521795028.0...,http://images.amazon.com/images/P/0521795028.0...,http://images.amazon.com/images/P/0521795028.0...
...,...,...,...,...,...,...,...,...,...,...
1031123,276704,0876044011,0,Edgar Cayce on the Akashic Records: The Book o...,Kevin J. Todeschi,1998,A.R.E. Press (Association of Research &amp; Enlig,http://images.amazon.com/images/P/0876044011.0...,http://images.amazon.com/images/P/0876044011.0...,http://images.amazon.com/images/P/0876044011.0...
1031124,276704,1563526298,9,Get Clark Smart : The Ultimate Guide for the S...,Clark Howard,2000,Longstreet Press,http://images.amazon.com/images/P/1563526298.0...,http://images.amazon.com/images/P/1563526298.0...,http://images.amazon.com/images/P/1563526298.0...
1031125,276706,0679447156,0,Eight Weeks to Optimum Health: A Proven Progra...,Andrew Weil,1997,Alfred A. Knopf,http://images.amazon.com/images/P/0679447156.0...,http://images.amazon.com/images/P/0679447156.0...,http://images.amazon.com/images/P/0679447156.0...
1031126,276709,0515107662,10,The Sherbrooke Bride (Bride Trilogy (Paperback)),Catherine Coulter,1996,Jove Books,http://images.amazon.com/images/P/0515107662.0...,http://images.amazon.com/images/P/0515107662.0...,http://images.amazon.com/images/P/0515107662.0...


### Create new column (Total number of ratings and Average Ratings)

In [19]:
num_ratings = books_with_ratings.groupby('Book-Title').count()['Book-Rating'].reset_index()
num_ratings.rename(columns={'Book-Rating':'Total_number_of_Ratings'},inplace=True)
num_ratings

Unnamed: 0,Book-Title,Total_number_of_Ratings
0,A Light in the Storm: The Civil War Diary of ...,4
1,Always Have Popsicles,1
2,Apple Magic (The Collector's series),1
3,"Ask Lily (Young Women of Faith: Lily Series, ...",1
4,Beyond IBM: Leadership Marketing and Finance ...,1
...,...,...
241060,Ã?Â?lpiraten.,2
241061,Ã?Â?rger mit Produkt X. Roman.,4
241062,Ã?Â?sterlich leben.,1
241063,Ã?Â?stlich der Berge.,3


In [20]:
avg_ratings = books_with_ratings.groupby('Book-Title')['Book-Rating'].mean().reset_index().round(2)
avg_ratings.rename(columns={'Book-Rating':'Average_Rating'},inplace=True)
avg_ratings

Unnamed: 0,Book-Title,Average_Rating
0,A Light in the Storm: The Civil War Diary of ...,2.25
1,Always Have Popsicles,0.00
2,Apple Magic (The Collector's series),0.00
3,"Ask Lily (Young Women of Faith: Lily Series, ...",8.00
4,Beyond IBM: Leadership Marketing and Finance ...,0.00
...,...,...
241060,Ã?Â?lpiraten.,0.00
241061,Ã?Â?rger mit Produkt X. Roman.,5.25
241062,Ã?Â?sterlich leben.,7.00
241063,Ã?Â?stlich der Berge.,2.67


In [21]:
new_ratings = num_ratings.merge(avg_ratings, on='Book-Title')
new_ratings

Unnamed: 0,Book-Title,Total_number_of_Ratings,Average_Rating
0,A Light in the Storm: The Civil War Diary of ...,4,2.25
1,Always Have Popsicles,1,0.00
2,Apple Magic (The Collector's series),1,0.00
3,"Ask Lily (Young Women of Faith: Lily Series, ...",1,8.00
4,Beyond IBM: Leadership Marketing and Finance ...,1,0.00
...,...,...,...
241060,Ã?Â?lpiraten.,2,0.00
241061,Ã?Â?rger mit Produkt X. Roman.,4,5.25
241062,Ã?Â?sterlich leben.,1,7.00
241063,Ã?Â?stlich der Berge.,3,2.67


### Books + Ratings (merged data)

In [22]:
merged = new_ratings.merge(books, on='Book-Title').drop_duplicates('Book-Title')[['ISBN','Book-Title','Book-Author','Year-Of-Publication','Total_number_of_Ratings', 'Average_Rating', 'Image-URL-M', 'Image-URL-L' ]]
merged

Unnamed: 0,ISBN,Book-Title,Book-Author,Year-Of-Publication,Total_number_of_Ratings,Average_Rating,Image-URL-M,Image-URL-L
0,0590567330,A Light in the Storm: The Civil War Diary of ...,Karen Hesse,1999,4,2.25,http://images.amazon.com/images/P/0590567330.0...,http://images.amazon.com/images/P/0590567330.0...
1,0964147726,Always Have Popsicles,Rebecca Harvin,1994,1,0.00,http://images.amazon.com/images/P/0964147726.0...,http://images.amazon.com/images/P/0964147726.0...
2,0942320093,Apple Magic (The Collector's series),Martina Boudreau,1984,1,0.00,http://images.amazon.com/images/P/0942320093.0...,http://images.amazon.com/images/P/0942320093.0...
3,0310232546,"Ask Lily (Young Women of Faith: Lily Series, ...",Nancy N. Rue,2001,1,8.00,http://images.amazon.com/images/P/0310232546.0...,http://images.amazon.com/images/P/0310232546.0...
4,0962295701,Beyond IBM: Leadership Marketing and Finance ...,Lou Mobley,1989,1,0.00,http://images.amazon.com/images/P/0962295701.0...,http://images.amazon.com/images/P/0962295701.0...
...,...,...,...,...,...,...,...,...
270284,3499232499,Ã?Â?lpiraten.,Janwillem van de Wetering,2002,2,0.00,http://images.amazon.com/images/P/3499232499.0...,http://images.amazon.com/images/P/3499232499.0...
270285,325721538X,Ã?Â?rger mit Produkt X. Roman.,Joan Aiken,1987,4,5.25,http://images.amazon.com/images/P/325721538X.0...,http://images.amazon.com/images/P/325721538X.0...
270286,3451274973,Ã?Â?sterlich leben.,Anselm GrÃ?Â¼n,2001,1,7.00,http://images.amazon.com/images/P/3451274973.0...,http://images.amazon.com/images/P/3451274973.0...
270287,3442725739,Ã?Â?stlich der Berge.,David Guterson,2000,3,2.67,http://images.amazon.com/images/P/3442725739.0...,http://images.amazon.com/images/P/3442725739.0...


In [51]:
popular = merged[merged['Total_number_of_Ratings']>=250].sort_values('Average_Rating', ascending=False).head(50)
popular

Unnamed: 0,ISBN,Book-Title,Book-Author,Year-Of-Publication,Total_number_of_Ratings,Average_Rating,Image-URL-M,Image-URL-L
89968,0439136350,Harry Potter and the Prisoner of Azkaban (Book 3),J. K. Rowling,1999,428,5.85,http://images.amazon.com/images/P/0439136350.0...,http://images.amazon.com/images/P/0439136350.0...
89950,0439139597,Harry Potter and the Goblet of Fire (Book 4),J. K. Rowling,2000,387,5.82,http://images.amazon.com/images/P/0439139597.0...,http://images.amazon.com/images/P/0439139597.0...
89978,0590353403,Harry Potter and the Sorcerer's Stone (Book 1),J. K. Rowling,1998,278,5.74,http://images.amazon.com/images/P/0590353403.0...,http://images.amazon.com/images/P/0590353403.0...
89955,043935806X,Harry Potter and the Order of the Phoenix (Boo...,J. K. Rowling,2003,347,5.5,http://images.amazon.com/images/P/043935806X.0...,http://images.amazon.com/images/P/043935806X.0...
89940,0439064872,Harry Potter and the Chamber of Secrets (Book 2),J. K. Rowling,2000,556,5.18,http://images.amazon.com/images/P/0439064872.0...,http://images.amazon.com/images/P/0439064872.0...
214743,0345339681,The Hobbit : The Enchanting Prelude to The Lor...,J.R.R. TOLKIEN,1986,281,5.01,http://images.amazon.com/images/P/0345339681.0...,http://images.amazon.com/images/P/0345339681.0...
209874,0345339703,The Fellowship of the Ring (The Lord of the Ri...,J.R.R. TOLKIEN,1986,368,4.95,http://images.amazon.com/images/P/0345339703.0...,http://images.amazon.com/images/P/0345339703.0...
89985,059035342X,Harry Potter and the Sorcerer's Stone (Harry P...,J. K. Rowling,1999,575,4.9,http://images.amazon.com/images/P/059035342X.0...,http://images.amazon.com/images/P/059035342X.0...
237355,0345339711,"The Two Towers (The Lord of the Rings, Part 2)",J.R.R. TOLKIEN,1986,260,4.88,http://images.amazon.com/images/P/0345339711.0...,http://images.amazon.com/images/P/0345339711.0...
246683,0446310786,To Kill a Mockingbird,Harper Lee,1988,510,4.7,http://images.amazon.com/images/P/0446310786.0...,http://images.amazon.com/images/P/0446310786.0...


# Collaborative recommendation system

In [52]:
# Active users who voted at least 50 books
a = books_with_ratings.groupby('User-ID').count()['Book-Rating']>=50
Active_users = a[a].index
Active_users

Index([   243,    254,    507,    638,    643,    741,    882,    929,   1025,
         1211,
       ...
       277928, 277965, 278026, 278137, 278144, 278188, 278418, 278582, 278633,
       278843],
      dtype='int64', name='User-ID', length=3058)

In [53]:
Active_users_books = books_with_ratings[books_with_ratings['User-ID'].isin(Active_users)]
Active_users_books

Unnamed: 0,User-ID,ISBN,Book-Rating,Book-Title,Book-Author,Year-Of-Publication,Publisher,Image-URL-S,Image-URL-M,Image-URL-L
298,276925,002542730X,10,Politically Correct Bedtime Stories: Modern Ta...,James Finn Garner,1994,John Wiley &amp; Sons Inc,http://images.amazon.com/images/P/002542730X.0...,http://images.amazon.com/images/P/002542730X.0...,http://images.amazon.com/images/P/002542730X.0...
299,276925,0060520507,0,"Sushi for Beginners : A Novel (Keyes, Marian)",Marian Keyes,2003,William Morrow,http://images.amazon.com/images/P/0060520507.0...,http://images.amazon.com/images/P/0060520507.0...,http://images.amazon.com/images/P/0060520507.0...
300,276925,0060930934,0,Wasted : A Memoir of Anorexia and Bulimia,Marya Hornbacher,1999,Perennial,http://images.amazon.com/images/P/0060930934.0...,http://images.amazon.com/images/P/0060930934.0...,http://images.amazon.com/images/P/0060930934.0...
301,276925,0060951303,0,La casa de los espÃ­ritus,Isabel Allende,1995,Rayo,http://images.amazon.com/images/P/0060951303.0...,http://images.amazon.com/images/P/0060951303.0...,http://images.amazon.com/images/P/0060951303.0...
302,276925,0140154078,6,The Music of Chance,Paul Auster,1993,Penguin Books,http://images.amazon.com/images/P/0140154078.0...,http://images.amazon.com/images/P/0140154078.0...,http://images.amazon.com/images/P/0140154078.0...
...,...,...,...,...,...,...,...,...,...,...
1031094,276688,0836236688,10,Dilbert: Seven Years Of Highly Defective Peopl...,Scott Adams,1997,Andrews McMeel Publishing,http://images.amazon.com/images/P/0836236688.0...,http://images.amazon.com/images/P/0836236688.0...,http://images.amazon.com/images/P/0836236688.0...
1031095,276688,0892966548,10,Killer Market: A Deborah Knott Mystery (Debora...,Margaret Maron,1997,Warner Books Inc,http://images.amazon.com/images/P/0892966548.0...,http://images.amazon.com/images/P/0892966548.0...,http://images.amazon.com/images/P/0892966548.0...
1031096,276688,1551669315,6,The Third Wife,Jasmine Cresswell,2002,Mira,http://images.amazon.com/images/P/1551669315.0...,http://images.amazon.com/images/P/1551669315.0...,http://images.amazon.com/images/P/1551669315.0...
1031097,276688,1575660792,7,Gray Matter,Shirley Kennett,1996,Kensington Publishing Corporation,http://images.amazon.com/images/P/1575660792.0...,http://images.amazon.com/images/P/1575660792.0...,http://images.amazon.com/images/P/1575660792.0...


In [54]:
q = Active_users_books.groupby('Book-Title').count()['Book-Rating']>=50
famous_books = q[q].index
famous_books

Index(['1984', '1st to Die: A Novel', '2010: Odyssey Two', '204 Rosewood Lane',
       '24 Hours', '2nd Chance', '4 Blondes', '84 Charing Cross Road',
       'A 2nd Helping of Chicken Soup for the Soul (Chicken Soup for the Soul Series (Paper))',
       'A Beautiful Mind: The Life of Mathematical Genius and Nobel Laureate John Nash',
       ...
       'Women Who Run with the Wolves',
       'Word Freak: Heartbreak, Triumph, Genius, and Obsession in the World of Competitive Scrabble Players',
       'Writ of Execution', 'Wuthering Heights',
       'Wuthering Heights (Penguin Classics)', 'Year of Wonders',
       'You Belong To Me',
       'Zen and the Art of Motorcycle Maintenance: An Inquiry into Values',
       'Zoya', '\O\" Is for Outlaw"'],
      dtype='object', name='Book-Title', length=1360)

In [71]:
filtered_table = filtered_table.drop_duplicates(['User-ID', 'Book-Title'])
filtered_table

Unnamed: 0,User-ID,ISBN,Book-Rating,Book-Title,Book-Author,Year-Of-Publication,Publisher,Image-URL-S,Image-URL-M,Image-URL-L
298,276925,002542730X,10,Politically Correct Bedtime Stories: Modern Ta...,James Finn Garner,1994,John Wiley &amp; Sons Inc,http://images.amazon.com/images/P/002542730X.0...,http://images.amazon.com/images/P/002542730X.0...,http://images.amazon.com/images/P/002542730X.0...
303,276925,0140327592,0,Matilda,Roald Dahl,1990,Viking Penguin Inc,http://images.amazon.com/images/P/0140327592.0...,http://images.amazon.com/images/P/0140327592.0...,http://images.amazon.com/images/P/0140327592.0...
310,276925,0316666343,0,The Lovely Bones: A Novel,Alice Sebold,2002,"Little, Brown",http://images.amazon.com/images/P/0316666343.0...,http://images.amazon.com/images/P/0316666343.0...,http://images.amazon.com/images/P/0316666343.0...
313,276925,0385504209,8,The Da Vinci Code,Dan Brown,2003,Doubleday,http://images.amazon.com/images/P/0385504209.0...,http://images.amazon.com/images/P/0385504209.0...,http://images.amazon.com/images/P/0385504209.0...
335,276925,0804106304,0,The Joy Luck Club,Amy Tan,1994,Prentice Hall (K-12),http://images.amazon.com/images/P/0804106304.0...,http://images.amazon.com/images/P/0804106304.0...,http://images.amazon.com/images/P/0804106304.0...
...,...,...,...,...,...,...,...,...,...,...
1031065,276688,0553575104,6,In Pursuit of the Proper Sinner,Elizabeth George,2000,Bantam Books,http://images.amazon.com/images/P/0553575104.0...,http://images.amazon.com/images/P/0553575104.0...,http://images.amazon.com/images/P/0553575104.0...
1031072,276688,0679419810,0,Strip Tease,Carl Hiaasen,1993,Random House Inc,http://images.amazon.com/images/P/0679419810.0...,http://images.amazon.com/images/P/0679419810.0...,http://images.amazon.com/images/P/0679419810.0...
1031076,276688,0679751521,0,Midnight in the Garden of Good and Evil,John Berendt,1999,Vintage Books USA,http://images.amazon.com/images/P/0679751521.0...,http://images.amazon.com/images/P/0679751521.0...,http://images.amazon.com/images/P/0679751521.0...
1031081,276688,068484267X,0,Angela's Ashes: A Memoir,Frank McCourt,1999,Scribner,http://images.amazon.com/images/P/068484267X.0...,http://images.amazon.com/images/P/068484267X.0...,http://images.amazon.com/images/P/068484267X.0...


In [72]:
pt = filtered_table.pivot_table(index='Book-Title', columns='User-ID', values='Book-Rating')

In [73]:
pt.isnull().sum()

User-ID
243       1307
254       1289
507       1330
638       1324
643       1359
          ... 
278188    1335
278418    1138
278582    1345
278633    1322
278843    1340
Length: 2992, dtype: int64

In [74]:
pt.fillna(0, inplace=True)

In [75]:
from sklearn.metrics.pairwise import cosine_similarity

In [76]:
score = cosine_similarity(pt)
score

array([[1.        , 0.04025687, 0.06331401, ..., 0.0469498 , 0.02513917,
        0.01745824],
       [0.04025687, 1.        , 0.01711353, ..., 0.03636981, 0.12535399,
        0.10090323],
       [0.06331401, 0.01711353, 1.        , ..., 0.07303747, 0.        ,
        0.        ],
       ...,
       [0.0469498 , 0.03636981, 0.07303747, ..., 1.        , 0.04144573,
        0.02158691],
       [0.02513917, 0.12535399, 0.        , ..., 0.04144573, 1.        ,
        0.08206063],
       [0.01745824, 0.10090323, 0.        , ..., 0.02158691, 0.08206063,
        1.        ]])

In [77]:
def recommend(book_name):
    if book_name not in pt.index:
        return f"'{book_name}' not found in the dataset"
    # index fetch
    index = np.where(pt.index==book_name)[0][0]
    similar_items = sorted(list(enumerate(score[index])),key=lambda x:x[1],reverse=True)[1:5]
    
    data = []
    for i in similar_items:
        item = []
        temp_df = merged[merged['Book-Title'] == pt.index[i[0]]]
        item.extend(list(temp_df.drop_duplicates('Book-Title')['Book-Title'].values))
        item.extend(list(temp_df.drop_duplicates('Book-Title')['Book-Author'].values))
        item.extend(list(temp_df.drop_duplicates('Book-Title')['Image-URL-M'].values))
        item.extend(list(temp_df.drop_duplicates('Book-Title')['Total_number_of_Ratings'].values))
        item.extend(list(temp_df.drop_duplicates('Book-Title')['Average_Rating'].values))
        
        data.append(item)
    
    return data

In [62]:
import pickle
pickle.dump(popular, open('popular.pkl', 'wb'))

In [63]:
pickle.dump(pt, open('pt.pkl', 'wb'))
pickle.dump(merged, open('merged.pkl', 'wb'))
pickle.dump(score,open('score.pkl','wb'))