In [1]:
import numpy as np
import pandas as pd

In [2]:
books = pd.read_csv('books.csv')
users = pd.read_csv('users.csv')
ratings = pd.read_csv('ratings.csv')

In [3]:
books['Image-URL-M'][1]

'https://images.amazon.com/images/P/0002005018.01.MZZZZZZZ.jpg'

In [4]:
users.head()

Unnamed: 0,User-ID,Location,Age
0,1,"nyc, new york, usa",
1,2,"stockton, california, usa",18.0
2,3,"moscow, yukon territory, russia",
3,4,"porto, v.n.gaia, portugal",17.0
4,5,"farnborough, hants, united kingdom",


In [5]:
ratings.head()

Unnamed: 0,User-ID,ISBN,Book-Rating
0,276725,034545104X,0
1,276726,0155061224,5
2,276727,0446520802,0
3,276729,052165615X,3
4,276729,0521795028,6


In [6]:
print(books.shape)
print(ratings.shape)
print(users.shape)

(39591, 8)
(1149780, 3)
(278858, 3)


In [7]:
books.isnull().sum()

ISBN                   0
Book-Title             0
Book-Author            1
Year-Of-Publication    1
Publisher              1
Image-URL-S            1
Image-URL-M            1
Image-URL-L            1
dtype: int64

In [8]:
users.isnull().sum()

User-ID          0
Location         0
Age         110762
dtype: int64

In [9]:
ratings.isnull().sum()

User-ID        0
ISBN           0
Book-Rating    0
dtype: int64

In [10]:
books.duplicated().sum()

0

In [11]:
ratings.duplicated().sum()

0

In [12]:
users.duplicated().sum()

0

## Popularity Based Recommender System

In [13]:
ratings_with_name = ratings.merge(books,on='ISBN')

In [14]:
num_rating_df = ratings_with_name.groupby('Book-Title').count()['Book-Rating'].reset_index()
num_rating_df.rename(columns={'Book-Rating':'num_ratings'},inplace=True)
num_rating_df

Unnamed: 0,Book-Title,num_ratings
0,Tales of Terror and Suspense,1
1,#NAME?,3
2,$oft Money: The True Power in Our Nation's Cap...,5
3,"' Sie belieben wohl zu scherzen, Mr. Feynman.'...",1
4,' Small g'. Eine Sommeridylle.,2
...,...,...
9334,why I'm like this : True Stories,11
9335,Â¿QuiÃ©n se ha llevado mi queso?,2
9336,"Â¿QuÃ© me quieres, amor?",9
9337,Ã?Â?ber die Freiheit.,1


In [15]:
avg_rating_df = ratings_with_name.groupby('Book-Title', as_index=False)['Book-Rating'].mean()
avg_rating_df.rename(columns={'Book-Rating': 'avg_rating'}, inplace=True)

In [16]:
popular_df = num_rating_df.merge(avg_rating_df,on='Book-Title')
popular_df

Unnamed: 0,Book-Title,num_ratings,avg_rating
0,Tales of Terror and Suspense,1,0.000000
1,#NAME?,3,5.000000
2,$oft Money: The True Power in Our Nation's Cap...,5,5.400000
3,"' Sie belieben wohl zu scherzen, Mr. Feynman.'...",1,9.000000
4,' Small g'. Eine Sommeridylle.,2,4.500000
...,...,...,...
9334,why I'm like this : True Stories,11,4.272727
9335,Â¿QuiÃ©n se ha llevado mi queso?,2,7.500000
9336,"Â¿QuÃ© me quieres, amor?",9,3.000000
9337,Ã?Â?ber die Freiheit.,1,7.000000


In [17]:
popular_df = popular_df[popular_df['num_ratings']>=250].sort_values('avg_rating',ascending=False).head(50)

In [18]:
popular_df = popular_df.merge(books,on='Book-Title').drop_duplicates('Book-Title')[['Book-Title','Book-Author','Image-URL-M','num_ratings','avg_rating']]

In [19]:
popular_df['Image-URL-M'][0]

'https://images.amazon.com/images/P/043935806X.01.MZZZZZZZ.jpg'

## Collaborative Filtering Based Recommender System

In [44]:
x = ratings_with_name.groupby('User-ID').count()['Book-Rating'] > 200
padhe_likhe_users = x[x].index

In [63]:
filtered_rating = ratings_with_name[ratings_with_name['User-ID'].isin(padhe_likhe_users)]
print("ratings_with_name shape:", ratings_with_name.shape)
print(ratings_with_name.columns)
print(ratings_with_name.head())

ratings_with_name shape: (80211, 10)
Index(['User-ID', 'ISBN', 'Book-Rating', 'Book-Title', 'Book-Author',
       'Year-Of-Publication', 'Publisher', 'Image-URL-S', 'Image-URL-M',
       'Image-URL-L'],
      dtype='object')
   User-ID        ISBN  Book-Rating  \
0   276725  034545104X            0   
1   276744  038550120X            7   
2   276746  055356451X            0   
3   276751  3596218098            8   
4   276762  3453092007            8   

                                   Book-Title       Book-Author  \
0                        Flesh Tones: A Novel        M. J. Rose   
1                             A Painted House      JOHN GRISHAM   
2                                  Night Sins         TAMI HOAG   
3  Reise nach Ixtlan. Die Lehre des Don Juan.  Carlos Castaneda   
4                            Die zweite Haut.       Dean Koontz   

   Year-Of-Publication                 Publisher  \
0               2002.0          Ballantine Books   
1               2001.0           

In [64]:
# Active users
user_counts = ratings_with_name['User-ID'].value_counts()
active_users = user_counts[user_counts > 10].index
print("Active users found:", len(active_users))

# Filter users
filtered_ratings = ratings_with_name[ratings_with_name['User-ID'].isin(active_users)]
print("After filtering users:", filtered_ratings.shape)

# Popular books
book_counts = filtered_ratings['Book-Title'].value_counts()
popular_books = book_counts[book_counts > 5].index
print("Popular books found:", len(popular_books))

# Final filtering
filtered_ratings = filtered_ratings[filtered_ratings['Book-Title'].isin(popular_books)]
print("After filtering books:", filtered_ratings.shape)


Active users found: 1343
After filtering users: (39738, 10)
Popular books found: 1585
After filtering books: (28222, 10)


In [65]:
pt = filtered_ratings.pivot_table(
    index='Book-Title',
    columns='User-ID',
    values='Book-Rating'
).fillna(0)

print("Pivot table shape:", pt.shape)

Pivot table shape: (1585, 1325)


In [66]:
y = filtered_rating.groupby('Book-Title').count()['Book-Rating']>=50
famous_books = y[y].index

In [67]:
final_ratings = filtered_rating[filtered_rating['Book-Title'].isin(famous_books)]

In [50]:
pt = final_ratings.pivot_table(index='Book-Title',columns='User-ID',values='Book-Rating')

In [68]:
pt.fillna(0,inplace=True)

In [52]:
pt

User-ID
Book-Title


In [56]:
print("Pivot table shape:", pt.shape)

Pivot table shape: (1585, 1325)


In [57]:
from sklearn.metrics.pairwise import cosine_similarity

if pt.shape[0] > 0 and pt.shape[1] > 0:
    similarity_scores = cosine_similarity(pt)
    print("similarity_scores shape:", similarity_scores.shape)
else:
    print("Pivot table is empty — cannot compute similarity scores.")

similarity_scores shape: (1585, 1585)


In [59]:
def recommend(book_name):
    if book_name not in pt.index:
        print(f"'{book_name}' not found in pivot table index.")
        return []
    
    index = np.where(pt.index == book_name)[0][0]
    similar_items = sorted(list(enumerate(similarity_scores[index])), key=lambda x: x[1], reverse=True)[1:5]
    
    data = []
    for i in similar_items:
        item = []
        temp_df = books[books['Book-Title'] == pt.index[i[0]]]
        item.extend(list(temp_df.drop_duplicates('Book-Title')['Book-Title'].values))
        item.extend(list(temp_df.drop_duplicates('Book-Title')['Book-Author'].values))
        item.extend(list(temp_df.drop_duplicates('Book-Title')['Image-URL-M'].values))
        
        data.append(item)
    return data

In [78]:
#Checking titles of the books
print(list(pt.index[:1000]))

['09-Nov', '16 Lighthouse Road', '20,000 Leagues Under the Sea (Wordsworth Collection)', '204 Rosewood Lane', '30-Minute Meals', '31 Songs.', '311 Pelican Court', '7b', 'A 2nd Helping of Chicken Soup for the Soul (Chicken Soup for the Soul Series (Paper))', 'A 3rd Serving of Chicken Soup for the Soul (Chicken Soup for the Soul Series (Paper))', 'A 4th Course of Chicken Soup for the Soul: 101 More Stories to Open the Heart and Rekindle the Spirit', 'A 5th Portion of Chicken Soup for the Soul : 101 Stories to Open the Heart and Rekindle the Spirit', 'A 6th Bowl of Chicken Soup for the Soul (Chicken Soup for the Soul)', 'A Book Without Covers', 'A Child Called \\It\\": One Child\'s Courage to Survive"', 'A Cold Day for Murder', "A Cook's Tour", 'A Cup of Chicken Soup for the Soul (Chicken Soup for the Soul Series (Paper))', 'A Cup of Comfort: Stories That Warm Your Heart, Lift Your Spirit, and Enrich Your Life', 'A Darker Dream', 'A Fine Balance', 'A Gesture Life', 'A Gift To Last', 'A Go

In [80]:
print([title for title in pt.index if 'Boy Still Missing' in title])

['Boy Still Missing : A Novel']


In [74]:
recommend('204 Rosewood Lane')

[['311 Pelican Court',
  'Debbie Macomber',
  'https://images.amazon.com/images/P/0739437631.01.MZZZZZZZ.jpg'],
 ['Between Friends',
  'Debbie Macomber',
  'https://images.amazon.com/images/P/155166674X.01.MZZZZZZZ.jpg'],
 ['Girls Night',
  'Stef Ann Holm',
  'https://images.amazon.com/images/P/1551669498.01.MZZZZZZZ.jpg'],
 ['Deadly Intent',
  'Christiane Heggan',
  'https://images.amazon.com/images/P/1551666480.01.MZZZZZZZ.jpg']]

In [81]:
recommend('Boy Still Missing : A Novel')

[["The Shelters of Stone (Earth's Children, Book 5)",
  'JEAN M. AUEL',
  'https://images.amazon.com/images/P/055328942X.01.MZZZZZZZ.jpg'],
 ['Made in America: An Informal History of the English Language in the United States',
  'Bill Bryson',
  'https://images.amazon.com/images/P/068810312X.01.MZZZZZZZ.jpg'],
 ['Cry Wolf',
  'TAMI HOAG',
  'https://images.amazon.com/images/P/055356160X.01.MZZZZZZZ.jpg'],
 ['Night Sins',
  'TAMI HOAG',
  'https://images.amazon.com/images/P/055356451X.01.MZZZZZZZ.jpg']]