#### Collect Datasets

In [82]:
import numpy as np
import pandas as pd

In [83]:
books = pd.read_csv('Datasets/Books.csv', low_memory=False)  # 'low_memory = False' disables automatic memory optimization when reading a CSV file
users = pd.read_csv('Datasets/Users.csv')
ratings = pd.read_csv('Datasets/Ratings.csv')

Display few rows of the datasets: 
* Books.csv
* Users.csv
* Ratings.csv

In [84]:
books.head()


Unnamed: 0,ISBN,Book-Title,Book-Author,Year-Of-Publication,Publisher,Image-URL-S,Image-URL-M,Image-URL-L
0,195153448,Classical Mythology,Mark P. O. Morford,2002,Oxford University Press,http://images.amazon.com/images/P/0195153448.0...,http://images.amazon.com/images/P/0195153448.0...,http://images.amazon.com/images/P/0195153448.0...
1,2005018,Clara Callan,Richard Bruce Wright,2001,HarperFlamingo Canada,http://images.amazon.com/images/P/0002005018.0...,http://images.amazon.com/images/P/0002005018.0...,http://images.amazon.com/images/P/0002005018.0...
2,60973129,Decision in Normandy,Carlo D'Este,1991,HarperPerennial,http://images.amazon.com/images/P/0060973129.0...,http://images.amazon.com/images/P/0060973129.0...,http://images.amazon.com/images/P/0060973129.0...
3,374157065,Flu: The Story of the Great Influenza Pandemic...,Gina Bari Kolata,1999,Farrar Straus Giroux,http://images.amazon.com/images/P/0374157065.0...,http://images.amazon.com/images/P/0374157065.0...,http://images.amazon.com/images/P/0374157065.0...
4,393045218,The Mummies of Urumchi,E. J. W. Barber,1999,W. W. Norton &amp; Company,http://images.amazon.com/images/P/0393045218.0...,http://images.amazon.com/images/P/0393045218.0...,http://images.amazon.com/images/P/0393045218.0...


In [85]:
users.head()

Unnamed: 0,User-ID,Location,Age
0,1,"nyc, new york, usa",
1,2,"stockton, california, usa",18.0
2,3,"moscow, yukon territory, russia",
3,4,"porto, v.n.gaia, portugal",17.0
4,5,"farnborough, hants, united kingdom",


In [86]:
ratings.head()

Unnamed: 0,User-ID,ISBN,Book-Rating
0,276725,034545104X,0
1,276726,0155061224,5
2,276727,0446520802,0
3,276729,052165615X,3
4,276729,0521795028,6


In [87]:
# Display no of columns and rows of the datasets
print(books.shape)
print(ratings.shape)
print(users.shape)

(271360, 8)
(1149780, 3)
(278858, 3)


In [88]:
# List of dataframes and their names
dataframes = [(books, 'Books'), (users, 'Users'), (ratings, 'Ratings')]

# Iterate through dataframes and print missing value counts
for df, name in dataframes:
    print(f'Counting values in {name}:\n{df.isnull().sum()}\n')

Counting values in Books:
ISBN                   0
Book-Title             0
Book-Author            2
Year-Of-Publication    0
Publisher              2
Image-URL-S            0
Image-URL-M            0
Image-URL-L            3
dtype: int64

Counting values in Users:
User-ID          0
Location         0
Age         110762
dtype: int64

Counting values in Ratings:
User-ID        0
ISBN           0
Book-Rating    0
dtype: int64



In [89]:
# Iterate through dataframes and print duplicated row counts
for df, name in dataframes:
    duplicate_count = df.duplicated().sum()
    print(f'Count of duplicated rows in {name}.csv: {duplicate_count}')

Count of duplicated rows in Books.csv: 0
Count of duplicated rows in Users.csv: 0
Count of duplicated rows in Ratings.csv: 0


### Popularity Based Recommendation 
A recommendation system that suggests items to users based on their overall popularity or the total number of interactions or ratings an item has received.

*Avg_rating* = (1 / *N*) * ∑(i=1 to *N*) *Ratings<sub>i</sub>*

Where,
- *N* is the total number of ratings for the specific *item*
- *Ratings<sub>i</sub>* represents the *i*-th rating given to that *item*

In [90]:
#Merging ratings and books based on common 'ISBN' column
ratings_with_name =ratings.merge(books,on='ISBN')

ratings_with_name

Unnamed: 0,User-ID,ISBN,Book-Rating,Book-Title,Book-Author,Year-Of-Publication,Publisher,Image-URL-S,Image-URL-M,Image-URL-L
0,276725,034545104X,0,Flesh Tones: A Novel,M. J. Rose,2002,Ballantine Books,http://images.amazon.com/images/P/034545104X.0...,http://images.amazon.com/images/P/034545104X.0...,http://images.amazon.com/images/P/034545104X.0...
1,2313,034545104X,5,Flesh Tones: A Novel,M. J. Rose,2002,Ballantine Books,http://images.amazon.com/images/P/034545104X.0...,http://images.amazon.com/images/P/034545104X.0...,http://images.amazon.com/images/P/034545104X.0...
2,6543,034545104X,0,Flesh Tones: A Novel,M. J. Rose,2002,Ballantine Books,http://images.amazon.com/images/P/034545104X.0...,http://images.amazon.com/images/P/034545104X.0...,http://images.amazon.com/images/P/034545104X.0...
3,8680,034545104X,5,Flesh Tones: A Novel,M. J. Rose,2002,Ballantine Books,http://images.amazon.com/images/P/034545104X.0...,http://images.amazon.com/images/P/034545104X.0...,http://images.amazon.com/images/P/034545104X.0...
4,10314,034545104X,9,Flesh Tones: A Novel,M. J. Rose,2002,Ballantine Books,http://images.amazon.com/images/P/034545104X.0...,http://images.amazon.com/images/P/034545104X.0...,http://images.amazon.com/images/P/034545104X.0...
...,...,...,...,...,...,...,...,...,...,...
1031131,276688,0517145553,0,Mostly Harmless,Douglas Adams,1995,Random House Value Pub,http://images.amazon.com/images/P/0517145553.0...,http://images.amazon.com/images/P/0517145553.0...,http://images.amazon.com/images/P/0517145553.0...
1031132,276688,1575660792,7,Gray Matter,Shirley Kennett,1996,Kensington Publishing Corporation,http://images.amazon.com/images/P/1575660792.0...,http://images.amazon.com/images/P/1575660792.0...,http://images.amazon.com/images/P/1575660792.0...
1031133,276690,0590907301,0,Triplet Trouble and the Class Trip (Triplet Tr...,Debbie Dadey,1997,Apple,http://images.amazon.com/images/P/0590907301.0...,http://images.amazon.com/images/P/0590907301.0...,http://images.amazon.com/images/P/0590907301.0...
1031134,276704,0679752714,0,A Desert of Pure Feeling (Vintage Contemporaries),Judith Freeman,1997,Vintage Books USA,http://images.amazon.com/images/P/0679752714.0...,http://images.amazon.com/images/P/0679752714.0...,http://images.amazon.com/images/P/0679752714.0...


In [91]:
# Group the 'ratings_with_name' DataFrame by book titles and count the number of ratings for each book
num_rating_df = ratings_with_name.groupby('Book-Title').count()['Book-Rating'].reset_index()

# Rename the column 'Book-Rating' to 'num_ratings'
num_rating_df.rename(columns={'Book-Rating': 'num_ratings'}, inplace=True)

num_rating_df


Unnamed: 0,Book-Title,num_ratings
0,A Light in the Storm: The Civil War Diary of ...,4
1,Always Have Popsicles,1
2,Apple Magic (The Collector's series),1
3,"Ask Lily (Young Women of Faith: Lily Series, ...",1
4,Beyond IBM: Leadership Marketing and Finance ...,1
...,...,...
241066,Ã?Â?lpiraten.,2
241067,Ã?Â?rger mit Produkt X. Roman.,4
241068,Ã?Â?sterlich leben.,1
241069,Ã?Â?stlich der Berge.,3


In [92]:
# Calculate avg book ratings
avg_rating_df = ratings_with_name.groupby('Book-Title')['Book-Rating'].mean().reset_index()

# Renaming columns
avg_rating_df.columns = ['Book-Title', 'avg_rating']

avg_rating_df

Unnamed: 0,Book-Title,avg_rating
0,A Light in the Storm: The Civil War Diary of ...,2.250000
1,Always Have Popsicles,0.000000
2,Apple Magic (The Collector's series),0.000000
3,"Ask Lily (Young Women of Faith: Lily Series, ...",8.000000
4,Beyond IBM: Leadership Marketing and Finance ...,0.000000
...,...,...
241066,Ã?Â?lpiraten.,0.000000
241067,Ã?Â?rger mit Produkt X. Roman.,5.250000
241068,Ã?Â?sterlich leben.,7.000000
241069,Ã?Â?stlich der Berge.,2.666667


In [93]:
# Merge dataframes based on 'Book-Title'
popular_df = num_rating_df.merge(avg_rating_df, on='Book-Title')

popular_df

Unnamed: 0,Book-Title,num_ratings,avg_rating
0,A Light in the Storm: The Civil War Diary of ...,4,2.250000
1,Always Have Popsicles,1,0.000000
2,Apple Magic (The Collector's series),1,0.000000
3,"Ask Lily (Young Women of Faith: Lily Series, ...",1,8.000000
4,Beyond IBM: Leadership Marketing and Finance ...,1,0.000000
...,...,...,...
241066,Ã?Â?lpiraten.,2,0.000000
241067,Ã?Â?rger mit Produkt X. Roman.,4,5.250000
241068,Ã?Â?sterlich leben.,1,7.000000
241069,Ã?Â?stlich der Berge.,3,2.666667


In [94]:
# Filter 'popular_df' for books with at least 250 ratings then sort by average rating in descending order
# Selecting top 25,000 books
popular_df = popular_df[popular_df['num_ratings'] >= 250].sort_values('avg_rating', ascending=False).head(25000)

popular_df


Unnamed: 0,Book-Title,num_ratings,avg_rating
80434,Harry Potter and the Prisoner of Azkaban (Book 3),428,5.852804
80422,Harry Potter and the Goblet of Fire (Book 4),387,5.824289
80441,Harry Potter and the Sorcerer's Stone (Book 1),278,5.737410
80426,Harry Potter and the Order of the Phoenix (Boo...,347,5.501441
80414,Harry Potter and the Chamber of Secrets (Book 2),556,5.183453
...,...,...,...
227701,Vinegar Hill (Oprah's Book Club (Paperback)),265,2.245283
233635,Whispers,286,2.199301
143377,Presumed Innocent,294,2.139456
94382,Isle of Dogs,288,2.000000


In [95]:
# Merge 'popular_df' with 'books' DataFrame on 'Book-Title' and drop duplicates based on 'Book-Title
popular_books = popular_df.merge(books,on='Book-Title').drop_duplicates('Book-Title')[['ISBN',
                                                                       'Book-Title',
                                                                       'Book-Author',
                                                                       'Year-Of-Publication',
                                                                       'Publisher',
                                                                       'Image-URL-L',
                                                                       'num_ratings',
                                                                       'avg_rating']].round({'avg_rating':1})  # Round 'avg_rating' column to one decimal place

popular_books                      

Unnamed: 0,ISBN,Book-Title,Book-Author,Year-Of-Publication,Publisher,Image-URL-L,num_ratings,avg_rating
0,0439136350,Harry Potter and the Prisoner of Azkaban (Book 3),J. K. Rowling,1999,Scholastic,http://images.amazon.com/images/P/0439136350.0...,428,5.9
3,0439139597,Harry Potter and the Goblet of Fire (Book 4),J. K. Rowling,2000,Scholastic,http://images.amazon.com/images/P/0439139597.0...,387,5.8
5,0590353403,Harry Potter and the Sorcerer's Stone (Book 1),J. K. Rowling,1998,Scholastic,http://images.amazon.com/images/P/0590353403.0...,278,5.7
9,043935806X,Harry Potter and the Order of the Phoenix (Boo...,J. K. Rowling,2003,Scholastic,http://images.amazon.com/images/P/043935806X.0...,347,5.5
13,0439064872,Harry Potter and the Chamber of Secrets (Book 2),J. K. Rowling,2000,Scholastic,http://images.amazon.com/images/P/0439064872.0...,556,5.2
...,...,...,...,...,...,...,...,...
716,0380730138,Vinegar Hill (Oprah's Book Club (Paperback)),A. Manette Ansay,1998,Perennial,http://images.amazon.com/images/P/0380730138.0...,265,2.2
717,0440216745,Whispers,BELVA PLAIN,1994,Dell,http://images.amazon.com/images/P/0440216745.0...,286,2.2
727,0446359866,Presumed Innocent,Scott Turow,1989,Warner Books,http://images.amazon.com/images/P/0446359866.0...,294,2.1
733,0425182908,Isle of Dogs,Patricia Cornwell,2002,Berkley Publishing Group,http://images.amazon.com/images/P/0425182908.0...,288,2.0


In [96]:
# Save the 'popular_books' DataFrame as a binary pickle file
popular_books.to_pickle('popular.pkl')

In [97]:
# Displaying pre-processed 'popular.pkl' dataset 
popular_books.head()

Unnamed: 0,ISBN,Book-Title,Book-Author,Year-Of-Publication,Publisher,Image-URL-L,num_ratings,avg_rating
0,0439136350,Harry Potter and the Prisoner of Azkaban (Book 3),J. K. Rowling,1999,Scholastic,http://images.amazon.com/images/P/0439136350.0...,428,5.9
3,0439139597,Harry Potter and the Goblet of Fire (Book 4),J. K. Rowling,2000,Scholastic,http://images.amazon.com/images/P/0439139597.0...,387,5.8
5,0590353403,Harry Potter and the Sorcerer's Stone (Book 1),J. K. Rowling,1998,Scholastic,http://images.amazon.com/images/P/0590353403.0...,278,5.7
9,043935806X,Harry Potter and the Order of the Phoenix (Boo...,J. K. Rowling,2003,Scholastic,http://images.amazon.com/images/P/043935806X.0...,347,5.5
13,0439064872,Harry Potter and the Chamber of Secrets (Book 2),J. K. Rowling,2000,Scholastic,http://images.amazon.com/images/P/0439064872.0...,556,5.2


### Collaborative Filtering
A recommendation system to provide personalized recommendations to users based on their interactions and preferences.

In [98]:
# Filter out users who has rated more than 200 books
# Group the ratings by User-ID, count the number of ratings for each user, and filter users with more than 200 ratings
x = ratings_with_name.groupby('User-ID').count()['Book-Rating'] > 200
filtered_users = x[x].index

In [99]:
# Filter ratings by "filtered users" (users with more than 200 ratings)
filtered_rating = ratings_with_name[ratings_with_name['User-ID'].isin(filtered_users)]

In [100]:
# Filter books with at least 50 ratings
famous_books = filtered_rating.groupby('Book-Title').count()['Book-Rating'] >= 50
famous_books = famous_books[famous_books].index
famous_books 

Index(['1984', '1st to Die: A Novel', '2nd Chance', '4 Blondes',
       'A Bend in the Road', 'A Case of Need',
       'A Child Called \It\": One Child's Courage to Survive"',
       'A Civil Action', 'A Day Late and a Dollar Short', 'A Fine Balance',
       ...
       'Winter Solstice', 'Wish You Well', 'Without Remorse',
       'Wizard and Glass (The Dark Tower, Book 4)', 'Wuthering Heights',
       'Year of Wonders', 'You Belong To Me',
       'Zen and the Art of Motorcycle Maintenance: An Inquiry into Values',
       'Zoya', '\O\" Is for Outlaw"'],
      dtype='object', name='Book-Title', length=706)

In [101]:
# Filter the ratings data to keep only ratings for famous books
final_ratings = filtered_rating[filtered_rating['Book-Title'].isin(famous_books)]

In [102]:
# Create a pivot table from the final ratings data
pt = final_ratings.pivot_table(index='Book-Title', columns='User-ID', values='Book-Rating')

In [103]:
# Fill missing values in the pivot table with zeros to represent no ratings
pt.fillna(0, inplace=True)

# Display the pivot table
pt

User-ID,254,2276,2766,2977,3363,4017,4385,6251,6323,6543,...,271705,273979,274004,274061,274301,274308,275970,277427,277639,278418
Book-Title,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1984,9.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,10.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1st to Die: A Novel,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,9.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2nd Chance,0.0,10.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4 Blondes,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
A Bend in the Road,0.0,0.0,7.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
Year of Wonders,0.0,0.0,0.0,7.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,9.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
You Belong To Me,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
Zen and the Art of Motorcycle Maintenance: An Inquiry into Values,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
Zoya,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


So we have altogether of 706 books and 810 users that can be recommended through users ratings

#### Cosine Similarity
A metric used to measure how similar two vectors, such as documents or items in a recommendation system, are in a multi-dimensional space.

*Cosine Similarity = (A · B) / (||A|| * ||B||)*

Where,
- *(A · B) represents the dot product of vectors A and B*
- *||A|| represents the Euclidean norm (magnitude) of vector A*
- *||B|| represents the Euclidean norm (magnitude) of vector B*

In [104]:
from sklearn.metrics.pairwise import cosine_similarity

# Compute the cosine similarity scores for the pivot table `pt`
# Where 'pt' represents relationship between books and users based on their ratings
similarity_scores = cosine_similarity(pt)

# Get matrix dimensions
similarity_scores.shape

(706, 706)

In [105]:
# Implement collaborative filtering using cosine similarity
def recommend(book_name):
    # Find the index of the input book name in the pivot table
    index = np.where(pt.index == book_name)[0][0]
    
    # Find the most similar items based on the cosine similarity scores, excluding the input book (hence [1:5])
    similar_items = sorted(enumerate(similarity_scores[index]), key=lambda x: x[1], reverse=True)[1:5]
    
    # Create an empty list to store recommended book data
    data = []

    for i in similar_items:
        item = []
        # Retrieve book information for the recommended book
        temp_df = books[books['Book-Title'] == pt.index[i[0]]]
        item.extend(list(temp_df.drop_duplicates('Book-Title')['Book-Title'].values))
        item.extend(list(temp_df.drop_duplicates('Book-Title')['Book-Author'].values))
        item.extend(list(temp_df.drop_duplicates('Book-Title')['Image-URL-L'].values))
        
        data.append(item)
    
    return data

In [106]:
recommend('Life of Pi')

[['Tell No One',
  'Harlan Coben',
  'http://images.amazon.com/images/P/0440236703.01.LZZZZZZZ.jpg'],
 ['The Lovely Bones: A Novel',
  'Alice Sebold',
  'http://images.amazon.com/images/P/0316666343.01.LZZZZZZZ.jpg'],
 ['Fried Green Tomatoes at the Whistle Stop Cafe',
  'Fannie Flagg',
  'http://images.amazon.com/images/P/0070212570.01.LZZZZZZZ.jpg'],
 ['Fall On Your Knees (Oprah #45)',
  'Ann-Marie MacDonald',
  'http://images.amazon.com/images/P/0743237188.01.LZZZZZZZ.jpg']]

In [107]:
# Checking if recommended books match each other
recommend('The Lovely Bones: A Novel')

[["Where the Heart Is (Oprah's Book Club (Paperback))",
  'Billie Letts',
  'http://images.amazon.com/images/P/0446672211.01.LZZZZZZZ.jpg'],
 ['Good in Bed',
  'Jennifer Weiner',
  'http://images.amazon.com/images/P/0743418174.01.LZZZZZZZ.jpg'],
 ["The Book of Ruth (Oprah's Book Club (Paperback))",
  'Jane Hamilton',
  'http://images.amazon.com/images/P/0385265700.01.LZZZZZZZ.jpg'],
 ['Life of Pi',
  'Yann Martel',
  'http://images.amazon.com/images/P/0151008116.01.LZZZZZZZ.jpg']]

In [74]:
import pickle 

pickle.dump(pt,open('pt.pkl','wb'))
pickle.dump(books,open('books.pkl','wb'))
pickle.dump(similarity_scores,open('similarity_scores.pkl','wb'))

### BooksDatset.csv
Generating new BooksDatset.csv for searching function

In [108]:
book_titles = pt.index
book_titles

Index(['1984', '1st to Die: A Novel', '2nd Chance', '4 Blondes',
       'A Bend in the Road', 'A Case of Need',
       'A Child Called \It\": One Child's Courage to Survive"',
       'A Civil Action', 'A Day Late and a Dollar Short', 'A Fine Balance',
       ...
       'Winter Solstice', 'Wish You Well', 'Without Remorse',
       'Wizard and Glass (The Dark Tower, Book 4)', 'Wuthering Heights',
       'Year of Wonders', 'You Belong To Me',
       'Zen and the Art of Motorcycle Maintenance: An Inquiry into Values',
       'Zoya', '\O\" Is for Outlaw"'],
      dtype='object', name='Book-Title', length=706)

In [109]:
book_titles = pd.DataFrame({'Book-Title': book_titles})
book_titles 

Unnamed: 0,Book-Title
0,1984
1,1st to Die: A Novel
2,2nd Chance
3,4 Blondes
4,A Bend in the Road
...,...
701,Year of Wonders
702,You Belong To Me
703,Zen and the Art of Motorcycle Maintenance: An ...
704,Zoya


In [110]:
# Define the columns to include
columns_to_include = ['ISBN', 'Book-Title', 'Book-Author', 'Year-Of-Publication', 'Publisher', 'Image-URL-L']

# Merge the datasets based on the 'Book-Title' column
merged_books = books.merge(book_titles, on='Book-Title', how='inner')

# Select and reorder the columns
merged_books = merged_books[columns_to_include]

In [111]:
# Drop duplicate rows based on 'Book-Title'
merged_books.drop_duplicates(subset='Book-Title', keep='first', inplace=True)

merged_books

Unnamed: 0,ISBN,Book-Title,Book-Author,Year-Of-Publication,Publisher,Image-URL-L
0,0399135782,The Kitchen God's Wife,Amy Tan,1991,Putnam Pub Group,http://images.amazon.com/images/P/0399135782.0...
5,0440234743,The Testament,John Grisham,1999,Dell,http://images.amazon.com/images/P/0440234743.0...
12,0452264464,Beloved (Plume Contemporary Fiction),Toni Morrison,1994,Plume,http://images.amazon.com/images/P/0452264464.0...
13,0971880107,Wild Animus,Rich Shapero,2004,Too Far,http://images.amazon.com/images/P/0971880107.0...
14,0345402871,Airframe,Michael Crichton,1997,Ballantine Books,http://images.amazon.com/images/P/0345402871.0...
...,...,...,...,...,...,...
2487,0399141200,That Camden Summer,Lavyrle Spencer,1996,Putnam Pub Group,http://images.amazon.com/images/P/0399141200.0...
2491,0440213991,Guardian Angel,Sara Paretsky,1996,Dell Publishing Company,http://images.amazon.com/images/P/0440213991.0...
2495,0449223604,M Is for Malice,Sue Grafton,1998,Fawcett Books,http://images.amazon.com/images/P/0449223604.0...
2496,0688084915,Nothing Lasts Forever,Sidney Sheldon,1994,Harpercollins Childrens Books,http://images.amazon.com/images/P/0688084915.0...


In [112]:
# Save the merged_books DataFrame to a CSV file
merged_books.to_csv('BooksDataset.csv', index=False)