# Request book cover images

This notebook saves images of book covers from an Amazon dataset

In [1]:
import pandas as pd
import requests
from bs4 import BeautifulSoup
from PIL import Image
from io import BytesIO
import os

In [47]:
# original books dataset can be found here
# an index was added to merge with other df in the project
# https://www.kaggle.com/datasets/mohamedbakhet/amazon-books-reviews?select=books_data.csv
amz_books = pd.read_csv('books_data_with_index.csv')

In [5]:
# don't want any NaN in image series
amz_books = amz_books.dropna(how="any", subset=["image"])

In [7]:
# check no Nan image series 
amz_books.isnull().sum()

Title                 1
description       23372
authors            5302
image                 0
previewLink           0
publisher         30146
publishedDate       924
infoLink              0
categories         8445
ratingsCount     113202
index                 0
dtype: int64

In [None]:
# get all images
folder_path = 'images'

no_images_saved = 0
no_images_not_saved = 0

for index, row in amz_books.iterrows():
    url = row["image"]
    image_url = str(url) + ".jpg"

    # Send a GET request to the image URL
    image_response = requests.get(image_url)

    # Check if the request was successful
    if image_response.status_code == 200:
        
        # Save the image to the specified folder
        save_name = os.path.join(folder_path, str(row["index"]) + ".jpg")
        image = Image.open(BytesIO(image_response.content))
        image.save(save_name)
        no_images_saved += 1
    else:
        no_images_not_saved += 1