In [1]:
from bs4 import BeautifulSoup
import requests
import pandas as pd
import numpy as np
import time
from requests.exceptions import RequestException

In [2]:
# Function to extract book title 
def get_bookTitle(soup):
    try:
        title = soup.find("span", attrs={"id": 'productTitle'})
        title_value = title.text
        title_string = title_value.strip()
    except AttributeError:
        title_string = ""
    return title_string

# Function to extract author name 
def get_authorName(soup):
    try:
        name = soup.find('span', attrs={'class': 'author notFaded'}).text
        name_string = name.strip()
    except AttributeError:
        name_string = ""
    return name_string

# Function to extract book price 
def get_bookPrice(soup):
    try:
        price = soup.find("span", attrs={"class":'a-price'}).text.strip()
    except AttributeError:
        price = ""
    return price

# Function to get book type 
def get_bookType(soup):
    try:
        typeOfBook = soup.find("span", attrs={"class": 'slot-title'}).text.strip()
    except AttributeError:
        typeOfBook = ""
    return typeOfBook 

# Function to extract book ratings 
def get_bookRating(soup):
    try:
        rating = soup.find("span", attrs={"class": 'a-icon-alt'}).text.strip()
    except AttributeError:
        rating = ""
    return rating

# Function to extract review count 
def get_reviewCount(soup):
    try:
        reviews = soup.find('span', attrs={'id': 'acrCustomerReviewText'}).text.strip()
    except AttributeError:
        reviews = ""
    return reviews

In [3]:
if __name__ == '__main__':
    
    HEADERS = { 'User-Agent':'',
               'Accept-Language': 'en-GB,en;q=0.9'}
    
    # Webpage URL
    URL = "https://www.amazon.co.uk/s?k=data+books&crid=3TFWSUVPVID3R&sprefix=data+books%2Caps%2C86&ref=nb_sb_noss_1"
    
    # HTTP Request
    try:
        webpage = requests.get(URL, headers=HEADERS)
        webpage.raise_for_status()  # Raise HTTPError for bad responses (4xx and 5xx)
    except RequestException as e:
        print(f"Failed to retrieve the main page: {e}")
        exit()

    # Soup object containing all data extracted from Amazon
    soup = BeautifulSoup(webpage.content, "html.parser")  # Converting to HTML format from bytes
    
    # Fetching links as List of Tag objects 
    links = soup.find_all("a", attrs={'class': "a-link-normal s-underline-text s-underline-link-text s-link-style a-text-normal"})
    
    # Link list 
    links_list = []
    
    # Loop to extract links from tag objects
    for link in links:
        href = link.get('href')
        if href and href.startswith("/"):
            href = "https://www.amazon.co.uk" + href
        links_list.append(href)
    
    d = {"title": [], "name": [], "price": [], "typeOfBook": [], "rating": [], "reviews": []}
    
    # Loop to extract book details from each link  
    for link in links_list:
        try:
            new_webpage = requests.get(link, headers=HEADERS)
            new_webpage.raise_for_status()  # Raise HTTPError for bad responses
            new_soup = BeautifulSoup(new_webpage.content, "html.parser")
            
            # Function calls to display all book details
            d['title'].append(get_bookTitle(new_soup))
            d['name'].append(get_authorName(new_soup))
            d['price'].append(get_bookPrice(new_soup))
            d['typeOfBook'].append(get_bookType(new_soup))
            d['rating'].append(get_bookRating(new_soup))
            d['reviews'].append(get_reviewCount(new_soup))
            
            # Delay between requests
            time.sleep(2)  # Sleep for 2 seconds
        except RequestException as e:
            print(f"Failed to retrieve the page: {e}")
            continue
        
    amazon_df = pd.DataFrame.from_dict(d)
    amazon_df['title'].replace('', np.nan, inplace=True)
    amazon_df = amazon_df.dropna(subset=['title'])
    amazon_df.to_csv("amazon_data.csv", header=True, index=False)

In [4]:
amazon_df 

Unnamed: 0,title,name,price,typeOfBook,rating,reviews
0,The Art of Statistics: Learning from Data (Pel...,David Spiegelhalter \n(Author),£10.11£10.11,Kindle Edition,4.5 out of 5 stars,"3,612 ratings"
1,Storytelling with Data: A Data Visualization G...,Cole Nussbaumer Knaflic \n(Author),£23.49,Kindle Edition,4.6 out of 5 stars,"4,644 ratings"
2,Data Analytics for Absolute Beginners: A Decon...,Oliver Theobald \n(Author),£11.79,Kindle Edition,4.3 out of 5 stars,464 ratings
3,"Data Scientist Coloring Book. A Funny, Unique,...",Witty Jobs Junior \n(Author),£6.85,Paperback,4.5 out of 5 stars,2 ratings
4,Metalworker's Data Book: No. 42 (Workshop Prac...,Harold Hall \n(Author),£7.25,Paperback,4.6 out of 5 stars,289 ratings
5,Microsoft Power BI Data Analyst Certification ...,"Orrin Edenfield \n(Author),",£33.99£33.99,Kindle Edition,4.0 out of 5 stars,44 ratings
6,How to Win the Premier League: The Inside Stor...,Ian Graham \n(Author),£16.99,Kindle Edition,Previous slide of product details,
7,"Becoming a Data Head: How to Think, Speak, and...","Alex J. Gutman \n(Author),",£23.09,Kindle Edition,4.6 out of 5 stars,310 ratings
8,Engineers′ Data Book,Clifford Matthews \n(Author),£16.25£16.25,Kindle Edition,4.6 out of 5 stars,176 ratings
9,Data Visualisation: A Handbook for Data Driven...,Andy Kirk \n(Author),£44.23£44.23,Kindle Edition,4.4 out of 5 stars,192 ratings
