Implement the web scraping on Amazon website or any shopping site by importing the requests and the Beautiful Soup.

Date: 08-05-2024

In [1]:
from bs4 import BeautifulSoup
import requests

In [2]:
# Function to extract Product Title
def get_title(soup):
    try:
        title = soup.find("span", attrs={"id": "productTitle"})
        title_string = title.get_text(strip=True)
    except AttributeError:
        title_string=""
    return title_string

# Function to extract Product Price
def get_price(soup):
    try:
        price = soup.find("span", attrs= {"class": "a-offscreen"})
        if price is not None:
            price_string = price.get_text(strip=True)
        else:
            price_string=""
    except AttributeError:
        price_string = ""
    return price_string

# Function to extract Product Rating
def get_rating(soup):
    try:
        rating = soup.find("span", attrs= {"class": "a-icon-alt"})
        if rating is not None:
            rating_string = rating.get_text(strip=True)
        else:
            rating_string=""
    except AttributeError:
        rating_string= ""
    return rating_string

# Function to extract Number of User Reviews
def get_review_count(soup):
    try:
        review_count = soup.find("span", attrs={"id": "acrCustomerReviewText"})
        if review_count is not None:
            review_count_string = review_count.get_text(strip=True)
        else:
            review_count_string = ""
    except AttributeError:
        review_count_string = ""
    return review_count_string            
        
# Function to extract Availability Status
def get_availability(soup):
    try:
        availability = soup.find("span", attrs= {"id": "availability"})
        if availability is not None:
            availability_string = availability.get_text(strip=True)
        else:
            availability_string=""
    except AttributeError:
        availability_string= ""
    return availability_string


In [3]:
if __name__ == "__main__":
    #Headers for request
    headers = {
        "User-Agent": "Mozilla/5.0 (Windows NT 10.0: Win64; x64) AppleWebKit/537.36 (KHTML. like gecko) Chrome/124.0.0. Safari/537.36",
        "Accept-Language" : "en-US, en;q=0.5"
    }
    
    # The webpage URL
    url = "https://www.amazon.in/HP-i3-1215U-15-6-inch-Graphics-Speakers/dp/B0BP2M7CCS/ref=sr_1_3?crid=35WC0HWGXROL0&dib=eyJ2IjoiMSJ9.56MtnFAWzi2JuWv0CsiCCEgiOOspPkYKMu6MtkzHHhBliQBI2aXAyG3LZZcipgYaZoiMlADV5clC6QvogMdjTQYkKlY3fP6RYRaPWEJlEVkIm_ccR5YUXECyuMUAg2VMXBE5sECZezvW0_crTB3TrMumaNnIkkfaALDxf_hJGBkpI1FpPVADfgnF9PC2ZCoqZ1Dvk1Wn9Mcm7sn-EUeoIoMN6JJDI8Jy9lohlpBd8TA.JaN_SH_POT9zbz5G_tCyjwFLX2whaet7oWwkL4Esv78&dib_tag=se&keywords=laptop&qid=1715512814&sprefix=lap%2Caps%2C289&sr=8-3"

    # HTTP Request
    webpage = requests.get(url, headers=headers)

    # Soup Object containing all data
    soup = BeautifulSoup(webpage.content, "lxml")

    # Function calls to display all necessary product information 
    print("Product Title = ", get_title(soup))
    print("Product Price = ", get_price(soup))
    print("Product Rating = ", get_rating(soup))
    print("Number of Product Reviews = ", get_review_count(soup))
    print("Availability = ", get_availability(soup))


Product Title =  HP Laptop 15s, 12th Gen Intel Core i3, 15.6-inch (39.6 cm), 8GB DDR4, 512GB SSD, Thin & Light, Dual Speakers (Win 11, MSO 2021, Silver, 1.69 kg), fq5007TU / FQ5327TU
Product Price =  ₹35,879.00
Product Rating =  4.0 out of 5 stars
Number of Product Reviews =  810 ratings
Availability =  
