In [3]:
pip install selenium

Note: you may need to restart the kernel to use updated packages.


In [16]:
import requests
from bs4 import BeautifulSoup
import csv

def scrape_amazon_products(url, num_pages):
    headers = {
        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36"
    }

    product_data = []

    for page in range(1, num_pages + 1):
        page_url = f"{url}&page={page}"
        response = requests.get(page_url, headers=headers)
        soup = BeautifulSoup(response.content, "html.parser")

        products = soup.find_all("div", {"data-component-type": "s-search-result"})

        if not products:
            print(f"No product containers found on page {page}. Exiting scraping.")
            break

        for product in products:
            product_url_element = product.find("a", {"class": "a-link-normal"})
            product_url = "https://www.amazon.in" + product_url_element['href'] if product_url_element else "N/A"

            product_name_element = product.find("span", {"class": "a-size-medium a-color-base a-text-normal"})
            product_name = product_name_element.text.strip() if product_name_element else "N/A"

            product_price_element = product.find("span", {"class": "a-offscreen"})
            product_price = product_price_element.text.strip() if product_price_element else "N/A"

            rating_element = product.find("span", {"class": "a-icon-alt"})
            product_rating = rating_element.text.split()[0] if rating_element else "N/A"

            reviews_element = product.find("span", {"class": "a-size-base"})
            product_reviews = reviews_element.text.split()[0] if reviews_element else "0"

            product_data.append({
                "Product URL": product_url,
                "Product Name": product_name,
                "Product Price": product_price,
                "Rating": product_rating,
                "Number of Reviews": product_reviews
            })

    return product_data

def export_to_csv(data, filename):
    with open(filename, mode="w", newline="", encoding="utf-8") as csvfile:
        fieldnames = ["Product URL", "Product Name", "Product Price", "Rating", "Number of Reviews"]
        writer = csv.DictWriter(csvfile, fieldnames=fieldnames)

        writer.writeheader()
        for item in data:
            writer.writerow(item)

amazon_url = "https://www.amazon.in/s?k=bags&crid=2M096C61O4MLT&qid=1653308124&sprefix=ba%2Caps%2C283&ref=sr_pg_1"
num_pages_to_scrape = 20

scraped_products = scrape_amazon_products(amazon_url, num_pages=num_pages_to_scrape)

# Check the number of products scraped
print(f"Scraped {len(scraped_products)} products from {num_pages_to_scrape} pages.")

# Export the data to a CSV file
export_to_csv(scraped_products, "amazon_products.csv")


Scraped 426 products from 20 pages.
