In [1]:
import time
import csv
import requests
from bs4 import BeautifulSoup

In [2]:
# Amazon search URL for scraping
AMAZON_URL = "https://www.amazon.in/s?rh=n%3A6612025031&fs=true&ref=lp_6612025031_sar"
HEADERS = {
    "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/119.0.0.0 Safari/537.36",
    "Accept-Language": "en-US,en;q=0.9",
}

In [3]:
def get_seller_from_product_page(product_url):
    """
    Fetches the seller name from the product's detail page.
    """
    try:
        response = requests.get(product_url, headers=HEADERS)
        if response.status_code != 200:
            return "Unknown"
        
        soup = BeautifulSoup(response.text, "html.parser")
        seller_tag = soup.find("a", id="sellerProfileTriggerId")
        return seller_tag.text.strip() if seller_tag else "Unknown"
    except Exception as e:
        print(f"Error fetching seller for {product_url}: {e}")
        return "Unknown"

In [4]:
def scrape_amazon_products():
    """
    Scrapes product details (Product Name, Price, Rating, and Seller Name) from Amazon's search results page.
    """
    try:
        print("Fetching Amazon page...")
        response = requests.get(AMAZON_URL, headers=HEADERS)
        if response.status_code != 200:
            print("Failed to retrieve page. Status code:", response.status_code)
            return []
        
        soup = BeautifulSoup(response.text, "html.parser")
        products = []

        # Finding all product containers on the page
        product_containers = soup.find_all("div", {"data-component-type": "s-search-result"})
        print(f"Found {len(product_containers)} products.")

        for product in product_containers:
            product_details = {}
            
            # Extract product name
            title_tag = product.find("h2")
            product_details['Product Name'] = title_tag.text.strip() if title_tag else "N/A"
            
            # Extract product price
            price_tag = product.find("span", class_="a-price-whole")
            product_details['Price'] = price_tag.text.strip() if price_tag else "N/A"
            
            # Extract product rating
            rating_tag = product.find("span", class_="a-icon-alt")
            product_details['Rating'] = rating_tag.text.split()[0] if rating_tag else "N/A"
            
            # Extract product URL to fetch seller name
            product_link_tag = product.find("a", class_="a-link-normal")
            if product_link_tag:
                product_url = "https://www.amazon.in" + product_link_tag["href"]
                product_details['Seller Name'] = get_seller_from_product_page(product_url)
            else:
                product_details['Seller Name'] = "Unknown"
            
            products.append(product_details)

        # Save extracted data to CSV
        save_to_csv(products)
        print("Data saved to Amazon_Products_Scrape.csv")
        return products
    except Exception as e:
        print("Error occurred while scraping Amazon:", str(e))
        return []

In [5]:
def save_to_csv(products):
    """
    Saves the extracted product data into a CSV file with proper column names.
    """
    filename = 'Amazon_Products_Scrape.csv'
    with open(filename, mode='w', newline='', encoding='utf-8') as file:
        fieldnames = ["Product Name", "Price", "Rating", "Seller Name"]
        writer = csv.DictWriter(file, fieldnames=fieldnames)
        writer.writeheader()
        writer.writerows(products)

In [6]:
def main():
    """
    Initiates the Amazon scraping process.
    """
    products = scrape_amazon_products()
    print(f"Number of products found: {len(products)}")

if __name__ == "__main__":
    main()

Fetching Amazon page...
Found 33 products.
Data saved to Amazon_Products_Scrape.csv
Number of products found: 33
