In [4]:
import requests
from bs4 import BeautifulSoup
import os

# Create a session
session = requests.Session()

# Updated headers to mimic a browser request
headers = {
    "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/109.0.0.0 Safari/537.36",
    "Accept-Language": "en-US,en;q=0.9",
    "Accept-Encoding": "gzip, deflate, br",
    "Referer": "https://www.amazon.com/"
}

# Update the session headers
session.headers.update(headers)

# List of Amazon search categories (5 different categories)
categories = [
    "laptops",
    "headphones",
    "smartphones",
    "watches",
    "tablets"
]

# Create a directory to save images
os.makedirs("images", exist_ok=True)

# Iterate over each category
for category in categories:
    # Construct the search URL
    url = f"https://www.amazon.com/s?k={category}"
    
    # Send a GET request to fetch the HTML content
    response = session.get(url)
    
    # Check if the request was successful (status code 200)
    if response.status_code == 200:
        soup = BeautifulSoup(response.content, "html.parser")
        
        # Find products
        products = soup.find_all('div', {'data-component-type': 's-search-result'}, limit=1)  # Get one product per category
        
        print(f"\nProduct in category: {category.capitalize()}\n")
        
        for idx, product in enumerate(products):
            # Extract product title
            title = product.h2.text.strip()
            
            # Extract image URL
            img_tag = product.find('img', class_='s-image')
            img_url = img_tag['src'] if img_tag else None
            
            # Save the image
            if img_url:
                img_response = requests.get(img_url)
                img_filename = f"images/{category}_{idx + 1}.jpg"  # Save with category name and index
                with open(img_filename, 'wb') as f:
                    f.write(img_response.content)
                print(f"Image saved as: {img_filename}")
            
            print(f"Product Title: {title}")
            print(f"Image URL: {img_url}")
            print("-" * 50)
    else:
        print(f"Failed to retrieve content for category '{category}'. Status code: {response.status_code}")



Product in category: Laptops

Image saved as: images/laptops_1.jpg
Product Title: HP 14 Laptop, Intel Celeron N4020, 4 GB RAM, 64 GB Storage, 14-inch Micro-edge HD Display, Windows 11 Home, Thin & Portable, 4K Graphics, One Year of Microsoft 365 (14-dq0040nr, Snowflake White)
Image URL: https://m.media-amazon.com/images/I/815uX7wkOZS._AC_UY218_.jpg
--------------------------------------------------

Product in category: Headphones

Image saved as: images/headphones_1.jpg
Product Title: Beats Studio Pro - Wireless Bluetooth Noise Cancelling Headphones - Personalized Spatial Audio, USB-C Lossless Audio, Apple & Android Compatibility, Up to 40 Hours Battery Life - Black
Image URL: https://m.media-amazon.com/images/I/61u-OaDSfQL._AC_UY218_.jpg
--------------------------------------------------

Product in category: Smartphones

Image saved as: images/smartphones_1.jpg
Product Title: SAMSUNG Galaxy S24 Ultra Cell Phone, 256GB AI Smartphone, Unlocked Android, 200MP, 100x Zoom Cameras, Long 