In [19]:
import requests
import json
from bs4 import BeautifulSoup
import urllib.parse
import logging

logger = logging.getLogger()
logger.setLevel(logging.INFO)

BASE_URL = "https://www.amazon.com"
PROXIES = {
    "http": "http://brd-customer-hl_557ac006-zone-web_unlocker1:zrd756846jna@brd.superproxy.io:33335",
    "https": "http://brd-customer-hl_557ac006-zone-web_unlocker1:zrd756846jna@brd.superproxy.io:33335"
}

HEADERS = {
    "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7",
    "Accept-Language": "en-US,en;q=0.9,en-IN;q=0.8",
    "Cache-Control": "no-cache",
    "Connection": "keep-alive",
    "Pragma": "no-cache",
    "Upgrade-Insecure-Requests": "1",
    "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/135.0.0.0 Safari/537.36 Edg/135.0.0.0",
    "sec-ch-ua": '"Microsoft Edge";v="135", "Not-A.Brand";v="8", "Chromium";v="135"',
    "sec-ch-ua-mobile": "?0",
    "sec-ch-ua-platform": '"Windows"',
}

def initializerequest(url):
    try:
        response = requests.get(url, headers=HEADERS, proxies=PROXIES, verify=False, timeout=10)
        return response
    except Exception as e:
        logger.error(f"Request error: {e}")
        return None

def get_amazon_products(response, url):
    results = []
    if not response or response.status_code != 200:
        logger.warning(f"Failed to fetch: {url}")
        return results

    soup = BeautifulSoup(response.text, "html.parser")
    for product in soup.find_all("div", {"role": "listitem"}):
        try:
            img = product.find("img", {"class": "s-image"}).get("src")
            title = product.find("div", {"data-cy": "title-recipe"}).find("span").text.strip()
            price_elem = product.find("span", {"class": "a-price"}).find('span').text.strip()
            prod_link = "https://www.amazon.com"+product.find("a").get("href")
            results.append({
                "Website Name": "Amazon",
                "Website URL": url,
                "Product Title": title,
                "Product Price": price_elem,
                "Product Price Currency": "$",
                "Product Images": [img],
                "Product Link": prod_link,  # optional: extract href if needed
                "Selling Type": "Fixed",
                "Product Description": ""
            })
        except Exception:
            continue
    return results

def scrape_amazon(keyword):
    try:
        all_results = []
        for page in range(1, 6):
            query = urllib.parse.quote_plus(keyword)
            url = f"{BASE_URL}/s?k={query}&page={page}&refresh={page}&ref=sr_pg_{page}"
            response = initializerequest(url)
            products = get_amazon_products(response, url)
            all_results.extend(products)
        return all_results
    except Exception as e:
        logger.error(f"Scrape error: {e}")
        return {"error": str(e)}

def lambda_handler(event, context):
    """
    AWS Lambda handler function to scrape Amazon.com for a given keyword.

    Example event:
    {
        "query": "wireless headphones"
    }
    """
    try:
        query = event.get('query', '')

        if not query:
            return {
                'statusCode': 400,
                'body': json.dumps({'error': 'Missing "query" parameter'})
            }

        results = scrape_amazon(query)

        if isinstance(results, dict) and "error" in results:
            return {
                'statusCode': 500,
                'body': json.dumps(results)
            }

        return {
            'statusCode': 200,
            'body': json.dumps(results)
        }
    except Exception as e:
        logger.exception("Unhandled exception in lambda_handler")
        return {
            'statusCode': 500,
            'body': json.dumps({'error': str(e)})
        }
