In [1]:
from bs4 import BeautifulSoup
import requests 
import time
import datetime
import smtplib
import csv

In [2]:
def fetch_page(url, headers):
    """Fetches the page content from the given URL."""
    try:
        response = requests.get(url, headers=headers)
        response.raise_for_status()  # Check for HTTP request errors
        return response.content
    except requests.RequestException as e:
        print(f"Error fetching page: {e}")
        return None

def extract_text(element, default='Not available'):
    """Safely extracts text from a BeautifulSoup element."""
    return element.get_text(strip=True) if element else default

def extract_product_details(soup, product_id):
    """Extracts product details from BeautifulSoup object."""
    product_title = extract_text(soup.find(id='productTitle'))
    description_element = soup.find('div', {'id': 'feature-bullets'})
    product_description = extract_text(description_element)
    rating_element = soup.find(id='averageCustomerReviews')
    rating = extract_text(rating_element)
    return product_title, product_description, rating

def extract_size_price(soup):
    """Extracts sizes and prices from BeautifulSoup object."""
    size_price_dict = {}
    i = 0
    while True:
        size_id = f'size_name_{i}'
        price_id = f'size_name_{i}_price'
        
        size_element = soup.find(id=size_id)
        price_element = soup.find(id=price_id)
        
        if not size_element or not price_element:
            break
        
        size = extract_text(size_element).split("\n")[0].strip()
        price = extract_text(price_element).replace("1 option from ", "").strip()
        
        size_price_dict[size] = price
        i += 1
    return size_price_dict

def scrape_and_save(urls, headers, output_file):
    """Scrapes product details from multiple URLs and saves results to a CSV file."""
    with open(output_file, 'w', newline='', encoding='utf-8') as csvfile:
        fieldnames = ['Product ID', 'Product Name', 'Description', 'Store', 'Rating', 'Size', 'Price']
        writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
        writer.writeheader()
        
        for url in urls:
            page_content = fetch_page(url, headers)
            if page_content:
                soup = BeautifulSoup(page_content, "html.parser")
                product_id = url.split('/dp/')[1].split('/')[0]
                product_title, product_description, rating = extract_product_details(soup, product_id)
                size_price_dict = extract_size_price(soup)
                
                for size, price in size_price_dict.items():
                    writer.writerow({
                        'Product ID': product_id,
                        'Product Name': product_title,
                        'Description': product_description,
                        'Store': 'Amazon UK',
                        'Rating': rating,
                        'Size': size,
                        'Price': price
                    })
                print(f"Processed {url}")

# Example usage
urls = [
    'https://www.amazon.co.uk/Amazon-Brand-Sensitive-Unscented-wipes/dp/B07V2N4SJY/ref=zg_bs_g_baby_d_sccl_1/257-9862628-6554964?psc=1',
    'https://www.amazon.co.uk/WaterWipes-Sensitive-Newborn-Biodegradable-Unscented/dp/B08MXSBRSB/ref=zg_bs_g_baby_d_sccl_2/257-9862628-6554964?psc=1',
]

headers = {
    'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/127.0.0.0 Safari/537.36'
}

output_file = 'products.csv'
scrape_and_save(urls, headers, output_file)

Processed https://www.amazon.co.uk/Amazon-Brand-Sensitive-Unscented-wipes/dp/B07V2N4SJY/ref=zg_bs_g_baby_d_sccl_1/257-9862628-6554964?psc=1
Processed https://www.amazon.co.uk/WaterWipes-Sensitive-Newborn-Biodegradable-Unscented/dp/B08MXSBRSB/ref=zg_bs_g_baby_d_sccl_2/257-9862628-6554964?psc=1
