### 1- import libraries

In [1]:
from bs4 import BeautifulSoup
import requests
import time
import datetime
import csv
from urllib3.util.retry import Retry
from requests.adapters import HTTPAdapter


### 2- requests handling

some collected infos:
- The User-Agent request header is a characteristic string that lets servers and network peers identify the application, operating system, vendor, and/or version of the requesting user agent. 

In [2]:
# Setup retry strategy for requests
session = requests.Session()
retry = Retry(total=3, backoff_factor=0.3, status_forcelist=[500, 502, 503, 504])
adapter = HTTPAdapter(max_retries=retry)
session.mount('http://', adapter)
session.mount('https://', adapter)

# URL and headers
URL = 'https://www.amazon.co.uk/Johnsons-Cotton-Buds-200/dp/B09843WY1B/ref=zg_bs_g_baby_d_sccl_3/257-9862628-6554964'
headers = {
    'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/127.0.0.0 Safari/537.36'
}

# Fetching the page content
response = session.get(URL, headers=headers)

if response.status_code == 200:
    soup = BeautifulSoup(response.content, "html.parser")
    
    # Extracting product details
    product_id = URL.split('/dp/')[1].split('/')[0]
    product_title = soup.find(id='productTitle').get_text(strip=True) if soup.find(id='productTitle') else 'Title not available'
    product_description = soup.find(id='feature-bullets').get_text(strip=True) if soup.find(id='feature-bullets') else 'Description not available'
    rating = soup.find(id='averageCustomerReviews').get_text(strip=True) if soup.find(id='averageCustomerReviews') else 'Rating not available'
    store = 'Amazon UK'

    # Size and price extraction
    i = 0
    size_price_dict = {}

    while True:
        size_id = f'size_name_{i}'
        price_id = f'size_name_{i}_price'
        size_element = soup.find(id=size_id)
        price_element = soup.find(id=price_id)

        if not size_element or not price_element:
            break

        size = size_element.get_text(strip=True)
        price = price_element.get_text(strip=True).replace("1 option from ", "")

        if size and price:
            size_price_dict[size] = price

        i += 1

    # Saving to CSV
    filename = f"amazon_product_{product_id}_{datetime.datetime.now().strftime('%Y-%m-%d')}.csv"
    with open(filename, mode='w', newline='', encoding='utf-8') as file:
        writer = csv.writer(file)
        writer.writerow(['Product ID', 'Product Name', 'Description', 'Store', 'Rating', 'Size', 'Price'])
        for size, price in size_price_dict.items():
            writer.writerow([product_id, product_title, product_description, store, rating, size, price])

    print(f"Data saved to {filename}")

else:
    print(f"Failed to retrieve the URL, status code: {response.status_code}")


Data saved to amazon_product_B09843WY1B_2024-10-05.csv
