# amazon product 
We can extract Amazon product information by retrieving details based on the product URL, using different IDs or classes to handle variations in the webpage structure

In [110]:
import time
import pandas as pd
from bs4 import BeautifulSoup
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from webdriver_manager.chrome import ChromeDriverManager

def get_amazon_product_details(url):
    # Automatically download and use the correct ChromeDriver version
    service = Service(ChromeDriverManager().install())
    options = webdriver.ChromeOptions()
    options.add_argument("--headless")  # Run in headless mode (no UI)
    options.add_argument("--disable-blink-features=AutomationControlled")
    options.add_argument("--no-sandbox")
    options.add_argument("--disable-dev-shm-usage")

    # Launch Chrome WebDriver
    driver = webdriver.Chrome(service=service, options=options)
    driver.get(url)
    time.sleep(3)  # Wait for page to load

    # Get the page source and parse with BeautifulSoup
    soup = BeautifulSoup(driver.page_source, "html.parser")
    driver.quit()

    # Extract product details
    product_details = {
        "Product Name": soup.find("span", {"id": "productTitle"}).text.strip() if soup.find("span", {"id": "productTitle"}) else None,
        "Price": soup.find("span", {"class": "a-price-whole"}).text.strip() if soup.find("span", {"class": "a-price-whole"}) else None,
        "Manufacturer": None,
        "Dimensions": None,
        "Weight": None,
        "Warranty": None,
        "Color": None,
        "Features": [],
        "Other Information": {},
    }

    # Possible table IDs for product details
    table_ids = ["productDetails_detailBullets_sections1", "productDetails_techSpec_section_1"]
    
    for table_id in table_ids:
        details_table = soup.find("table", {"id": table_id})
        if details_table:
            for row in details_table.find_all("tr"):
                key = row.find("th").text.strip()
                value = row.find("td").text.strip()
                if "Manufacturer" in key:
                    product_details["Manufacturer"] = value
                elif "Dimensions" in key:
                    product_details["Dimensions"] = value
                elif "Item Weight" in key:
                    product_details["Weight"] = value
                elif "Warranty" in key:
                    product_details["Warranty"] = value
                elif "Color" in key:
                    product_details["Color"] = value
                else:
                    product_details["Other Information"][key] = value

    # Extract bullet points under "Features"
    feature_list = soup.find("ul", {"class": "a-unordered-list a-vertical a-spacing-mini"})
    if feature_list:
        product_details["Features"] = [li.text.strip() for li in feature_list.find_all("span", {"class": "a-list-item"})]

    return product_details

amazon_url = "https://www.amazon.in/Semi-Automatic-AR10SAMBGL124D-Black-Grey-Programs-Pulsator/dp/B0DK5S3PRZ/"
product_info = get_amazon_product_details(amazon_url)

product_details = product_info

warranty_text = None

for feature in product_details.get('Features', []):
    if feature.startswith("Warranty:"):
        warranty_text = feature.replace("Warranty: ", "")
        
        
product_df = pd.DataFrame([{
    "Product Name": product_info["Product Name"],
    "Price": product_info["Price"],
    "Manufacturer": product_info["Manufacturer"],
    "Dimensions": product_info["Dimensions"],
    "Weight": product_info["Weight"],
    "Warranty": warranty_text,
    "Colour": product_info["Other Information"].get("Colour", None),
    "Features": "; ".join(product_info["Features"]),
    "Brand": product_info["Other Information"].get("Brand", None),
    "Model": product_info["Other Information"].get("Model", None),
    "Capacity": product_info["Other Information"].get("Capacity", None),
    "Installation Type": product_info["Other Information"].get("Installation Type", None),
    "Part Number": product_info["Other Information"].get("Part Number", None),
    "Special Features": product_info["Other Information"].get("Special Features", None),
    "Control Console": product_info["Other Information"].get("Control Console", None),
    "Access Location": product_info["Other Information"].get("Access Location", None),
    "Included Components": product_info["Other Information"].get("Included Components", None),
    "Batteries Required": product_info["Other Information"].get("Batteries Required", None),
    "Country of Origin": product_info["Other Information"].get("Country of Origin", None)
}])

#Display the DataFrame
product_df

Unnamed: 0,Product Name,Price,Manufacturer,Dimensions,Weight,Warranty,Colour,Features,Brand,Model,Capacity,Installation Type,Part Number,Special Features,Control Console,Access Location,Included Components,Batteries Required,Country of Origin
0,Acer 10.0 Kg 5 Star Semi-Automatic Top Loading...,11999.0,‎Dixon Technologies (India) Limited,91.5 x 101.5 x 56 Centimeters,28 kg,2 years comprehensive and 5 years on motor.,‎Black-Grey,Semi Automatic Top Load washing machine: User-...,‎acer,‎AR10SAMBGL124D,‎10 Kilograms,‎Freestanding,‎AR10SAMBGL124D,"‎Protective Rat Mesh, Auto Restart, High Effic...",‎Push Button,‎Top Load,"‎DRAIN CLAMP, DRAIN PIPE, SAFETY COVER, INLET ...",‎No,‎India
