In [None]:
import requests
from bs4 import BeautifulSoup
import pandas as pd
import time
import random

# Define headers
headers = {
    "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36 Avast/131.0.0.0",
    "Accept-Language": "en-US,en;q=0.9",
    "Referer": "https://www.amazon.com/"
}

# Function to fetch product title
def get_title(soup):
    try:
        title = soup.find("span", attrs={"id": "productTitle"}).text.strip()
    except AttributeError:
        title = "N/A"
    return title

# Function to fetch product price
def get_price(soup):
    try:
        price = soup.find(
            "span", attrs={"class": "a-price aok-align-center reinventPricePriceToPayMargin priceToPay"}
        ).find("span", attrs={"class": "a-price-whole"}).text.strip()
    except AttributeError:
        price = "N/A"
    return price

# Function to fetch product rating
def get_rating(soup):
    try:
        rating = soup.find("span", attrs={"class": "a-icon-alt"}).text.strip()
    except AttributeError:
        rating = "N/A"
    return rating

# Function to fetch product availability
def get_availability(soup):
    try:
        available = soup.find("span", attrs={"class": "a-size-medium a-color-success"}).text.strip()
    except AttributeError:
        available = "Not Available"
    return available

# Function to fetch product ID (ASIN)
def get_product_id(soup):
    try:
        product_id = soup.find("th", string=lambda x: x and "ASIN" in x).find_next_sibling("td").text.strip()
    except AttributeError:
        product_id = "N/A"
    return product_id

# Function to fetch released year
def get_released_year(soup):
    try:
        released_year = soup.find("th", string=lambda x: x and "Date First Available" in x).find_next_sibling("td").text.strip()
    except AttributeError:
        released_year = "N/A"
    return released_year

# Function to fetch special features
def get_special_features(soup):
    try:
        special_features = soup.find(
            "th", string=lambda x: x and "Special Features" in x
        ).find_next_sibling("td").text.strip()
    except AttributeError:
        special_features = "N/A"
    return special_features

# Function to scrape a single page (using the specific URL with page number)
def scrape_page(url, headers, data):
    response = requests.get(url, headers=headers)
    if response.status_code == 200:
        soup = BeautifulSoup(response.content, "html.parser")
        
        # Find product links (if any)
        links = soup.find_all("a", attrs={"class": "a-link-normal s-no-outline"})
        links_list = ["https://www.amazon.in" + link.get("href") for link in links]

        # Loop through product links and fetch details
        for link in links_list:
            try:
                product_page = requests.get(link, headers=headers)
                product_soup = BeautifulSoup(product_page.content, "html.parser")
                
                # Extract product details
                data["Title"].append(get_title(product_soup))
                data["Price"].append(get_price(product_soup))
                data["Rating"].append(get_rating(product_soup))
                data["Availability"].append(get_availability(product_soup))
                data["Product_ID"].append(get_product_id(product_soup))
                data["Released_Year"].append(get_released_year(product_soup))
                data["Special_Features"].append(get_special_features(product_soup))
                
                # Optional: Sleep to prevent rapid requests
                time.sleep(random.uniform(1, 3))
            except Exception as e:
                print(f"Error fetching details for {link}: {e}")
    else:
        print(f"Failed to fetch page: {url}, Status code: {response.status_code}")

# Main code
if __name__ == "__main__":
    # Base URL for the first page (keeping other query parameters intact)
    base_url = "https://www.amazon.in/s?k=Original+samsung+headphones+only+wired+and+wireless&i=electronics&crid=1LT4VD15B1ANP&qid=1737550580&sprefix=original+samsung+headphones+only+wired+and+wireless%2Celectronics%2C236&ref=sr_pg_{}&page={}&xpid=eVYBOpAtBAROW"
    
    data = {
        "Title": [], 
        "Price": [], 
        "Rating": [], 
        "Availability": [], 
        "Product_ID": [], 
        "Released_Year": [], 
        "Special_Features": []
    }
    
    # Scrape 20 pages
    for page in range(1, 21):
        print(f"Scraping page {page}...")
        url = base_url.format(page, page)
        scrape_page(url, headers, data)

    # Save data to CSV
    df = pd.DataFrame(data)
    df.to_csv("amazon_data_with_special_features.csv", index=False)
    print("Data saved to amazon_data_with_special_features.csv")