In [None]:
from selenium import webdriver
from selenium.webdriver.edge.options import Options
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from bs4 import BeautifulSoup
import pandas as pd
import numpy as np
import time

def get_edge_options(proxy=None):
    """Configure Edge options with optional proxy"""
    options = Options()
    options.add_argument('--start-maximized')
    options.add_argument('--disable-blink-features=AutomationControlled')
    options.add_experimental_option("excludeSwitches", ["enable-automation"])
    options.add_experimental_option('useAutomationExtension', False)
    
    if proxy:
        options.add_argument(f'--proxy-server={proxy}')  # Set proxy if provided
    
    return options

def switch_proxy(driver, new_proxy):
    """Switch to a new proxy"""
    options = get_edge_options(new_proxy)  # Get options with new proxy
    driver.quit()  # Quit the current driver
    driver = webdriver.Edge(options=options)  # Create a new driver with the new proxy
    return driver

def get_product_data(driver, url):
    """Extract product data from a given URL"""
    driver.get(url)
    time.sleep(2)  # Wait for page to load in case some errors or traffic 
    
    soup = BeautifulSoup(driver.page_source, "html.parser")
    
    # product details extraction
    title = get_title(soup)
    price = get_price(soup)
    rating = get_rating(soup)
    review_count = get_review_count(soup)
    availability = get_availability(soup)
    
    return {
        'title': title,
        'price': price,
        'rating': rating,
        'reviews': review_count,
        'availability': availability
    }

# Functions for scraping the data
def get_title(soup):
    try:
        title = soup.find("span", attrs={"id": 'productTitle'})
        return title.text.strip() if title else ""
    except AttributeError:
        return ""

def get_price(soup):
    try:
        price = soup.find("span", class_="a-price-whole")
        return price.text.strip() if price else ""
    except AttributeError:
        return ""

def get_rating(soup):
    try:
        rating = soup.find("span", class_="a-icon-alt")
        return rating.text.strip() if rating else ""
    except AttributeError:
        return ""

def get_review_count(soup):
    try:
        reviews = soup.find("span", id="acrCustomerReviewText")
        return reviews.text.strip() if reviews else ""
    except AttributeError:
        return ""

def get_availability(soup):
    try:
        available = soup.find("div", id="availability")
        return available.find("span").text.strip() if available else "Not Available"
    except AttributeError:
        return "Not Available"

def main(search_term):
    proxies = ["http://13.234.24.116", "http://another.proxy:port"]  # Add more proxies as needed
    driver = None
    for proxy in proxies:
        try:
            options = get_edge_options(proxy)
            driver = webdriver.Edge(options=options)
            base_url = "https://www.amazon.in"
            search_url = f"{base_url}/s?k={search_term.replace(' ', '+')}"
            
            driver.get(search_url)
            time.sleep(3)
            
            WebDriverWait(driver, 10).until(
                EC.presence_of_element_located((By.CSS_SELECTOR, ".s-result-item"))
            )
            
            soup = BeautifulSoup(driver.page_source, "html.parser")
            products = soup.find_all("div", attrs={"data-component-type": "s-search-result"})
            
            data = {
                "title": [],
                "price": [],
                "rating": [],
                "reviews": [],
                "availability": []
            }
            
            # Process each product in the page 
            for product in products[:3]:  # Limit to first 3 products for testing
                try:
                    link = product.find("a", class_="a-link-normal")
                    if link and link.get("href"):
                        product_url = base_url + link.get("href")
                        print(".....")
                        
                        product_data = get_product_data(driver, product_url)
                        
                        for key in data:
                            data[key].append(product_data[key])
                        
                        time.sleep(1)  # Wait between products
            
                except Exception as e:
                    print(f"Error processing product: {str(e)}")
                    continue
            
            # Save data to CSV
            df = pd.DataFrame.from_dict(data)
            df['title'].replace('', np.nan, inplace=True)
            df = df.dropna(subset=['title'])
            df.to_csv("amazon_data.csv", header=True, index=False)
            print("Data saved to amazon_data.csv")
            break  # Exit the proxy loop if successful
            
        except Exception as e:
            print(f"An error occurred with proxy {proxy}: {str(e)}")
            if driver:
                driver.quit()  # Ensure the driver is closed before switching proxies
            continue  # Try the next proxy
    
    if driver:
        driver.quit()  # Ensure the driver is closed if it was successfully created

if __name__ == "__main__":
    main("laptop")