In [1]:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.service import Service
from webdriver_manager.chrome import ChromeDriverManager
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import time
import pandas as pd

# Set up the WebDriver
options = webdriver.ChromeOptions()
options.add_argument('--headless')  # Run in headless mode (optional)
driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()), options=options)

# Start with the first page
base_url = 'https://www.amazon.in/s?k=pants+for+men&crid=123K8GN55J4S4&sprefix=pan%2Caps%2C231&ref=nb_sb_ss_ts-doa-p_1_3'
driver.get(base_url)

# Prepare an empty list to collect product details
product_data = []

def extract_product_details():
    products = WebDriverWait(driver, 15).until(
        EC.presence_of_all_elements_located((By.CSS_SELECTOR, '.s-main-slot .s-result-item'))
    )
    for product in products:
        product_info = {}
        
        try:
            # Extract title
            title_elements = product.find_elements(By.CSS_SELECTOR, 'span.a-size-base-plus.a-color-base')
            title = title_elements[0].text if title_elements else ''
            
            # Skip product if no title
            if not title:
                continue
            
            # Remove text after the first occurrence of '/' or '-'
            if '|' in title:
                title = title.split('|')[0].strip()
            if '-' in title:
                title = title.split('-')[0].strip()
                
            product_info['Title'] = title
        except Exception as e:
            print(f"Error extracting title: {e}")
            continue  # Skip this product if title extraction fails
        
        try:
            # Extract price
            price_elements = product.find_elements(By.CSS_SELECTOR, 'span.a-price-whole')
            price = price_elements[0].text if price_elements else 'No price'
            product_info['Price'] = price
        except Exception as e:
            print(f"Error extracting price: {e}")
            product_info['Price'] = 'No price'
        
        try:
            # Extract ratings
            rating_elements = product.find_elements(By.CSS_SELECTOR, 'span.a-icon-alt')
            rating_text = rating_elements[0].get_attribute("innerText").strip() if rating_elements else 'No rating'
            product_info['Rating'] = rating_text
        except Exception as e:
            print(f"Error extracting rating: {e}")
            product_info['Rating'] = 'No rating'
        
        try:
            # Extract number of reviews
            reviews_elements = product.find_elements(By.CSS_SELECTOR, 'span.a-size-base.s-underline-text')
            reviews = reviews_elements[0].text if reviews_elements else 'No reviews'
            product_info['Reviews'] = reviews
        except Exception as e:
            print(f"Error extracting reviews: {e}")
            product_info['Reviews'] = 'No reviews'

        # Append the product info to the list
        product_data.append(product_info)

# Loop through all pages

i = 0
while True:
    extract_product_details()
    
    # Find the "Next" button
    i += 1
    print(f"Page {i}")
    next_button = driver.find_elements(By.CSS_SELECTOR, '.s-pagination-next')
    
    if not next_button:
        print('No "Next" button found. Exiting.')
        break
    
    next_button_class = next_button[0].get_attribute('class')
    
    if 's-pagination-disabled' in next_button_class:
        print('Next button is disabled. Exiting.')
        break
    
    # Click the "Next" button
    next_button[0].click()
    
    # Wait for the next page to load
    time.sleep(10)

# Close the driver after scraping
driver.quit()

# Convert the data into a pandas DataFrame
df = pd.DataFrame(product_data)

# Save the data to an Excel file
df.to_excel('amazon_M-cloth_data.xlsx', index=False)

print('Data saved to amazon_M-cloth_data.xlsx')


Page 1
Page 2
Page 3
Page 4
Page 5
Page 6
Page 7
Next button is disabled. Exiting.
Data saved to amazon_M-cloth_data.xlsx
