In [5]:
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.common.by import By
from webdriver_manager.chrome import ChromeDriverManager
import pandas as pd
import time
from selenium.common.exceptions import WebDriverException, NoSuchElementException

# Set up the WebDriver with headless option
options = webdriver.ChromeOptions()
options.add_argument('--headless')  # Run headless for performance
options.add_argument('--no-sandbox')
options.add_argument('--disable-dev-shm-usage')

driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()), options=options)

# Start with the first page
base_url = 'https://www.bigbasket.com/ps/?q=shampoo&nc=as&page=1&filter=%5B%7B%22name%22%3A%22Pack+Size%22%2C%22type%22%3A%22packsize%22%2C%22values%22%3A%5B%7B%22id%22%3A9999%2C%22name%22%3A%221+L%22%2C%22image%22%3A%22%22%2C%22slug%22%3A%221+L%22%2C%22level%22%3A0%2C%22url%22%3A%221+L%22%2C%22is_selected%22%3Afalse%7D%5D%7D%5D'
driver.get(base_url)

# Initialize an empty list to store product data
product_data = []
max_products = 150  # Set limit to stop after collecting a certain number of products

def extract_product_details():
    products = driver.find_elements(By.CSS_SELECTOR, 'li.PaginateItems___StyledLi2-sc-1yrbjdr-1.kUiNOF')

    for product in products:
        if len(product_data) >= max_products:  # Stop if we have enough data
            return False

        product_info = {}

        try:
            # Extract product link
            link_elements = product.find_elements(By.CSS_SELECTOR, 'a')
            product_link = link_elements[0].get_attribute('href') if link_elements else 'No link'
            product_info['link'] = product_link
        except NoSuchElementException:
            product_info['link'] = 'No link'

        # Now go to the individual product page to extract details
        if product_info['link'] != 'No link':
            try:
                # Open product link in a new tab
                driver.execute_script(f"window.open('{product_info['link']}', '_blank');")
                driver.switch_to.window(driver.window_handles[1])

                # Wait for the product page to load
                time.sleep(3)

                # Extract title
                try:
                    title_element = driver.find_element(By.CSS_SELECTOR, 'h1.product-title')
                    product_info['title'] = title_element.text if title_element else 'No title'
                except NoSuchElementException:
                    product_info['title'] = 'No title'

                # Extract price
                try:
                    price_element = driver.find_element(By.CSS_SELECTOR, 'span.price')
                    product_info['price'] = price_element.text if price_element else 'No price'
                except NoSuchElementException:
                    product_info['price'] = 'No price'

                # Extract ratings
                try:
                    rating_element = driver.find_element(By.CSS_SELECTOR, 'span.rating')
                    product_info['rating'] = rating_element.text if rating_element else 'No rating'
                except NoSuchElementException:
                    product_info['rating'] = 'No rating'

                # Extract number of reviews
                try:
                    reviews_element = driver.find_element(By.CSS_SELECTOR, 'span.reviews')
                    product_info['reviews'] = reviews_element.text if reviews_element else 'No reviews'
                except NoSuchElementException:
                    product_info['reviews'] = 'No reviews'

                print(f"Scraped details from product page: {product_info}")

                # Close the product tab and switch back to the search results tab
                driver.close()
                driver.switch_to.window(driver.window_handles[0])

                # Wait for the page to return to the main search
                time.sleep(3)

            except WebDriverException as e:
                print(f"Error while scraping product page: {e}")
                product_info['title'] = 'No title'
                product_info['price'] = 'No price'
                product_info['rating'] = 'No rating'
                product_info['reviews'] = 'No reviews'

        # Append the product info to the list
        product_data.append(product_info)

        # Print product information to show the scraping progress
        print(f"Scraped product {len(product_data)}: {product_info}")

    return True  # Continue scraping


# Scroll the page and extract products until we reach the max product limit
scroll_pause_time = 4  # Increased pause time for smoother scrolling

while len(product_data) < max_products:
    try:
        extract_success = extract_product_details()

        if not extract_success:
            break

        # Scroll down to load more products
        driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")

        # Wait for the page to load new products
        time.sleep(scroll_pause_time)

    except WebDriverException as e:
        print(f"WebDriverException occurred: {e}")
        break

# Close the driver after scraping
driver.quit()

# Convert the product data into a DataFrame and save it to an Excel file
df = pd.DataFrame(product_data)
df.to_excel('bb_sham.xlsx', index=False)  # Use .to_excel() for Excel format
print('Data saved to bb_sham.xlsx.')



Scraped details from product page: {'link': 'https://www.bigbasket.com/pd/40158272/tresemme-keratin-smooth-shampoo-1-l/?nc=cl-prod-list&t_pos_sec=1&t_pos_item=1&t_s=Keratin+Smooth+Shampoo', 'title': 'No title', 'price': 'No price', 'rating': 'No rating', 'reviews': 'No reviews'}
Scraped product 1: {'link': 'https://www.bigbasket.com/pd/40158272/tresemme-keratin-smooth-shampoo-1-l/?nc=cl-prod-list&t_pos_sec=1&t_pos_item=1&t_s=Keratin+Smooth+Shampoo', 'title': 'No title', 'price': 'No price', 'rating': 'No rating', 'reviews': 'No reviews'}
Scraped details from product page: {'link': 'https://www.bigbasket.com/pd/40246844/meera-hair-fall-care-shampoo-with-shikakai-badam-reduces-split-ends-cleanses-scalp-1-l/?nc=cl-prod-list&t_pos_sec=1&t_pos_item=2&t_s=Hair+Fall+Care+Shampoo+-+With+Shikakai+%2526+Badam%252C+For+Strong+%2526+Healthy+Hair%252C+For+Men+%2526+Women', 'title': 'No title', 'price': 'No price', 'rating': 'No rating', 'reviews': 'No reviews'}
Scraped product 2: {'link': 'https://

KeyboardInterrupt: 