In [2]:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.service import Service
from webdriver_manager.chrome import ChromeDriverManager
import pandas as pd
import time
from selenium.common.exceptions import WebDriverException

# Set up the WebDriver with headless option
options = webdriver.ChromeOptions()
options.add_argument('--headless')  # Uncomment for headless mode
options.add_argument('--no-sandbox')  # Recommended option for headless mode
options.add_argument('--disable-dev-shm-usage')  # Disable shared memory usage for better stability
options.add_argument('--remote-debugging-port=9222')  # Prevents Chrome from disconnecting DevTools

driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()), options=options)

# Start with the first page
base_url = 'https://www.bigbasket.com/ps/?q=icecreams&nc=as'
driver.get(base_url)

# Initialize an empty list to store product data
product_data = []
max_products = 150  # Set limit to stop after collecting a certain number of products

def extract_product_details():
    products = driver.find_elements(By.CSS_SELECTOR, 'div.SKUDeck___StyledDiv-sc-1e5d9gk-0.eA-dmzP')

    for product in products:
        if len(product_data) >= max_products:  # Stop if we have enough data
            return False

        product_info = {}

        try:
            # Extract product link (using correct CSS selector)
            link_elements = product.find_elements(By.CSS_SELECTOR, 'a')
            product_link = link_elements[0].get_attribute('href') if link_elements else 'No link'
            product_info['link'] = product_link
        except Exception:
            product_info['link'] = 'No link'

        # Now go to the individual product page to extract details
        if product_info['link'] != 'No link':
            try:
                # Open product link in a new tab
                driver.execute_script(f"window.open('{product_info['link']}', '_blank');")
                driver.switch_to.window(driver.window_handles[1])

                # Wait for the product page to load
                time.sleep(3)

                # Extract title
                try:
                    title_elements = driver.find_elements(By.CSS_SELECTOR, 'h1.Description___StyledH-sc-82a36a-2.bofYPK')
                    product_info['title'] = title_elements[0].text if title_elements else 'No title'
                except Exception:
                    product_info['title'] = 'No title'

                # Extract price
                try:
                    price_elements = driver.find_elements(By.CSS_SELECTOR, 'td.Description___StyledTd-sc-82a36a-4.fLZywG')
                    product_info['price'] = price_elements[0].text if price_elements else 'No price'
                except Exception:
                    product_info['price'] = 'No price'

                # Extract ratings
                try:
                    rating_elements = driver.find_elements(By.XPATH, '//*[@id="siteLayout"]/div/div/section[1]/div[2]/section[1]/div[2]/div/span/span')
                    product_info['rating'] = rating_elements[0].text if rating_elements else 'No rating'
                except Exception:
                    product_info['rating'] = 'No rating'

                # Extract number of reviews
                try:
                    reviews_elements = driver.find_elements(By.CSS_SELECTOR, 'p.leading-md.text-black.m-0')
                    product_info['reviews'] = reviews_elements[0].text if reviews_elements else 'No reviews'
                except Exception:
                    product_info['reviews'] = 'No reviews'

                print(f"Scraped details from product page: {product_info}")

                # Close the product tab and switch back to the search results tab
                driver.close()
                driver.switch_to.window(driver.window_handles[0])

                # Wait for the page to return to the main search
                time.sleep(3)

            except Exception as e:
                print(f"Error while scraping product page: {e}")
                product_info['title'] = 'No title'
                product_info['price'] = 'No price'
                product_info['rating'] = 'No rating'
                product_info['reviews'] = 'No reviews'

        # Append the product info to the list
        product_data.append(product_info)

        # Print product information to show the scraping progress
        print(f"Scraped product {len(product_data)}: {product_info}")

    return True  # Continue scraping


# Scroll the page and extract products until we reach the max product limit
scroll_pause_time = 4  # Increased pause time for smoother scrolling

while len(product_data) < max_products:
    try:
        extract_success = extract_product_details()

        if not extract_success:
            break

        # Scroll down to load more products
        driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")

        # Wait for the page to load new products
        time.sleep(scroll_pause_time)

    except WebDriverException as e:
        print(f"WebDriverException occurred: {e}")
        # Optionally: exit loop or handle the exception
        break

# Close the driver after scraping
driver.quit()

# Convert the product data into a DataFrame and save it to an Excel file
df = pd.DataFrame(product_data)
df.to_excel('bb_icecream.xlsx', index=False)  # Use .to_excel() for Excel format
print('Data saved to bb_icecream.xlsx.')

Scraped details from product page: {'link': 'https://www.bigbasket.com/pd/40300878/hangyo-sitaphal-ice-cream-made-with-real-fruit-125-ml/?nc=cl-prod-list&t_pos_sec=1&t_pos_item=1&t_s=Sitaphal+Ice+Cream+-+Made+With+Real+Fruit', 'title': 'Hangyo Sitaphal Ice Cream - Made With Real Fruit, 125 ml', 'price': 'Price: ₹50', 'rating': '3.7', 'reviews': '44 Ratings & 0 Reviews'}
Scraped product 1: {'link': 'https://www.bigbasket.com/pd/40300878/hangyo-sitaphal-ice-cream-made-with-real-fruit-125-ml/?nc=cl-prod-list&t_pos_sec=1&t_pos_item=1&t_s=Sitaphal+Ice+Cream+-+Made+With+Real+Fruit', 'title': 'Hangyo Sitaphal Ice Cream - Made With Real Fruit, 125 ml', 'price': 'Price: ₹50', 'rating': '3.7', 'reviews': '44 Ratings & 0 Reviews'}
Scraped details from product page: {'link': 'https://www.bigbasket.com/pd/40300871/hangyo-belgian-chocolate-ice-cream-1-l-tub/?nc=cl-prod-list&t_pos_sec=1&t_pos_item=2&t_s=Belgian+Chocolate+Ice+Cream', 'title': 'No title', 'price': 'No price', 'rating': 'No rating', 're

KeyboardInterrupt: 

In [3]:
# Convert the product data into a DataFrame and save it to an Excel file
df = pd.DataFrame(product_data)
df.to_excel('bb_icecream.xlsx', index=False)  # Use .to_excel() for Excel format
print('Data saved to bb_icecream.xlsx.')

Data saved to bb_icecream.xlsx.
