In [11]:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.service import Service
from webdriver_manager.chrome import ChromeDriverManager
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import pandas as pd
import time

# Set up the WebDriver
options = webdriver.ChromeOptions()
options.add_argument('--headless')  # Run in headless mode (optional)
driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()), options=options)

# Start with the first page
base_url = 'https://www.flipkart.com/search?q=sunscreen&sid=g9b%2Cema%2C5la%2Cxrh&as=on&as-show=on&otracker=AS_QueryStore_OrganicAutoSuggest_1_4_na_na_na&otracker1=AS_QueryStore_OrganicAutoSuggest_1_4_na_na_na&as-pos=1&as-type=RECENT&suggestionId=sunscreen%7CSunscreen&requestId=c93be26e-86cf-41a0-a352-a8c29290930b&as-backfill=on'
driver.get(base_url)

# Initialize an empty list to store product data
product_data = []
page_number = 1  # Keep track of the page number

def extract_product_details():
    products = WebDriverWait(driver, 10).until(
        EC.presence_of_all_elements_located((By.CSS_SELECTOR, 'div.slAVV4'))
    )
    
    # Add a print statement to show which page is being scraped
    print(f'Scraping page {page_number}...')

    for product in products:
        product_info = {}
        
        try:
            # Extract title
            title_elements = product.find_elements(By.CSS_SELECTOR, 'a.wjcEIp')
            title = title_elements[0].text if title_elements else 'No title'
            product_info['title'] = title
        except Exception:
            product_info['title'] = 'No title'
        
        try:
            # Extract price
            price_elements = product.find_elements(By.CSS_SELECTOR, 'div.Nx9bqj')
            price = price_elements[0].text if price_elements else 'No price'
            product_info['price'] = price
        except Exception:
            product_info['price'] = 'No price'
        
        try:
            # Extract ratings
            rating_elements = product.find_elements(By.CSS_SELECTOR, 'div.XQDdHH')
            rating = rating_elements[0].text if rating_elements else 'No rating'
            product_info['rating'] = rating
        except Exception:
            product_info['rating'] = 'No rating'
        
        try:
            # Extract number of reviews
            reviews_elements = product.find_elements(By.CSS_SELECTOR, 'span.Wphh3N')
            reviews = reviews_elements[0].text if reviews_elements else 'No reviews'
            product_info['reviews'] = reviews
        except Exception:
            product_info['reviews'] = 'No reviews'
        
        # Append the product info to the list
        product_data.append(product_info)


# Function to handle pagination
def get_next_page():
    try:
        # Find the "Next" button
        next_button = driver.find_element(By.CSS_SELECTOR, 'a._9QVEpD')
        next_button.click()  # Click on the next button
        time.sleep(5)  # Wait for the next page to load
        return True  # Return True if successfully moved to next page
    except Exception as e:
        return False  # Return False if there's no next button


# Loop through all pages, stop at 30 pages
while page_number <= 30:
    extract_product_details()
    
    # Check if there's a next page or stop after 30 pages
    if not get_next_page():
        print(f'Scraping completed. Total pages scraped: {page_number}')
        break
    
    page_number += 1

# Close the driver after scraping
driver.quit()

# Convert the product data into a DataFrame and save it to an Excel file
df = pd.DataFrame(product_data)
df.to_excel('flipkart_sunscreen.xlsx', index=False)  # Use .to_excel() for Excel format
print('Data saved to flipkart_sunscreen.xlsx.')

Scraping page 1...
Scraping page 2...
Scraping page 3...
Scraping page 4...
Scraping page 5...
Scraping page 6...
Scraping page 7...
Scraping page 8...
Scraping page 9...
Scraping page 10...
Scraping page 11...
Scraping page 12...
Scraping page 13...
Scraping page 14...
Scraping page 15...
Scraping page 16...
Scraping page 17...
Scraping page 18...
Scraping page 19...
Scraping page 20...
Scraping page 21...
Scraping page 22...
Scraping page 23...
Scraping page 24...
Scraping page 25...
Scraping page 26...
Scraping page 27...
Scraping page 28...
Scraping page 29...
Scraping page 30...
Data saved to flipkart_sunscreen.xlsx.
