In [3]:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.service import Service
from webdriver_manager.chrome import ChromeDriverManager
import pandas as pd
import time

# Set up the WebDriver with headless option
options = webdriver.ChromeOptions()
options.add_argument('--headless')  # Run browser in headless mode
options.add_argument('--no-sandbox')  # Recommended option for headless mode
options.add_argument('--disable-dev-shm-usage')  # Disable shared memory usage for better stability

driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()), options=options)

# Start with the first page
base_url = 'https://www.netmeds.com/catalogsearch/result/vitamin%20c/all?prod_meds%5Bpage%5D=4'
driver.get(base_url)

# Initialize an empty list to store product data
product_data = []
max_products = 100  # Set limit to stop after collecting a certain number of products

def extract_product_details():
    products = driver.find_elements(By.CSS_SELECTOR, 'div.cat-item ')

    for product in products:
        if len(product_data) >= max_products:  # Stop if we have enough data
            return False

        product_info = {}

        try:
            # Extract title
            title_elements = product.find_elements(By.CSS_SELECTOR, 'h3.clsgetname')
            title = title_elements[0].text if title_elements else 'No title'
            product_info['title'] = title
            print(f"Title: {title}")

            # Extract price
            price_elements = product.find_elements(By.CSS_SELECTOR, 'span.final-price')
            price = price_elements[0].text if price_elements else 'No price'
            product_info['price'] = price
            print(f"Price: {price}")

            # Append the product info to the list
            product_data.append(product_info)

            # Print product information to show the scraping progress
            print(f"Scraped product {len(product_data)}: {product_info}")

        except Exception as e:
            print(f"Error while scraping product: {e}")
            continue

    return True  # Continue scraping

# Scroll the page and extract products until we reach the max product limit
scroll_pause_time = 2  # Adjust if needed for smoother scrolling
while len(product_data) < max_products:
    extract_success = extract_product_details()

    if not extract_success:
        break

    # Scroll down to load more products
    driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
    
    # Wait for the page to load new products
    time.sleep(scroll_pause_time)

# Close the driver after scraping
driver.quit()

# Convert the product data into a DataFrame and save it to an Excel file
df = pd.DataFrame(product_data)
df.to_excel('netmeds_c.xlsx', index=False)  # Use .to_excel() for Excel format
print('Data saved to netmeds_c.xlsx.')

Title: Vitaminhaat Vitamin-C with Bioflavonoids & Rosehip Capsule 90's
Price: ₹1007.37
Scraped product 1: {'title': "Vitaminhaat Vitamin-C with Bioflavonoids & Rosehip Capsule 90's", 'price': '₹1007.37'}
Title: Vitaminhaat Vitamin-C Capsule 90's
Price: ₹818.37
Scraped product 2: {'title': "Vitaminhaat Vitamin-C Capsule 90's", 'price': '₹818.37'}
Title: Vitaminhaat Vitamin-C with Bioflavonoids & Rosehip Capsule 60's
Price: ₹1007.37
Scraped product 3: {'title': "Vitaminhaat Vitamin-C with Bioflavonoids & Rosehip Capsule 60's", 'price': '₹1007.37'}
Title: Zenius Vitamin C Lemon Gummies 30's
Price: ₹549.45
Scraped product 4: {'title': "Zenius Vitamin C Lemon Gummies 30's", 'price': '₹549.45'}
Title: Zenius Vitamin C Orange Gummies 30's
Price: ₹549.45
Scraped product 5: {'title': "Zenius Vitamin C Orange Gummies 30's", 'price': '₹549.45'}
Title: Zenius Vitamin C Lychee Gummies 30's
Price: ₹549.45
Scraped product 6: {'title': "Zenius Vitamin C Lychee Gummies 30's", 'price': '₹549.45'}
Title: