In [14]:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.options import Options
import pandas as pd
import time

# ---------------- CONFIG ----------------
BASE_URL = "https://dir.indiamart.com/search.mp?ss=industrial+pumps&prdsrc=1"
NUM_PAGES = 8
OUTPUT_FILE = "indiamart_industrial_pumps_selenium.csv"

# -------------- SELENIUM SETUP --------------
options = Options()
options.add_argument("--headless")
options.add_argument("--window-size=1920,1080")
options.add_argument("--disable-gpu")

driver = webdriver.Chrome(options=options)

# ---------------- SCRAPER ----------------
all_data = []

for page in range(1, NUM_PAGES + 1):
    url = f"{BASE_URL}&page={page}"
    print(f"Scraping page {page}...")
    driver.get(url)
    time.sleep(3)  # wait for dynamic content

    cards = driver.find_elements(By.CLASS_NAME, "card")
    print(f"Found {len(cards)} listings")

    for card in cards:
        # Product name + URL
        try:
            product_tag = card.find_element(By.TAG_NAME, "a")
            product_name = product_tag.text.strip()
            product_url = product_tag.get_attribute("href")
        except:
            product_name = None
            product_url = None

        # Supplier name
        try:
            supplier_name = card.find_element(By.CLASS_NAME, "companyname").text.strip()
        except:
            supplier_name = None

        # Location (fetch from innerText of card)
        try:
            full_text = card.text  # all text inside the card
    # check for common city/state keywords
            possible_locations = ["Delhi", "Karnataka", "Maharashtra", "Telangana", "Gujarat", "Tamil Nadu",
                          "Andhra Pradesh", "West Bengal", "Kerala", "Bengaluru", "Mumbai", "Hyderabad"]
            location = None
            for loc in possible_locations:
                if loc in full_text:
            # extract the full line containing the location
                    lines = full_text.split("\n")
                    for line in lines:
                        if loc in line:
                            location = line.strip()
                            break
                if location:
                    break
        except:
            location = None


        # Price
        try:
            price_text = card.find_element(By.CLASS_NAME, "price").text
            price_text = price_text.replace("₹", "").replace(",", "").strip()
            try:
                price = int(price_text)
            except:
                price = None
        except:
            price = None

        all_data.append({
            "product_name": product_name,
            "supplier_name": supplier_name,
            "category": "Industrial Pumps",
            "location": location,
            "price": price,
            "product_url": product_url
        })

    time.sleep(2)

driver.quit()

# ---------------- SAVE CSV ----------------
df = pd.DataFrame(all_data)
df.to_csv(OUTPUT_FILE, index=False)

print(f"✅ Saved {len(df)} records to {OUTPUT_FILE}")


Scraping page 1...
Found 10 listings
Scraping page 2...
Found 10 listings
Scraping page 3...
Found 10 listings
Scraping page 4...
Found 10 listings
Scraping page 5...
Found 10 listings
Scraping page 6...
Found 10 listings
Scraping page 7...
Found 10 listings
Scraping page 8...
Found 10 listings
✅ Saved 80 records to indiamart_industrial_pumps_selenium.csv
