In [2]:
from selenium import webdriver
from bs4 import BeautifulSoup
import pandas as pd
from datetime import datetime
import time

# 1. Start headless Chrome browser
options = webdriver.ChromeOptions()
options.add_argument("--headless")  # Run without opening a browser window
options.add_argument("--no-sandbox")
options.add_argument("--disable-dev-shm-usage")

driver = webdriver.Chrome(options=options)

# 2. Open IndiaMART Tiles page
url = "https://dir.indiamart.com/impcat/floor-tiles.html"
driver.get(url)

# 3. Let JavaScript load content
time.sleep(5)  # adjust if needed

# 4. Extract page source
html = driver.page_source
soup = BeautifulSoup(html, "html.parser")

# 5. Close the browser
driver.quit()

# 6. Extract product listings
products = []
cards = soup.find_all("div", class_=lambda x: x and "prd-list" in x)


for card in cards:
    try:
        product_name = card.find("a", class_="prd-name").get_text(strip=True)
    except:
        product_name = None

    try:
        price = card.find("span", class_="prd-price").get_text(strip=True)
    except:
        price = None

    try:
        seller_location = card.find("span", class_="cmp-city").get_text(strip=True)
    except:
        seller_location = None

    try:
        description = card.find("p", class_="clr5 mt-5").get_text(strip=True)
    except:
        description = None

    products.append({
        "product_name": product_name,
        "price": price,
        "seller_location": seller_location,
        "description": description,
        "category": "Tiles",
        "scraped_at": datetime.now().isoformat()
    })

# 7. Convert to DataFrame
df = pd.DataFrame(products)
df.to_csv("indiamart_tiles_products.csv", index=False)

print(f"✅ Scraped {len(df)} products from IndiaMART (Tiles)")


✅ Scraped 0 products from IndiaMART (Tiles)


In [3]:
from selenium import webdriver
from bs4 import BeautifulSoup
import pandas as pd
from datetime import datetime
import time

# Step 1: Set up Selenium Chrome driver
options = webdriver.ChromeOptions()
options.add_argument("--headless")  # Runs without opening browser
options.add_argument("--no-sandbox")
options.add_argument("--disable-dev-shm-usage")

driver = webdriver.Chrome(options=options)

# Step 2: Go to IndiaMART Tiles page
url = "https://dir.indiamart.com/impcat/floor-tiles.html"
driver.get(url)

# Step 3: Wait for page to load
time.sleep(5)

# Step 4: Get page content
html = driver.page_source
soup = BeautifulSoup(html, "html.parser")

# Step 5: Close browser
driver.quit()

# Step 6: Extract product-like links
products = []
for a in soup.find_all("a"):
    text = a.get_text(strip=True)
    href = a.get("href")

    # Heuristic: check for tile/marble words in text
    if text and "tile" in text.lower():
        products.append({
            "product_name": text,
            "product_link": "https://www.indiamart.com" + href if href and href.startswith("/") else href,
            "category": "Tiles",
            "scraped_at": datetime.now().isoformat()
        })

# Step 7: Save to CSV
df = pd.DataFrame(products)
df.to_csv("indiamart_tiles_products.csv", index=False)

print(f"✅ Scraped {len(df)} tile products from IndiaMART")



✅ Scraped 134 tile products from IndiaMART


In [4]:
import pandas as pd

df = pd.read_csv("indiamart_tiles_products.csv")
df.head(10)


Unnamed: 0,product_name,product_link,category,scraped_at
0,"Vitrified,Ceramic Floor & Wall Tiles",https://www.indiamart.com/indianexporters/tile...,Tiles,2025-08-05T17:15:34.985433
1,Vitrified Floor Tile,https://www.indiamart.com/impcat/vitrified-flo...,Tiles,2025-08-05T17:15:34.985433
2,Ceramic Floor Tiles,https://www.indiamart.com/impcat/ceramic-floor...,Tiles,2025-08-05T17:15:34.985433
3,Parking Tile,https://www.indiamart.com/impcat/parking-tile....,Tiles,2025-08-05T17:15:34.985433
4,Double Charge Tiles,https://www.indiamart.com/impcat/double-charge...,Tiles,2025-08-05T17:15:34.985433
5,GVT Tiles,https://www.indiamart.com/impcat/glazed-vitrif...,Tiles,2025-08-05T17:15:34.985433
6,Designer Floor Tiles,https://www.indiamart.com/impcat/designer-floo...,Tiles,2025-08-05T17:15:34.985433
7,"Ceramic GFT BDW Texas Brown FT Floor Tile, Siz...",https://www.indiamart.com/proddetail/gft-bdw-t...,Tiles,2025-08-05T17:15:34.985433
8,"Vitrified Sahara Gris Floor Tiles, Size: 600x6...",https://www.indiamart.com/proddetail/sahara-gr...,Tiles,2025-08-05T17:15:34.985433
9,"Ceramic Gft Bhf Maksi Dot Silver Floor Tile, S...",https://www.indiamart.com/proddetail/gft-bhf-m...,Tiles,2025-08-05T17:15:34.985433


In [6]:
import pandas as pd

In [7]:
df = pd.read_csv("indiamart_tiles_products.csv")

In [10]:
# 1. Drop missing product names or links
df = df.dropna(subset=["product_name", "product_link"])

In [11]:
df.count()

product_name    134
product_link    134
category        134
scraped_at      134
dtype: int64

In [12]:
df["product_name"] = df ["product_name"].str.strip().str.title()

In [13]:
df["product_link"] = df["product_link"].apply(lambda x: x if str(x).startswith("http") else "https://www.indiamart.com" + str(x))

In [14]:
df["brand"] = "Unknown"
df["price"] = "N/A"
df["seller_location"] = "Unknown"

In [15]:
df.count()

product_name       134
product_link       134
category           134
scraped_at         134
brand              134
price              134
seller_location    134
dtype: int64

In [16]:
df = df[["product_name", 
    "brand", 
    "price", 
    "seller_location", 
    "product_link", 
    "category", 
    "scraped_at"]]

In [20]:
df.head(10)

Unnamed: 0,product_name,brand,price,seller_location,product_link,category,scraped_at
0,"Vitrified,Ceramic Floor & Wall Tiles",Unknown,,Unknown,https://www.indiamart.com/indianexporters/tile...,Tiles,2025-08-05T17:15:34.985433
1,Vitrified Floor Tile,Unknown,,Unknown,https://www.indiamart.com/impcat/vitrified-flo...,Tiles,2025-08-05T17:15:34.985433
2,Ceramic Floor Tiles,Unknown,,Unknown,https://www.indiamart.com/impcat/ceramic-floor...,Tiles,2025-08-05T17:15:34.985433
3,Parking Tile,Unknown,,Unknown,https://www.indiamart.com/impcat/parking-tile....,Tiles,2025-08-05T17:15:34.985433
4,Double Charge Tiles,Unknown,,Unknown,https://www.indiamart.com/impcat/double-charge...,Tiles,2025-08-05T17:15:34.985433
5,Gvt Tiles,Unknown,,Unknown,https://www.indiamart.com/impcat/glazed-vitrif...,Tiles,2025-08-05T17:15:34.985433
6,Designer Floor Tiles,Unknown,,Unknown,https://www.indiamart.com/impcat/designer-floo...,Tiles,2025-08-05T17:15:34.985433
7,"Ceramic Gft Bdw Texas Brown Ft Floor Tile, Siz...",Unknown,,Unknown,https://www.indiamart.com/proddetail/gft-bdw-t...,Tiles,2025-08-05T17:15:34.985433
8,"Vitrified Sahara Gris Floor Tiles, Size: 600X6...",Unknown,,Unknown,https://www.indiamart.com/proddetail/sahara-gr...,Tiles,2025-08-05T17:15:34.985433
9,"Ceramic Gft Bhf Maksi Dot Silver Floor Tile, S...",Unknown,,Unknown,https://www.indiamart.com/proddetail/gft-bhf-m...,Tiles,2025-08-05T17:15:34.985433


In [21]:
df.to_csv("tiles_silver_cleaned.csv", index=False)