In [1]:
# cars24_maruti_scraper_edge.py
import time
import random
import pandas as pd
from selenium import webdriver
from selenium.webdriver.edge.service import Service
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC

# ---------------- CONFIG ----------------
EDGE_DRIVER_PATH = r"C:/Drivers/msedgedriver.exe"   # <-- update this to your msedgedriver path
CITIES = {
    "Mumbai": "https://www.cars24.com/buy-used-maruti-cars-mumbai/",
    "Bengaluru": "https://www.cars24.com/buy-used-maruti-cars-bangalore/",
    "Ahmedabad": "https://www.cars24.com/buy-used-maruti-cars-ahmedabad/",
}
TARGET_PER_CITY = 250            # 👈 at least this many cars per city
MAX_SCROLL_ATTEMPTS_WITHOUT_NEW = 10
MAX_TOTAL_SCROLLS = 300
SCROLL_PAUSE_MIN = 1.0
SCROLL_PAUSE_MAX = 2.5
# ----------------------------------------

options = webdriver.EdgeOptions()
options.add_argument("--start-maximized")
driver = webdriver.Edge(service=Service(EDGE_DRIVER_PATH), options=options)
wait = WebDriverWait(driver, 20)

def close_cookie_or_popup():
    try:
        btns = driver.find_elements(By.TAG_NAME, "button")
        for b in btns:
            try:
                t = (b.text or "").strip().lower()
                if any(x in t for x in ("accept", "agree", "allow", "ok", "got it", "close", "dismiss")):
                    b.click()
                    time.sleep(0.5)
                    return
            except:
                continue
    except:
        pass

def collect_links_from_wrapper():
    links = set()
    try:
        anchors = driver.find_elements(By.CSS_SELECTOR, "div.styles_carListingContainer__uob_6 div.styles_wrapper__b4UUV a")
        for a in anchors:
            href = a.get_attribute("href")
            if href and "/buy-used-" in href:
                links.add(href.split("?")[0])
    except:
        anchors = driver.find_elements(By.CSS_SELECTOR, "a[href*='/buy-used-']")
        for a in anchors:
            try:
                href = a.get_attribute("href")
                if href and "cars24.com" in href and "/buy-used-" in href:
                    links.add(href.split("?")[0])
            except:
                continue
    return links

def try_click_load_more():
    try:
        possible = driver.find_elements(By.XPATH,
            "//button[contains(translate(., 'ABCDEFGHIJKLMNOPQRSTUVWXYZ', 'abcdefghijklmnopqrstuvwxyz'), 'load more')"
            " or contains(translate(., 'ABCDEFGHIJKLMNOPQRSTUVWXYZ', 'abcdefghijklmnopqrstuvwxyz'), 'show more')"
            " or contains(translate(., 'ABCDEFGHIJKLMNOPQRSTUVWXYZ', 'abcdefghijklmnopqrstuvwxyz'), 'view more')"
            " or contains(translate(., 'ABCDEFGHIJKLMNOPQRSTUVWXYZ', 'abcdefghijklmnopqrstuvwxyz'), 'see more')]")
        for btn in possible:
            try:
                if btn.is_displayed():
                    btn.click()
                    time.sleep(1.0)
                    return True
            except:
                continue
    except:
        pass
    return False

def scrape_city(city_name, listing_url):
    print(f"\n🚗 Scraping city: {city_name}")
    driver.get(listing_url)
    time.sleep(5)
    close_cookie_or_popup()

    try:
        wait.until(EC.presence_of_element_located((By.CSS_SELECTOR, "div.styles_carListingContainer__uob_6")))
    except:
        pass

    collected_links = set()
    attempts_no_change = 0
    total_scrolls = 0
    last_count = 0

    while attempts_no_change < MAX_SCROLL_ATTEMPTS_WITHOUT_NEW and total_scrolls < MAX_TOTAL_SCROLLS:
        total_scrolls += 1
        anchors = driver.find_elements(By.CSS_SELECTOR, "div.styles_carListingContainer__uob_6 div.styles_wrapper__b4UUV a")
        if anchors:
            try:
                last_anchor = anchors[-1]
                driver.execute_script("arguments[0].scrollIntoView({behavior: 'smooth', block: 'end'});", last_anchor)
            except:
                driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
        else:
            driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")

        time.sleep(random.uniform(SCROLL_PAUSE_MIN, SCROLL_PAUSE_MAX))
        try_click_load_more()

        new_links = collect_links_from_wrapper()
        current_count = len(new_links)
        print(f"[{city_name}] Scroll {total_scrolls}: {current_count} links found")

        if current_count > last_count:
            collected_links = new_links
            last_count = current_count
            attempts_no_change = 0
        else:
            attempts_no_change += 1

        # ✅ Stop once we reach at least 250 cars
        if len(collected_links) >= TARGET_PER_CITY:
            print(f"✅ {city_name}: Reached {TARGET_PER_CITY}+ cars, stopping scroll")
            break

    print(f"✅ {city_name}: Finished scrolling, found {len(collected_links)} car links")
    return collected_links

def scrape_car_detail(city, url):
    driver.get(url)
    time.sleep(3.0)

    try:
        title_el = driver.find_element(By.CSS_SELECTOR, "h1.sc-braxZu.fjhfdl")
        title_text = title_el.text.strip()
    except:
        title_text = ""

    year = ""
    model_name = ""
    if title_text:
        parts = title_text.split(maxsplit=1)
        if parts[0].isdigit():
            year = parts[0]
            model_name = parts[1] if len(parts) > 1 else ""
        else:
            model_name = title_text

    kilometer_driven = number_of_owners = transmission = fuel_type = ""
    try:
        meta_ps = driver.find_elements(By.CSS_SELECTOR, "div.styles_carMeta__hm1XQ p.sc-braxZu.kvfdZL")
        if len(meta_ps) > 0: kilometer_driven = meta_ps[0].text.strip()
        if len(meta_ps) > 1: number_of_owners = meta_ps[1].text.strip()
        if len(meta_ps) > 2: transmission = meta_ps[2].text.strip()
        if len(meta_ps) > 3: fuel_type = meta_ps[3].text.strip()
    except:
        pass

    try:
        location = driver.find_element(By.CSS_SELECTOR, "div.styles_carLocation__UrZVn p.sc-braxZu.dughoY").text.strip()
    except:
        location = ""

    price = ""
    try:
        price = driver.find_element(By.CSS_SELECTOR, "div.styles_price__3yE9i p.sc-braxZu.hhzsvw").text.strip()
    except:
        try:
            price = driver.find_element(By.CSS_SELECTOR, "div.styles_price__3yE9i p").text.strip()
        except:
            price = ""

    return {
        "City": city,
        "Year": year,
        "Model Name": model_name,
        "Kilometer Driven": kilometer_driven,
        "Number of Owners": number_of_owners,
        "Transmission": transmission,
        "Fuel Type": fuel_type,
        "Location": location,
        "Price": price
    }

# ---------------- MAIN ----------------
all_data = []

for city, url in CITIES.items():
    links = scrape_city(city, url)
    print(f"🔎 {city}: Scraping {len(links)} detail pages...")
    for i, car_url in enumerate(sorted(links), start=1):
        try:
            row = scrape_car_detail(city, car_url)
            all_data.append(row)
            print(f"[{city}] Scraped {i}/{len(links)}")
        except Exception as e:
            print(f"❌ Error scraping {car_url}: {e}")

# Save results
df = pd.DataFrame(all_data)
df.to_excel("maruti_cars_multiple_cities.xlsx", index=False)
print(f"\n🎉 Saved {len(df)} rows to maruti_cars_multiple_cities.xlsx")

driver.quit()



🚗 Scraping city: Mumbai
[Mumbai] Scroll 1: 40 links found
[Mumbai] Scroll 2: 60 links found
[Mumbai] Scroll 3: 80 links found
[Mumbai] Scroll 4: 100 links found
[Mumbai] Scroll 5: 120 links found
[Mumbai] Scroll 6: 140 links found
[Mumbai] Scroll 7: 160 links found
[Mumbai] Scroll 8: 180 links found
[Mumbai] Scroll 9: 200 links found
[Mumbai] Scroll 10: 220 links found
[Mumbai] Scroll 11: 240 links found
[Mumbai] Scroll 12: 260 links found
✅ Mumbai: Reached 250+ cars, stopping scroll
✅ Mumbai: Finished scrolling, found 260 car links
🔎 Mumbai: Scraping 260 detail pages...
[Mumbai] Scraped 1/260
[Mumbai] Scraped 2/260
[Mumbai] Scraped 3/260
[Mumbai] Scraped 4/260
[Mumbai] Scraped 5/260
[Mumbai] Scraped 6/260
[Mumbai] Scraped 7/260
[Mumbai] Scraped 8/260
[Mumbai] Scraped 9/260
[Mumbai] Scraped 10/260
[Mumbai] Scraped 11/260
[Mumbai] Scraped 12/260
[Mumbai] Scraped 13/260
[Mumbai] Scraped 14/260
[Mumbai] Scraped 15/260
[Mumbai] Scraped 16/260
[Mumbai] Scraped 17/260
[Mumbai] Scraped 18/26