In [1]:
import time
import pandas as pd
from bs4 import BeautifulSoup as bs
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC

# Set up Chrome options with user-agent
chrome_options = Options()
chrome_options.add_argument("--headless")
chrome_options.add_argument("--no-sandbox")
chrome_options.add_argument("--disable-dev-shm-usage")
chrome_options.add_argument(
    "user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) "
    "AppleWebKit/537.36 (KHTML, like Gecko) "
    "Chrome/122.0.0.0 Safari/537.36"
)

# City list and mapping for URLs
cities = ["Pune", "Mumbai", "New Delhi", "Chennai", "Ahmedabad", "Kolkata", "Bengaluru", "Hyderabad"]

# Hold all data
all_cars = []

# Function to simulate slow scroll
def get_full_page_source(url):
    browser = webdriver.Chrome(options=chrome_options)
    browser.get(url)

    wait = WebDriverWait(browser, 10)
    scroll_pause = 2
    scroll_increment = 500
    max_scrolls = 30
    last_height = browser.execute_script("return document.body.scrollHeight")

    for i in range(max_scrolls):
        browser.execute_script(f"window.scrollBy(0, {scroll_increment});")
        time.sleep(scroll_pause)

        try:
            wait.until(EC.presence_of_all_elements_located((By.CLASS_NAME, "styles_outer__NTVth")))
        except:
            pass

        new_height = browser.execute_script("return document.body.scrollHeight")
        if new_height == last_height:
            break
        last_height = new_height

    html = browser.page_source
    browser.quit()
    return html

# Start scraping
for city in cities:
    slug = city.lower().replace(" ", "-")
    url = f"https://www.cars24.com/buy-used-cars-{slug}/"
    print(f"\nScraping data for: {city} - {url}")

    try:
        html = get_full_page_source(url)
        soup = bs(html, 'html.parser')
        cars = soup.find_all('div', {'class': "styles_outer__NTVth"})

        for car in cars:
            try:
                name_tag = car.find('span')
                if not name_tag:
                    continue
                name = name_tag.get_text()
                year = name.split()[0]
                brand = name.split()[1]
                model = " ".join(name.split()[1:])

                ul = car.find('ul')
                if not ul:
                    continue
                details = ul.find_all('div')
                km = details[0].get_text() if len(details) > 0 else ""
                fuel_type = details[1].get_text() if len(details) > 1 else ""
                transmission = details[2].get_text() if len(details) > 2 else ""

                price_tag = car.find('div', class_="styles_pricingDetail__Q_3hz")
                price = price_tag.get_text() if price_tag else "Not Available"

                all_cars.append({
                    "Brand model": model,
                    "Brand": brand,
                    "Year": year,
                    "Kilometers driven": km,
                    "Fuel Type": fuel_type,
                    "Transmission Type": transmission,
                    "Location": city,
                    "Price (INR)": price
                })
            except Exception as e:
                print(f"    Error parsing a car listing: {e}")
                continue
    except Exception as e:
        print(f"  Failed to scrape {city}: {e}")

# Save DataFrame
df = pd.DataFrame(all_cars)
print("\nSample scraped data:")
print(df.head(10))
df.to_csv("cars24_multicity_data_final_1.csv", index=False)
print("\n✅ Data saved to 'cars24_multicity_data_final_1.csv'")


Scraping data for: Pune - https://www.cars24.com/buy-used-cars-pune/

Scraping data for: Mumbai - https://www.cars24.com/buy-used-cars-mumbai/

Scraping data for: New Delhi - https://www.cars24.com/buy-used-cars-new-delhi/

Scraping data for: Chennai - https://www.cars24.com/buy-used-cars-chennai/

Scraping data for: Ahmedabad - https://www.cars24.com/buy-used-cars-ahmedabad/

Scraping data for: Kolkata - https://www.cars24.com/buy-used-cars-kolkata/

Scraping data for: Bengaluru - https://www.cars24.com/buy-used-cars-bengaluru/

Scraping data for: Hyderabad - https://www.cars24.com/buy-used-cars-hyderabad/

Sample scraped data:
          Brand model       Brand  Year Kilometers driven Fuel Type  \
0          Tata Tiago        Tata  2019         23.80k km    Petrol   
1  Maruti Wagon R 1.0      Maruti  2017         42.15k km    Petrol   
2         Skoda Rapid       Skoda  2021         20.23k km    Petrol   
3     Maruti Alto K10      Maruti  2016         71.20k km    Petrol   
4  Maru