In [1]:
import pandas as pd
from bs4 import BeautifulSoup
import requests
import re

In [2]:
def clean_kilometers(km_str):
    # Remove 'km' and any extra spaces
    str = km_str.replace(' km', '').strip()
    return int(str.replace(',', ''))

def convert_price(price_str):
    # Remove the '₹' symbol and 'Lakh' suffix
    price_str = price_str.replace('₹', '').replace(' Lakh', '').strip()

    # Convert the price to float and scale it up
    return int(float(price_str.replace(',', '')) * 100)*1000


In [3]:
# Get user input
brand = input("Enter the brand name (e.g., Toyota): ").strip()
city = input("Enter the city or location (e.g., Mumbai): ").strip()

# Construct the URL
base_url = f'https://www.cars24.com/buy-used-{brand.lower()}-cars-'
city_slug = city.lower().replace(' ', '-')  # Convert city to a URL-friendly slug
cars24_url = f'{base_url}{city_slug}/'

Enter the brand name (e.g., Toyota): honda
Enter the city or location (e.g., Mumbai): new delhi


In [4]:
# Send a GET request to the URL
headers = {
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
}
response = requests.get(cars24_url, headers=headers)

# Parse the HTML content
soup = BeautifulSoup(response.content, "html.parser")

cars_data = []

In [5]:
# Locate the car elements
car_elements = soup.find_all('div', class_='_2YB7p')

for car_element in car_elements:
    # Extract car model and clean it
    car_model_tag = car_element.find('h3', class_='_11dVb')
    if car_model_tag:
        car_model = car_model_tag.text
        year = car_model.split()[0]
        model = ' '.join(car_model.split()[1:]).replace(f'{brand} ', '')
    else:
        year = model = 'Unknown'

    # Extract car details (km run, fuel type, transmission type)
    details_tag = car_element.find('ul', class_='_3J2G-')
    if details_tag:
        details = details_tag.find_all('li')
        km_run = details[0].text.strip()
        fuel_type = details[2].text.strip()
        transmission = details[4].text.strip()
    else:
        km_run = fuel_type = transmission = 'Unknown'

    # Extract car price
    price_tag = car_element.find('strong', class_='_3RL-I')
    price = price_tag.text if price_tag else 'Unknown'

    # Set location based on user input
    location = city

    cars_data.append({
        'Brand': brand,
        'Manufacturing Year': year,
        'Model': model,
        'Kilometers Driven': km_run,
        'Fuel Type': fuel_type,
        'Transmission Type': transmission,
        'Price': price,
        'Location': location
    })


In [6]:
# Create DataFrame
cars24_df = pd.DataFrame(cars_data)

# Apply cleaning functions to the DataFrame
cars24_df['Kilometers Driven'] = cars24_df['Kilometers Driven'].apply(clean_kilometers)
cars24_df['Price'] = cars24_df['Price'].apply(convert_price)

# Save the DataFrame to a CSV file
file_name = f'{brand.lower()}-{city.lower()}.csv'
cars24_df.to_csv(file_name, index=False)

# Display the DataFrame
print(cars24_df)
print(f'Data saved to {file_name}')

    Brand Manufacturing Year                          Model  \
0   honda               2015    Honda City 1.5L I-VTEC V MT   
1   honda               2020    Honda City 1.5L I-VTE V CVT   
2   honda               2021    Honda City 1.5L I-VTE V CVT   
3   honda               2015    Honda City 1.5L I-VTEC V MT   
4   honda               2018   Honda WR-V 1.2L I-VTEC VX MT   
5   honda               2015      Honda Amaze 1.2L I-VTEC S   
6   honda               2017   Honda WR-V 1.2L I-VTEC VX MT   
7   honda               2019      Honda City 1.5L I-VTEC ZX   
8   honda               2014  Honda City 1.5L I-VTEC VX CVT   
9   honda               2021      Honda City 1.5L I-VTEC ZX   
10  honda               2016      Honda Jazz 1.2L I-VTEC VX   
11  honda               2019      Honda Amaze 1.2L I-VTEC S   
12  honda               2013     Honda Amaze 1.2L I-VTEC EX   
13  honda               2019       Honda Jazz 1.2L I-VTEC V   
14  honda               2020      Honda Amaze 1.2L I-VT