# CP32 — Team C — Cars24 Web Scraping Mini Project

### Project Objective
Develop hands-on skills in web scraping, data cleaning, and structured data presentation by extracting car details from Cars24. The focus is on listings in India, with location handling as specified below.

In [1]:
# Step 0: Dependency Installation (Optional - for new environments)
# Uncomment and run the following lines if you need to install dependencies in a new kernel
# !pip install requests beautifulsoup4 pandas

# Note: For production environments, create a requirements.txt file with:
# requests>=2.25.0
# beautifulsoup4>=4.9.0
# pandas>=1.3.0

# Then install with: pip install -r requirements.txt

print("Dependency installation cell ready (commented for safety)")

Dependency installation cell ready (commented for safety)


In [2]:
# Step 1: Importing Required Libraries and Checking Versions

import requests                      # For sending HTTP requests
from bs4 import BeautifulSoup         # For parsing HTML content
import pandas as pd                   # For data manipulation and analysis
import os                             # For creating project structure
import sys                            # For system information
import urllib.robotparser             # For robots.txt checking

# Print package versions for reproducibility
print("=== Package Versions ===")
print(f"requests: {requests.__version__}")
try:
    import bs4
    print(f"beautifulsoup4: {bs4.__version__}")
except AttributeError:
    print("beautifulsoup4: Version not available")
print(f"pandas: {pd.__version__}")
print(f"Python: {sys.version}")
print("\nLibraries imported successfully!")

=== Package Versions ===
requests: 2.32.3
beautifulsoup4: 4.13.4
pandas: 2.3.0
Python: 3.13.5 | packaged by Anaconda, Inc. | (main, Jun 12 2025, 16:37:03) [MSC v.1929 64 bit (AMD64)]

Libraries imported successfully!


In [3]:
# Step 2: Robots.txt Compliance Check
# Demonstrating awareness of scraping rules and ethical guidelines

def check_robots_txt(base_url):
    """Check robots.txt for scraping permissions"""
    try:
        rp = urllib.robotparser.RobotFileParser()
        rp.set_url(f"{base_url}/robots.txt")
        rp.read()
        
        print("=== Robots.txt Analysis ===")
        print(f"Checking robots.txt for: {base_url}")
        
        # Check if we can access the main page
        can_fetch = rp.can_fetch("*", "/")
        print(f"Can fetch main page: {can_fetch}")
        
        # Check specific paths we might need
        search_paths = ["/buy-used-hyundai-cars-mumbai/", "/buy-used-cars/"]
        for path in search_paths:
            can_access = rp.can_fetch("*", path)
            print(f"Can access {path}: {can_access}")
        
        # Get crawl delay if specified
        crawl_delay = rp.crawl_delay("*")
        if crawl_delay:
            print(f"Recommended crawl delay: {crawl_delay} seconds")
        else:
            print("No specific crawl delay specified")
            
        return rp
        
    except Exception as e:
        print(f"Error checking robots.txt: {e}")
        return None

# Check robots.txt for Cars24
base_url = "https://www.cars24.com"
robots_parser = check_robots_txt(base_url)


=== Robots.txt Analysis ===
Checking robots.txt for: https://www.cars24.com
Can fetch main page: False
Can access /buy-used-hyundai-cars-mumbai/: False
Can access /buy-used-cars/: False
No specific crawl delay specified


In [4]:
# Step 3: Basic HTTP Connectivity Test
# Simple connectivity check without detailed exception handling

# Create a session with proper headers
session = requests.Session()
session.headers.update({
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
})

# Make request
response = session.get("https://www.cars24.com/buy-used-hyundai-cars-mumbai/?sort=bestmatch&serveWarrantyCount=true&listingSource=Homepage_Filters", timeout=10)

print("=== Website Connectivity Test ===")
print(f"✓ HTTP Status: {response.status_code}")
print(f"✓ Content Length: {len(response.content)} bytes")
print(f"✓ Content Type: {response.headers.get('content-type', 'Unknown')}")

# Basic page validation
soup = BeautifulSoup(response.content, 'html.parser')
title = soup.find('title')
if title:
    print(f"✓ Page Title: {title.get_text().strip()}")

print("✓ Successfully connected to Cars24 website")


=== Website Connectivity Test ===
✓ HTTP Status: 200
✓ Content Length: 1241449 bytes
✓ Content Type: text/html; charset=utf-8
✓ Page Title: 433 Hyundai Used Cars in Mumbai | Second Hand Hyundai Cars in Mumbai starting from ₹0.89 lakh - CARS24
✓ Successfully connected to Cars24 website


In [5]:
# Step 4: Project Structure Setup

project_dir = "cars24_hyundai_mumbai"              # name of the project folder
if not os.path.exists(project_dir):                # Check if the directory already exists
    os.makedirs(project_dir)                       # If not, create the directory
    print(f"Project directory '{project_dir}' created successfully.")
else:
    print(f"Project directory '{project_dir}' already exists.")                # Printing a message if it already exists

Project directory 'cars24_hyundai_mumbai' already exists.


In [6]:
# Step 5: Advanced HTTP Handling with Retries, Logging, and Robust Error Handling

import logging
import time
from typing import Tuple, Optional, Dict, Any
from urllib3.util.retry import Retry
from requests.adapters import HTTPAdapter
from requests.exceptions import Timeout, ConnectionError, HTTPError, RequestException

# Configure logging
logging.basicConfig(
    level=logging.INFO,
    format='%(asctime)s - %(levelname)s - %(message)s',
    handlers=[
        logging.StreamHandler(),
        logging.FileHandler('cars24_scraper.log', mode='a')
    ]
)
logger = logging.getLogger(__name__)

# Declaring the target website URL
TARGET_URL = "https://www.cars24.com/buy-used-hyundai-cars-mumbai/?sort=bestmatch&serveWarrantyCount=true&listingSource=Homepage_Filters"

def create_robust_session() -> requests.Session:
    """
    Create a session with retry strategy and proper adapters.
    
    Returns:
        requests.Session: Configured session with retry behavior
    """
    session = requests.Session()
    
    # Configure retry strategy with exponential backoff
    retry_strategy = Retry(
        total=3,  # Total number of retries
        backoff_factor=1,  # Exponential backoff factor
        status_forcelist=[429, 500, 502, 503, 504],  # Retry on these status codes
        allowed_methods=["HEAD", "GET", "OPTIONS"]  # Only retry safe methods
    )
    
    # Mount adapter with retry strategy
    adapter = HTTPAdapter(max_retries=retry_strategy)
    session.mount("http://", adapter)
    session.mount("https://", adapter)
    
    # Set headers
    session.headers.update({
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36',
        'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
        'Accept-Language': 'en-US,en;q=0.5',
        'Accept-Encoding': 'gzip, deflate',
        'Connection': 'keep-alive',
    })
    
    return session

def validate_response_content(response: requests.Response) -> Tuple[bool, str]:
    """
    Validate response content for basic checks.
    
    Args:
        response: The HTTP response object
        
    Returns:
        Tuple[bool, str]: (is_valid, reason)
    """
    # Check content type
    content_type = response.headers.get('content-type', '').lower()
    if 'text/html' not in content_type:
        return False, f"Invalid content type: {content_type}"
    
    # Check minimum content length
    if len(response.content) < 1000:
        return False, f"Content too short: {len(response.content)} bytes"
    
    # Check for common error indicators
    content_lower = response.text.lower()
    error_indicators = ['captcha', 'access denied', 'blocked', 'forbidden']
    for indicator in error_indicators:
        if indicator in content_lower:
            return False, f"Error indicator found: {indicator}"
    
    return True, "Content validation passed"

def get_robust_response(url: str, session: Optional[requests.Session] = None, 
                       timeout_seconds: int = 15) -> Tuple[Optional[requests.Response], Optional[Dict[str, Any]]]:
    """
    Advanced robust HTTP request function with comprehensive error handling.
    
    Args:
        url (str): The target website URL
        session (requests.Session, optional): Pre-configured session. If None, creates new one
        timeout_seconds (int): Request timeout in seconds
        
    Returns:
        Tuple[Optional[requests.Response], Optional[Dict[str, Any]]]: 
            (response, None) on success, (None, error_info) on failure
    """
    
    if session is None:
        session = create_robust_session()
    
    error_info = {
        'url': url,
        'timestamp': time.time(),
        'error_type': None,
        'error_message': None,
        'status_code': None,
        'retry_after': None
    }
    
    try:
        logger.info(f"Attempting to fetch: {url}")
        
        # Make the request
        response = session.get(url, timeout=timeout_seconds)
        
        # Handle rate limiting (429 Too Many Requests)
        if response.status_code == 429:
            retry_after = response.headers.get('Retry-After')
            if retry_after:
                wait_time = int(retry_after)
                logger.warning(f"Rate limited. Waiting {wait_time} seconds as per Retry-After header")
                time.sleep(wait_time)
                # Retry once after waiting
                response = session.get(url, timeout=timeout_seconds)
            else:
                logger.warning("Rate limited but no Retry-After header found")
        
        # Differentiate 4xx vs 5xx errors
        if response.status_code >= 400:
            if 400 <= response.status_code < 500:
                # Client error (4xx)
                error_info.update({
                    'error_type': 'ClientError',
                    'error_message': f'Client error: {response.status_code} {response.reason}',
                    'status_code': response.status_code
                })
                logger.error(f"Client error {response.status_code}: {response.reason}")
                return None, error_info
            else:
                # Server error (5xx)
                error_info.update({
                    'error_type': 'ServerError', 
                    'error_message': f'Server error: {response.status_code} {response.reason}',
                    'status_code': response.status_code
                })
                logger.error(f"Server error {response.status_code}: {response.reason}")
                return None, error_info
        
        # Validate response content
        is_valid, validation_reason = validate_response_content(response)
        if not is_valid:
            error_info.update({
                'error_type': 'ContentValidationError',
                'error_message': f'Content validation failed: {validation_reason}',
                'status_code': response.status_code
            })
            logger.error(f"Content validation failed: {validation_reason}")
            return None, error_info
        
        logger.info(f"Success! Status: {response.status_code}, Content: {len(response.content)} bytes")
        return response, None
        
    except Timeout:
        error_info.update({
            'error_type': 'TimeoutError',
            'error_message': f'Request timed out after {timeout_seconds}s'
        })
        logger.error(f"Timeout after {timeout_seconds}s for {url}")
        return None, error_info
        
    except ConnectionError as e:
        error_info.update({
            'error_type': 'ConnectionError',
            'error_message': f'Connection failed: {str(e)}'
        })
        logger.error(f"Connection error for {url}: {e}")
        return None, error_info
        
    except HTTPError as e:
        status_code = e.response.status_code if e.response else None
        error_info.update({
            'error_type': 'HTTPError',
            'error_message': f'HTTP error: {str(e)}',
            'status_code': status_code
        })
        logger.error(f"HTTP error for {url}: {e}")
        return None, error_info
        
    except RequestException as e:
        error_info.update({
            'error_type': 'RequestException',
            'error_message': f'Request exception: {str(e)}'
        })
        logger.error(f"Request exception for {url}: {e}")
        return None, error_info
        
    except Exception as e:
        error_info.update({
            'error_type': 'UnexpectedError',
            'error_message': f'Unexpected error: {str(e)}'
        })
        logger.critical(f"Unexpected error for {url}: {e}")
        return None, error_info


In [7]:
# Step 6: Testing the Advanced HTTP Function

print("=== Advanced HTTP Function Testing ===")

# Create a robust session
robust_session = create_robust_session()

# Test Case 1: Successful Connection (Expected)
print("\n[Test 1: Successful URL]")
response, error = get_robust_response(TARGET_URL, robust_session)
if response:
    print(f"✓ Test 1 Result: Success! Status: {response.status_code}, Content: {len(response.content)} bytes")
    # Parse title for verification
    soup = BeautifulSoup(response.content, 'html.parser')
    title = soup.find('title')
    if title:
        print(f"✓ Page Title: {title.get_text().strip()}")
else:
    print(f"✗ Test 1 Failed: {error['error_type']} - {error['error_message']}")

# Test Case 2: 404 Not Found (Expected to fail gracefully)
print("\n[Test 2: Invalid/404 URL]")
response_404, error_404 = get_robust_response("https://www.cars24.com/this-path-does-not-exist-12345/", robust_session)
if not response_404:
    print(f"✓ Test 2 Result: Correctly handled 404 error - {error_404['error_type']}: {error_404['error_message']}")
else:
    print("✗ Test 2 Unexpected: Should have failed but didn't")

# Test Case 3: Test retry behavior with a URL that might cause transient errors
print("\n[Test 3: Testing Retry Behavior]")
# Using a URL that might trigger rate limiting or server errors
test_url = "https://httpbin.org/status/500"  # This will return 500 error
response_500, error_500 = get_robust_response(test_url, robust_session)
if not response_500:
    print(f"✓ Test 3 Result: Correctly handled server error - {error_500['error_type']}: {error_500['error_message']}")
else:
    print("✗ Test 3 Unexpected: Should have failed but didn't")

print("\n=== Testing Complete ===")
print("Check 'cars24_scraper.log' file for detailed logs with timestamps.")


2025-10-18 20:40:40,950 - INFO - Attempting to fetch: https://www.cars24.com/buy-used-hyundai-cars-mumbai/?sort=bestmatch&serveWarrantyCount=true&listingSource=Homepage_Filters


=== Advanced HTTP Function Testing ===

[Test 1: Successful URL]


2025-10-18 20:40:41,502 - INFO - Success! Status: 200, Content: 1242683 bytes
2025-10-18 20:40:41,649 - INFO - Attempting to fetch: https://www.cars24.com/this-path-does-not-exist-12345/


✓ Test 1 Result: Success! Status: 200, Content: 1242683 bytes
✓ Page Title: 433 Hyundai Used Cars in Mumbai | Second Hand Hyundai Cars in Mumbai starting from ₹0.89 lakh - CARS24

[Test 2: Invalid/404 URL]


2025-10-18 20:40:41,948 - ERROR - Client error 404: Not Found
2025-10-18 20:40:41,950 - INFO - Attempting to fetch: https://httpbin.org/status/500


✓ Test 2 Result: Correctly handled 404 error - ClientError: Client error: 404 Not Found

[Test 3: Testing Retry Behavior]


2025-10-18 20:40:51,093 - ERROR - Request exception for https://httpbin.org/status/500: HTTPSConnectionPool(host='httpbin.org', port=443): Max retries exceeded with url: /status/500 (Caused by ResponseError('too many 500 error responses'))


✓ Test 3 Result: Correctly handled server error - RequestException: Request exception: HTTPSConnectionPool(host='httpbin.org', port=443): Max retries exceeded with url: /status/500 (Caused by ResponseError('too many 500 error responses'))

=== Testing Complete ===
Check 'cars24_scraper.log' file for detailed logs with timestamps.


In [8]:
# Step 7: Unit Tests with Mocked Responses

import unittest
from unittest.mock import Mock, patch
import requests

class TestRobustHTTPFunction(unittest.TestCase):
    """Unit tests for the robust HTTP function with mocked responses"""
    
    def setUp(self):
        """Set up test fixtures"""
        self.test_url = "https://example.com"
        self.session = create_robust_session()
    
    def test_successful_response(self):
        """Test successful HTTP response"""
        # Mock successful response with sufficient content size
        mock_response = Mock()
        mock_response.status_code = 200
        mock_response.content = b"<html><title>Test Page</title></html>" + b"x" * 1000
        mock_response.headers = {'content-type': 'text/html; charset=utf-8'}
        mock_response.text = "<html><title>Test Page</title></html>" + "x" * 1000
        
        with patch.object(self.session, 'get', return_value=mock_response):
            response, error = get_robust_response(self.test_url, self.session)
            
            self.assertIsNotNone(response)
            self.assertIsNone(error)
            self.assertEqual(response.status_code, 200)
    
    def test_404_client_error(self):
        """Test 404 client error handling"""
        # Mock 404 response
        mock_response = Mock()
        mock_response.status_code = 404
        mock_response.reason = "Not Found"
        mock_response.content = b"<html>Not Found</html>"
        mock_response.headers = {'content-type': 'text/html'}
        mock_response.text = "<html>Not Found</html>"
        
        with patch.object(self.session, 'get', return_value=mock_response):
            response, error = get_robust_response(self.test_url, self.session)
            
            self.assertIsNone(response)
            self.assertIsNotNone(error)
            self.assertEqual(error['error_type'], 'ClientError')
            self.assertEqual(error['status_code'], 404)
    
    def test_500_server_error(self):
        """Test 500 server error handling"""
        # Mock 500 response
        mock_response = Mock()
        mock_response.status_code = 500
        mock_response.reason = "Internal Server Error"
        mock_response.content = b"<html>Server Error</html>"
        mock_response.headers = {'content-type': 'text/html'}
        mock_response.text = "<html>Server Error</html>"
        
        with patch.object(self.session, 'get', return_value=mock_response):
            response, error = get_robust_response(self.test_url, self.session)
            
            self.assertIsNone(response)
            self.assertIsNotNone(error)
            self.assertEqual(error['error_type'], 'ServerError')
            self.assertEqual(error['status_code'], 500)
    
    def test_timeout_error(self):
        """Test timeout error handling"""
        with patch.object(self.session, 'get', side_effect=requests.exceptions.Timeout()):
            response, error = get_robust_response(self.test_url, self.session)
            
            self.assertIsNone(response)
            self.assertIsNotNone(error)
            self.assertEqual(error['error_type'], 'TimeoutError')
    
    def test_connection_error(self):
        """Test connection error handling"""
        with patch.object(self.session, 'get', side_effect=requests.exceptions.ConnectionError()):
            response, error = get_robust_response(self.test_url, self.session)
            
            self.assertIsNone(response)
            self.assertIsNotNone(error)
            self.assertEqual(error['error_type'], 'ConnectionError')
    
    def test_content_validation_failure(self):
        """Test content validation failure"""
        # Mock response with invalid content type
        mock_response = Mock()
        mock_response.status_code = 200
        mock_response.content = b"Not HTML"
        mock_response.headers = {'content-type': 'application/json'}
        mock_response.text = "Not HTML"
        
        with patch.object(self.session, 'get', return_value=mock_response):
            response, error = get_robust_response(self.test_url, self.session)
            
            self.assertIsNone(response)
            self.assertIsNotNone(error)
            self.assertEqual(error['error_type'], 'ContentValidationError')

# Run the unit tests
if __name__ == '__main__':
    print("=== Running Unit Tests ===")
    unittest.main(argv=[''], exit=False, verbosity=2)


test_404_client_error (__main__.TestRobustHTTPFunction.test_404_client_error)
Test 404 client error handling ... 2025-10-18 20:41:04,501 - INFO - Attempting to fetch: https://example.com
2025-10-18 20:41:04,503 - ERROR - Client error 404: Not Found
ok
test_500_server_error (__main__.TestRobustHTTPFunction.test_500_server_error)
Test 500 server error handling ... 2025-10-18 20:41:04,507 - INFO - Attempting to fetch: https://example.com


2025-10-18 20:41:04,510 - ERROR - Server error 500: Internal Server Error
ok
test_connection_error (__main__.TestRobustHTTPFunction.test_connection_error)
Test connection error handling ... 2025-10-18 20:41:04,517 - INFO - Attempting to fetch: https://example.com
2025-10-18 20:41:04,518 - ERROR - Connection error for https://example.com: 
ok
test_content_validation_failure (__main__.TestRobustHTTPFunction.test_content_validation_failure)
Test content validation failure ... 2025-10-18 20:41:04,522 - INFO - Attempting to fetch: https://example.com
2025-10-18 20:41:04,524 - ERROR - Content validation failed: Invalid content type: application/json
ok
test_successful_response (__main__.TestRobustHTTPFunction.test_successful_response)
Test successful HTTP response ... 2025-10-18 20:41:04,528 - INFO - Attempting to fetch: https://example.com
2025-10-18 20:41:04,530 - INFO - Success! Status: 200, Content: 1037 bytes
ok
test_timeout_error (__main__.TestRobustHTTPFunction.test_timeout_error)
Tes

=== Running Unit Tests ===


In [12]:
# Step 8: Data Extraction
# To be completed by the next team members
# Step 3: Data Extraction
# To be completed by the next team members
"""
Web Scraping Used Hyundai Cars from Cars24 (Mumbai) using Selenium + BeautifulSoup

Requirements:
1. Install Python 3.x
2. Install necessary libraries:
   pip install selenium beautifulsoup4

3. Download ChromeDriver:
   - Go to https://chromedriver.chromium.org/downloads
   - Choose the version that matches your Chrome browser
   - Extract and save the chromedriver.exe somewhere (e.g., C:\chromedriver\chromedriver.exe)

4. Update the 'chrome_service' path below to your ChromeDriver location.

 Notes / Tips:
   - ChromeDriver path must be updated in the script.
   - The Chrome browser must remain open while the script scrolls, otherwise some cars may not load.
   - Slow scrolling is necessary because Cars24 dynamically loads cars as you scroll.
   - The script safely handles missing data using conditional checks.
   - You can later export the 'all_car_data' list to CSV or JSON if needed.
"""

from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from bs4 import BeautifulSoup
import time

chrome_service = Service(r"C:\\Users\\PUNIT AYARE\\Downloads\\chromedriver-win64\\chromedriver-win64\\chromedriver.exe")
driver = webdriver.Chrome(service=chrome_service)

all_car_data = []


driver.get(TARGET_URL)
time.sleep(5)  # initial wait


print("🖱️ Slowly scrolling to load all cars...")

scroll_pause = 1.5
scroll_increment = 400
all_cards_count = 0
current_height = 0

while True:
    driver.execute_script(f"window.scrollTo(0, {current_height});")
    time.sleep(scroll_pause)
    current_height += scroll_increment
    
    # Count how many car cards are loaded
    soup = BeautifulSoup(driver.page_source, 'html.parser')
    car_cards = soup.find_all('div', class_='styles_normalCardWrapper__qDZjq')
    
    if len(car_cards) > all_cards_count:
        all_cards_count = len(car_cards)
    else:
        # If no new cards loaded after scroll, check if we reached bottom
        page_height = driver.execute_script("return document.body.scrollHeight")
        if current_height >= page_height:
            break  # all cars loaded

time.sleep(2)  # extra wait for last cards
print(f"✅ Finished scrolling. Total cards loaded: {len(car_cards)}")

# --- Parse page ---
soup = BeautifulSoup(driver.page_source, 'html.parser')
car_cards = soup.find_all('div', class_='styles_normalCardWrapper__qDZjq')
print(f"Found {len(car_cards)} cars on the page")

for card in car_cards:
    try:
        # Name
        car_name_tag = card.find('span', class_='sc-braxZu kjFjan')
        car_name = car_name_tag.text.strip() if car_name_tag else None

        # Variant
        variant_info = card.find('span', class_='sc-braxZu lmmumg')
        variant = variant_info.text.strip() if variant_info else None

        kilometers_info = card.find('p', class_='sc-braxZu kvfdZL')
        kilometers = kilometers_info.text.strip() if kilometers_info else None

        kilometer_tags = card.find_all('p', class_='sc-braxZu kvfdZL')
        fuel = kilometer_tags[1].text.strip() if len(kilometer_tags) > 1 else None
        transmission = kilometer_tags[2].text.strip() if len(kilometer_tags) > 2 else None



        

        # Price
        price_tag = card.find('p', string=lambda t: t and "₹" in t)
        price = price_tag.text.strip() if price_tag else None
        # Find the parent div by class
        price_block = card.find('div', class_='styles_priceWrap__VwWBV')
        if price_block:
            price_tags = price_block.find_all('p')
            price_display = price_tags[0].text.strip() if len(price_tags) > 0 else None
            price_lakh = price_tags[1].text.strip() if len(price_tags) > 1 else None
        else:
             price_display = None
             price_lakh = None

        car_data = {
            "Car_Name": car_name,
            "Variant": variant,
            "Kilometers": kilometers,
            "Fuel_Type": fuel,
            "Transmission": transmission,
            "EMI": price,
            "Price_original": price_display,
            "Price": price_lakh
        }

        all_car_data.append(car_data)
    except Exception as e:
        print(f"❌ Failed to extract a car card: {e}")

driver.quit()

print(f"\n✅ Total cars scraped: {len(all_car_data)}")
for car in all_car_data:
    print(car)


  - Extract and save the chromedriver.exe somewhere (e.g., C:\chromedriver\chromedriver.exe)


🖱️ Slowly scrolling to load all cars...
✅ Finished scrolling. Total cards loaded: 434
Found 434 cars on the page

✅ Total cars scraped: 434
{'Car_Name': '2016 Hyundai Grand i10', 'Variant': 'SPORTZ 1.2 KAPPA VTVT', 'Kilometers': '34.11k km', 'Fuel_Type': 'Petrol', 'Transmission': 'Manual', 'EMI': 'EMI ₹6,549/m*', 'Price_original': '₹3.58L', 'Price': '₹3.35 lakh'}
{'Car_Name': '2018 Hyundai Verna', 'Variant': '1.6 VTVT SX (O) AT', 'Kilometers': '30.23k km', 'Fuel_Type': 'Petrol', 'Transmission': 'Auto', 'EMI': 'EMI ₹13,064/m*', 'Price_original': '₹7.83L', 'Price': '₹6.68 lakh'}
{'Car_Name': '2018 Hyundai Grand i10', 'Variant': 'SPORTZ 1.2 KAPPA VTVT', 'Kilometers': '63.23k km', 'Fuel_Type': 'Petrol', 'Transmission': 'Manual', 'EMI': 'EMI ₹6,843/m*', 'Price_original': '₹3.85L', 'Price': '₹3.50 lakh'}
{'Car_Name': '2014 Hyundai Xcent', 'Variant': 'SX 1.2', 'Kilometers': '96.90k km', 'Fuel_Type': 'Petrol', 'Transmission': 'Manual', 'EMI': 'EMI ₹7,900/m*', 'Price_original': '₹3.17L', 'Price