# Scaper for car listings from auto.bg

In [4]:
import pandas as pd
import requests
from bs4 import BeautifulSoup
import time
import os
import re

In [5]:
# change for amount of pages to scrape
pages = 2

In [None]:

headers = {"User-Agent": "Mozilla/5.0"}
base_url = "https://www.auto.bg/obiavi/avtomobili-dzhipove"
output_file = "auto_bg_cars.csv"

if os.path.exists(output_file):
    df_existing = pd.read_csv(output_file)
    existing_ids = set(df_existing["Listing_ID"].astype(str))
    print(f"Loaded {len(existing_ids)} existing listings.")
else:
    df_existing = pd.DataFrame()
    existing_ids = set()
    print("No existing CSV found, starting fresh.")

cars = []


for page in range(1, pages): 
    url = f"{base_url}?page={page}"
    response = requests.get(url, headers=headers)
    soup = BeautifulSoup(response.text, "html.parser")

    listings = soup.find_all("div", class_="resultItem")
    print(f"Found {len(listings)} listings on page {page}")

    for item in listings:
        try:
            title_tag = item.find("div", class_="link")
            price_tag = item.find("div", class_="price")
            href_tag = item.find("a", href=True)

            title = title_tag.get_text(strip=True) if title_tag else "N/A"
            price = price_tag.get_text(strip=True) if price_tag else "N/A"

            listing_id = None
            if href_tag:
                href = href_tag["href"]
                if not href.startswith("http"):
                    href = "https://www.auto.bg" + href
                details_url = href

                match = re.search(r"/obiava/(\d+)/", href)
                if match:
                    listing_id = match.group(1)
            if listing_id and listing_id in existing_ids:
                print(f"⏭ Skipping existing listing {listing_id}")
                continue

            spec_dict = {}
            if details_url:
                detail_res = requests.get(details_url, headers=headers)
                detail_soup = BeautifulSoup(detail_res.text, "html.parser")
                table = detail_soup.find("table", class_="dowble")

                if table:
                    rows = table.find_all("tr")
                    for row in rows:
                        children = row.find_all(["th", "td"])
                        for i in range(0, len(children), 2):
                            key_tag = children[i]
                            value_tag = children[i + 1] if i + 1 < len(children) else None
                            key = key_tag.get_text(strip=True) if key_tag else f"Unknown{i}"
                            value = value_tag.get_text(strip=True) if value_tag else ""
                            spec_dict[key] = value

            car_data = {
                "Listing_ID": listing_id if listing_id else "N/A",
                "Title": title,
                "Price": price,
                "URL": details_url
            }
            car_data.update(spec_dict)
            cars.append(car_data)

            print(f"Scraped: {title}")

        except Exception as e:
            print(f"Error: {e}")

        time.sleep(1)  

if cars:
    df_new = pd.DataFrame(cars)
    if not df_existing.empty:
        df_final = pd.concat([df_existing, df_new], ignore_index=True)
    else:
        df_final = df_new

    df_final.to_csv(output_file, index=False)
    print(f"Total listings: {len(df_final)} saved to {output_file}")
else:
    print("No new listings found.")


Loaded 6532 existing listings.
Found 19 listings on page 1
✅ Scraped: Mercedes-Benz S 550 Mercedes S550 AMG Бяла Перла, Headup, Distronic
✅ Scraped: BMW 530 3.0D M57 * ОБДУХВАНЕ* * КОЖА* * ШИБЕДАХ* * ПОДГРЕВ
✅ Scraped: Audi Tt 2.0 TFSI DSG 270кс
✅ Scraped: VW Golf
✅ Scraped: Audi A4
✅ Scraped: Skoda Octavia vrs
✅ Scraped: BMW 330 Xdrive E90
✅ Scraped: VW Golf
✅ Scraped: Audi S5 3.0 TFSI Quattro / Technik / Bang & Olufsen / PANO
✅ Scraped: Toyota Auris
✅ Scraped: Mercedes-Benz C 180 ELEGANCE
✅ Scraped: Audi A4
✅ Scraped: BMW X5
✅ Scraped: Renault Zoe R110 52kWh
✅ Scraped: Audi A6 3.0BiTDi/НОЩНО ВИЖДАНЕ/ПЕЧКА/KEYLESS/HEADUP/360CAM
✅ Scraped: Opel Astra
✅ Scraped: Audi A3 8l
✅ Scraped: VW Passat 2, 0TDI-170k.c/4x4/DSG/LED/NAVI/КАМЕРА/ТОП!!!
✅ Scraped: Peugeot 308 1, 6i-150k.c/111000kм/АВТОМАТИК/НАВИГАЦИЯ/КОЖА/ТОП
✅ Done! Total listings: 7326 saved to auto_bg_cars.csv
