In [37]:
pip install selenium
import csv
import random
import re
import time
from urllib.parse import urlparse, parse_qs

from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.firefox.options import Options
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC

In [38]:
"""
Fuad Ibrahimli
Turbo.az web scripting
"""

# --------------------------------------------------------------------- #
def safe_get(driver, url, timeout=20):
    """Open *url* with *driver*. Return True on success, False on timeout."""
    try:
        driver.set_page_load_timeout(timeout)
        driver.get(url)
        return True
    except Exception:
        print("⏱  Timeout or error on:", url)
        return False


# --------------------------------------------------------------------- #
options = Options()
options.headless = True       

driver        = webdriver.Firefox(options=options)   
detail_driver = webdriver.Firefox(options=options)   

CSV_PATH = "data.csv"
with open(CSV_PATH, "w", newline="", encoding="utf-8") as f:
    csv.writer(f).writerow([
        "Brand", "Model", "Price", "Currency", "Year",
        "Engine Size", "Horsepower", "Fuel Type", "Distance",
        "Color", "Body Type", "ProdYear", "Transmission",
        "Drive Type", "New", "Seats", "Owners",
        "Condition", "Market", "City"
    ])

try:
    # ------------------------------------------------ last-page discover #
    driver.get("https://turbo.az/autos")
    try:
        # wait until either products grid OR pagination appears
        WebDriverWait(driver, 15).until(
            EC.presence_of_element_located(
                (By.CSS_SELECTOR, ".products, .pagination a.page-link")
            )
        )
    except Exception:
        print("⚠️  Could not verify pagination – defaulting to 5 pages.")
        max_pages = 5
    else:
        page_links = driver.find_elements(By.CSS_SELECTOR, ".pagination a.page-link")
        if page_links:
            last_href = page_links[-1].get_attribute("href")
            max_pages = int(parse_qs(urlparse(last_href).query).get("page", [1])[0])
        else:
            print("⚠️  Pagination not found – defaulting to 5 pages.")
            max_pages = 5
    print(f"→ Will iterate through {max_pages} pages.")

    # ------------------------------------------------ outer loop (pages) #
    for page in range(1, max_pages + 1):
        list_url = f"https://turbo.az/autos?page={page}"
        print(f"\n📄 Page {page}/{max_pages} • {list_url}")
        driver.get(list_url)

        # trigger lazy-load once
        driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
        WebDriverWait(driver, 20).until(
            EC.presence_of_element_located((By.CSS_SELECTOR, "a.products-i__link"))
        )

        car_links = [
            el.get_attribute("href") for el in
            driver.find_elements(By.CSS_SELECTOR, "a.products-i__link")
            if el.get_attribute("href")
        ]
        print(f"   → Found {len(car_links)} car links.")

        # -------------------------------------------- inner loop (cars) #
        for link in car_links:
            if not safe_get(detail_driver, link):
                continue
            try:
                WebDriverWait(detail_driver, 20).until(
                    EC.presence_of_element_located((By.CLASS_NAME, "product-properties"))
                )

                # ------------ brand + model from title ---------------- #
                title = detail_driver.find_element(By.CLASS_NAME, "product-title").text.strip()
                brand_model = re.split(r",", title)[0]         # remove “, 2.0 L / …”
                parts = brand_model.split(" ", 1)
                brand = parts[0]
                model = parts[1] if len(parts) > 1 else ""

                # ------------ price & currency ----------------------- #
                price_raw   = detail_driver.find_element(By.CLASS_NAME, "product-price__i").text
                price_parts = price_raw.replace("\u202f", " ").split()
                price       = "".join(p for p in price_parts if p.isdigit())
                if "USD" in price_parts or "$" in price_parts:   currency = "USD"
                elif "EUR" in price_parts or "€" in price_parts: currency = "EUR"
                else:                                            currency = "AZN"

                # ------------ property table ------------------------ #
                props = {}
                keys = detail_driver.find_elements(By.CLASS_NAME, "product-properties__i-name")
                vals = detail_driver.find_elements(By.CLASS_NAME, "product-properties__i-value")
                for k, v in zip(keys, vals):
                    props[k.text.strip()] = v.text.strip()

                engine_raw = props.get("Mühərrik", "")
                m = re.match(r"([\d.]+ L)\s*/\s*(\d+)\s*a\.g\.\s*/\s*(.+)", engine_raw)
                if m:
                    engine_size, horsepower, fuel_type = m.groups()
                else:
                    engine_size = horsepower = fuel_type = engine_raw

                # ------------ append to CSV ------------------------- #
                with open(CSV_PATH, "a", newline="", encoding="utf-8") as f:
                    csv.writer(f).writerow([
                        brand, model, price, currency,
                        props.get("Buraxılış ili", ""),
                        engine_size, horsepower, fuel_type,
                        props.get("Yürüş", ""),
                        props.get("Rəng", ""),
                        props.get("Ban növü", ""),
                        props.get("Buraxılış ili", ""),
                        props.get("Sürətlər qutusu", ""),
                        props.get("Ötürücü", ""),
                        props.get("Yeni", ""),
                        props.get("Yerlərin sayı", ""),
                        props.get("Sahiblər", ""),
                        props.get("Vəziyyəti", ""),
                        props.get("Hansı bazar üçün yığılıb", ""),
                        props.get("Şəhər", "")
                    ])
                print(f"      ✔ {brand} {model} – {price} {currency}")

            except Exception as e:
                print(f"      ✖ Error scraping {link}: {e}")

            # gentle delay to avoid hammering the site
            time.sleep(random.uniform(0.5, 1.2))

        # extra pause between listing pages
        time.sleep(random.uniform(1.5, 3.0))

finally:
    driver.quit()
    detail_driver.quit()
    print("\n✅ Scraping complete. Data saved to data.csv")


⚠️  Pagination not found – defaulting to 5 pages.
→ Will iterate through 5 pages.

📄 Page 1/5 • https://turbo.az/autos?page=1
   → Found 36 car links.
      ✔ GAC Empow – 35900 AZN
      ✔ Nissan Sunny – 32900 AZN
      ✔ JETOUR T2 – 24500 USD
      ✔ Changan Qiyuan Q05 – 28800 AZN
      ✔ Toyota Corolla Cross – 33200 AZN
      ✔ Chevrolet Trax – 19700 AZN
      ✔ Toyota Land Cruiser Prado – 41500 AZN
      ✔ Kia Sorento – 13600 AZN
      ✔ Land Rover Range Rover – 26500 AZN
      ✔ Ford Fusion (North America) – 17800 AZN
      ✔ Lexus GX 470 – 19500 USD
      ✔ Toyota Camry – 28000 USD
      ✔ Ford Transit – 32500 AZN
      ✔ Ford Fusion (North America) – 20500 AZN
      ✔ Changan Qiyuan Q07 – 42000 AZN
      ✔ Hyundai Elantra – 21500 AZN
      ✔ Kia Forte – 26500 AZN
      ✔ Chevrolet Cruze – 14660 AZN
      ✔ Chevrolet Trax – 17500 AZN
      ✔ Hyundai Santa Fe – 32900 AZN
      ✔ Ford Fusion (North America) – 21300 AZN
      ✔ Changan Uni-V – 28500 AZN
      ✔ Opel Insignia – 29900 