In [16]:
import requests
from bs4 import BeautifulSoup
import pandas as pd

# Initialize an empty list to store car results
car_results = []
headers = {
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
} # It's good practice to include headers to mimic a browser

for page_number in range(1, 10): # Looping through 300 pages
    url = f'https://turbo.az/autos?page={page_number}'
    print(f"Scraping page: {page_number}")
    try:
        response = requests.get(url, headers=headers, timeout=10) # Added timeout for robustness
        response.raise_for_status() # Raise an exception for HTTP errors (4xx or 5xx)
    except requests.exceptions.RequestException as e:
        print(f"Request failed for page {page_number}: {e}")
        continue # Skip to the next page if request fails

    soup = BeautifulSoup(response.text, 'html.parser')

    # Find all car listing elements
    listings = soup.find_all('div', {'class': 'products-i__bottom'})

    if not listings:
        print(f"No listings found on page {page_number}. Stopping.")
        break

    for el in listings:
        name = None
        price = None
        currency = None
        year = None
        engine_power = None
        km = None

        mileage = None
        city = None
        brand = None
        model = None
        body_type = None
        transmission = None

        drive_type = None
        number_of_seats = None
        owners = None
        condition = None
        body_type = None
        market_adapted_for = None

        # Safely extract 'name'
        name_tag = el.find('div', {'class': 'products-i__name products-i__bottom-text'})
        if name_tag:
            name = name_tag.text.strip()

        # Safely extract 'price' and 'currency'
        price_tag = el.find('div', {'class': 'products-i__price products-i__bottom-text'})
        if price_tag:
            full_price_text = price_tag.text.replace(' ', '').strip()
            # Check if the text is not empty before slicing
            if full_price_text:
                currency = full_price_text[-1]
                price = full_price_text[:-1]

        # Safely extract 'year', 'engine_power', 'km'
        attributes_tag = el.find('div', {'class': 'products-i__attributes products-i__bottom-text'})
        if attributes_tag:
            attributes_text = attributes_tag.text.strip()
            parts = attributes_text.split(',')
            if len(parts) > 0:
                year = parts[0].strip()
            if len(parts) > 1:
                engine_power = parts[1].strip()
            if len(parts) > 2:
                km = parts[2].strip()




        car_results.append({
            'name': name,
            'price': price,
            'currency': currency,
            'year': year,
            'engine_power': engine_power,
            'km': km
        })

# Create DataFrame from the collected results
cars = pd.DataFrame(car_results)

# Display the first few rows of the DataFrame
print("\nDataFrame created successfully. First 5 rows:")
print(cars.head())

# Optional: Save to CSV

Scraping page: 1
Request failed for page 1: 403 Client Error: Forbidden for url: https://turbo.az/autos?page=1
Scraping page: 2
Request failed for page 2: 403 Client Error: Forbidden for url: https://turbo.az/autos?page=2
Scraping page: 3
Request failed for page 3: 403 Client Error: Forbidden for url: https://turbo.az/autos?page=3
Scraping page: 4
Request failed for page 4: 403 Client Error: Forbidden for url: https://turbo.az/autos?page=4
Scraping page: 5
Request failed for page 5: 403 Client Error: Forbidden for url: https://turbo.az/autos?page=5
Scraping page: 6
Request failed for page 6: 403 Client Error: Forbidden for url: https://turbo.az/autos?page=6
Scraping page: 7
Request failed for page 7: 403 Client Error: Forbidden for url: https://turbo.az/autos?page=7
Scraping page: 8
Request failed for page 8: 403 Client Error: Forbidden for url: https://turbo.az/autos?page=8
Scraping page: 9
Request failed for page 9: 403 Client Error: Forbidden for url: https://turbo.az/autos?page=9



In [None]:
cars.to_csv('turbo_az_cars.csv', index=False)

In [None]:
import requests
from bs4 import BeautifulSoup
import pandas as pd
import re

# Initialize an empty list to store car results
car_results = []
headers = {
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
}

for page_number in range(1, 10):
    url = f'https://turbo.az/autos?page={page_number}'
    print(f"Scraping page: {page_number}")
    try:
        response = requests.get(url, headers=headers, timeout=10)
        response.raise_for_status()
    except requests.exceptions.RequestException as e:
        print(f"Request failed for page {page_number}: {e}")
        continue

    soup = BeautifulSoup(response.text, 'html.parser')

    # Find all car listing elements
    listings = soup.find_all('div', {'class': 'products-i__bottom'})

    if not listings:
        print(f"No listings found on page {page_number}. Stopping.")
        break

    for el in listings:
        # Initialize all variables
        name = None
        price = None
        currency = None
        year = None
        engine_power = None
        km = None
        mileage = None
        city = None
        brand = None
        model = None
        body_type = None
        transmission = None
        drive_type = None
        number_of_seats = None
        owners = None
        condition = None
        market_adapted_for = None

        # Safely extract 'name'
        name_tag = el.find('div', {'class': 'products-i__name products-i__bottom-text'})
        if name_tag:
            name = name_tag.text.strip()
            # Extract brand and model from name
            if name:
                name_parts = name.split()
                if len(name_parts) >= 2:
                    brand = name_parts[0]
                    model = ' '.join(name_parts[1:])

        # Safely extract 'price' and 'currency'
        price_tag = el.find('div', {'class': 'products-i__price products-i__bottom-text'})
        if price_tag:
            full_price_text = price_tag.text.replace(' ', '').strip()
            if full_price_text:
                currency = full_price_text[-1]
                price = full_price_text[:-1]

        # Safely extract 'year', 'engine_power', 'km'
        attributes_tag = el.find('div', {'class': 'products-i__attributes products-i__bottom-text'})
        if attributes_tag:
            attributes_text = attributes_tag.text.strip()
            parts = attributes_text.split(',')
            if len(parts) > 0:
                year = parts[0].strip()
            if len(parts) > 1:
                engine_power = parts[1].strip()
            if len(parts) > 2:
                km = parts[2].strip()
                mileage = km  # km and mileage are the same

        # Extract city (location)
        city_tag = el.find('div', {'class': 'products-i__city products-i__bottom-text'})
        if city_tag:
            city = city_tag.text.strip()

        # Look for additional details in description or other elements
        # This might be in a different container - you may need to adjust selectors
        details_container = el.find_parent().find('div', class_='products-i')

        if details_container:
            # Look for additional attributes that might be in data attributes or other elements
            # Extract body type, transmission, etc. from additional info
            info_elements = details_container.find_all('span', class_='products-i__info')

            for info in info_elements:
                info_text = info.text.lower().strip()

                # Body type detection
                if any(body in info_text for body in ['sedan', 'suv', 'hatchback', 'coupe', 'wagon', 'pickup']):
                    body_type = info.text.strip()

                # Transmission detection
                elif any(trans in info_text for trans in ['automatic', 'manual', 'variator', 'robot']):
                    transmission = info.text.strip()

                # Drive type detection
                elif any(drive in info_text for drive in ['front', 'rear', 'all', '4wd', 'awd']):
                    drive_type = info.text.strip()

                # Number of seats
                elif 'seat' in info_text or info_text.isdigit():
                    if info_text.isdigit() and int(info_text) <= 9:
                        number_of_seats = info.text.strip()

                # Condition
                elif any(cond in info_text for cond in ['new', 'used', 'damaged', 'excellent']):
                    condition = info.text.strip()

        # Try to extract additional info from link or data attributes
        link_element = el.find_parent().find('a')
        if link_element:
            # Sometimes additional data is stored in data attributes
            data_attrs = link_element.attrs
            for attr, value in data_attrs.items():
                if 'body' in attr.lower():
                    body_type = value
                elif 'transmission' in attr.lower():
                    transmission = value
                elif 'drive' in attr.lower():
                    drive_type = value

        # Look for owner count in the listing
        owner_pattern = re.search(r'(\d+)\s*owner', el.get_text(), re.IGNORECASE)
        if owner_pattern:
            owners = owner_pattern.group(1)

        # Market adaptation (usually mentioned in description)
        if 'local' in el.get_text().lower():
            market_adapted_for = 'Local'
        elif 'import' in el.get_text().lower():
            market_adapted_for = 'Import'

        car_results.append({
            'name': name,
            'brand': brand,
            'model': model,
            'price': price,
            'currency': currency,
            'year': year,
            'engine_power': engine_power,
            'km': km,
            'mileage': mileage,
            'city': city,
            'body_type': body_type,
            'transmission': transmission,
            'drive_type': drive_type,
            'number_of_seats': number_of_seats,
            'owners': owners,
            'condition': condition,
            'market_adapted_for': market_adapted_for
        })

# Create DataFrame from the collected results
cars = pd.DataFrame(car_results)

# Display the first few rows of the DataFrame
print("\nDataFrame created successfully. First 5 rows:")
print(cars.head())

# Display column info
print("\nDataFrame info:")
print(cars.info())

# Save to CSV
cars.to_csv('turbo_az_cars.csv', index=False, encoding='utf-8')
print("\nData saved to 'turbo_az_cars.csv'")

# Display summary statistics
print("\nSummary of extracted data:")
print(f"Total cars scraped: {len(cars)}")
print(f"Unique brands: {cars['brand'].nunique()}")
print(f"Unique cities: {cars['city'].nunique()}")
print(f"Price range: {cars['price'].min()} - {cars['price'].max()}")

Scraping page: 1
Scraping page: 2
Scraping page: 3
Scraping page: 4
Scraping page: 5
Scraping page: 6
Scraping page: 7
Scraping page: 8
Scraping page: 9

DataFrame created successfully. First 5 rows:
                      name     brand           model    price currency  year  \
0              Kia Sorento       Kia         Sorento  82900AZ        N  2024   
1  Mercedes GLS 450 4MATIC  Mercedes  GLS 450 4MATIC   142000        $  2023   
2            Nissan Patrol    Nissan          Patrol   124900        $  2025   
3      Jeep Grand Cherokee      Jeep  Grand Cherokee  28500AZ        N  2013   
4           Hyundai Accent   Hyundai          Accent  11200AZ        N  2008   

  engine_power          km     mileage  city body_type transmission  \
0        2.2 L        0 km        0 km  None      None         None   
1        3.0 L        0 km        0 km  None      None         None   
2        3.5 L        0 km        0 km  None      None         None   
3        3.6 L  156 000 km  156 000

In [14]:
import requests
from bs4 import BeautifulSoup
import pandas as pd
import re

# Initialize an empty list to store car results
car_results = []
headers = {
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
}

for page_number in range(1, 5):
    url = f'https://turbo.az/autos?page={page_number}'
    print(f"Scraping page: {page_number}")
    try:
        response = requests.get(url, headers=headers, timeout=10)
        response.raise_for_status()
    except requests.exceptions.RequestException as e:
        print(f"Request failed for page {page_number}: {e}")
        continue

    soup = BeautifulSoup(response.text, 'html.parser')

    # Find all car listing elements
    listings = soup.find_all('div', {'class': 'products-i__bottom'})

    if not listings:
        print(f"No listings found on page {page_number}. Stopping.")
        break

    for el in listings:
        # Initialize all variables
        name = None
        price = None
        currency = None
        year = None
        engine_power = None
        km = None
        mileage = None
        city = None
        brand = None
        model = None
        body_type = None
        transmission = None
        drive_type = None
        number_of_seats = None
        owners = None
        condition = None
        market_adapted_for = None

        # Safely extract 'name'
        name_tag = el.find('div', {'class': 'products-i__name products-i__bottom-text'})
        if name_tag:
            name = name_tag.text.strip()
            # Extract brand and model from name
            if name:
                name_parts = name.split()
                if len(name_parts) >= 2:
                    brand = name_parts[0]
                    model = ' '.join(name_parts[1:])

        # Safely extract 'price' and 'currency'
        price_tag = el.find('div', {'class': 'products-i__price products-i__bottom-text'})
        if price_tag:
            full_price_text = price_tag.text.replace(' ', '').replace(',', '').strip()
            if full_price_text:
                # Check if price contains currency symbol at the end
                if full_price_text[-1] in ['₼', '$', '€']:
                    currency = full_price_text[-1]
                    price = full_price_text[:-1]
                # If no currency symbol, check if it's all digits (likely AZN)
                elif full_price_text.replace('.', '').isdigit():
                    currency = 'AZN'  # Default to AZN when no currency symbol
                    price = full_price_text
                else:
                    # Try to extract numbers and assume AZN
                    price_numbers = re.findall(r'\d+', full_price_text)
                    if price_numbers:
                        price = ''.join(price_numbers)
                        currency = 'AZN'

        # Safely extract 'year', 'engine_power', 'km'
        attributes_tag = el.find('div', {'class': 'products-i__attributes products-i__bottom-text'})
        if attributes_tag:
            attributes_text = attributes_tag.text.strip()
            parts = attributes_text.split(',')
            if len(parts) > 0:
                year = parts[0].strip()
            if len(parts) > 1:
                engine_power = parts[1].strip()
            if len(parts) > 2:
                km = parts[2].strip()
                mileage = km  # km and mileage are the same

        # Extract city (location) - try multiple possible selectors
        city_selectors = [
            'div.products-i__city',
            'div.products-i__location',
            'span.products-i__city',
            'div[class*="city"]',
            'div[class*="location"]'
        ]

        for selector in city_selectors:
            city_tag = el.find_next(selector.split('.')[0], {'class': selector.split('.')[1] if '.' in selector else None})
            if city_tag:
                city = city_tag.text.strip()
                break

        # If city not found in current element, try parent container
        if not city:
            parent_container = el.find_parent('div', class_='products-i')
            if parent_container:
                for selector in city_selectors:
                    city_tag = parent_container.find(selector.split('.')[0], {'class': selector.split('.')[1] if '.' in selector else None})
                    if city_tag:
                        city = city_tag.text.strip()
                        break

        # Look for additional car details in the full listing container
        # Get the parent container that has all the car information
        full_listing = el.find_parent('div', class_='products-i') or el.find_parent('a')

        if full_listing:
            full_text = full_listing.get_text().lower()

            # Extract additional details from any text content
            detail_patterns = {
                'body_type': ['sedan', 'suv', 'hatchback', 'coupe', 'wagon', 'pickup', 'crossover', 'minivan', 'roadster', 'universal'],
                'transmission': ['avtomatik', 'mexaniki', 'variator', 'robot', 'automatic', 'manual'],
                'drive_type': ['ön', 'arxa', 'tam', 'front', 'rear', 'all', '4wd', 'awd', 'fwd', 'rwd'],
                'condition': ['yeni', 'işlənmiş', 'zədəli', 'new', 'used', 'damaged', 'excellent', 'normal'],
                'market_adapted_for': ['yerli', 'xarici', 'local', 'import', 'imported']
            }

            for field, patterns in detail_patterns.items():
                for pattern in patterns:
                    if pattern in full_text:
                        if field == 'body_type':
                            body_type = pattern.title()
                        elif field == 'transmission':
                            transmission = pattern.title()
                        elif field == 'drive_type':
                            drive_type = pattern.title()
                        elif field == 'condition':
                            condition = pattern.title()
                        elif field == 'market_adapted_for':
                            market_adapted_for = 'Local' if pattern in ['yerli', 'local'] else 'Import'
                        break

            # Extract number of seats (look for patterns like "5 yerlik", "7 seats")
            seat_pattern = re.search(r'(\d+)\s*(yerlik|seats?|oturacaq)', full_text)
            if seat_pattern:
                number_of_seats = seat_pattern.group(1)

            # Extract number of owners
            owner_patterns = [
                r'(\d+)\s*(sahibli|owner|sahibi)',
                r'sahibli[:\s]*(\d+)',
                r'(\d+)\s*-ci\s*sahibi'
            ]
            for pattern in owner_patterns:
                owner_match = re.search(pattern, full_text)
                if owner_match:
                    owners = owner_match.group(1)
                    break

        # Additional extraction from link attributes or title
        link_element = el.find_parent('a') or el.find('a')
        if link_element:
            title_attr = link_element.get('title', '')
            if title_attr:
                title_text = title_attr.lower()
                # Extract additional info from title attribute
                if not city and any(location in title_text for location in ['baku', 'ganja', 'sumgait', 'mingachevir']):
                    for loc in ['baku', 'ganja', 'sumgait', 'mingachevir']:
                        if loc in title_text:
                            city = loc.title()
                            break

        car_results.append({
            'name': name,
            'brand': brand,
            'model': model,
            'price': price,
            'currency': currency,
            'year': year,
            'engine_power': engine_power,
            'km': km,
            'mileage': mileage,
            'city': city,
            'body_type': body_type,
            'transmission': transmission,
            'drive_type': drive_type,
            'number_of_seats': number_of_seats,
            'owners': owners,
            'condition': condition,
            'market_adapted_for': market_adapted_for
        })

# Create DataFrame from the collected results
cars = pd.DataFrame(car_results)

# Display the first few rows of the DataFrame
print("\nDataFrame created successfully. First 5 rows:")
print(cars.head())

# Display column info
print("\nDataFrame info:")
print(cars.info())

# Show non-empty data for missing fields
print("\nNon-empty data summary:")
for col in ['city', 'body_type', 'transmission', 'drive_type', 'number_of_seats', 'owners', 'condition', 'market_adapted_for']:
    non_empty = cars[col].dropna()
    print(f"{col}: {len(non_empty)} non-empty values")
    if len(non_empty) > 0:
        print(f"  Sample values: {non_empty.unique()[:5]}")

# Save to CSV
cars.to_csv('turbo_az_cars.csv', index=False, encoding='utf-8')
print("\nData saved to 'turbo_az_cars.csv'")

# Display summary statistics
print("\nSummary of extracted data:")
print(f"Total cars scraped: {len(cars)}")
print(f"Unique brands: {cars['brand'].nunique()}")
print(f"Unique cities: {cars['city'].nunique()}")
if cars['price'].notna().any():
    try:
        # Convert price to numeric for statistics
        numeric_prices = pd.to_numeric(cars['price'], errors='coerce')
        print(f"Price range: {numeric_prices.min():.0f} - {numeric_prices.max():.0f}")
    except:
        print("Price statistics unavailable (non-numeric values)")
else:
    print("No price data available")

Scraping page: 1
Request failed for page 1: 403 Client Error: Forbidden for url: https://turbo.az/autos?page=1
Scraping page: 2
Request failed for page 2: 403 Client Error: Forbidden for url: https://turbo.az/autos?page=2
Scraping page: 3
Request failed for page 3: 403 Client Error: Forbidden for url: https://turbo.az/autos?page=3
Scraping page: 4
Request failed for page 4: 403 Client Error: Forbidden for url: https://turbo.az/autos?page=4

DataFrame created successfully. First 5 rows:
Empty DataFrame
Columns: []
Index: []

DataFrame info:
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 0 entries
Empty DataFrame
None

Non-empty data summary:


KeyError: 'city'

In [15]:
import requests
from bs4 import BeautifulSoup
import pandas as pd
import re

# Initialize an empty list to store car results
car_results = []

headers = {
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
}

# Define a dictionary of common attribute labels and their target variables (Azerbaijani to English)
attribute_labels = {
    'Ban növü': 'body_type',
    'Sürətlər qutusu': 'transmission',
    'Ötürücü': 'drive_type',
    'Oturacaq sayı': 'number_of_seats',
    'Sahiblər': 'owners',
    'Vəziyyəti': 'condition',
    'Bazara uyğunlaşma': 'market_adapted_for'
}

# Mapping of Azerbaijani/common values to standardized English terms
value_mapping = {
    'sedan': 'Sedan', 'suv': 'SUV', 'hetçbek': 'Hatchback', 'kupe': 'Coupe',
    'universal': 'Wagon', 'pikap': 'Pickup', 'krossover': 'Crossover',
    'minivan': 'Minivan', 'rodster': 'Roadster', 'limuzin': 'Limousine', # Added more body types
    'avtomatik': 'Automatic', 'mexaniki': 'Manual', 'variator': 'Variator', 'robot': 'Robot',
    'ön': 'Front-wheel drive', 'arxa': 'Rear-wheel drive', 'tam': 'All-wheel drive',
    'new': 'New', 'yeni': 'New', 'used': 'Used', 'işlənmiş': 'Used', 'normal': 'Used',
    'damaged': 'Damaged', 'zədəli': 'Damaged', 'əla': 'Used (Excellent)',
    'yerli': 'Local Market', 'xarici': 'Imported Market', 'yerli bazar': 'Local Market',
    'automatic': 'Automatic', 'manual': 'Manual', 'front': 'Front-wheel drive',
    'rear': 'Rear-wheel drive', 'all': 'All-wheel drive', '4wd': 'All-wheel drive',
    'awd': 'All-wheel drive', 'fwd': 'Front-wheel drive', 'rwd': 'Rear-wheel drive',
    'excellent': 'Used (Excellent)'
}


for page_number in range(1, 5): # Looping through pages 1 to 4 as before
    url = f'https://turbo.az/autos?page={page_number}'
    print(f"Scraping page: {page_number}")

    try:
        response = requests.get(url, headers=headers, timeout=10)
        response.raise_for_status() # Raise HTTPError for bad responses (4xx or 5xx)
    except requests.exceptions.RequestException as e:
        print(f"Request failed for page {page_number}: {e}")
        continue

    soup = BeautifulSoup(response.text, 'html.parser')

    # Find all car listing elements. Each 'products-i' div represents a full car listing.
    listings = soup.find_all('div', {'class': 'products-i'})

    if not listings:
        print(f"No listings found on page {page_number}. Stopping.")
        break

    for listing_el in listings: # 'listing_el' now represents the entire product item div
        # Initialize all variables for the current car
        name = None
        price = None
        currency = None
        year = None
        engine_power = None
        km = None
        mileage = None
        city = None
        brand = None
        model = None
        body_type = None
        transmission = None
        drive_type = None
        number_of_seats = None
        owners = None
        condition = None
        market_adapted_for = None

        # The 'products-i__bottom' div usually contains name, price, year, engine, km
        bottom_el = listing_el.find('div', {'class': 'products-i__bottom'})

        if bottom_el:
            # Safely extract 'name'
            name_tag = bottom_el.find('div', {'class': 'products-i__name products-i__bottom-text'})
            if name_tag:
                name = name_tag.text.strip()
                # Extract brand and model from name (e.g., "BMW X5" -> Brand: BMW, Model: X5)
                if name:
                    name_parts = name.split(maxsplit=1) # Split only on the first space
                    if len(name_parts) >= 1:
                        brand = name_parts[0]
                        if len(name_parts) > 1:
                            model = name_parts[1]

            # Safely extract 'price' and 'currency'
            price_tag = bottom_el.find('div', {'class': 'products-i__price products-i__bottom-text'})
            if price_tag:
                full_price_text = price_tag.text.replace(' ', '').replace(',', '').strip()
                if full_price_text:
                    # Check if price contains currency symbol at the end
                    if full_price_text and full_price_text[-1] in ['₼', '$', '€']:
                        currency = full_price_text[-1]
                        price = full_price_text[:-1]
                    # If no currency symbol, check if it's all digits (likely AZN)
                    elif full_price_text.replace('.', '').isdigit():
                        currency = 'AZN'  # Default to AZN when no explicit symbol
                        price = full_price_text
                    else:
                        # Try to extract numbers and assume AZN if other symbols are present
                        price_numbers = re.findall(r'\d+', full_price_text)
                        if price_numbers:
                            price = ''.join(price_numbers)
                            currency = 'AZN' # Fallback to AZN if numbers are found without explicit currency

            # Safely extract 'year', 'engine_power', 'km' from the attributes line
            attributes_tag = bottom_el.find('div', {'class': 'products-i__attributes products-i__bottom-text'})
            if attributes_tag:
                attributes_text = attributes_tag.text.strip()
                parts = attributes_text.split(',')
                if len(parts) > 0:
                    year = parts[0].strip()
                if len(parts) > 1:
                    engine_power = parts[1].strip() # Example: "3.0 L" or "250 hp"
                if len(parts) > 2:
                    km = parts[2].strip()
                    mileage = km  # km and mileage are essentially the same value here

        # --- Enhanced City Extraction ---
        # City is typically found in a div with class 'products-i__city' within the main listing_el
        city_tag = listing_el.find('div', {'class': 'products-i__city'})
        if city_tag:
            city = city_tag.text.strip()
        else:
            # Fallback: attempt to find common Azerbaijani city names within the full listing's text
            city_pattern = r'\b(Bakı|Gəncə|Sumqayıt|Mingəçevir|Naxçıvan|Lənkəran|Şəki|Quba|Qazax|Tovuz|Astara|Bərdə|Kürdəmir|Masallı|Qəbələ|Salyan|Şamaxı|Yevlax)\b'
            city_match = re.search(city_pattern, listing_el.get_text(), re.IGNORECASE)
            if city_match:
                city = city_match.group(1).title() # Capitalize the first letter of the found city name

        # --- Enhanced Extraction for other detailed attributes (body_type, transmission, etc.) ---
        # Look for a specific section that contains detailed attributes. This could be in a 'products-i__info' div
        # or other elements within the main 'listing_el'.

        # Prioritize the 'products-i__info' div for finding these details, fall back to the full listing_el
        main_info_area = listing_el.find('div', {'class': 'products-i__info'}) or listing_el

        if main_info_area:
            # Search for various HTML elements that might contain attribute labels and their values.
            # Using find_all with multiple tags to cover different structuring possibilities.
            detail_elements = main_info_area.find_all(['div', 'span', 'li', 'p', 'h4', 'h5', 'td'])

            for elem in detail_elements:
                # Use separator=' ' to handle cases where value is in a nested tag
                text_content = elem.get_text(separator=' ', strip=True)

                for label_az, var_name in attribute_labels.items():
                    # Create a regex pattern to find "Label: Value" or "Label Value"
                    # re.escape() handles special characters in labels
                    # (.+) captures the value after the label, non-greedily
                    pattern = re.compile(rf'{re.escape(label_az)}\s*[:]?\s*(.+?)(?:\s*,|\s*$)', re.IGNORECASE)
                    match = pattern.search(text_content)

                    if match:
                        value_raw = match.group(1).strip()
                        # Standardize the extracted value using the value_mapping
                        standardized_value = value_mapping.get(value_raw.lower(), value_raw)

                        # Assign the standardized value to the correct variable
                        if var_name == 'body_type':
                            body_type = standardized_value
                        elif var_name == 'transmission':
                            transmission = standardized_value
                        elif var_name == 'drive_type':
                            drive_type = standardized_value
                        elif var_name == 'number_of_seats':
                            # Extract numbers only for seats
                            num_match = re.search(r'\d+', value_raw)
                            if num_match:
                                number_of_seats = num_match.group(0)
                        elif var_name == 'owners':
                            # Extract numbers only for owners
                            num_match = re.search(r'\d+', value_raw)
                            if num_match:
                                owners = num_match.group(0)
                        elif var_name == 'condition':
                            condition = standardized_value
                        elif var_name == 'market_adapted_for':
                            market_adapted_for = standardized_value
                        # Once a match is found for a specific label, break to avoid reprocessing
                        # and ensure the first found value is kept.
                        break

        # Append the collected data for the current car to the results list
        car_results.append({
            'name': name,
            'brand': brand,
            'model': model,
            'price': price,
            'currency': currency,
            'year': year,
            'engine_power': engine_power,
            'km': km,
            'mileage': mileage,
            'city': city,
            'body_type': body_type,
            'transmission': transmission,
            'drive_type': drive_type,
            'number_of_seats': number_of_seats,
            'owners': owners,
            'condition': condition,
            'market_adapted_for': market_adapted_for
        })

# Create DataFrame from the collected results
cars = pd.DataFrame(car_results)

# Display the first few rows of the DataFrame
print("\nDataFrame created successfully. First 5 rows:")
print(cars.head())

# Display column info
print("\nDataFrame info:")
print(cars.info())

# Display summary statistics for currency issue debugging
print("\nCurrency distribution:")
print(cars['currency'].value_counts())
print("\nSample of price and currency data:")
print(cars[['name', 'price', 'currency']].head(10))

# Show non-empty data for missing fields for verification
print("\nNon-empty data summary:")
for col in ['city', 'body_type', 'transmission', 'drive_type', 'number_of_seats', 'owners', 'condition', 'market_adapted_for']:
    non_empty = cars[col].dropna()
    print(f"{col}: {len(non_empty)} non-empty values")
    if len(non_empty) > 0:
        print(f"  Sample values: {non_empty.unique()[:5]}")

# Save the DataFrame to a CSV file
cars.to_csv('turbo_az_cars.csv', index=False, encoding='utf-8')
print("\nData saved to 'turbo_az_cars.csv'")

# Display overall summary statistics
print("\nSummary of extracted data:")
print(f"Total cars scraped: {len(cars)}")
print(f"Unique brands: {cars['brand'].nunique()}")
print(f"Unique cities: {cars['city'].nunique()}")
if cars['price'].notna().any():
    try:
        # Convert price to numeric for statistics, coercing errors to NaN
        numeric_prices = pd.to_numeric(cars['price'], errors='coerce')
        print(f"Price range: {numeric_prices.min():.0f} - {numeric_prices.max():.0f}")
    except Exception as e:
        print(f"Price statistics unavailable (error converting price: {e})")
else:
    print("No price data available")


Scraping page: 1
Request failed for page 1: 403 Client Error: Forbidden for url: https://turbo.az/autos?page=1
Scraping page: 2
Request failed for page 2: 403 Client Error: Forbidden for url: https://turbo.az/autos?page=2
Scraping page: 3
Request failed for page 3: 403 Client Error: Forbidden for url: https://turbo.az/autos?page=3
Scraping page: 4
Request failed for page 4: 403 Client Error: Forbidden for url: https://turbo.az/autos?page=4

DataFrame created successfully. First 5 rows:
Empty DataFrame
Columns: []
Index: []

DataFrame info:
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 0 entries
Empty DataFrame
None

Currency distribution:


KeyError: 'currency'