In [4]:
import pandas as pd
import requests
from bs4 import BeautifulSoup
import re
import concurrent.futures
import warnings

# Suppress warnings for cleaner output
warnings.filterwarnings("ignore")

# Define headers for the request
headers = {
    'User-Agent': 'Mozilla/5.0 (X11; CrOS x86_64 8172.45.0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/51.0.2704.64 Safari/537.36',
    'Accept-Language': 'en-US, en;q=0.5'
}

# Define the base URL
baseurl = "https://turbo.az"

conversion_rates = {
    'USD': 1.70,  # USD to AZN
    'EUR': 1.88   # EUR to AZN
}

# Function to fetch car links from a specific page
def fetch_car_links(page_num):
    r = requests.get(f"{baseurl}/autos?page={page_num}&q%5Bmake%5D%5B%5D=4", headers=headers)
    soup = BeautifulSoup(r.content, 'html.parser')
    return [baseurl + item.find("a", href=True)["href"] for item in soup.find_all("div", class_='products-i')]

def fetch_car_details(url):
    response = requests.get(url, headers=headers)
    soup = BeautifulSoup(response.text, 'html.parser')
    
    # Extract car ID, set to 0 if not found
    car_id = soup.find('div', class_='product-actions__id')
    car_id = int(car_id.text.strip().replace("Elanın nömrəsi: ", "")) if car_id else None
    
    # Extracting and converting price
    price = soup.find('div', class_="product-price__i")
    price_value = None
    if price:
        price_text = price.text.strip()
        price_value = int(''.join(filter(str.isdigit, price_text)))
        if "USD" in price_text:
            price_value = round(price_value * conversion_rates['USD'])
        elif "EUR" in price_text:
            price_value = round(price_value * conversion_rates['EUR'])


    # Initializing number_of_views to None
    number_of_views = None

    # Extracting number of views, set to 0 if not found
    all_spans = soup.find_all('span', class_="product-statistics__i-text")
    for span in all_spans:
        if "Baxışların sayı" in span.text:
            number_of_views = int(span.text.strip().replace("Baxışların sayı: ", ""))
            break

    # Extract car properties
    properties = {
        'Marka': None,
        'Model': None,
        'Buraxılış ili': None,
        'Şəhər': None,
        'Rəng': None,
        'Yürüş (km)': None,
        'Mühərrik': None,
        'At gücü': None,
        'Benzin tipi': None,
        'Sürətlər qutusu': None,
        'Yeni': None,
        'Vəziyyəti': None
    }

    for el in soup.find_all('div', {'class': 'product-properties__i'}):
        label = el.find('label').text.strip()
        value = el.find('span', class_='product-properties__i-value').text.strip()
        
        if label == 'Buraxılış ili':
            properties[label] = int(value) if value else None
        elif label == 'Yürüş':
            properties['Yürüş (km)'] = int(''.join(filter(str.isdigit, value))) if value else None 
        elif label == 'Mühərrik':
            engine_details = value.split('/')
            properties['Mühərrik'] = float(engine_details[0].replace(" L", "")) if len(engine_details) > 0 else None
            properties['At gücü'] = int(re.findall(r'\d+', engine_details[1])[0]) if len(engine_details) > 1 else None
            properties['Benzin tipi'] = engine_details[2] if len(engine_details) > 2 else None
        else:
            properties[label] = value if value else None


    # Combine all details into a single dictionary
    return {
        'ID': car_id,
        'Marka': properties['Marka'],
        'Model': properties['Model'],
        'Buraxılış ili': properties['Buraxılış ili'],
        'Şəhər': properties['Şəhər'],
        'Qiymət (AZN)': price_value,
        'Rəng': properties['Rəng'],
        'Yürüş (km)': properties['Yürüş (km)'],
        'Mühərrik (L)': properties['Mühərrik'],
        'At gücü (a.g)': properties['At gücü'],
        'Benzin tipi': properties['Benzin tipi'],
        'Sürətlər qutusu': properties['Sürətlər qutusu'],
        'Yeni': properties['Yeni'],
        'Vəziyyəti': properties['Vəziyyəti'],
        'Baxışların sayı': number_of_views
    }

# Main script to collect and save car data
if __name__ == "__main__":
    productlinks = []
    max_workers = 10  # Number of threads for parallel processing

    # Use concurrent.futures to fetch car links in parallel
    with concurrent.futures.ThreadPoolExecutor(max_workers=max_workers) as executor:
        # Adjust range for the number of pages you want to scrape
        future_to_page = {executor.submit(fetch_car_links, page_num): page_num for page_num in range(1,316)}
        for future in concurrent.futures.as_completed(future_to_page):
            productlinks.extend(future.result())

    # Use concurrent.futures to fetch car details in parallel
    with concurrent.futures.ThreadPoolExecutor(max_workers=max_workers) as executor:
        future_to_url = {executor.submit(fetch_car_details, url): url for url in productlinks}
        car_results = [future.result() for future in concurrent.futures.as_completed(future_to_url)]
    
    # Convert the collected data into a DataFrame
    cars_df = pd.DataFrame(car_results)
    
    # Write the data to a CSV file
    cars_df.to_csv("scraped_mercedes_data.csv", index=False, encoding='utf-8')
    
    print("Data has been written to scraped_mercedes_data.csv file")

Data has been written to turbo_az_mercedes.csv file
