In [None]:
import os
import time
import requests
import pandas as pd

from bs4 import BeautifulSoup
from concurrent.futures import ThreadPoolExecutor, as_completed

In [None]:
def fetch_urls_response(function, urls):
    with ThreadPoolExecutor(max_workers=os.cpu_count()) as executor:
        futures = {executor.submit(function, url): url for url in urls}
        results = []

        for future in as_completed(futures):
            url = futures[future]
            try:
                result = future.result()
                if result is False:
                    print(f"Processing failed for URL: {url}")
                    executor.shutdown(wait=False)
                    time.sleep(3)
                    continue
                print(f"Thread completed for URL: {url}")
                results.append(result)
            except Exception as e:
                print(f"Exception occurred for URL: {url}, {e}")
                executor.shutdown(wait=False)
                time.sleep(3)
                continue

        return results

In [None]:
SEARCH_PAGE_URL_TEMPLATE = "https://www.arabam.com/ikinci-el/otomobil/fiat?take=50&page={}"
search_pages_urls = [SEARCH_PAGE_URL_TEMPLATE.format(number) for number in range(1, 11)]
search_pages_response = fetch_urls_response(lambda url: requests.get(url), search_pages_urls)
search_pages_text = [page.text for page in search_pages_response]

In [None]:
base_url = "https://www.arabam.com"
advertisement_links = []

for page_text in search_pages_text:
    soup = BeautifulSoup(page_text, "html.parser")
    advertisement_cards = soup.find_all("td", class_="horizontal-half-padder-minus pr")

    advertisement_links.extend(
        base_url + card.find("a")["href"]
        for card in advertisement_cards
    )

In [None]:
advertisement_pages_response = fetch_urls_response(lambda url: requests.get(url), advertisement_links)

In [None]:
advertisements_details = []

for response in advertisement_pages_response:
    soup = BeautifulSoup(response.text, "html.parser")

    try:
        price = soup.find("div", {"data-testid": "desktop-information-price"}).text.strip()
        lines = soup.find("div", class_="product-properties-details linear-gradient").text.split("\n")
    except AttributeError:
        continue

    cleaned_lines = [line.strip() for line in lines if line.strip() and line.strip() != "KopyalandÄ±"]

    detail = {
        cleaned_lines[i]: cleaned_lines[i + 1]
        for i in range(0, len(cleaned_lines) - 1, 2)
    }
    detail["Price"] = price

    advertisements_details.append(detail)

In [None]:
pd.DataFrame(advertisements_details).to_excel("fiat_cars.xlsx")