In [1]:
import asyncio
import sys

if sys.platform.startswith("win"):
    asyncio.set_event_loop_policy(asyncio.WindowsProactorEventLoopPolicy())

In [2]:
import time
import random
import re

import requests
from bs4 import BeautifulSoup
from urllib.parse import urljoin

In [3]:
SEARCH_URL = "https://www.mobile.de/es/veh%C3%ADculos/buscar.html?isSearchRequest=true&s=Car&vc=Car&cn=DE&ml=%3A175000&fr=2013&st=DEALER&pw=74&sr=4&dam=0&emc=EURO6&ref=dsp"

HEADERS = {
    "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120 Safari/537.36",
    "Accept-Language": "es-ES,es;q=0.9,en;q=0.8",
}

In [4]:
import time
import random
import requests

def fetch_html(url):
    r = requests.get(url, headers=HEADERS, timeout=30)
    r.raise_for_status()
    time.sleep(random.uniform(1, 2))
    return r.text

In [5]:
html = fetch_html(SEARCH_URL)

print(type(html))
print("len:", len(html))
print(html[:500])

<class 'str'>
len: 1224734
<!DOCTYPE html><html data-grid="8" data-theme="" lang="es"><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width, initial-scale=1"/><link rel="stylesheet" href="https://static.classistatic.de/consumer-webapp-next/_next/static/css/5a1b9bf0a3b9fa0d.css" crossorigin="anonymous" data-precedence="next"/><link rel="stylesheet" href="https://static.classistatic.de/consumer-webapp-next/_next/static/css/bc8ea11ede0825c5.css" crossorigin="anonymous" data-precedence="next"/><link r


In [6]:
from bs4 import BeautifulSoup
from urllib.parse import urljoin

def extract_detail_links(search_html, base_url="https://www.mobile.de"):
    soup = BeautifulSoup(search_html, "html.parser")
    links = set()

    for a in soup.select("a[href]"):
        href = a.get("href", "")
        # Los anuncios suelen ir a una página de "detalles"
        if "details.html" in href or "detalles.html" in href:
            full = urljoin(base_url, href)
            # nos quedamos solo con links que parecen de vehículo
            if "/veh" in full and "mobile.de" in full:
                links.add(full)

    return sorted(links)

detail_links = extract_detail_links(html)
print("links encontrados:", len(detail_links))
detail_links[:10]

links encontrados: 24


['https://www.mobile.de/es/veh%C3%ADculos/detalles.html?id=423842448&sb=rel&od=up&vc=Car&cn=DE&ml=%3A175000&fr=2013&st=DEALER&pw=74&sr=4&dam=0&emc=EURO6&s=Car&searchId=0f1d3c77-c4c3-cc19-9b73-e53d57f96a2f&ref=srp&refId=0f1d3c77-c4c3-cc19-9b73-e53d57f96a2f',
 'https://www.mobile.de/es/veh%C3%ADculos/detalles.html?id=424335114&sb=rel&od=up&vc=Car&cn=DE&ml=%3A175000&fr=2013&st=DEALER&pw=74&sr=4&dam=0&emc=EURO6&s=Car&searchId=0f1d3c77-c4c3-cc19-9b73-e53d57f96a2f&ref=srp&refId=0f1d3c77-c4c3-cc19-9b73-e53d57f96a2f',
 'https://www.mobile.de/es/veh%C3%ADculos/detalles.html?id=429024928&sb=rel&od=up&vc=Car&cn=DE&ml=%3A175000&fr=2013&st=DEALER&pw=74&sr=4&dam=0&emc=EURO6&s=Car&searchId=0f1d3c77-c4c3-cc19-9b73-e53d57f96a2f&ref=srp&refId=0f1d3c77-c4c3-cc19-9b73-e53d57f96a2f',
 'https://www.mobile.de/es/veh%C3%ADculos/detalles.html?id=433378695&sb=rel&od=up&vc=Car&cn=DE&ml=%3A175000&fr=2013&st=DEALER&pw=74&sr=4&dam=0&emc=EURO6&s=Car&searchId=0f1d3c77-c4c3-cc19-9b73-e53d57f96a2f&ref=srp&refId=0f1d3c7

In [7]:
import re

def clean_vehicle_links(links):
    cleaned = []
    for link in links:
        if "detalles.html" in link and re.search(r"id=\d+", link):
            cleaned.append(link)
    return cleaned

vehicle_links = clean_vehicle_links(detail_links)

print("vehículos válidos:", len(vehicle_links))
vehicle_links[:5]

vehículos válidos: 24


['https://www.mobile.de/es/veh%C3%ADculos/detalles.html?id=423842448&sb=rel&od=up&vc=Car&cn=DE&ml=%3A175000&fr=2013&st=DEALER&pw=74&sr=4&dam=0&emc=EURO6&s=Car&searchId=0f1d3c77-c4c3-cc19-9b73-e53d57f96a2f&ref=srp&refId=0f1d3c77-c4c3-cc19-9b73-e53d57f96a2f',
 'https://www.mobile.de/es/veh%C3%ADculos/detalles.html?id=424335114&sb=rel&od=up&vc=Car&cn=DE&ml=%3A175000&fr=2013&st=DEALER&pw=74&sr=4&dam=0&emc=EURO6&s=Car&searchId=0f1d3c77-c4c3-cc19-9b73-e53d57f96a2f&ref=srp&refId=0f1d3c77-c4c3-cc19-9b73-e53d57f96a2f',
 'https://www.mobile.de/es/veh%C3%ADculos/detalles.html?id=429024928&sb=rel&od=up&vc=Car&cn=DE&ml=%3A175000&fr=2013&st=DEALER&pw=74&sr=4&dam=0&emc=EURO6&s=Car&searchId=0f1d3c77-c4c3-cc19-9b73-e53d57f96a2f&ref=srp&refId=0f1d3c77-c4c3-cc19-9b73-e53d57f96a2f',
 'https://www.mobile.de/es/veh%C3%ADculos/detalles.html?id=433378695&sb=rel&od=up&vc=Car&cn=DE&ml=%3A175000&fr=2013&st=DEALER&pw=74&sr=4&dam=0&emc=EURO6&s=Car&searchId=0f1d3c77-c4c3-cc19-9b73-e53d57f96a2f&ref=srp&refId=0f1d3c7

In [9]:
vehicle_links[0]

'https://www.mobile.de/es/veh%C3%ADculos/detalles.html?id=423842448&sb=rel&od=up&vc=Car&cn=DE&ml=%3A175000&fr=2013&st=DEALER&pw=74&sr=4&dam=0&emc=EURO6&s=Car&searchId=0f1d3c77-c4c3-cc19-9b73-e53d57f96a2f&ref=srp&refId=0f1d3c77-c4c3-cc19-9b73-e53d57f96a2f'

In [14]:
vehicle_links[:10]

['https://www.mobile.de/es/veh%C3%ADculos/detalles.html?id=423842448&sb=rel&od=up&vc=Car&cn=DE&ml=%3A175000&fr=2013&st=DEALER&pw=74&sr=4&dam=0&emc=EURO6&s=Car&searchId=0f1d3c77-c4c3-cc19-9b73-e53d57f96a2f&ref=srp&refId=0f1d3c77-c4c3-cc19-9b73-e53d57f96a2f',
 'https://www.mobile.de/es/veh%C3%ADculos/detalles.html?id=424335114&sb=rel&od=up&vc=Car&cn=DE&ml=%3A175000&fr=2013&st=DEALER&pw=74&sr=4&dam=0&emc=EURO6&s=Car&searchId=0f1d3c77-c4c3-cc19-9b73-e53d57f96a2f&ref=srp&refId=0f1d3c77-c4c3-cc19-9b73-e53d57f96a2f',
 'https://www.mobile.de/es/veh%C3%ADculos/detalles.html?id=429024928&sb=rel&od=up&vc=Car&cn=DE&ml=%3A175000&fr=2013&st=DEALER&pw=74&sr=4&dam=0&emc=EURO6&s=Car&searchId=0f1d3c77-c4c3-cc19-9b73-e53d57f96a2f&ref=srp&refId=0f1d3c77-c4c3-cc19-9b73-e53d57f96a2f',
 'https://www.mobile.de/es/veh%C3%ADculos/detalles.html?id=433378695&sb=rel&od=up&vc=Car&cn=DE&ml=%3A175000&fr=2013&st=DEALER&pw=74&sr=4&dam=0&emc=EURO6&s=Car&searchId=0f1d3c77-c4c3-cc19-9b73-e53d57f96a2f&ref=srp&refId=0f1d3c7