In [25]:
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from webdriver_manager.chrome import ChromeDriverManager
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from bs4 import BeautifulSoup
import time
import random

def random_sleep(min_seconds=1, max_seconds=3):
    # Simulate random pauses to mimic human behavior
    time.sleep(random.uniform(min_seconds, max_seconds))

def extract_detailed_info(soup):
    # Extract the relevant details from the page
    determinants = {
        'title': 'h1',
        'price': 'is24qa-kaufpreis',
        'sizeliving': 'is24qa-wohnflaeche-ca',
        'propertysize': 'is24qa-grundstueck-ca',
        'sizetotal': 'is24qa-nutzflaeche-ca',
        'rooms': 'is24qa-zimmer',
        'construction_year': 'is24qa-baujahr',
        'condition': 'is24qa-objektzustand',
        'heating_type': 'is24qa-heizungsart',
        'floor': 'is24qa-etage',
        'total_floors': 'is24qa-etagenzahl',
        'balcony': 'is24qa-balkon-terrasse-label',
        'rented': 'is24qa-vermietet-label',
        'garden': 'is24qa-garten-mitbenutzung-label',
        'basement': 'is24qa-keller-label',
        'elevator': 'is24qa-personenaufzug-label',
        'parking': 'is24qa-garage-stellplatz',
        'available_from': 'is24qa-bezugsfrei-ab',
        'monthly_costs': 'is24qa-monatliche-kosten',
        'kitchen': 'is24qa-einbaukueche-label',
        'commission': 'is24qa-provisionsfrei-fuer-kaufende-label',
        'provision': 'is24qa-provision',
        'furnished': 'is24qa-mobiliar',
        'energy_certificate': 'is24qa-energieausweis',
        'energy_class': 'is24qa-energieeffizienzklasse', 
        'stairlessentrance': 'is24qa-stufenloser-zugang-label',
        'energyusage': 'is24qa-wesentliche-energietraeger',
        'guest_toilet': 'is24qa-gaeste-wc-label',
        'number_of_bathrooms': 'is24qa-badezimmer',
        'number_of_bedrooms': 'is24qa-schlafzimmer',
        'property_type': 'is24qa-typ',
        'modernized_year': 'is24qa-letztemodernisierung',
        'internet_speed_value': 'grid-item desk-seven-tenths lap-five-eighths palm-six-eighths'
    }

    # Call scrape_info to extract the relevant determinants
    property_info = scrape_info(soup, determinants)
    return property_info

def scrape_info(soup, determinants):
    property_info = {}
    for key, class_name in determinants.items():
        if key == 'title':  # Special case for title
            element = soup.find(class_name)
            if element:
                property_info[key] = element.get_text(strip=True)
            else:
                property_info[key] = None

        elif key in ['balcony', 'elevator', 'guest_toilet', 'commission', 'kitchen', 
                     'basement', 'rented', 'garden', 'stairlessentrance']:  # Handle span elements
            element = soup.find('span', class_=class_name)
            if element:
                property_info[key] = element.get_text(strip=True)
            else:
                property_info[key] = None

        elif key == 'energy_class':  # Special case for energy class
            element = soup.find('dd', class_=class_name)
            if element:
                span_tag = element.find('span', class_='energy-efficiency-class')
                if span_tag:
                    img_tag = span_tag.find('img')
                    if img_tag:
                        property_info[key] = img_tag.get('alt')
                    else:
                        property_info[key] = None
                else:
                    property_info[key] = None
            else:
                property_info[key] = None

        else:
            # Extract all other info
            element = soup.find(class_=class_name)
            if element:
                property_info[key] = element.get_text(strip=True)
            else:
                property_info[key] = None

    return property_info

def scrape_single_property(url):
    # Set Chrome options to avoid detection as a bot
    chrome_options = Options()
    chrome_options.add_argument("--start-maximized")
    chrome_options.add_argument("--disable-notifications")
    chrome_options.add_argument("--disable-infobars")
    chrome_options.add_argument("--disable-extensions")
    chrome_options.add_argument("user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36")
    chrome_options.add_argument("--disable-blink-features=AutomationControlled")

    # Use Selenium to simulate a real browser
    driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()), options=chrome_options)
    
    try:
        driver.get(url)

        # Wait for the page to load fully (including JavaScript-rendered content)
        WebDriverWait(driver, 30).until(
            EC.presence_of_element_located((By.CLASS_NAME, 'is24qa-kaufpreis'))  # Wait for a known element
        )

        # Ensure there are no pop-ups or overlays that block interaction
        # try:
        #     WebDriverWait(driver, 10).until(
        #         EC.element_to_be_clickable((By.CLASS_NAME, 'accept-cookies-button'))
        #     ).click()
        #     random_sleep(2, 4)
        # except:
        #     pass  # If no pop-up, continue

        # Scroll down to trigger full page rendering if necessary
        driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
        random_sleep(2, 4)

        # Parse the page with BeautifulSoup
        soup = BeautifulSoup(driver.page_source, 'html.parser')

        # Extract detailed property information
        property_details = extract_detailed_info(soup)

        # Output the data
        print(property_details)

    except Exception as e:
        print(f"Error scraping the page: {e}")

    finally:
        driver.quit()

if __name__ == "__main__":
    # Provide the URL of the specific property (expose page)
    scrape_single_property("https://www.immobilienscout24.de/expose/158278824")


{'title': 'Mehrfamilienhaus mit 15 Wohneinheiten und ca. 869 m² Wohnfläche in Kiel zu verkaufen', 'price': '1.895.000 €', 'sizeliving': '869  m²', 'propertysize': '1.848  m²', 'sizetotal': None, 'rooms': '16', 'construction_year': '1962', 'condition': None, 'heating_type': 'Zentralheizung', 'floor': None, 'total_floors': None, 'balcony': None, 'rented': None, 'garden': None, 'basement': None, 'elevator': None, 'parking': None, 'available_from': None, 'monthly_costs': None, 'kitchen': None, 'commission': None, 'provision': '5,95 % inkl. MwSt.', 'furnished': None, 'energy_certificate': 'liegt vor', 'energy_class': 'D', 'stairlessentrance': None, 'energyusage': 'Öl', 'guest_toilet': None, 'number_of_bathrooms': None, 'number_of_bedrooms': None, 'property_type': 'Mehrfamilienhaus', 'modernized_year': None, 'internet_speed_value': 'Internet GeschwindigkeitDie Geschwindigkeitsangabe ist eine unverbindliche Angabe auf Basis der Standortadresse. Verbindliche Verfügbarkeitsprüfungen werden erst