In [None]:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import TimeoutException
from webdriver_manager.chrome import ChromeDriverManager
import time
import pandas as pd
import copy

# Setup options
options = Options()
# options.add_argument("--headless")  # Uncomment after testing
options.add_argument("--window-size=1920,1080")

# Start the browser
driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()), options=options)
wait = WebDriverWait(driver, 10)

driver.get("https://www.diaconsulting.de/de/16/Zertifizierungsverzeichnis")
time.sleep(2)

response=[]

In [2]:
# Accept cookie banner
try:
    cookie = WebDriverWait(driver, 5).until(
        EC.element_to_be_clickable((By.XPATH, "//button[@id='btn_all']"))
    )
    driver.execute_script("arguments[0].click();", cookie)
    time.sleep(1)
except:
    print("✅ Cookie banner not found or already accepted.")

In [3]:
def extract_results():
    try:
        expert_blocks = WebDriverWait(driver, 10).until(
            EC.presence_of_all_elements_located((By.XPATH, "//div[contains(@class, 'list-row')]"))
        )
        for block in expert_blocks:
                try:

                    print("result scrape")
                    block.click()
                    # Name
                    name = block.find_element(By.XPATH, ".//strong[contains(@class, 'item_name')]").text.strip()

                    # Address (even if display:none)
                    try:
                        address_elem = block.find_element(By.XPATH, ".//div[contains(@class, '6u$')]/p[1]")
                        address = address_elem.text.strip()
                    except:
                        address = ""

                    # Certifications (joining multiple cert paragraphs)
                    cert_parts = block.find_elements(By.XPATH, ".//p[contains(@class, 'item-certs')]")
                    cert = " | ".join([c.get_attribute("innerText").strip() for c in cert_parts]) if cert_parts else ""

                    # Phone: collect all phone numbers labeled 'Telefon' or 'Mobil'
                    phone_numbers = []
                    phone_ps = block.find_elements(By.XPATH, ".//p[span[contains(@class, 'label-phone')]]")
                    for p in phone_ps:
                        label = p.find_element(By.XPATH, "./span[contains(@class, 'label-phone')]").text.strip().rstrip(":")
                        # get full text of <p>, remove label text, and clean
                        full_text = p.text.strip()
                        number = full_text.replace(label + ":", "").strip()
                        if number:
                            phone_numbers.append(f"{label}: {number}")
                    phone = " | ".join(phone_numbers)

                    # Email
                    email_elem = block.find_elements(By.XPATH, ".//span[contains(@class, 'label-email')]/following-sibling::a[1]")
                    email = email_elem[0].text.strip() if email_elem else ""

                    # Website
                    website_elem = block.find_elements(By.XPATH, ".//span[contains(@class, 'label-web')]/following-sibling::a[1]")
                    website = website_elem[0].text.strip() if website_elem else ""

                    response.append({
                        "Full Name": name,
                        "Address": address,
                        "Phone Number": phone,
                        "Email Address": email,
                        "Website": website,
                        "Certification Type / Details": cert,
                        "Source Directory Name": "DIA_ZERT"
                    })

                except Exception as block_err:
                    print(f"❗ Error extracting a block: {str(block_err)}")

    except TimeoutException:
        print("⏱️ Timeout: No search results found; check selectors or form submission")
    except Exception as e:
        print(f"❗ Unexpected error extracting results: {str(e)}")


In [None]:
for i in range(1, 30):  # range is exclusive at the end
    extract_results()
    try:
        xpath = f'//a[@data-no="{i}"]'
        next_button = driver.find_element(By.XPATH, xpath)
        driver.execute_script("arguments[0].click();", next_button)
        time.sleep(3)
    except Exception as e:
        print(f"🔁 Could not click page {i}: {e}")
        break

In [5]:
print(response)

[{'Full Name': 'Adamczyk, Maik', 'Address': 'Korbflechterstraße 31\n48369 Saerbeck', 'Phone Number': 'Telefon: 02574/9397412 | Mobil: 0151/17881852', 'Email Address': 'info@ma-immowert.de', 'Website': 'http://www.ma-immowert.de', 'Certification Type / Details': 'Zertifizierter Sachverständiger DIAZert (LS)\nZertifiziert für die Marktwertermittlung von Standardimmobilien | Zertifizierung gültig bis: 17.05.2028', 'Source Directory Name': 'DIA_ZERT'}, {'Full Name': 'Alekozai, Dr. M. Emal', 'Address': 'Heidelberger Str.7\n69207 Sandhausen', 'Phone Number': 'Mobil: 01781359400 | Mobil: 0178 1359400', 'Email Address': 'alganabi.alekozai@gmail.com', 'Website': '', 'Certification Type / Details': 'Zertifizierter Immobiliengutachter DIAZert (LS)\nZertifiziert für die Marktwertermittlung von Standardimmobilien | Zertifizierung gültig bis: 19.08.2026', 'Source Directory Name': 'DIA_ZERT'}, {'Full Name': 'Altrichter, Julia', 'Address': 'Puscherstr. 7\n90411 Nürnberg', 'Phone Number': 'Telefon: 091

In [7]:
import copy
# Make a backup before filtering out entries without addresses
backup_response = copy.deepcopy(response)

In [8]:
print(f"🔎 Original: {len(backup_response)} entries")
print(f"✅ After cleaning: {len(response)} entries")


🔎 Original: 720 entries
✅ After cleaning: 720 entries


In [13]:
df = pd.DataFrame(response)
df.drop_duplicates(inplace=True)
df.to_csv("DIA_ZERT_results.csv", index=False)
df.to_excel("DIA_ZERT_results.xlsx", index=False)

In [14]:
df.head(10)

Unnamed: 0,Full Name,Address,Phone Number,Email Address,Website,Certification Type / Details,Source Directory Name
0,"Adamczyk, Maik",Korbflechterstraße 31\n48369 Saerbeck,Telefon: 02574/9397412 | Mobil: 0151/17881852,info@ma-immowert.de,http://www.ma-immowert.de,Zertifizierter Sachverständiger DIAZert (LS)\n...,DIA_ZERT
1,"Alekozai, Dr. M. Emal",Heidelberger Str.7\n69207 Sandhausen,Mobil: 01781359400 | Mobil: 0178 1359400,alganabi.alekozai@gmail.com,,Zertifizierter Immobiliengutachter DIAZert (LS...,DIA_ZERT
2,"Altrichter, Julia",Puscherstr. 7\n90411 Nürnberg,Telefon: 0911 23966723,j.altrichter@rls-immobilien.com,https://www.svb-altrichter.com,Zertifizierte Immobiliengutachterin DIAZert (L...,DIA_ZERT
3,"Ancker, Michael",Brodschrangen 4\n20457 Hamburg,Telefon: 040 35743740 | Mobil: 0170 5369373 | ...,info@ma-ic.de,https://www.ma-ic.de,Zertifizierter Sachverständige DIAZert(LF)\nZe...,DIA_ZERT
4,"Andrissek, Joachim",Heumadener Straße 6\n73760 Ostfildern,Telefon: 0711 96891489 | Mobil: 0177 3613108 |...,j.andrissek@andrissek-isvb.de,,Zertifizierter Immobiliengutachter DIAZert (LS...,DIA_ZERT
5,"Arampatzis, Georgios",Erich-Bloch-Weg 6\n78467 Konstanz,Mobil: 015254233082 | Mobil: 0152 54233082,info@arampatzis.immobilien,https://www.arampatzis.immobilien,Zertifizierter Immobiliengutachter DIAZert (S)...,DIA_ZERT
6,"Artl, Simone",Speidelweg 47\n89077 Ulm,Telefon: 0176 56701051,simone.artl1987@gmail.com,,Zertifizierte Immobiliengutachterin DIAZert (L...,DIA_ZERT
7,"Bachert, Manuel",Heinrich-von-Stephan-Str. 8a\n79100 Freiburg i...,Telefon: 0761 7058115 | Mobil: 0151 18848778,bachert.manuel@gxb.immo,https://www.gxb-immobilienbewertung.de/,Zertifizierter Immobiliengutachter DIAZert (LS...,DIA_ZERT
8,"Bachler, Hendrik",Steinbruchweg 17\n33605 Bielefeld,Telefon: 0521 5212793 | Mobil: 0177 5212793,info@immogutachter-bielefeld.de,https://www.immobiliengutachter-bielefeld.de,Zertifizierter Immobiliengutachter DIAZert (LS...,DIA_ZERT
9,"Baiz, Stefanie",Herzmanns 10\n87448 Waltenhofen,Telefon: 08379 2348 136 | Mobil: 0151 18401400,stefanie.baiz@geigergruppe.de,https://www.geigergruppe.de,Zertifizierte Sachverständige DIAZert (LF)\nZe...,DIA_ZERT
