In [30]:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import TimeoutException
from webdriver_manager.chrome import ChromeDriverManager
from selenium.webdriver.support.ui import WebDriverWait, Select
import time
import pandas as pd
import copy

# Setup options
options = Options()
# options.add_argument("--headless")  # Uncomment after testing
options.add_argument("--window-size=1920,1080")

# Start the browser
driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()), options=options)
wait = WebDriverWait(driver, 10)

search_inputs = [
    {"plz": "01067"},{"plz": "04109"},{"plz": "06108"},{"plz": "10115"},{"plz": "14467"},
    {"plz": "19053"},{"plz": "20095"},{"plz": "23552"},{"plz": "24103"},{"plz": "26122"},
    {"plz": "28195"},{"plz": "30159"},{"plz": "33602"},{"plz": "34117"},{"plz": "37073"},
    {"plz": "39104"},{"plz": "44787"},{"plz": "47051"},{"plz": "50667"},{"plz": "53111"},
    {"plz": "56068"},{"plz": "60311"},{"plz": "66111"},{"plz": "68159"},{"plz": "70173"},
    {"plz": "72070"},{"plz": "74072"},{"plz": "80331"},{"plz": "86150"},{"plz": "89073"},
    {"plz": "90403"},{"plz": "93047"},{"plz": "94032"},{"plz": "96450"},{"plz": "99084"}
]


response=[]

In [31]:
def extract_results():
    try:
        expert_blocks = WebDriverWait(driver, 10).until(
        EC.presence_of_all_elements_located((By.XPATH, "//div[@class='treffer-container-div flex-50 ng-star-inserted']"))
        )
        for block in expert_blocks:
            try:
                name = block.find_element(By.XPATH, ".//h2[contains(@class, 'lux-card-title')]").text.strip() \
                    if block.find_elements(By.XPATH, ".//h2[contains(@class, 'lux-card-title')]") else ""

                cert = block.find_element(By.XPATH, ".//div[contains(@class, 'lux-card-subtitle')]").text.strip() \
                    if block.find_elements(By.XPATH, ".//div[contains(@class, 'lux-card-subtitle')]") else ""

                # Address: first line
                address_line1 = block.find_element(By.XPATH, ".//span[@id='detail-row-value-Hauptsitz-0']").text.strip() \
                    if block.find_elements(By.XPATH, ".//span[@id='detail-row-value-Hauptsitz-0']") else ""

                # Address: second line (postal code + city)
                address_line2 = block.find_element(By.XPATH, ".//span[contains(@id,'detail-row-value') and contains(text(), 'Dresden')]").text.strip() \
                    if block.find_elements(By.XPATH, ".//span[contains(@id,'detail-row-value') and contains(text(), 'Dresden')]") else ""

                address = f"{address_line1}, {address_line2}".strip(", ")

                # Phone
                phone = block.find_element(By.XPATH, ".//span[@id='detail-row-value-Telefon-0']").text.strip() \
                    if block.find_elements(By.XPATH, ".//span[@id='detail-row-value-Telefon-0']") else ""

                # Email
                email = block.find_element(By.XPATH, ".//span[@id='detail-row-value-E-Mail-0']").text.strip() \
                    if block.find_elements(By.XPATH, ".//span[@id='detail-row-value-E-Mail-0']") else ""

                response.append({
                    "Full Name": name,
                    "Address": address,
                    "Phone Number": phone,
                    "Email Address": email,
                    "Website": "",
                    "Certification Type / Details": cert,
                    "Source Directory Name": "IHK"
                })

            except Exception as block_err:
                print(f"❗ Error extracting a block: {str(block_err)}")


    except TimeoutException:
            print("⏱️ Timeout: No search results found; check selectors or form submission")
    except Exception as e:
        print(f"❗ Unexpected error extracting results: {str(e)}")


In [32]:
# Start search loop
for search in search_inputs:
    driver.get("https://svv.ihk.de/svw-suche/4931566/suche-extern")
    time.sleep(2)

    wait = WebDriverWait(driver, 15)
    try:   
        plz_input = wait.until(EC.presence_of_element_located((By.XPATH, "//input[@tabindex='2']")))
        plz_input.clear()
        plz_input.send_keys(search["plz"])

        umkreis_select = wait.until(EC.presence_of_element_located((By.XPATH, "//mat-select[@role='combobox']")))
        umkreis_select.click()
        umkreis_option=wait.until(EC.presence_of_element_located((By.XPATH, "//mat-option[@id='mat-option-5']")))
        umkreis_option.click()

        search_button = wait.until(EC.element_to_be_clickable((By.XPATH, "//button[@aria-label='Finden']")))
        driver.execute_script("arguments[0].scrollIntoView({behavior: 'smooth', block: 'center'});", search_button)
        time.sleep(5)
        search_button.click()
        
        print("✅ Form submitted successfully.")       
        while True:
            extract_results()
            try:
                next_button = driver.find_element(By.XPATH, '//button[@class="mat-mdc-tooltip-trigger mat-mdc-paginator-navigation-next mdc-icon-button mat-mdc-icon-button mat-unthemed mat-mdc-button-base"]')
                driver.execute_script("arguments[0].click();", next_button)
                time.sleep(3)
            except Exception as e:
                print(f"🔁 All the Data Extract for the search")
                break
            

    except Exception as e:
        print(f"❌ Error with search: ({search['plz']}) — {type(e).__name__}: {e}")
        driver.save_screenshot(f"error_{search['plz']}_{search['city']}.png")
        continue

driver.quit()



print("✅ IHK scraping with All Serach Inputs complete.")


✅ Form submitted successfully.
🔁 All the Data Extract for the search
✅ Form submitted successfully.
🔁 All the Data Extract for the search
✅ Form submitted successfully.
🔁 All the Data Extract for the search
✅ Form submitted successfully.
⏱️ Timeout: No search results found; check selectors or form submission
🔁 All the Data Extract for the search
✅ Form submitted successfully.
⏱️ Timeout: No search results found; check selectors or form submission
🔁 All the Data Extract for the search
✅ Form submitted successfully.
🔁 All the Data Extract for the search
✅ Form submitted successfully.
⏱️ Timeout: No search results found; check selectors or form submission
🔁 All the Data Extract for the search
✅ Form submitted successfully.
🔁 All the Data Extract for the search
✅ Form submitted successfully.
🔁 All the Data Extract for the search
✅ Form submitted successfully.
⏱️ Timeout: No search results found; check selectors or form submission
🔁 All the Data Extract for the search
✅ Form submitted succe

In [33]:
print(response)

[{'Full Name': 'Setzer, Prof. Dr. Frank', 'Address': 'Oberhermsdorfer Str. 33 a', 'Phone Number': '(0351) 6555259', 'Email Address': 'setzer@tss-forstplanung.de', 'Website': '', 'Certification Type / Details': '', 'Source Directory Name': 'IHK'}, {'Full Name': 'Kipper, Dr.-Ing. René, M. Sc.', 'Address': '01219 Dresden', 'Phone Number': '', 'Email Address': '', 'Website': '', 'Certification Type / Details': '', 'Source Directory Name': 'IHK'}, {'Full Name': 'Schneider, Prof. Dr. rer. nat. Petra', 'Address': '', 'Phone Number': '', 'Email Address': '', 'Website': '', 'Certification Type / Details': '', 'Source Directory Name': 'IHK'}, {'Full Name': 'Nitsche, Dr.-Ing. Claus', 'Address': '01219 Dresden', 'Phone Number': '', 'Email Address': '', 'Website': '', 'Certification Type / Details': '', 'Source Directory Name': 'IHK'}, {'Full Name': 'Umlauft, Carmen', 'Address': '', 'Phone Number': '', 'Email Address': '', 'Website': '', 'Certification Type / Details': 'Dipl.-Ing. (FH)', 'Source Di

In [None]:
# Make a backup before filtering out entries without addresses
backup_response = copy.deepcopy(response)
r

In [35]:
response = [entry for entry in response if entry.get("Address")]

In [36]:
print(f"🔎 Original: {len(backup_response)} entries")
print(f"✅ After cleaning: {len(response)} entries")


🔎 Original: 2473 entries
✅ After cleaning: 300 entries


In [None]:
# Save results
df = pd.DataFrame(response)
df.drop_duplicates(inplace=True)
df.to_csv("IHK_results.csv", index=False)
df.to_excel("IHK_results.xlsx", index=False)

In [6]:
driver.get("https://svv.ihk.de/svw-suche/4931566/suche-extern")
time.sleep(2)


In [28]:

plz_input = wait.until(EC.presence_of_element_located((By.XPATH, "//input[@tabindex='2']")))
plz_input.clear()
plz_input.send_keys("01067")



In [11]:
umkreis_select = wait.until(EC.presence_of_element_located((By.XPATH, "//mat-select[@role='combobox']")))
umkreis_select.click()
umkreis_option=wait.until(EC.presence_of_element_located((By.XPATH, "//mat-option[@id='mat-option-5']")))
umkreis_option.click()

In [13]:
search_button = wait.until(EC.element_to_be_clickable((By.XPATH, "//button[@aria-label='Finden']")))
driver.execute_script("arguments[0].scrollIntoView({behavior: 'smooth', block: 'center'});", search_button)
time.sleep(5)
search_button.click()

In [None]:
expert_blocks = WebDriverWait(driver, 10).until(
        EC.presence_of_all_elements_located((By.XPATH, "//div[@class='treffer-container-div flex-50 ng-star-inserted']"))
        )
print(len(expert_blocks))

10


In [16]:
block = WebDriverWait(driver, 10).until(
        EC.presence_of_element_located((By.XPATH, "//div[@class='treffer-container-div flex-50 ng-star-inserted']"))
        )

In [None]:
try:
    name = block.find_element(By.XPATH, ".//h2[contains(@class, 'lux-card-title')]").text.strip() \
        if block.find_elements(By.XPATH, ".//h2[contains(@class, 'lux-card-title')]") else ""

    cert = block.find_element(By.XPATH, ".//div[contains(@class, 'lux-card-subtitle')]").text.strip() \
        if block.find_elements(By.XPATH, ".//div[contains(@class, 'lux-card-subtitle')]") else ""

    # Address: first line
    address_line1 = block.find_element(By.XPATH, ".//span[@id='detail-row-value-Hauptsitz-0']").text.strip() \
        if block.find_elements(By.XPATH, ".//span[@id='detail-row-value-Hauptsitz-0']") else ""

    # Address: second line (postal code + city)
    address_line2 = block.find_element(By.XPATH, ".//span[contains(@id,'detail-row-value') and contains(text(), 'Dresden')]").text.strip() \
        if block.find_elements(By.XPATH, ".//span[contains(@id,'detail-row-value') and contains(text(), 'Dresden')]") else ""

    address = f"{address_line1}, {address_line2}".strip(", ")

    # Phone
    phone = block.find_element(By.XPATH, ".//span[@id='detail-row-value-Telefon-0']").text.strip() \
        if block.find_elements(By.XPATH, ".//span[@id='detail-row-value-Telefon-0']") else ""

    # Email
    email = block.find_element(By.XPATH, ".//span[@id='detail-row-value-E-Mail-0']").text.strip() \
        if block.find_elements(By.XPATH, ".//span[@id='detail-row-value-E-Mail-0']") else ""

    response.append({
        "Full Name": name,
        "Address": address,
        "Phone Number": phone,
        "Email Address": email,
        "Website": "",
        "Certification Type / Details": cert,
        "Source Directory Name": "IHK"
    })

except Exception as block_err:
    print(f"❗ Error extracting a block: {str(block_err)}")


In [18]:
print(response)

[{'Full Name': 'Helff, Anett', 'Address': 'Weintraubenstr. 4,, 01099 Dresden', 'Phone Number': '(0351) 16068671', 'Email Address': 'info@immobilienbewertung-helff.de', 'Website': '', 'Certification Type / Details': 'Dipl.-Ing.', 'Source Directory Name': 'HypZert'}]


In [None]:
while True:
    # extract_results()
    try:
        next_button = driver.find_element(By.XPATH, '//button[@class="mat-mdc-tooltip-trigger mat-mdc-paginator-navigation-next mdc-icon-button mat-mdc-icon-button mat-unthemed mat-mdc-button-base"]')
        driver.execute_script("arguments[0].click();", next_button)
        time.sleep(3)
    except Exception as e:
        print(f"🔁 All the Data Extract for the search")
        break

KeyboardInterrupt: 