In [None]:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import TimeoutException
from webdriver_manager.chrome import ChromeDriverManager
from selenium.webdriver.support.ui import WebDriverWait, Select
import time
import pandas as pd
import copy

# Setup options
options = Options()
# options.add_argument("--headless")  # Uncomment after testing
options.add_argument("--window-size=1920,1080")

# Start the browser
driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()), options=options)
wait = WebDriverWait(driver, 10)

search_inputs = [
    {"plz": "01067"},{"plz": "04109"},{"plz": "06108"},{"plz": "10115"},{"plz": "14467"},
    {"plz": "19053"},{"plz": "20095"},{"plz": "23552"},{"plz": "24103"},{"plz": "26122"},
    {"plz": "28195"},{"plz": "30159"},{"plz": "33602"},{"plz": "34117"},{"plz": "37073"},
    {"plz": "39104"},{"plz": "44787"},{"plz": "47051"},{"plz": "50667"},{"plz": "53111"},
    {"plz": "56068"},{"plz": "60311"},{"plz": "66111"},{"plz": "68159"},{"plz": "70173"},
    {"plz": "72070"},{"plz": "74072"},{"plz": "80331"},{"plz": "86150"},{"plz": "89073"},
    {"plz": "90403"},{"plz": "93047"},{"plz": "94032"},{"plz": "96450"},{"plz": "99084"}
]


response=[]

In [None]:
def extract_results():
    try:
        expert_blocks = WebDriverWait(driver, 10).until(
            EC.presence_of_all_elements_located((By.XPATH, "//div[@class='treffer-container-div flex-50 ng-star-inserted']"))
        )
        
        for block in expert_blocks:
            try:
                name = block.find_element(By.XPATH, ".//h2[contains(@class, 'lux-card-title')]").text.strip()

                cert = block.find_element(By.XPATH, ".//div[contains(@class, 'lux-card-subtitle')]").text.strip()

                # Address lines
                address_line1_elem = block.find_elements(By.XPATH, ".//span[starts-with(@id, 'detail-row-value-Hauptsitz')]")
                address_line2_elem = block.find_elements(By.XPATH, ".//span[starts-with(@id, 'detail-row-value-') and not(contains(@id, 'Hauptsitz')) and not(contains(@id, 'E-Mail')) and not(contains(@id, 'Telefon')) and not(contains(@id, 'bestellt'))]")

                address_line1 = address_line1_elem[0].text.strip() if address_line1_elem else ""
                address_line2 = address_line2_elem[0].text.strip() if address_line2_elem else ""
                address = f"{address_line1}, {address_line2}".strip(", ")

                # Phone
                phone_elem = block.find_elements(By.XPATH, ".//span[starts-with(@id, 'detail-row-value-Telefon')]")
                phone = phone_elem[0].text.strip() if phone_elem else ""

                # Email
                email_elem = block.find_elements(By.XPATH, ".//span[starts-with(@id, 'detail-row-value-E-Mail')]")
                email = email_elem[0].text.strip() if email_elem else ""

                response.append({
                    "Full Name": name,
                    "Address": address,
                    "Phone Number": phone,
                    "Email Address": email,
                    "Website": "",
                    "Certification Type / Details": cert,
                    "Source Directory Name": "IHK"
                })

            except Exception as block_err:
                print(f"❗ Error extracting a block: {str(block_err)}")

    except TimeoutException:
        print("⏱️ Timeout: No search results found; check selectors or form submission")
    except Exception as e:
        print(f"❗ Unexpected error extracting results: {str(e)}")


In [None]:
# Start search loop
for search in search_inputs:
    driver.get("https://svv.ihk.de/svw-suche/4931566/suche-extern")
    time.sleep(2)

    wait = WebDriverWait(driver, 15)
    try:

        search_input = wait.until(EC.presence_of_element_located((
        By.XPATH, "//label[normalize-space()='Suchbegriff(e)']/following::input[1]"
         )))
        search_input.clear()
        search_input.send_keys("Immobilienbewertung")

        plz_input = wait.until(EC.presence_of_element_located((By.XPATH, "//input[@tabindex='2']")))
        plz_input.clear()
        plz_input.send_keys(search["plz"])

        

        umkreis_select = wait.until(EC.presence_of_element_located((By.XPATH, "//mat-select[@role='combobox']")))
        umkreis_select.click()
        umkreis_option=wait.until(EC.presence_of_element_located((By.XPATH, "//mat-option[@id='mat-option-5']")))
        umkreis_option.click()

        search_button = wait.until(EC.element_to_be_clickable((By.XPATH, "//button[@aria-label='Finden']")))
        driver.execute_script("arguments[0].scrollIntoView({behavior: 'smooth', block: 'center'});", search_button)
        time.sleep(5)
        search_button.click()
        
        print("✅ Form submitted successfully.")       
        while True:
            extract_results()
            try:
                next_button = driver.find_element(By.XPATH, '//button[@class="mat-mdc-tooltip-trigger mat-mdc-paginator-navigation-next mdc-icon-button mat-mdc-icon-button mat-unthemed mat-mdc-button-base"]')
                driver.execute_script("arguments[0].click();", next_button)
                time.sleep(3)
            except Exception as e:
                print(f"🔁 All the Data Extract for the search")
                break
            

    except Exception as e:
        print(f"❌ Error with search: ({search['plz']}) — {type(e).__name__}: {e}")
        driver.save_screenshot(f"error_{search['plz']}_{search['city']}.png")
        continue

driver.quit()



print("✅ IHK scraping with All Serach Inputs complete.")


✅ Form submitted successfully.
🔁 All the Data Extract for the search
✅ Form submitted successfully.
🔁 All the Data Extract for the search
✅ Form submitted successfully.
🔁 All the Data Extract for the search
✅ Form submitted successfully.
⏱️ Timeout: No search results found; check selectors or form submission
🔁 All the Data Extract for the search
✅ Form submitted successfully.
⏱️ Timeout: No search results found; check selectors or form submission
🔁 All the Data Extract for the search
✅ Form submitted successfully.
🔁 All the Data Extract for the search
✅ Form submitted successfully.
⏱️ Timeout: No search results found; check selectors or form submission
🔁 All the Data Extract for the search
✅ Form submitted successfully.
🔁 All the Data Extract for the search
✅ Form submitted successfully.
🔁 All the Data Extract for the search
✅ Form submitted successfully.
⏱️ Timeout: No search results found; check selectors or form submission
🔁 All the Data Extract for the search
✅ Form submitted succe

In [17]:
print(response)

[{'Full Name': 'Nieke, Norbert', 'Address': 'Altpestitz 7,, 01217 Dresden', 'Phone Number': '(0351) 4033958', 'Email Address': 'info@holzschutz-nieke.de', 'Website': '', 'Certification Type / Details': 'Dipl.-Ing.', 'Source Directory Name': 'IHK'}, {'Full Name': 'Klein, Michael', 'Address': 'Ostra-Allee 9', 'Phone Number': '(0351) 4331163', 'Email Address': '', 'Website': '', 'Certification Type / Details': 'Dipl.-Ing.', 'Source Directory Name': 'IHK'}, {'Full Name': 'Dietze, Ronald', 'Address': 'Weisbachstr. 6,, 09599 Freiberg', 'Phone Number': '(03731) 26010', 'Email Address': 'R.Dietze@biug-geotechnik.de', 'Website': '', 'Certification Type / Details': 'Dipl.-Ing.', 'Source Directory Name': 'IHK'}, {'Full Name': 'Beutlich, Tobias', 'Address': 'Wilhelm-Franke-Str. 68,, 01219 Dresden', 'Phone Number': '(0351) 4725 - 999', 'Email Address': 'buero@ibhaufe.de', 'Website': '', 'Certification Type / Details': 'Dipl.-Ing. (FH)', 'Source Directory Name': 'IHK'}, {'Full Name': 'Wehnert-Kohlen

In [18]:
# Make a backup before filtering out entries without addresses
backup_response = copy.deepcopy(response)


In [9]:
print(backup_response)

NameError: name 'backup_response' is not defined

In [19]:
response = [entry for entry in response if entry.get("Address")]

In [20]:
print(f"🔎 Original: {len(backup_response)} entries")
print(f"✅ After cleaning: {len(response)} entries")


🔎 Original: 2472 entries
✅ After cleaning: 2472 entries


In [22]:
# Save results
df = pd.DataFrame(response)
df.drop_duplicates(inplace=True)
df.to_csv("Final_IHK_results.csv", index=False)
df.to_excel("Final_IHK_results.xlsx", index=False)

In [23]:
df.head(40)

Unnamed: 0,Full Name,Address,Phone Number,Email Address,Website,Certification Type / Details,Source Directory Name
0,"Nieke, Norbert","Altpestitz 7,, 01217 Dresden",(0351) 4033958,info@holzschutz-nieke.de,,Dipl.-Ing.,IHK
1,"Klein, Michael",Ostra-Allee 9,(0351) 4331163,,,Dipl.-Ing.,IHK
2,"Dietze, Ronald","Weisbachstr. 6,, 09599 Freiberg",(03731) 26010,R.Dietze@biug-geotechnik.de,,Dipl.-Ing.,IHK
3,"Beutlich, Tobias","Wilhelm-Franke-Str. 68,, 01219 Dresden",(0351) 4725 - 999,buero@ibhaufe.de,,Dipl.-Ing. (FH),IHK
4,"Wehnert-Kohlenbrenner, Dr. Michael","Sachsenallee 24,, 01723 Kesselsdorf",035204 60536,michael.wehnert@ogf.de,,,IHK
5,"Nitsche, Dr.-Ing. Claus","Tiergartenstr. 48,, 01219 Dresden",,c.nitsche@bgd-ecosax.de,,,IHK
6,"Vetter, Dietmar","Schloßstr. 1,, 01067 Dresden",(0351) 4976200,post@immobilienbewertung-vetter.de,,Dipl.-Ing. (FH) Bauingenieur,IHK
7,"Coenen, Boris, M. A.",Eduard-Bilz-Str. 55 a,0351 82618697,b.coenen@on-wert.de,,Dipl.-Ing.,IHK
8,"Schneider, Prof. Dr. rer. nat. Petra","Halsbrücker Str. 34,, 09599 Freiberg",(03731) 365255,freiberg@dbi-ewi.de,,,IHK
9,"Ridzewski, Prof. Dr.-Ing. Jens","Hugo-Hickmann-Str. 14,, 01465 Dresden/Langebrück",(035201) 813322,ridzewski@composite-consulting.de,,,IHK
