In [1]:
# ======================================
# 🔧 Import libraries
# ======================================
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from webdriver_manager.chrome import ChromeDriverManager
import time
import csv
import re

# ======================================
# 🧠 Create Chrome driver
# ======================================
def create_driver():
    options = Options()
    options.add_argument("--start-maximized")
    options.add_argument("--disable-blink-features=AutomationControlled")
    options.add_argument("--no-sandbox")
    options.add_argument("--disable-dev-shm-usage")
    options.add_experimental_option("excludeSwitches", ["enable-automation"])
    options.add_experimental_option('useAutomationExtension', False)
    driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()), options=options)
    return driver

# ======================================
# ⏳ Helper: Wait for element
# ======================================
def wait_for_element(driver, by, selector, timeout=5):
    try:
        return WebDriverWait(driver, timeout).until(
            EC.presence_of_element_located((by, selector))
        )
    except:
        return None

# ======================================
# 🌍 Helper: Extract country from address
# ======================================
def extract_country(address):
    if not address:
        return ""
    # Split by commas, take last meaningful word
    parts = [p.strip() for p in re.split(r"[,\\n]", address) if p.strip()]
    if parts:
        return parts[-1]
    return ""

# ======================================
# 🔍 STEP 1: Loop through multiple keywords
# ======================================

keywords = ["CNC", "Forging", "Pump", "Pneumatic","bar", "Steel", "Bright", "stainless steel"]
all_data = []

for keyword in keywords:
    print(f"\n===============================")
    print(f"🔍 Searching for: {keyword}")
    print(f"===============================")

    driver = create_driver()
    search_url = f"https://www.elmia.se/en/subcontractor/exhibitors-and-products/search?keyword={keyword}&opt=agency%2Cproduct%2Cexhibitor%2Ccatalog%2Ccity%2Ccountry"
    driver.get(search_url)
    time.sleep(2)

    try:
        WebDriverWait(driver, 20).until(
            EC.presence_of_all_elements_located((By.CSS_SELECTOR, "table.table tbody tr td a.link.link--primary.link--medium"))
        )

        links = driver.find_elements(By.CSS_SELECTOR, "table.table tbody tr td a.link.link--primary.link--medium")
        exhibitor_links = []

        for link in links:
            name = link.text.strip()
            href = link.get_attribute("href")
            if href and any(char.isalpha() for char in name) and len(name) > 2:
                exhibitor_links.append((name, href))

        print(f"✅ Found {len(exhibitor_links)} exhibitors for {keyword}")

    except Exception as e:
        print(f"❌ Could not load exhibitors for {keyword}: {e}")
        exhibitor_links = []

    driver.quit()

    # ======================================
    # 🔍 STEP 2: Scrape details for each exhibitor
    # ======================================
    driver = create_driver()

    for i, (company_name_from_list, url) in enumerate(exhibitor_links, start=1):
        if i % 20 == 0:
            driver.quit()
            driver = create_driver()

        retries = 0
        while retries < 3:
            try:
                driver.get(url)
                time.sleep(1)

                company_name_el = wait_for_element(driver, By.CSS_SELECTOR, "h2.exhibitor__title")
                company_name = company_name_el.text.strip() if company_name_el else company_name_from_list

                phone_el = wait_for_element(driver, By.CSS_SELECTOR, ".text-icon .text-icon__text")
                phone = phone_el.text.strip() if phone_el else ""

                try:
                    email_el = driver.find_element(By.CSS_SELECTOR, "a[href^='mailto:'] .link__text")
                    email = email_el.text.strip()
                except:
                    email = ""

                website_el = wait_for_element(driver, By.CSS_SELECTOR, ".exhibitor__facts_item a[href^='http'] .link__text")
                website = website_el.text.strip() if website_el else ""

                try:
                    address_el = driver.find_element(By.CSS_SELECTOR, ".exhibitor__facts_adress")
                    address = address_el.text.strip()
                except:
                    address = ""

                # Extract country from address
                country = extract_country(address)

                # Use keyword as Product
                product = keyword

                # --- Append Data ---
                all_data.append({
                    "Product": product,
                    "Company Name": company_name,
                    "Phone": phone,
                    "Email": email,
                    "Website": website,
                    "Address": address,
                    "Country": country,
                    "URL": url
                })

                break  # success
            except Exception as e:
                retries += 1
                print(f"⚠️ Retry {retries} for {company_name_from_list}: {e}")
                time.sleep(2)
                if retries == 3:
                    print(f"❌ Skipping {company_name_from_list}")

    driver.quit()

# ======================================
# 🧹 Clean and Export
# ======================================
cleaned_data = [
    d for d in all_data
    if d["Website"].strip() and any(c.isalpha() for c in d["Company Name"])
]

print(f"\n✅ Total real exhibitors across all keywords: {len(cleaned_data)}\n")

# Save to CSV
if cleaned_data:
    with open("elmia_exhibitors_all_keywords123.csv", "w", newline="", encoding="utf-8") as f:
        writer = csv.DictWriter(f, fieldnames=cleaned_data[0].keys())
        writer.writeheader()
        writer.writerows(cleaned_data)

    print("💾 Data saved to elmia_exhibitors_all_keywords123.csv")
else:
    print("⚠️ No valid data found to save.")



🔍 Searching for: CNC
✅ Found 76 exhibitors for CNC

🔍 Searching for: Forging
✅ Found 29 exhibitors for Forging

🔍 Searching for: Pump
✅ Found 15 exhibitors for Pump

🔍 Searching for: Pneumatic
✅ Found 13 exhibitors for Pneumatic

🔍 Searching for: bar
✅ Found 152 exhibitors for bar

🔍 Searching for: Steel
✅ Found 99 exhibitors for Steel

🔍 Searching for: Bright
✅ Found 4 exhibitors for Bright

🔍 Searching for: stainless steel
✅ Found 48 exhibitors for stainless steel

✅ Total real exhibitors across all keywords: 342

💾 Data saved to elmia_exhibitors_all_keywords123.csv
