In [None]:
import time
import pandas as pd
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC

# Set up the WebDriver (Chrome in this case)
options = webdriver.ChromeOptions()
# options.add_argument("--headless")  # Comment out this line to see the browser

driver = webdriver.Chrome(options=options)

# Function to scroll down the page
def scroll_down(driver):
    driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
    time.sleep(2)  # Adjust sleep time as needed to wait for the page to load

try:
    # Navigate to the GITEX Africa homepage
    driver.get("https://www.gitexafrica.com/home")

    # Wait for the page to load
    WebDriverWait(driver, 10).until(
        EC.presence_of_element_located((By.TAG_NAME, "body"))
    )

    # Find the link to the EXHIBITOR LIST by its href attribute
    exhibitor_list_link = driver.find_element(By.XPATH, "//a[@href='https://exhibitors-dwtc.exhibitoronlinemanual.com/gitex-africa-2024/Exhibitor']")
    
    # Use JavaScript to click the link
    driver.execute_script("arguments[0].click();", exhibitor_list_link)

    # Wait for the new page/tab to load
    WebDriverWait(driver, 10).until(
        EC.presence_of_element_located((By.TAG_NAME, "body"))
    )

    # Switch to the new window/tab
    driver.switch_to.window(driver.window_handles[1])

    # Extract company information
    companies_data = []

    # Initial scroll height
    last_height = driver.execute_script("return document.body.scrollHeight")

    while True:
        # Extract company names and other details
        companies = driver.find_elements(By.CLASS_NAME, "item_heading")
        for company in companies:
            try:
                name = company.find_element(By.CLASS_NAME, "heading").text.strip()
                stand_info = company.find_element(By.CSS_SELECTOR, ".head_discription .web p").text.strip()
                #description = company.find_element(By.CSS_SELECTOR, ".list-group-item-text span").text.strip()
                
                # Extract skills if they exist
                skills_elements = company.find_elements(By.CSS_SELECTOR, ".sector_block_outer .sector_block li")
                skills = [skill.text.strip() for skill in skills_elements]

                companies_data.append({
                    "Company Name": name,
                    "Stand Info": stand_info,
                    #"Description": description,
                    "Skills": ", ".join(skills)  # Join skills into a comma-separated string
                })
            except Exception as e:
                print(f"Error extracting company details: {e}")
                continue

        # Scroll down to load more companies
        driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
        time.sleep(8)  # Adjust sleep time as needed to wait for the page to load
        new_height = driver.execute_script("return document.body.scrollHeight")

        # Check if the page height has not changed (indicating all companies are loaded)
        if new_height == last_height:
            break

        last_height = new_height

    # Save the data into a pandas DataFrame
    df = pd.DataFrame(companies_data)
   # print(df)

finally:
    # Close the WebDriver
    driver.quit()
