In [4]:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.common.exceptions import NoSuchElementException
import pandas as pd
import time


In [5]:
def scrape_companies(page_url, output_csv="companies.csv"):
    """
    Uses Selenium to load `page_url`, finds all elements with class="company-info",
    and extracts:
      1. Company Name  (from .company-header h3 a)
      2. Company Link  (href of .company-header h3 a)
      3. Location      (from .meta-entry.location .meta-entry-value)
      4. Industry      (from .meta-entry.industry .meta-entry-value)
      5. Employees     (from .meta-entry.employees .meta-entry-value)

    Stores results into a pandas DataFrame and saves as CSV.
    """

    # 1) Set up Selenium WebDriver
    driver = webdriver.Edge()

    try:
        # 2) Load the target page
        driver.get(page_url)
        # If the page needs time to load dynamically, you can wait a few seconds:
        time.sleep(3)

        # 3) Find all elements with class="company-info"
        company_elements = driver.find_elements(By.CLASS_NAME, "company-info")

        # Prepare lists to collect each field
        names = []
        links = []
        locations = []
        industries = []
        employees_list = []

        # 4) Iterate over each company-info block
        for idx, comp in enumerate(company_elements, start=1):
            try:
                # 4.1) Extract company header link (name + href)
                header_a = comp.find_element(By.CSS_SELECTOR, ".company-header h3 a")
                company_name = header_a.text.strip()
                company_link = header_a.get_attribute("href").strip()
            except NoSuchElementException:
                company_name = ""
                company_link = ""

            # 4.2) Extract location
            try:
                loc_elem = comp.find_element(By.CSS_SELECTOR, ".meta-entry.location .meta-entry-value")
                location = loc_elem.text.strip()
            except NoSuchElementException:
                location = ""

            # 4.3) Extract industry
            try:
                ind_elem = comp.find_element(By.CSS_SELECTOR, ".meta-entry.industry .meta-entry-value")
                industry = ind_elem.text.strip()
            except NoSuchElementException:
                industry = ""

            # 4.4) Extract employees
            try:
                emp_elem = comp.find_element(By.CSS_SELECTOR, ".meta-entry.employees .meta-entry-value")
                employees = emp_elem.text.strip()
            except NoSuchElementException:
                employees = ""

            # Append to lists
            names.append(company_name)
            links.append(company_link)
            locations.append(location)
            industries.append(industry)
            employees_list.append(employees)

        # 5) Build pandas DataFrame
        df = pd.DataFrame({
            "Company Name": names,
            "Job Reviews URL": links,
            "Location": locations,
            "Industry": industries,
            "Employees": employees_list
        })

        # 6) Save to CSV
        df.to_csv(output_csv, index=False)
        print(f"Scraped {len(df)} companies. Data saved to '{output_csv}'.")
    finally:
        driver.quit()

In [6]:
scrape_companies("https://www.canadastop100.com/national/", output_csv="companies.csv")

Scraped 100 companies. Data saved to 'companies.csv'.
