In [12]:
from selenium import webdriver
from selenium.webdriver.edge.service import Service
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import pandas as pd
import time


def scrape_jobs(base_url, file_name,max_posts=10):
    """
    Scrapes job postings from a given URL.
    Args:
        base_url (str): Base URL of the job site (pagination should be handled by appending a page number).
        file_name (str): Name of the Excel file to save.
        max_posts (int): Maximum number of job postings to scrape.
    Returns:
        list: A list of dictionaries containing job details.
    """
    job_data = []
    page_number = 0

    try :
        while len(job_data) < max_posts:
            # Modifier l'URL pour chaque page
            url = base_url + str(page_number * 10)
            print(f"Extraction des données de la page {page_number + 1} : {url}")
            driver.get(url)
            
            # Pause pour s'assurer que la page est bien chargée
            time.sleep(3)
            
            # Extraire les titres des postes et les noms des entreprises
            job_cards = driver.find_elements(By.CLASS_NAME, 'resultContent')
    
            for job in job_cards:
                if len(job_data) >= max_posts:
                    break  # Sortir de la boucle si le maximum est atteint
                try:
                    title = job.find_element(By.CLASS_NAME, 'jobTitle').text
                    try:
                        company = job.find_element(By.CSS_SELECTOR, '[data-testid="company-name"]').text
                    except:
                        company = "N/A" 
                    try:
                        location = job.find_element(By.CSS_SELECTOR, '[data-testid="text-location"]').text
                    except:
                        location = "N/A"  
                    try:
                        salary = job.find_element(By.CLASS_NAME, 'css-19j1a75').text
                    except:
                        salary = "N/A"  
                    job_data.append({'Job title': title, 'Company name': company, 'Location': location, 'Salary': salary})
                except Exception as e:
                    print(f"Error extracting job data: {e}")
    
            # print total jobs extracted so far
            print(f"Total jobs extracted so far : {len(job_data)}")
    
            page_number += 1
            
            # Stop if no results
            if not job_cards:
                print("No data in this page.")
                break
    except Exception as e:
        print(f"Unexpected erreur : {e}")

    if job_data:
        try:
            df = pd.DataFrame(job_data)
            df.to_excel(f'{file_name}.xlsx', index=False, engine='openpyxl')
            print(f"Excel file created successfully: {file_name}")
        except Exception as e:
            print(f"Error creating Excel file: {e}")
    else:
        print("No data extracted.")
    
    # Fermer le navigateur si ce n'est pas déjà fait
    try:
        driver.quit()
    except Exception as e:
        print("Error closing the browser.")
    return job_data

In [13]:


if __name__ == "__main__":

    # Set the path to your Edge WebDriver
    DRIVER_PATH = r"C:\windriver\msedgedriver.exe"  # Update with your WebDriver path
    
     # Initialize the Selenium Edge driver
    service = Service(DRIVER_PATH)
    driver = webdriver.Edge(service=service)
    
    # Base URL for job postings
    BASE_URL = 'https://uk.indeed.com/jobs?q=quant&start='

    file_name='uk_quantitative_roles.xlsx'
    max_posts=10
    try:
        # Scrape job data
        job_data = scrape_jobs(BASE_URL,file_name,max_posts)

    except Exception as e:
        print(f"Unexpected error: {e}")
    finally:
        # Close the WebDriver
        try:
            driver.quit()
        except Exception as e:
            print(f"Error closing the browser: {e}")

Extraction des données de la page 1 : https://uk.indeed.com/jobs?q=quant&start=0
Total jobs extracted so far : 10
Excel file created successfully: uk_quantitative_roles.xlsx
