In [7]:
import requests  # For making HTTP requests (simulated scraping)
from bs4 import BeautifulSoup  # For parsing HTML (simulated scraping)
import requests
from bs4 import BeautifulSoup
from urllib.parse import urljoin

class ScrapingFactory:
    domain = [
        "francetravail.fr",
    ]

    def __init__(self):
        pass

    def urlCheckInDomain(self, url):
        """
        Check if the given URL belongs to one of the supported domains.
        
        :param url: The URL to check.
        :return: True if the URL belongs to a supported domain, False otherwise.
        """
        for domain in self.domain:
            if domain in url:
                return True
        return False

    def scrapOne(self, url):
        """
        Scrape data from a single URL.
        
        :param url: The URL to scrape.
        :return: Scraped data or None if the URL is not supported.
        """
        if not self.urlCheckInDomain(url):
            print(f"URL {url} is not supported.")
            return None

        if "francetravail.fr" in url:
            scraper = ScrapingFrancetravail()
        else:
            return None

        return scraper.scrape(url)

    def scrapMany(self, keyWord):
        """
        Scrape multiple job listings based on a keyword.
        
        :param keyWord: The keyword to search for.
        :return: A list of scraped job listings.
        """
        if not keyWord:
            print("Keyword is required.")
            return []

        # Use the France Travail scraper for this example
        scraper = ScrapingFrancetravail()
        return scraper.scrapMany(keyWord)



class ScrapingFrancetravail:
    # En-têtes pour imiter un vrai navigateur
    headers = {
        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36"
    }

    def __init__(self):
        self.base_url = "https://www.francetravail.fr"  # Base URL for France Travail

    def scrape(self, url):
        """
        Scrape data from a single France Travail URL.
        
        :param url: The URL to scrape.
        :return: Scraped data as a dictionary or None if an error occurs.
        """
        try:
            # Make a request to the URL
            print(f"Scraping France Travail URL: {url}")
            response = requests.get(url, headers=self.headers)
            response.raise_for_status()  # Raise an error for bad status codes

            # Parse the HTML content
            soup = BeautifulSoup(response.text, "html.parser")

            # Extract relevant data
            job_title = soup.find("h1").text.strip() if soup.find("h1") else "No Title"
            job_description = soup.find("div", class_="description").text.strip() if soup.find("div", class_="description") else "No Description"
            company = soup.find("span", class_="company-name").text.strip() if soup.find("span", class_="company-name") else "No Company"
            location = soup.find("span", class_="job-location").text.strip() if soup.find("span", class_="job-location") else "No Location"

            return {
                "title": job_title,
                "company": company,
                "location": location,
                "description": job_description,
                "url": url
            }
        except requests.RequestException as e:
            print(f"Error scraping {url}: {e}")
            return None

    def scrapMany(self, keyWord, max_results=10):
        """
        Scrape multiple job listings based on a keyword.
        
        :param keyWord: The keyword to search for.
        :param max_results: The maximum number of results to return.
        :return: A list of scraped job listings.
        """
        try:
            # Construct the search URL
            search_url = f"{self.base_url}/recherche?query={keyWord}"
            print(f"Searching France Travail for keyword: {keyWord}")
            response = requests.get(search_url, headers=self.headers)
            response.raise_for_status()

            # Parse the search results
            soup = BeautifulSoup(response.text, "html.parser")
            job_listings = []

            # Find job listing elements
            job_elements = soup.find_all("div", class_="job-listing", limit=max_results)
            for job in job_elements:
                # Extract job link
                job_link = job.find("a")["href"]
                full_job_url = urljoin(self.base_url, job_link)

                # Scrape individual job details
                job_data = self.scrape(full_job_url)
                if job_data:
                    job_listings.append(job_data)

            return job_listings
        except requests.RequestException as e:
            print(f"Error searching for jobs: {e}")
            return []

In [8]:
# Create an instance of ScrapingFactory
factory = ScrapingFactory()

# Test scraping a single URL
single_url = "https://candidat.francetravail.fr/offres/recherche/detail/189KKQR"
single_result = factory.scrapOne(single_url)
print("Single URL Result:", single_result)

# Test scraping multiple job listings based on a keyword
keyword = "developer"
multiple_results = factory.scrapMany(keyword)
print("Multiple URL Results:")
for result in multiple_results:
    print(result)

Scraping France Travail URL: https://candidat.francetravail.fr/offres/recherche/detail/189KKQR
Single URL Result: {'title': 'Offre n° 189KKQRDéveloppeur Full Stack .Net  (H/F)', 'company': 'No Company', 'location': 'No Location', 'description': "Rejoignez une équipe sympathique et dynamique, dans un groupe en forte croissance !\n\nPOSTE et MISSION\n\nAu sein du pôle «Développement et R&D», vous serez intégré-e à l'équipe chargée du développement de nos Suites logicielles de gestion des risques Qualité, Sécurité, Environnement. A ce titre, vous travaillerez sur la conception et le développement de nos logiciels (algorithmes, IHM, API, persistance, Front End, Back End, etc) :\n\n- Intégrez une petite équipe d'ingénieurs, stable, qui travaille en étroite collaboration avec les autres services (notamment métier), selon la méthode Agile (Scrum), sur des projets durables,\n\n- A partir de spécifications fonctionnelles, vous concevez et développez les évolutions et nouvelles fonctionnalités d