In [1]:
import requests
from bs4 import BeautifulSoup
import csv

In [2]:
url = 'https://www.careerjet.com.om/jobs?l=Oman&lid=119810&radius=15'

In [3]:
def get_jobs_links(url):
    response = requests.get(url)
    
    if response.status_code != 200:
        print(f"Erreur HTTP {response.status_code}")
        return []

    soup = BeautifulSoup(response.text, "html.parser")
    job_articles = soup.find_all("article", class_="job clicky")
    links = []

    for job in job_articles:
        job_url = job.get("data-url")
        if job_url:
            full_url = f"https://www.careerjet.com.om{job_url}"
            links.append(full_url)

    return links


In [4]:
job_links = get_jobs_links(url)

# Affichage des liens extraits
for link in job_links:
    print(link)

https://www.careerjet.com.om/jobad/omfae28e8b9d995ca987118d01f5bdf894
https://www.careerjet.com.om/jobad/om31d63e5432320ef7d2e269b677181135
https://www.careerjet.com.om/jobad/omb61ba6eefc0d4db107ce778aaa3d3081
https://www.careerjet.com.om/jobad/om53267c74cd1379aeb78cab497d7083ae
https://www.careerjet.com.om/jobad/omdba1863cf1ca658b0465db6c24519c16
https://www.careerjet.com.om/jobad/om97051432c1c4c88dab3049b5bce2421a
https://www.careerjet.com.om/jobad/om996733a691638606792b532f37e3dfe7
https://www.careerjet.com.om/jobad/omf39d44cc042f8d3c5bd4794129cd31da
https://www.careerjet.com.om/jobad/om4af55f97c01d9ac149ae9c41fe93e28e
https://www.careerjet.com.om/jobad/om9b00c58f73e40416cddbf11089b41ded
https://www.careerjet.com.om/jobad/om8f5879dcf94bbb930f5261bbec7e6a2c
https://www.careerjet.com.om/jobad/omfe66e16d428e30a40e6731a018fbd62b
https://www.careerjet.com.om/jobad/omd8c0378f6f023d5f6c58e82fee7b7364
https://www.careerjet.com.om/jobad/om54580e76b02b84f6f309dfc1fd6a93f9
https://www.careerje

In [5]:
def get_job_details(link):
    response = requests.get(link)
    response.raise_for_status()
    soup = BeautifulSoup(response.text, "html.parser")
    
    job_details = {}
    
    job_details["Title"] = soup.find("h1").text.strip() if soup.find("h1") else "N/A"
    
    job_details["Company"] = soup.find("p", class_="company").text.strip() if soup.find("p", class_="company") else "N/A"
    
    location_li = None
    details_ul = soup.find("ul", class_="details")
    if details_ul:
        for li in details_ul.find_all("li"):
            icon = li.find("use")
            if icon and "#icon-location" in icon.get("xlink:href", ""):
                location_li = li
                break
    job_details["Location"] = location_li.text.strip() if location_li else "N/A"
    
    # Description
    job_details["Description"] = soup.find("section", class_="content").text.strip() if soup.find("section", class_="content") else "N/A"
    
    # Date de publication
    date_badge = soup.find("span", class_="badge badge-r badge-s")
    job_details["Date Posted"] = date_badge.text.strip() if date_badge else "N/A"    
    
    # Lien source
    job_details["Link"] = link
    
    return job_details


In [6]:
def scrape_optioncarriere(base_url):
    all_jobs = []
    current_page = 1
    
    while current_page <= 2:
        job_links = get_jobs_links(base_url)
        
        if not job_links:
            print("No more job offers found. Scraping complete.")
            break
        
        for link in job_links:
            try:
                job_details = get_job_details(link)
                all_jobs.append(job_details)
            except Exception as e:
                print(f"Failed to scrape job details from {link}: {e}")
        
        soup = BeautifulSoup(requests.get(base_url).text, "html.parser")
        next_button = soup.find("button", {"class": "ves-control ves-add btn btn-r btn-primary-inverted next"})
        
        if next_button and "data-value" in next_button.attrs:
            next_page_value = next_button["data-value"]
            base_url = f"https://www.careerjet.com.om/jobs?s=&l=Oman&radius=15&p={next_page_value}"
            current_page += 1
        else:
            print("No more pages found. Ending scraping.")
            break
    
    return all_jobs


In [7]:
jobs = scrape_optioncarriere(url)

for job in jobs[:1]: 
    formatted_job = {
        "Title": job["Title"],
        "Company": job["Company"],
        "Location": job["Location"],
        "Date Posted": job["Date Posted"],
        "Description": job["Description"],
        "Link": job["Link"],
    }
    print(formatted_job)


{'Title': 'Digital Sales Executive', 'Company': 'Top Notch Consultancy', 'Location': 'Muscat', 'Date Posted': '5 days ago', 'Description': "Job Overview:We are seeking a highly motivated Digital Sales Executive to join our client's team in Muscat, Oman. In this role, you will be responsible for driving digital advertising sales, building strong client relationships, and delivering innovative marketing solutions. You will work with brands, agencies, and partners to promote our digital advertising products and achieve revenue targets.Key Responsibilities:  Identify and develop new business opportunities in the digital advertising space. Manage and grow a portfolio of clients, including brands, agencies, and advertisers. Promote and sell digital programmatic advertising and performance marketing. Understand client needs and propose tailored advertising strategies to achieve their marketing goals. Negotiate pricing, and campaign terms to close deals. Collaborate with internal teams (ad ope

In [8]:
csv_file = "jobs_careerjet.csv"
with open(csv_file, mode="w", newline="", encoding="utf-8") as file:
    writer = csv.DictWriter(file, fieldnames=["Title", "Company", "Location", "Date Posted", "Description", "Link"])
    writer.writeheader()
    for job in jobs:
        writer.writerow(job)