In [65]:
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from bs4 import BeautifulSoup
from time import sleep
import csv

In [66]:
options = Options()
options.add_experimental_option("excludeSwitches", ["enable-automation"])
options.add_experimental_option("useAutomationExtension", False)
driver = webdriver.Chrome(options=options)
driver.execute_cdp_cmd("Page.addScriptToEvaluateOnNewDocument", {
    "source": "Object.defineProperty(navigator, 'webdriver', { get: () => undefined })"
})

{'identifier': '2'}

In [67]:
def get_job_description(link):
    try:
        driver.get(link)
        WebDriverWait(driver, 15).until(
            EC.presence_of_element_located((By.CSS_SELECTOR, "article.job-description"))
        )
        sleep(2)

        soup = BeautifulSoup(driver.page_source, 'html.parser')
        article = soup.find("article", class_="job-description")
        if not article:
            return "N/A"

        paragraphs = [p.get_text(strip=True) for p in article.find_all("p") if p.get_text(strip=True)]
        lists = [
            "- " + li.get_text(strip=True)
            for ul in article.find_all("ul")
            for li in ul.find_all("li") if li.get_text(strip=True)
        ]

        full_description = "\n".join(paragraphs + lists)
        return full_description.strip() or "N/A"

    except Exception as e:
        print(f"Erreur lors de l'extraction de la description sur {link} : {e}")
        return "N/A"


In [68]:
def extract_job_data(card):
    date_tag = card.find("span", class_="time")
    date_posted = date_tag.get_text(strip=True) if date_tag else "N/A"
    if "30+ days ago" in date_posted:
        return None

    title = card.find("p", class_="designation-title")
    company = card.find("a", class_="info-org")
    location_tag = card.select_one("li.info-loc")
    experience_tag = card.find("li", class_="info-exp")
    description_tag = card.find("p", class_="description")

    return {
        "Title": title.get_text(strip=True) if title else "N/A",
        "Company": company.get_text(strip=True) if company else "N/A",
        "Location": location_tag.find_all("span")[-1].get_text(strip=True) if location_tag else "N/A",
        "Experience": experience_tag.find("span").get_text(strip=True) if experience_tag else "N/A",
        "Short Description": description_tag.get_text(strip=True) if description_tag else "N/A",
        "Date Posted": date_posted
    }


In [69]:
def scrape_all_pages(base_url):
    all_jobs = []
    current_page = 1
    base_pagination_url = "https://www.naukrigulf.com/jobs-in-oman"
    query_params = "?industryType=25&xz=1_2_5"
    visited_urls = set()

    while True:
        current_url = f"{base_pagination_url}{query_params}" if current_page == 1 \
            else f"{base_pagination_url}-{current_page}{query_params}"

        if current_url in visited_urls:
            print("URL déjà visitée - éviter la boucle.")
            break
        visited_urls.add(current_url)

        print(f"Scraping page {current_page}: {current_url}")
        driver.get(current_url)

        try:
            WebDriverWait(driver, 10).until(
                EC.presence_of_all_elements_located((By.CSS_SELECTOR, "div.ng-box.srp-tuple"))
            )
        except:
            print("Timeout ou fin des offres.")
            break

        soup = BeautifulSoup(driver.page_source, "html.parser")
        job_cards = soup.find_all("div", class_="ng-box srp-tuple")
        if not job_cards:
            print("Aucune offre trouvée sur cette page. Fin du scraping.")
            break

        for card in job_cards:
            job_data = extract_job_data(card)
            if not job_data:
                continue

            link_tag = card.find("a", class_="info-position")
            job_link = link_tag["href"] if link_tag and link_tag.has_attr("href") else "N/A"
            if job_link != "N/A" and not job_link.startswith("http"):
                job_link = "https://www.naukrigulf.com" + job_link

            job_data["Link"] = job_link
            job_data["Description"] = get_job_description(job_link) if job_link != "N/A" else "N/A"
            all_jobs.append(job_data)
            sleep(1)

        current_page += 1
        sleep(2)

    return all_jobs


In [70]:
url = "https://www.naukrigulf.com/jobs-in-oman?industryType=25&locale=en&xz=1_2_5"
jobs = scrape_all_pages(url)

for job in jobs[:3]:
    print({
        "Title": job["Title"],
        "Company": job["Company"],
        "Location": job["Location"],
        "Date Posted": job["Date Posted"],
        "Description": job["Description"][:150] + "...",
        "Link": job["Link"],
    })


Scraping page 1: https://www.naukrigulf.com/jobs-in-oman?industryType=25&xz=1_2_5
Scraping page 2: https://www.naukrigulf.com/jobs-in-oman-2?industryType=25&xz=1_2_5
Scraping page 3: https://www.naukrigulf.com/jobs-in-oman-3?industryType=25&xz=1_2_5
Scraping page 4: https://www.naukrigulf.com/jobs-in-oman-4?industryType=25&xz=1_2_5
Scraping page 5: https://www.naukrigulf.com/jobs-in-oman-5?industryType=25&xz=1_2_5
Scraping page 6: https://www.naukrigulf.com/jobs-in-oman-6?industryType=25&xz=1_2_5
Scraping page 7: https://www.naukrigulf.com/jobs-in-oman-7?industryType=25&xz=1_2_5
Scraping page 8: https://www.naukrigulf.com/jobs-in-oman-8?industryType=25&xz=1_2_5
Timeout ou fin des offres.
{'Title': 'PRO Admin OR HR Admin', 'Company': 'wipro', 'Location': 'Muscat - Oman', 'Date Posted': '2 Jun', 'Description': 'Job Description\nRoles & Responsibilities\nKey Responsibilities and Skills:\nExample Job Responsibilities:\n- Administrative Support:Managing      schedul...', 'Link': 'https://ww

In [None]:
csv_file = "jobs_naukrigulf.csv"
keys = ["Title", "Company", "Location", "Date Posted", "Description", "Link"]

with open(csv_file, mode='w', encoding='utf-8', newline='') as f:
    writer = csv.DictWriter(f, fieldnames=keys)
    writer.writeheader()

    for job in jobs:
        filtered_job = {key: job.get(key, "N/A") for key in keys}
        writer.writerow(filtered_job)



Les données ont été exportées vers jobs_naukrigulf.csv


In [72]:
driver.quit()