In [3]:
import requests
from bs4 import BeautifulSoup
from time import sleep
import csv

In [4]:
url = "https://www.gulftalent.com/oman/jobs"

In [5]:
HEADERS = {
    "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/114.0 Safari/537.36"
}

In [6]:
def get_job_description(link):
    try:
        response = requests.get(link, headers=HEADERS)
        response.raise_for_status()
        soup = BeautifulSoup(response.text, 'html.parser')

        desc_container = soup.find("div", class_="panel-body content-visibility-auto job-description")
        if desc_container:
            paragraphs = desc_container.find_all("p")
            full_description = "\n".join(
                [p.get_text(strip=True) for p in paragraphs if p.get_text(strip=True)]
            )
            return full_description
        else:
            return "N/A"
    except Exception as e:
        print(f"Erreur lors de l'extraction de la description sur {link} : {e}")
        return "N/A"


In [7]:
def get_jobs_links(soup):
    job_links = []
    for row in soup.find_all("tr", class_="content-visibility-auto"):
        link_tag = row.find("a", class_="ga-job-impression")
        if link_tag and link_tag.has_attr('href'):
            job_links.append("https://www.gulftalent.com" + link_tag['href'])
    return job_links


In [8]:
def extract_job_data(row):
    title_tag = row.find("a", class_="ga-job-impression")
    company_tag = row.find("a", class_="text-base text-muted text-secondary-hover")
    location_tag = row.find("a", class_="text-base text-regular text-secondary-hover")
    date_tag = row.find("td", class_="col-sm-4")

    return {
        "Title": title_tag.get_text(strip=True) if title_tag else "N/A",
        "Company": company_tag.get_text(strip=True) if company_tag else "N/A",
        "Location": location_tag.get_text(strip=True) if location_tag else "N/A",
        "Date Posted": date_tag.get_text(strip=True) if date_tag else "N/A"
    }



In [9]:
def scrape_all_pages(base_url):
    all_jobs = []
    current_page = 1
    visited_urls = set()

    while True:
        print(f"Scraping Page {current_page}: {base_url}")
        response = requests.get(base_url, headers=HEADERS)
        response.raise_for_status()
        soup = BeautifulSoup(response.text, "html.parser")

        job_rows = soup.find_all("tr", class_="content-visibility-auto")
        if not job_rows:
            print("Aucune offre trouvée. Fin du scraping.")
            break

        for row in job_rows:
            job_data = extract_job_data(row)
            job_link_tag = row.find("a", class_="ga-job-impression")
            job_data["Link"] = (
                "https://www.gulftalent.com" + job_link_tag['href']
                if job_link_tag and job_link_tag.has_attr('href') else "N/A"
            )
            job_data["Description"] = (
                get_job_description(job_data["Link"]) if job_data["Link"] != "N/A" else "N/A"
            )
            all_jobs.append(job_data)
            sleep(1)

        pagination = soup.find("ul", class_="pagination")
        next_link = pagination.find("li", class_="jumper") and pagination.find("a", string="Next")
        if next_link and next_link.has_attr('href'):
            next_page_url = "https://www.gulftalent.com" + next_link['href']
            if next_page_url in visited_urls:
                print("Page déjà visitée. Arrêt du scraping.")
                break
            visited_urls.add(next_page_url)
            base_url = next_page_url
            current_page += 1
        else:
            print("Fin de la pagination.")
            break

    return all_jobs


In [10]:
url = "https://www.gulftalent.com/oman/jobs"
jobs = scrape_all_pages(url)


for job in jobs[:1]:
    formatted_job = {
        "Title": job["Title"],
        "Company": job["Company"],
        "Location": job["Location"],
        "Date Posted": job["Date Posted"],
        "Description": job["Description"],
        "Link": job["Link"],
    }
    print(formatted_job)


Scraping Page 1: https://www.gulftalent.com/oman/jobs
Scraping Page 2: https://www.gulftalent.com/oman/jobs/2
Scraping Page 3: https://www.gulftalent.com/oman/jobs/3
Fin de la pagination.
{'Title': 'Digital Product - UI/UX Specialist', 'Company': 'Renna Mobile', 'Location': 'Muscat', 'Date Posted': '4 Jun', 'Description': 'We are seeking a talented Digital Product - UI/UX Specialist to join our dynamic telecom team in Muscat, Oman. In this role, you will craft intuitive and engaging user interfaces, ensuring an exceptional digital experience for our customers. You will collaborate closely with product managers, developers, and stakeholders to design innovative solutions that drive customer satisfaction and business growth.Responsibilities:- Design and develop user interface mockups and prototypes that clearly illustrate how sites function and look.- Conduct user research and evaluate user feedback to improve product usability.- Collaborate with cross-functional teams including product 

In [13]:
csv_file = "jobs_expat.csv"
keys = ["Title", "Company", "Location", "Date Posted", "Description", "Link"]

with open(csv_file, mode='w', encoding='utf-8', newline='') as f:
    writer = csv.DictWriter(f, fieldnames=keys)
    writer.writeheader()
    for job in jobs:
        filtered_job = {key: job.get(key, "N/A") for key in keys}
        writer.writerow(filtered_job)

