In [1]:
import requests
import json
import time
import pandas as pd
from bs4 import BeautifulSoup as bs
import re

In [2]:
df = pd.read_csv("wuzzuf job listings output 12-oct-2025.csv")

In [3]:
df.head() 

Unnamed: 0,search_category,search_location,id
0,Accounting-Finance,Cairo,3831f42a-fe69-40b9-a9f7-a2d725a18ae4
1,Accounting-Finance,Cairo,df8a8659-85d6-4e30-9666-d5f27156e911
2,Accounting-Finance,Cairo,2550c42d-cec8-4beb-93d8-79513c56baa0
3,Accounting-Finance,Cairo,0e06538b-941b-40a2-b6c4-a473939da9a6
4,Accounting-Finance,Cairo,b7d17a61-61e3-4253-9694-5b2096ce71e8


In [4]:
# Helper function for safe nested key access
def deep_get(d, *keys):
    """Safely get a nested value from a dict using keys."""
    for k in keys:
        if isinstance(d, dict):
            d = d.get(k)
        else:
            return None
    return d

In [5]:
# Extract company name from Wuzzuf job slug
def company_name(slug,title,country,city):
    """Extract company name from Wuzzuf job slug"""
    title = title.lower().replace(' - ','-').replace(' ','-').replace('/','')
    location = city + ' ' + country
    
    loc = location.lower().replace(' - ','-').replace(' ','-')
    slug2 = re.split(f'-{loc}',slug)[0]

    try:
        parts = re.split(f'-{title}', slug2)
        if len(parts) > 1 and parts[1]:
            company = parts[1].replace('-',' ').lstrip(' ').title()
        else:
            company = "Confidential Company"
    except IndexError:
        company = "Confidential Company"

    return company

In [6]:
# Keywords & Roles Functions
def work_roles(attr, name_or_id):
    """ Getting Work Roles """
    return [x.get(name_or_id) for x in attr.get('workRoles', []) if isinstance(x, dict)]

def work_types(attr, name_or_id):
    """ Getting Work Types """
    return [x.get(name_or_id) for x in attr.get('workTypes', []) if isinstance(x, dict)]

def keywords(attr, name_or_id):
    """ Getting KeyWords """
    return [x.get(name_or_id) for x in attr.get('keywords', []) if isinstance(x, dict)]

In [38]:
def all_jobs_details(job_ids, delay=0.5):
    """
    Fetch detailed job data from Wuzzuf API for given job IDs.
    Returns a pandas DataFrame with all job details.
    """
    all_jobs = []
    errors_list = []

    for i, job_id in enumerate(job_ids, start=1):
        if i % 1000 == 0:
            print(f"Processed {i} jobs")
            print(f"{len(errors_list)} Failed")

        try:
            url = f"https://wuzzuf.net/api/job/{job_id}"
            response = requests.get(url)
            response.raise_for_status()
            df = response.json().get('data', {})

            attr = df.get('attributes', {})
            comp = deep_get(df, 'relationships', 'company', 'data')

            # Dependencies for Company and Requiremets
            requirements_html = deep_get(attr, 'requirements') or ""
            requiremets_soup = bs(requirements_html, 'html.parser')
            requirements_list = [li.get_text(strip=True) for li in requiremets_soup.find_all('li')]

            slug = attr.get("slug")
            title = attr.get("title")
            country = deep_get(attr, 'location', 'country', 'name')
            city = deep_get(attr, 'location', 'city', 'name')

            # Creating dict 
            job = {
                # Basic Info
                "job_id": df.get("id"),
                "title": title,
                "jobType": attr.get("jobType"),
                "applicationType": attr.get("applicationType"),
                "status": attr.get("status"),
                "postedAt": attr.get("postedAt"),
                "expireAt": attr.get("expireAt"),
                "views": attr.get("views"),
                "hotScore": attr.get("hotScore"),
                "link": deep_get(df, 'links', 'self') or attr.get("canonicalLink"),
                "uri": attr.get("uri"),

                # Company
                "company_id": comp.get("id") if comp else None,
                "company_name": company_name(slug, title, country, city) if slug else "Confidential Company",

                # Requirements
                "requirements": requirements_list,

                # Salary
                "salary_min": deep_get(attr, 'salary', 'min'),
                "salary_max": deep_get(attr, 'salary', 'max'),
                "salary_currency": deep_get(attr, 'salary', 'currency'),
                "salary_period": deep_get(attr, 'salary', 'period'),

                # Experience
                "exp_min": deep_get(attr, 'workExperienceYears', 'min'),
                "exp_max": deep_get(attr, 'workExperienceYears', 'max'),

                # Location
                "country": country,
                "city": city,
                "country_id": deep_get(attr, 'location', 'country', 'id'),
                "city_id": deep_get(attr, 'location', 'city', 'id'),
                "area": deep_get(attr, 'location', 'area', 'name'),

                # Keywords & Roles
                "keywords": keywords(attr, 'name') or [],
                "work_types": work_types(attr, 'name') or [],
                "work_types_id": work_types(attr, 'id') or [],
                "work_roles": work_roles(attr, 'name') or [],
                "work_roles_id": work_roles(attr, 'id') or [],
            }

            all_jobs.append(job)
            time.sleep(delay)

        except Exception as e:
            errors_list.append(job_id)
            continue

    return pd.DataFrame(all_jobs), errors_list

In [None]:
result = all_jobs_details(df['id'])

In [None]:
jobs,errors_list = result

In [26]:
jobs

Unnamed: 0,job_id,title,jobType,applicationType,status,postedAt,expireAt,views,hotScore,link,...,country,city,country_id,city_id,area,keywords,work_types,work_types_id,work_roles,work_roles_id
0,3831f42a-fe69-40b9-a9f7-a2d725a18ae4,Accounting Operations - Senior Manager - New C...,job,internal,active,09/25/2025 17:02:47,11/24/2025 17:02:47,779,4,https://wuzzuf.net/api/job/3831f42a-fe69-40b9-...,...,Egypt,Cairo,56,1545,New Cairo,"[Finance, Accounts Payable, Accounting, Invoic...",[full_time],[1],"[Accounting/Finance, Operations/Management]","[2, 19]"
1,df8a8659-85d6-4e30-9666-d5f27156e911,Accounting Manager,job,internal,active,10/06/2025 12:40:13,12/05/2025 12:40:13,376,7,https://wuzzuf.net/api/job/df8a8659-85d6-4e30-...,...,Egypt,Cairo,56,1545,New Cairo,"[Finance, Accounting, Odoo, Reporting, Pivot T...",[full_time],[1],[Accounting/Finance],[2]
2,2550c42d-cec8-4beb-93d8-79513c56baa0,Accounting Manager – Real Estate Development S...,job,internal,active,10/04/2025 13:26:48,12/03/2025 13:26:48,459,5,https://wuzzuf.net/api/job/2550c42d-cec8-4beb-...,...,Egypt,Cairo,56,1545,New Cairo,"[Real Estate, Accounting, Operations Managemen...",[full_time],[1],"[Accounting/Finance, Sales/Retail]","[2, 27]"
3,0e06538b-941b-40a2-b6c4-a473939da9a6,Head OF Accounting,job,internal,active,10/07/2025 14:39:24,12/06/2025 14:39:24,431,8,https://wuzzuf.net/api/job/0e06538b-941b-40a2-...,...,Egypt,Cairo,56,1545,Maadi,"[Finance, Accounting]",[full_time],[1],[Accounting/Finance],[2]
4,b7d17a61-61e3-4253-9694-5b2096ce71e8,Head OF Accounting,job,internal,active,10/12/2025 11:51:52,12/11/2025 11:51:52,358,30,https://wuzzuf.net/api/job/b7d17a61-61e3-4253-...,...,Egypt,Cairo,56,1545,Downtown,"[Accounting, ERP, Finance, Financial Analysis,...",[full_time],[1],[Accounting/Finance],[2]
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
12725,d01a8fbf-7cd1-4a8e-98b0-638809a6cbaf,Business Development and Admin/Diary management,job,internal,active,09/19/2025 18:33:32,11/18/2025 18:33:32,357,1,https://wuzzuf.net/api/job/d01a8fbf-7cd1-4a8e-...,...,United Kingdom,London,197,5512,,"[Administration, Business Development, secreta...",[full_time],[1],"[Administration, Business Development, Tourism...","[3, 7, 32]"
12726,b50ec3c0-8003-427e-a5d9-c85cf8db2e23,Airline Ticket Reservation Staff,job,internal,active,09/14/2025 13:36:01,11/13/2025 13:36:01,95,0,https://wuzzuf.net/api/job/b50ec3c0-8003-427e-...,...,Libya,Tripoli,104,3345,,"[Aviation ticketing, GDS (Amadeus, Galileo, Sa...",[full_time],[1],[Tourism/Travel],[32]
12727,58a909d1-73f2-4032-8f30-c4cfc654451f,Arabic Proofreader,job,internal,active,09/16/2025 16:33:36,11/15/2025 16:33:36,844,1,https://wuzzuf.net/api/job/58a909d1-73f2-4032-...,...,Egypt,Cairo,56,1545,Downtown,"[Arabic, Arabic proofreading, Education, Editi...",[full_time],[1],"[Writing/Editorial, Other]","[10, 23]"
12728,69df21aa-78c4-406b-81b7-fb520ade8fe5,كاتب/ـة محتوى ثنائي اللغة (عربي – إنجليزي),job,internal,active,09/29/2025 21:58:40,11/28/2025 21:58:40,361,1,https://wuzzuf.net/api/job/69df21aa-78c4-406b-...,...,United Arab Emirates,Dubai,196,5433,,"[Content Creation, Content Writing, Content Ma...",[full_time],[1],[Writing/Editorial],[10]


In [27]:
errors_list

['3b232bd9-14a7-4c8f-b011-5a5fc4b8482f',
 '705f216c-b898-4d15-ac6f-940fa5ca54a8',
 'd6e2a4d1-e98f-4dff-b162-e264c90aee93',
 '0349f61e-b1fc-4494-903e-b8d7da56d623',
 '754b9867-534c-46bd-b8c7-71dafe9a4ff7',
 '10ccf857-60f8-44e2-8c16-49186718d648',
 '12624f63-1002-4b23-a4b8-b3a879cb814f',
 'ca543e19-ed85-451c-9b6e-3a60b4a93237',
 'a8b82996-2f82-49cb-b019-c4c3180417e3',
 '8ceebd1b-2b46-40b2-8fe6-96385b4212bd',
 'ef666fb6-56be-474e-bac7-219475c5779a',
 '5123a5d5-c484-4908-84db-ad645ded4312',
 '8f74ba8b-0512-48f7-bddc-7b2c21294c2f']

In [28]:
jobs.to_csv('Wuzzuf Job Listings and Details October 2025.csv', index=False)