In [1]:
import requests
from bs4 import BeautifulSoup
import pandas as pd

In [2]:
url = f"https://wuzzuf.net/search/jobs/?q=&start=0"
request = requests.get(url)
content = request.content
soup = BeautifulSoup(content, "html.parser")
jobs = soup.find_all("div", class_="css-1gatmva e1v1l3u10")

In [3]:
def extract_job_names(jobs):
    jobs_names = []

    for job in jobs:
        elements = job.find_all("h2", class_="css-m604qf")
        if elements:
            job_name = elements[0].text
            jobs_names.append(job_name)

    df = pd.DataFrame({'Job Names': jobs_names})
    return df

In [4]:
def extract_company_name(jobs):
    Company_Name = []

    for job in jobs:
        elements = job.find_all('a', class_='css-17s97q8')
        if elements:
            job_name = elements[0].text.split("-")
            Company_Name.append(job_name[0])

    df = pd.DataFrame({'Company Name': Company_Name})
    return df

In [5]:
def extract_post_time(jobs):
    post_time = []
    for job in jobs:
        elements_class_1 = job.find_all('div', class_="css-4c4ojb")
        elements_class_2 = job.find_all('div', class_="css-do6t5g")

        if elements_class_1:
            job_name = elements_class_1[0].text
            post_time.append(job_name)
        elif elements_class_2:
            job_name = elements_class_2[0].text
            post_time.append(job_name)
        else:
            post_time.append(None)  # Append None if no post time found

    df = pd.DataFrame({'Post Time': post_time})
    return df

In [6]:
def extract_location(jobs):
    location = []

    for job in jobs:
        elements = job.find_all('span', class_="css-5wys0k")
        if elements:
            job_name = elements[0].text
            location.append(job_name)

    df = pd.DataFrame({'Location': location})
    return df

In [7]:
def extract_job_types(jobs):
    jobs_types = []
    i = 0

    while i < len(jobs):
        elements = jobs[i].find_all("div", class_="css-1lh32fc")
        if elements:
            job_type = elements[0].text
            jobs_types.append(job_type)
        i = i + 1
    df = pd.DataFrame({'Job Type': jobs_types})

    return df

In [8]:
def extract_job_info(jobs):
    job_details = []

    i = 0
    while i < len(jobs):
        elements = jobs[i].find_all("div", class_="css-1lh32fc")[0].next_sibling.text.split(" · ")
        if elements:
            exp_level = jobs[i].find_all("div", class_="css-1lh32fc")[0].next_sibling.text.split(" · ")[0]
            years_of_exp = jobs[i].find_all("div", class_="css-1lh32fc")[0].next_sibling.text.split(" · ")[1]
            job_skills = " ".join(jobs[i].find_all("div", class_="css-1lh32fc")[0].next_sibling.text.split(" · ")[2:])

            job_details.append({'Exp Level': exp_level, 'Years Of Exp': years_of_exp, 'Job Skills': job_skills})

        i = i + 1
    df = pd.DataFrame(job_details)
    df
    return df

In [9]:
def concatenate_dataframes(dfs):
    df = pd.concat(dfs, axis=1)
    return df

In [10]:
# Initialize an empty list to store DataFrames
dfs_list = []

for i in range(696):
    request = requests.get(f"https://wuzzuf.net/search/jobs/?q=&start={i}")
    content = request.content
    soup = BeautifulSoup(content, "html.parser")
    jobs = soup.find_all("div", class_="css-1gatmva e1v1l3u10")

    df0 = extract_job_names(jobs)
    df1 = extract_company_name(jobs)
    df2 = extract_post_time(jobs)
    df3 = extract_location(jobs)
    df4 = extract_job_types(jobs)
    df5 = extract_job_info(jobs)

    # Concatenate all DataFrames from this iteration
    df = pd.concat([df0, df1, df2, df3, df4, df5], axis=1)

    # Append the concatenated DataFrame to the list
    dfs_list.append(df)

    if i % 50 == 0:  # Print a message every 50 iterations
        print(f"Iteration {i} done.")

# Concatenate all DataFrames after the loop
final_df = pd.concat(dfs_list, axis=0)

# Reset index
final_df = final_df.reset_index(drop=True)

# Make sure the final DataFrame has 750 rows and 8 columns
final_df = final_df.iloc[:10440, :]

Iteration 0 done.
Iteration 50 done.
Iteration 100 done.
Iteration 150 done.
Iteration 200 done.
Iteration 250 done.
Iteration 300 done.
Iteration 350 done.
Iteration 400 done.
Iteration 450 done.
Iteration 500 done.
Iteration 550 done.
Iteration 600 done.
Iteration 650 done.


In [11]:
final_df

Unnamed: 0,Job Names,Company Name,Post Time,Location,Job Type,Exp Level,Years Of Exp,Job Skills
0,High-School English Teacher,Delta American School,1 hour ago,"Mansoura, Dakahlia, Egypt",Full Time,Experienced,2+ Yrs of Exp,Education/Teaching Teacher Teaching Education ...
1,Java Instructor,MICA,41 minutes ago,"Haram, Giza, Egypt",Full Time,Entry Level,IT/Software Development,Engineering - Telecom/Technology Training/Inst...
2,Senior Estimator façade Engineer,AluNile,2 hours ago,"Mohandessin, Giza, Egypt",Full Time,Experienced,3 - 7 Yrs of Exp,Engineering - Construction/Civil/Architecture ...
3,Sales Lead,Hands of Hope Physical Therapy & Wellness,2 hours ago,"Cairo, Egypt",Full Time,Manager,6 - 8 Yrs of Exp,Customer Service/Support Operations/Management...
4,Payroll Supervisor - October City,GEMS Education,2 hours ago,"6th of October, Giza, Egypt",Full Time,Manager,4 - 6 Yrs of Exp,Human Resources Operations/Management Compensa...
...,...,...,...,...,...,...,...,...
10320,Flutter Developer,Techno Management,2 months ago,"Cairo, Egypt",Part TimeFreelance / ProjectWork From Home,Experienced,3 - 7 Yrs of Exp,IT/Software Development Mobile flutter Develop...
10321,Digital Marketing Executive,euro trust capital,2 months ago,"Maadi, Cairo, Egypt",Full Time,Experienced,3 - 9 Yrs of Exp,IT/Software Development Operations/Management ...
10322,UI/UX Developer,Techno Management,2 months ago,"Cairo, Egypt",Full TimeFreelance / ProjectWork From Home,Experienced,3 - 10 Yrs of Exp,IT/Software Development UI/UX Developer Bootst...
10323,Maintenance Manager,Confidential,2 months ago,"Badr City, Cairo, Egypt",Full Time,Manager,10 - 15 Yrs of Exp,Installation/Maintenance/Repair Operations/Man...


In [12]:
final_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 10325 entries, 0 to 10324
Data columns (total 8 columns):
 #   Column        Non-Null Count  Dtype 
---  ------        --------------  ----- 
 0   Job Names     10325 non-null  object
 1   Company Name  10325 non-null  object
 2   Post Time     10325 non-null  object
 3   Location      10325 non-null  object
 4   Job Type      10325 non-null  object
 5   Exp Level     10325 non-null  object
 6   Years Of Exp  10325 non-null  object
 7   Job Skills    10325 non-null  object
dtypes: object(8)
memory usage: 645.4+ KB


In [14]:
final_df.to_csv("Wuzzuf_jobs_data.csv")