In [1]:
pip install selenium


Note: you may need to restart the kernel to use updated packages.



[notice] A new release of pip is available: 25.0 -> 25.0.1
[notice] To update, run: python.exe -m pip install --upgrade pip


In [None]:
import time
import pandas as pd
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.common.action_chains import ActionChains
from selenium.common.exceptions import NoSuchElementException

In [None]:

driver = webdriver.Chrome()
driver.get("https://www.rozee.pk/")

In [4]:
search_box = driver.find_element(By.XPATH, '//*[@id="search"]')
search_box.send_keys("Software Engineer")
search_box.send_keys(Keys.RETURN)
time.sleep(5)

In [None]:
csv_filename = "rozee_jobs.csv"
unique_jobs = set()
max_pages = 5
for page in range(max_pages):
    print(f"Scraping Page {page + 1}...")
    job_listings = driver.find_element(By.ID, "jobs")
    job_divs = job_listings.find_elements(By.CLASS_NAME, "job")
    for job in job_divs:
        try:
            job_anchor = job.find_element(By.TAG_NAME, "a")
            job_url = job_anchor.get_attribute("href")
            job_title = job_anchor.find_element(By.TAG_NAME, "bdi").text.strip()
            company_elements = job.find_elements(By.CSS_SELECTOR, ".cname a")
            company_name = company_elements[0].text.strip() if company_elements else "N/A"
            location = company_elements[1].text.strip() if len(company_elements) > 1 else "N/A"
            job_entry = (job_title, job_url, company_name, location)
            if job_entry not in unique_jobs:
                unique_jobs.add(job_entry)
        except Exception as e:
            print(f"Skipping job due to error: {e}")
    driver.find_element(By.TAG_NAME, "body").send_keys(Keys.END)
    time.sleep(5)
    df = pd.DataFrame(list(unique_jobs), columns=["Job Title", "Job URL", "Company Name", "Location"])
    df.to_csv(csv_filename, index=False)
    print(f"Saved {len(unique_jobs)} jobs.")
    try:
        next_button = driver.find_element(By.XPATH, '//*[@id="jobs"]/div[28]/div/ul/li[3]/a')
        next_button.click()
        time.sleep(5)
    except NoSuchElementException:
        print("Next page button not found. Stopping.")
        break

print(f"Final CSV saved: {csv_filename} with {len(unique_jobs)} unique jobs.")

Scraping Page 1...
Saved 20 jobs.
Scraping Page 2...
Saved 35 jobs.
Next page button not found. Stopping.
Final CSV saved: rozee_jobs.csv with 35 unique jobs.


In [19]:
data=pd.read_csv("rozee_jobs.csv")
data

Unnamed: 0,Job Title,Job URL,Company Name,Location
0,Software Engineer - SQL Database,https://www.rozee.pk/the-shams-group-software-...,"The Shams Group,",", Pakistan"
1,Software Engineer,https://www.rozee.pk/digi-soft-software-engine...,"Digi Soft,",Karachi
2,Software Quality Assurance Engineer,https://www.rozee.pk/e2e-worx-software-quality...,"E2E Worx,",Lahore
3,Software Quality Assurance Engineer,https://www.rozee.pk/broadstone-technologies-s...,"Broadstone Technologies,",Lahore
4,Software Quality Assurance Engineer,https://www.rozee.pk/musketeers-tech-software-...,"Musketeers Tech,",Lahore
5,Software Quality Assurance Engineer,https://www.rozee.pk/modisoft-software-quality...,"Modisoft,",Karachi
6,Software Engineer,https://www.rozee.pk/st-michael-llc-software-e...,"St. Michael Llc,",Karachi
7,IT Support Engineer,https://www.rozee.pk/mmba-chartered-certified-...,"MMBA Chartered Certified Accountants,",Islamabad
8,Mechanical Engineer,https://www.rozee.pk/lockersmiths-pvt-ltd-mech...,"Lockersmiths (Pvt) Ltd,",Gujranwala
9,Software Engineer,https://www.rozee.pk/croem-inc-software-engine...,"Croem Inc,",Islamabad


In [None]:
input_csv = "rozee_jobs.csv"
output_csv = "rozee_jobs_details.csv"
df = pd.read_csv(input_csv)
driver = webdriver.Chrome()
job_details = []

for index, row in df.iterrows():
    job_url = row["Job URL"]
    company_name = row["Company Name"]
    job_title = row["Job Title"]
    print(f"Scraping details for: {job_title} ({company_name})")
    driver.get(job_url)
    time.sleep(5) 
    try:
        skills_elements = driver.find_elements(By.XPATH, '//*[@id="jbDetail"]/div[2]/div//a')
        skills = [skill.text.strip() for skill in skills_elements if skill.text.strip()]
        requirements_elements = driver.find_elements(By.XPATH, '//*[@id="jbDetail"]/div[1]/div/ul[2]/li')
        requirements = [req.text.strip() for req in requirements_elements if req.text.strip()]
        try:
            job_type = driver.find_element(By.XPATH, '//*[@id="jbDetail"]/div[3]/div/div[5]/div[2]').text.strip()
        except NoSuchElementException:
            job_type = "N/A"
        job_details.append({
            "Job Title": job_title,
            "Company Name": company_name,
            "Job URL": job_url,
            "Skills": ", ".join(skills),
            "Requirements": "; ".join(requirements),
            "Job Type": job_type
        })

    except Exception as e:
        print(f"Skipping due to error: {e}")
df_details = pd.DataFrame(job_details)
df_details.to_csv(output_csv, index=False)

print(f"Job details saved in {output_csv}")

Scraping details for: Software Engineer - SQL Database (The Shams Group,)
Scraping details for: Software Engineer (Digi Soft,)
Scraping details for: Software Quality Assurance Engineer (E2E Worx,)
Scraping details for: Software Quality Assurance Engineer (Broadstone Technologies,)
Scraping details for: Software Quality Assurance Engineer (Musketeers Tech,)
Scraping details for: Software Quality Assurance Engineer (Modisoft,)
Scraping details for: Software Engineer (St. Michael Llc,)
Scraping details for: IT Support Engineer (MMBA Chartered Certified Accountants,)
Scraping details for: Mechanical Engineer (Lockersmiths (Pvt) Ltd,)
Scraping details for: Software Engineer (Croem Inc,)
Scraping details for: Civil Engineer (Manpoy UK,)
Scraping details for: Software Engineer (MATZ Solutions Pvt Ltd,)
Scraping details for: Software Support Engineer (The Shams Group,)
Scraping details for: Mechanical Engineer (Nateeq Enterprise,)
Scraping details for: Software Engineer (Softo Tech,)
Scraping 

In [21]:
data=pd.read_csv("rozee_jobs_details.csv")
data

Unnamed: 0,Job Title,Company Name,Job URL,Skills,Requirements,Job Type
0,Software Engineer - SQL Database,"The Shams Group,",https://www.rozee.pk/the-shams-group-software-...,"End to End Sales, Microsoft SQL Database, Soft...",Bachelors degree in Computer Science or releva...,Full Time/Permanent
1,Software Engineer,"Digi Soft,",https://www.rozee.pk/digi-soft-software-engine...,"AngularJS, MVC, RESTful APIs, ASP.net",,Full Time/Permanent
2,Software Quality Assurance Engineer,"E2E Worx,",https://www.rozee.pk/e2e-worx-software-quality...,"Software Quality Checking, Software Quality As...",,Full Time/Permanent
3,Software Quality Assurance Engineer,"Broadstone Technologies,",https://www.rozee.pk/broadstone-technologies-s...,"Software Quality Review, Agile Programming, So...",Strong knowledge of Software QA tools and proc...,Full Time/Permanent
4,Software Quality Assurance Engineer,"Musketeers Tech,",https://www.rozee.pk/musketeers-tech-software-...,"Test Automation, Selenium, Jira Configuration,...",,2 Years
5,Software Quality Assurance Engineer,"Modisoft,",https://www.rozee.pk/modisoft-software-quality...,Performance Testing,,2 Years
6,Software Engineer,"St. Michael Llc,",https://www.rozee.pk/st-michael-llc-software-e...,"Java, C++, JavaScript",,No Preference
7,IT Support Engineer,"MMBA Chartered Certified Accountants,",https://www.rozee.pk/mmba-chartered-certified-...,"Microsoft Online, IT Infrastructure Management...",,Full Time/Permanent
8,Mechanical Engineer,"Lockersmiths (Pvt) Ltd,",https://www.rozee.pk/lockersmiths-pvt-ltd-mech...,"Team Building, MS Excel, Production Coordinati...",,Full Time/Permanent
9,Software Engineer,"Croem Inc,",https://www.rozee.pk/croem-inc-software-engine...,DotNet,,5 Years


# ----------------------------Further Analysis------------------------

In [23]:
def most_common(items):
    counts = {}
    for item in items:
        counts[item] = counts.get(item, 0) + 1
    return sorted(counts.items(), key=lambda x: x[1], reverse=True)[:10]

jobs_df = pd.read_csv("rozee_jobs.csv")
print(most_common(jobs_df["Job Title"]))

job_details_df = pd.read_csv("rozee_jobs_details.csv")
all_skills = []

for skills in job_details_df["Skills"].dropna():
    all_skills.extend(skills.split(","))

print(most_common(all_skills))


[('Software Engineer', 8), ('Software Quality Assurance Engineer', 7), ('IT Support Engineer', 2), ('Mechanical Engineer', 2), ('Software Engineer - SQL Database', 1), ('Civil Engineer', 1), ('Software Support Engineer', 1), ('Principal Software Engineer', 1), ('ASP.NET Developer / Software Engineer', 1), ('Senior Software Engineer', 1)]
[(' JavaScript', 5), (' HTML', 4), (' ASP.net', 4), (' Software Quality Review', 4), (' Communication Skills', 4), (' AutoCAD Design Software', 4), (' C', 4), (' Software Life Cycle Testing', 3), (' Software Development', 2), ('AngularJS', 2)]
