# Libraries

In [3]:
import sqlite3
import pandas as pd
from W_db import store_jobs_in_db
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.chrome.service import Service
from webdriver_manager.chrome import ChromeDriverManager
from urllib.parse import urlencode
import time
from datetime import datetime, timedelta
import re

# Driver

In [6]:
def setup_driver():
    options = webdriver.ChromeOptions()
    options.add_argument('--headless')
    options.add_argument('--disable-gpu')
    options.add_argument('--no-sandbox')
    options.add_argument('--disable-dev-shm-usage')
    driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()), options=options)
    return driver

# URL

In [9]:
def generate_url(query):
    params = {'q': query}
    return f"https://wuzzuf.net/search/jobs/?{urlencode(params)}"

# formatting

In [12]:
def get_posted_date(time_posted):
    match = re.search(r'(\d+)', time_posted)
    if match:
        number = int(match.group(1))
        
        if "day" in time_posted:
            return (datetime.now() - timedelta(days=number)).strftime('%d-%m-%Y')
        elif "hour" in time_posted:
            return (datetime.now() - timedelta(hours=number)).strftime('%d-%m-%Y')
        elif "minute" in time_posted:
            return (datetime.now() - timedelta(minutes=number)).strftime('%d-%m-%Y')
        elif "month" in time_posted:
            return (datetime.now() - timedelta(days=number*30)).strftime('%d-%m-%Y')
        else:
            return "Invalid time format"
    else:
        return "Time not found"

In [14]:
def scrape_jobs(job_titles, max_pages=1):
    driver = setup_driver()
    wait = WebDriverWait(driver, 10)
    jobs_data = []
    try:
        for job_title in job_titles:
            print(f"Scraping jobs for: {job_title}")
            
            url = generate_url(job_title)
            driver.get(url)

            current_page = 1

            while current_page <= max_pages:
                print(f"Processing page {current_page}: {driver.current_url}")
                wait.until(EC.presence_of_element_located((By.CSS_SELECTOR, "div.css-1gatmva.e1v1l3u10")))
                
                job_cards = driver.find_elements(By.CSS_SELECTOR, "div.css-1gatmva.e1v1l3u10")
                
                if not job_cards:
                    print(f"No jobs found on page {current_page} for {job_title}")
                    break

                for card in job_cards:
                    try:
                        job_link = card.find_element(By.CSS_SELECTOR, "a.css-o171kl").get_attribute("href")
                        job_id = job_link.split('/')[-1]

                        try:
                            time_posted = card.find_element(By.CSS_SELECTOR, "div.css-d7j1kk div.css-4c4ojb").text
                        except:
                            try:
                                time_posted = card.find_element(By.CSS_SELECTOR, "div.css-d7j1kk div.css-do6t5g").text
                            except:
                                time_posted = "Time not found"

                        posted_date = get_posted_date(time_posted)
                    
                        job_info = {
                            "ID": job_id,
                            "Title": card.find_element(By.CSS_SELECTOR, "a.css-o171kl").text,
                            "Company": card.find_element(By.CSS_SELECTOR, "a.css-17s97q8").text,
                            "Location": card.find_element(By.CSS_SELECTOR, "span.css-5wys0k").text,
                            "Posted_Date": posted_date,
                            "Job_Type": card.find_element(By.CSS_SELECTOR, "span.css-1ve4b75.eoyjyou0").text,
                            "Search_Query": job_title,
                            "Page": current_page,
                            "Job_Link": job_link}

                        driver.execute_script("window.open('');")
                        driver.switch_to.window(driver.window_handles[1])
                        driver.get(job_link)

                        try:
                            full_description = wait.until(
                                EC.presence_of_element_located((By.CSS_SELECTOR, "div.css-1uobp1k")))
                            job_info["Description"] = full_description.text
                            
                            try:
                                skills_elements = wait.until(
                                    EC.presence_of_all_elements_located((By.CSS_SELECTOR, "span.css-158icaa")))
                                job_info["Skills"] = ", ".join([skill.text for skill in skills_elements])
                            except:
                                job_info["Skills"] = "Not specified"

                            try:
                                job_details = wait.until(
                                    EC.presence_of_all_elements_located((By.CSS_SELECTOR, "div.css-rcl8e5")))
                                
                                for detail in job_details:
                                    if "Experience Needed:" in detail.text:
                                        exp_text = detail.text
                                        job_info["Experience_Needed"] = exp_text.split("Experience Needed:")[1].strip()
                                        break
                                else:
                                    job_info["Experience_Needed"] = "Not specified"
                            except:
                                job_info["Experience_Needed"] = "Not specified"
                        except:
                            job_info["Description"] = "Description not found"
                            job_info["Skills"] = "Not specified"
                            job_info["Experience_Needed"] = "Not specified"
                        
                        jobs_data.append(job_info)

                        driver.close()
                        driver.switch_to.window(driver.window_handles[0])

                    except Exception as e:
                        print(f"Error extracting job details: {e}")
                        if len(driver.window_handles) > 1:
                            driver.close()
                            driver.switch_to.window(driver.window_handles[0])
                        continue

                try:
                    next_button = wait.until(EC.element_to_be_clickable(
                        (By.CSS_SELECTOR, "button.css-zye1os.ezfki8j0")))
                    driver.execute_script("arguments[0].click();", next_button)
                    current_page += 1
                    time.sleep(2)
                except:
                    print(f"No next button found on page {current_page}.")
                    break

    finally:
        driver.quit()

    jobs_df = pd.DataFrame(jobs_data)
    store_jobs_in_db(jobs_df)
    return jobs_df

# Jops you want to add

In [17]:
job_titles = ["Python","sowftware Engineer"]

In [19]:
jobs_df = scrape_jobs(job_titles,1)

Scraping jobs for: Python
Processing page 1: https://wuzzuf.net/search/jobs/?q=Python
Scraping jobs for: sowftware Engineer
Processing page 1: https://wuzzuf.net/search/jobs/?q=sowftware+Engineer
                                              job_id  \
0  kx4QTsoTSbKX-Senior-Python-Developer-Softxpert...   
1  y9FsWhj1fMll-Python-Team-Lead---AI-Python-Engi...   
2  ayajJH3Afl3R-Python-Automation-Engineer-Luxoft...   
3  kJNETgKZ7v9r-Senior-Python-Automation-Engineer...   
4  bwni7B3ow3qc-Python-Backend-Developer-%E2%80%9...   

                                        title  \
0                     Senior Python Developer   
1  Python Team Lead - AI & Python Engineering   
2                  Python Automation Engineer   
3           Senior Python Automation Engineer   
4    Python Backend Developer – AI & Chatbots   

                               company                   location posted_date  \
0            Softxpert Incorporation -  Bolkly, Alexandria, Egypt  09-04-2025   
1         

In [21]:
jobs_df.head()

Unnamed: 0,ID,Title,Company,Location,Posted_Date,Job_Type,Search_Query,Page,Job_Link,Description,Skills,Experience_Needed
0,kx4QTsoTSbKX-Senior-Python-Developer-Softxpert...,Senior Python Developer,Softxpert Incorporation -,"Bolkly, Alexandria, Egypt",09-04-2025,Full Time,Python,1,https://wuzzuf.net/jobs/p/kx4QTsoTSbKX-Senior-...,"Job Description:\nDevelop, test, and maintain ...","Business Development, IT/Software Development,...",More Than 3 Years
1,y9FsWhj1fMll-Python-Team-Lead---AI-Python-Engi...,Python Team Lead - AI & Python Engineering,Softec Technologies -,"Cairo, Egypt",08-04-2025,Full Time,Python,1,https://wuzzuf.net/jobs/p/y9FsWhj1fMll-Python-...,Position Overview:\nWe are seeking a highly sk...,"IT/Software Development, Quality, Engineering ...",More Than 6 Years
2,ayajJH3Afl3R-Python-Automation-Engineer-Luxoft...,Python Automation Engineer,Luxoft (A DXC Technology Company) -,"Cairo, Egypt",28-03-2025,Full Time,Python,1,https://wuzzuf.net/jobs/p/ayajJH3Afl3R-Python-...,Requirement analysis\nConcept development for ...,"IT/Software Development, Engineering - Telecom...",More Than 3 Years
3,kJNETgKZ7v9r-Senior-Python-Automation-Engineer...,Senior Python Automation Engineer,Luxoft (A DXC Technology Company) -,"Cairo, Egypt",28-03-2025,Full Time,Python,1,https://wuzzuf.net/jobs/p/kJNETgKZ7v9r-Senior-...,Requirement analysis\nConcept development for ...,"IT/Software Development, Engineering - Telecom...",More Than 5 Years
4,bwni7B3ow3qc-Python-Backend-Developer-%E2%80%9...,Python Backend Developer – AI & Chatbots,OTIC -,"New Cairo, Cairo, Egypt",26-02-2025,Full Time,Python,1,https://wuzzuf.net/jobs/p/bwni7B3ow3qc-Python-...,Python Backend Developer\nAre you a skilled Py...,"IT/Software Development, Python Backend Develo...",2 To 4 Years


# start to Summarize the description model

In [24]:
#pip install transformers
#pip install torch

In [26]:
import sqlite3

conn = sqlite3.connect("job.db")
query = "SELECT description FROM jobs"

db_data = pd.read_sql_query(query, conn)

conn.close()


In [28]:
db_data

Unnamed: 0,description
0,"Job Description:\nDevelop, test, and maintain ..."
1,Position Overview:\nWe are seeking a highly sk...
2,Requirement analysis\nConcept development for ...
3,Requirement analysis\nConcept development for ...
4,Python Backend Developer\nAre you a skilled Py...
5,We are looking for a Data Analyst Lead / Manag...
6,Location: Remote\nType: Contract/Full-Time\nRo...
7,Engineer Needed – Programming & Control with P...
8,1. Team Mission & Culture: \nThe Customer & Te...
9,We are seeking a skilled Odoo Developer to joi...


In [72]:
# i want to know what is the length of the 29 lines 
for line in range( len(db_data)) :
    print(f' line number {line}   has length : { len(db_data['description'][line]) } ')

 line number 0   has length : 675 
 line number 1   has length : 1753 
 line number 2   has length : 487 
 line number 3   has length : 503 
 line number 4   has length : 874 
 line number 5   has length : 2116 
 line number 6   has length : 732 
 line number 7   has length : 651 
 line number 8   has length : 4221 
 line number 9   has length : 822 
 line number 10   has length : 298 
 line number 11   has length : 1598 
 line number 12   has length : 816 
 line number 13   has length : 2206 
 line number 14   has length : 779 
 line number 15   has length : 1240 
 line number 16   has length : 857 
 line number 17   has length : 2443 
 line number 18   has length : 1495 
 line number 19   has length : 515 
 line number 20   has length : 1674 
 line number 21   has length : 1288 
 line number 22   has length : 356 
 line number 23   has length : 1626 
 line number 24   has length : 213 
 line number 25   has length : 2889 
 line number 26   has length : 1041 
 line number 27   has len

In [80]:
from transformers import pipeline
summarizer = pipeline('summarization', model="google/pegasus-xsum", framework="pt")

summaries = []


for line in db_data["description"].tolist():
    i=0
    if len(line) > 512:
        line = line[:512]  

    
    try:
        summary = summarizer(line, max_length=60, min_length=10, do_sample=False)
        summaries.append(summary[0]['summary_text'])
        print(f' t {i}')
        print(summaries)
    except Exception as e:
        summaries.append("")  
        print(f' e {i}')
        print(summaries)

    i=i+1


Some weights of PegasusForConditionalGeneration were not initialized from the model checkpoint at google/pegasus-xsum and are newly initialized: ['model.decoder.embed_positions.weight', 'model.encoder.embed_positions.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Device set to use cpu


 t 0
['The Web Developer will be responsible for developing and maintaining web applications using Python and the Django framework.']
 t 0
['The Web Developer will be responsible for developing and maintaining web applications using Python and the Django framework.', 'The ideal candidate will have a strong technical background in Python, combined with leadership skills to manage and inspire a team, ensuring timely delivery and alignment with product objectives.']
 t 0
['The Web Developer will be responsible for developing and maintaining web applications using Python and the Django framework.', 'The ideal candidate will have a strong technical background in Python, combined with leadership skills to manage and inspire a team, ensuring timely delivery and alignment with product objectives.', 'Requirement analysis Concept development for test automation Design, implementation and verification of Robot based Automation Framework Implementation of automated tests Creation, implementation a

Your max_length is set to 60, but your input_length is only 56. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=28)


 t 0
['The Web Developer will be responsible for developing and maintaining web applications using Python and the Django framework.', 'The ideal candidate will have a strong technical background in Python, combined with leadership skills to manage and inspire a team, ensuring timely delivery and alignment with product objectives.', 'Requirement analysis Concept development for test automation Design, implementation and verification of Robot based Automation Framework Implementation of automated tests Creation, implementation and verification of test cases Execution of automated tests Test analysis and test evaluation Defect logging Reporting Development of automated procedures for test execution, test result analysis, traces repository, and automated', 'Requirement analysis Concept development for test automation Design, implementation and verification of Robot based Automation Framework Implementation of automated tests Creation, implementation and verification of test cases Execution

Your max_length is set to 60, but your input_length is only 56. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=28)


 t 0
['The Web Developer will be responsible for developing and maintaining web applications using Python and the Django framework.', 'The ideal candidate will have a strong technical background in Python, combined with leadership skills to manage and inspire a team, ensuring timely delivery and alignment with product objectives.', 'Requirement analysis Concept development for test automation Design, implementation and verification of Robot based Automation Framework Implementation of automated tests Creation, implementation and verification of test cases Execution of automated tests Test analysis and test evaluation Defect logging Reporting Development of automated procedures for test execution, test result analysis, traces repository, and automated', 'Requirement analysis Concept development for test automation Design, implementation and verification of Robot based Automation Framework Implementation of automated tests Creation, implementation and verification of test cases Execution

Your max_length is set to 60, but your input_length is only 37. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=18)


 t 0
['The Web Developer will be responsible for developing and maintaining web applications using Python and the Django framework.', 'The ideal candidate will have a strong technical background in Python, combined with leadership skills to manage and inspire a team, ensuring timely delivery and alignment with product objectives.', 'Requirement analysis Concept development for test automation Design, implementation and verification of Robot based Automation Framework Implementation of automated tests Creation, implementation and verification of test cases Execution of automated tests Test analysis and test evaluation Defect logging Reporting Development of automated procedures for test execution, test result analysis, traces repository, and automated', 'Requirement analysis Concept development for test automation Design, implementation and verification of Robot based Automation Framework Implementation of automated tests Creation, implementation and verification of test cases Execution

Your max_length is set to 60, but your input_length is only 38. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=19)


 t 0
['The Web Developer will be responsible for developing and maintaining web applications using Python and the Django framework.', 'The ideal candidate will have a strong technical background in Python, combined with leadership skills to manage and inspire a team, ensuring timely delivery and alignment with product objectives.', 'Requirement analysis Concept development for test automation Design, implementation and verification of Robot based Automation Framework Implementation of automated tests Creation, implementation and verification of test cases Execution of automated tests Test analysis and test evaluation Defect logging Reporting Development of automated procedures for test execution, test result analysis, traces repository, and automated', 'Requirement analysis Concept development for test automation Design, implementation and verification of Robot based Automation Framework Implementation of automated tests Creation, implementation and verification of test cases Execution

In [82]:
len(summaries)

29

In [95]:
for i in range (len(summaries)) :
    print (f'Summ {i} is {summaries[i]} \n' )

Summ 0 is The Web Developer will be responsible for developing and maintaining web applications using Python and the Django framework. 

Summ 1 is The ideal candidate will have a strong technical background in Python, combined with leadership skills to manage and inspire a team, ensuring timely delivery and alignment with product objectives. 

Summ 2 is Requirement analysis Concept development for test automation Design, implementation and verification of Robot based Automation Framework Implementation of automated tests Creation, implementation and verification of test cases Execution of automated tests Test analysis and test evaluation Defect logging Reporting Development of automated procedures for test execution, test result analysis, traces repository, and automated 

Summ 3 is Requirement analysis Concept development for test automation Design, implementation and verification of Robot based Automation Framework Implementation of automated tests Creation, implementation and verifi

In [93]:
for i in range (len(summaries)) :
    print (f'Descr {i} is {db_data['description'][i]} \n' )

Summ 0 is Job Description:
Develop, test, and maintain web applications using Python and the Django framework.
Design and implement RESTful APIs for seamless integration with front-end applications.
Collaborate with product managers and designers to deliver user-friendly solutions.
Optimize application performance and scalability.
Write clean, efficient, and well-documented code.
Troubleshoot and debug application issues.
Stay updated on emerging trends and best practices in web development.
Implement and manage database models and migrations using Django ORM.
Perform reliable code reviews to ensure system quality.
Taking the initiative for system improvement in all aspects.
  

Summ 1 is Position Overview:
We are seeking a highly skilled and motivated Python Team Lead to spearhead the AI and Python engineering efforts in our product suite. This role is pivotal in driving the development of cutting-edge AI-driven data security solutions, leading a team of Python developers and AI engin

In [105]:
# ad to DB

con=sqlite3.connect('job.db')
cursor=con.cursor()
cursor.execute('ALTER TABLE jobs ADD COLUMN summary TEXT')
conn.commit()


In [111]:
conn = sqlite3.connect('job.db')
cursor = conn.cursor()

for i, summary in enumerate(summaries):
    cursor.execute('''
        UPDATE jobs
        SET summary = ?
        WHERE rowid = ?''', (summary, i + 1))  


conn.commit()


In [121]:
conn = sqlite3.connect('job.db')
cursor = conn.cursor()


cursor.execute("SELECT rowid, description, summary FROM jobs LIMIT 5")
rows = cursor.fetchall()


for row in rows:
    print(f'Row ID: {row[0]} \n , Description: {row[1]} , \n Summary: {row[2]} \n ')

conn.close()

Row ID: 1 
 , Description: Job Description:
Develop, test, and maintain web applications using Python and the Django framework.
Design and implement RESTful APIs for seamless integration with front-end applications.
Collaborate with product managers and designers to deliver user-friendly solutions.
Optimize application performance and scalability.
Write clean, efficient, and well-documented code.
Troubleshoot and debug application issues.
Stay updated on emerging trends and best practices in web development.
Implement and manage database models and migrations using Django ORM.
Perform reliable code reviews to ensure system quality.
Taking the initiative for system improvement in all aspects.
  , 
 Summary: The Web Developer will be responsible for developing and maintaining web applications using Python and the Django framework. 
 
Row ID: 2 
 , Description: Position Overview:
We are seeking a highly skilled and motivated Python Team Lead to spearhead the AI and Python engineering effo