In [114]:
# Importing necessary libraries
import requests
from bs4 import BeautifulSoup
import random
import pandas as pd


### Imported the necessary libraries for scraping LinkedIn job postings.
### - `requests` for making HTTP requests to fetch web pages.
### - `BeautifulSoup` for parsing HTML content.
### - `random` for generating random values, which can be useful for user-agent rotation.
### - `pandas` for handling and manipulating the scraped data.

In [115]:
# Prompt the user to enter the job title and location they are searching for
desired_job_title = input("Enter the job title you are searching for: ")
desired_location = input("Enter the location you are looking in: ")

In [116]:
# Define the starting point for pagination
pagination_start = 0


In [117]:
# Construct the URL for LinkedIn job search based on the desired job title, location, and pagination start point
job_search_url = f"https://www.linkedin.com/jobs-guest/jobs/api/seeMoreJobPostings/search?keywords={desired_job_title}&location={desired_location}&start={pagination_start}"
response = requests.get(job_search_url)

In [118]:
response

<Response [200]>

In [119]:
response.text

'<!DOCTYPE html>\n\n      <li>\n        \n    \n\n    \n    \n    \n      <div class="base-card relative w-full hover:no-underline focus:no-underline base-card--link base-search-card base-search-card--link job-search-card" data-entity-urn="urn:li:jobPosting:3912829462" data-impression-id="jobs-search-result-0" data-reference-id="F/ypxWWcHJY/FJUNyegRAw==" data-tracking-id="NmW/Mx7lUDfsMWCdS+fXig==" data-column="1" data-row="1">\n        \n\n        <a class="base-card__full-link absolute top-0 right-0 bottom-0 left-0 p-0 z-[2]" href="https://eg.linkedin.com/jobs/view/junior-frontend-developer-at-vodafone-3912829462?position=1&amp;pageNum=0&amp;refId=F%2FypxWWcHJY%2FFJUNyegRAw%3D%3D&amp;trackingId=NmW%2FMx7lUDfsMWCdS%2BfXig%3D%3D&amp;trk=public_jobs_jserp-result_search-card" data-tracking-control-name="public_jobs_jserp-result_search-card" data-tracking-client-ingraph data-tracking-will-navigate>\n          \n          <span class="sr-only">\n              \n        \n        Junior Fron

In [120]:
#Get the HTML, parse the response and find all list items(jobs postings)
list_data = response.text
list_soup = BeautifulSoup(list_data, "html.parser")
page_jobs = list_soup.find_all("li")

In [121]:
# Extract the HTML content from the job search response, parse it, and find all job postings
job_search_html = response.text
job_search_soup = BeautifulSoup(job_search_html, "html.parser")
job_postings = job_search_soup.find_all("li")

In [122]:
#Create an empty list to store the job postings
id_list = []

In [123]:
# Initialize an empty list to store the IDs of the job postings
job_posting_ids = []

In [124]:
#Itetrate through job postings to find job ids
for job in page_jobs:
    base_card_div = job.find("div", {"class": "base-card"})
    job_id = base_card_div.get("data-entity-urn").split(":")[3]
    print(job_id)
    id_list.append(job_id)

3912829462
3891685472
3879059525
3888118287
3849934900
3907380177
3834812789
3917125987
3891674091
3870144051


In [125]:
# Iterate through the job postings to extract job IDs
for job_posting in job_postings:
    base_card_div = job_posting.find("div", {"class": "base-card"})
    job_id = base_card_div.get("data-entity-urn").split(":")[3]
    print(job_id)
    job_posting_ids.append(job_id)

3912829462
3891685472
3879059525
3888118287
3849934900
3907380177
3834812789
3917125987
3891674091
3870144051


In [126]:
# Initialize an empty list to store job information
job_list = []

# Loop through the list of job IDs and get each URL
for job_id in id_list:
    # Construct the URL for each job using the job ID
    job_url = f"https://www.linkedin.com/jobs-guest/jobs/api/jobPosting/{job_id}"
    
    # Send a GET request to the job URL and parse the reponse
    job_response = requests.get(job_url)
    print(job_response.status_code)
    job_soup = BeautifulSoup(job_response.text, "html.parser")
    
     # Create a dictionary to store job details
    job_post = {}
    
    # Try to extract and store the job title
    try:
        job_post["job_title"] = job_soup.find("h2", {"class":"top-card-layout__title font-sans text-lg papabear:text-xl font-bold leading-open text-color-text mb-0 topcard__title"}).text.strip()
    except:
        job_post["job_title"] = None
        
    # Try to extract and store the company name
    try:
        job_post["company_name"] = job_soup.find("a", {"class": "topcard__org-name-link topcard__flavor--black-link"}).text.strip()
    except:
        job_post["company_name"] = None
        
    # Try to extract and store the time posted
    try:
        job_post["time_posted"] = job_soup.find("span", {"class": "posted-time-ago__text topcard__flavor--metadata"}).text.strip()
    except:
        job_post["time_posted"] = None
        
    # Try to extract and store the number of applicants
    try:
        job_post["num_applicants"] = job_soup.find("span", {"class": "num-applicants__caption topcard__flavor--metadata topcard__flavor--bullet"}).text.strip()
    except:
        job_post["num_applicants"] = None
    
        
    # Append the job details to the job_list
    job_list.append(job_post)

200
200
200
200
200
200
200
200
200
200


In [127]:
# Initialize an empty list to store job postings
job_postings_list = []

# Loop through the list of job IDs and retrieve each job's details
for job_id in job_posting_ids:
    # Construct the URL for each job using the job ID
    job_url = f"https://www.linkedin.com/jobs-guest/jobs/api/jobPosting/{job_id}"
    
    # Send a GET request to the job URL and parse the response
    job_response = requests.get(job_url)
    print(job_response.status_code)
    job_soup = BeautifulSoup(job_response.text, "html.parser")
    
    # Create a dictionary to store job details
    job_details = {}
    
    # Try to extract and store the job title
    try:
        job_details["title"] = job_soup.find("h2", {"class":"top-card-layout__title font-sans text-lg papabear:text-xl font-bold leading-open text-color-text mb-0 topcard__title"}).text.strip()
    except:
        job_details["title"] = None
        
    # Try to extract and store the company name
    try:
        job_details["company"] = job_soup.find("a", {"class": "topcard__org-name-link topcard__flavor--black-link"}).text.strip()
    except:
        job_details["company"] = None
        
    # Try to extract and store the time posted
    try:
        job_details["posted_time"] = job_soup.find("span", {"class": "posted-time-ago__text topcard__flavor--metadata"}).text.strip()
    except:
        job_details["posted_time"] = None
        
    # Try to extract and store the number of applicants
    try:
        job_details["applicants_count"] = job_soup.find("span", {"class": "num-applicants__caption topcard__flavor--metadata topcard__flavor--bullet"}).text.strip()
    except:
        job_details["applicants_count"] = None
    
    # Append the job details to the job_postings_list
    job_postings_list.append(job_details)

200
200
200
200
200
200
200
200
200
200


In [128]:
# Display the list of job postings with extracted details
job_postings_list


[{'title': 'Junior Frontend Developer',
  'company': 'Vodafone',
  'posted_time': '1 week ago',
  'applicants_count': None},
 {'title': 'Front-end Developer',
  'company': 'Sarmad',
  'posted_time': '4 weeks ago',
  'applicants_count': None},
 {'title': 'Frontend Developer - Jeddah',
  'company': 'Lucidya | لوسيديا',
  'posted_time': '2 weeks ago',
  'applicants_count': None},
 {'title': 'Frontend Developer - Remotely',
  'company': 'Lucidya | لوسيديا',
  'posted_time': '2 weeks ago',
  'applicants_count': None},
 {'title': 'Frontend Engineer',
  'company': 'Bosta',
  'posted_time': '1 month ago',
  'applicants_count': None},
 {'title': 'Front-End Web Developer',
  'company': 'Fabulous',
  'posted_time': '1 month ago',
  'applicants_count': None},
 {'title': 'Junior/Fresh-graduate SW Support Engineer',
  'company': 'Bosta',
  'posted_time': '2 months ago',
  'applicants_count': None},
 {'title': 'Frontend Engineer',
  'company': 'Cabrio',
  'posted_time': '4 days ago',
  'applicants_co

In [129]:
# Filter out job postings with missing job titles
filtered_job_postings = [job for job in job_postings_list if job['title'] is not None]

# Display the filtered job postings list
print(filtered_job_postings)

[{'title': 'Junior Frontend Developer', 'company': 'Vodafone', 'posted_time': '1 week ago', 'applicants_count': None}, {'title': 'Front-end Developer', 'company': 'Sarmad', 'posted_time': '4 weeks ago', 'applicants_count': None}, {'title': 'Frontend Developer - Jeddah', 'company': 'Lucidya | لوسيديا', 'posted_time': '2 weeks ago', 'applicants_count': None}, {'title': 'Frontend Developer - Remotely', 'company': 'Lucidya | لوسيديا', 'posted_time': '2 weeks ago', 'applicants_count': None}, {'title': 'Frontend Engineer', 'company': 'Bosta', 'posted_time': '1 month ago', 'applicants_count': None}, {'title': 'Front-End Web Developer', 'company': 'Fabulous', 'posted_time': '1 month ago', 'applicants_count': None}, {'title': 'Junior/Fresh-graduate SW Support Engineer', 'company': 'Bosta', 'posted_time': '2 months ago', 'applicants_count': None}, {'title': 'Frontend Engineer', 'company': 'Cabrio', 'posted_time': '4 days ago', 'applicants_count': None}, {'title': 'Frontend Developer', 'company':

In [130]:
# Initialize a list to store the modified job postings
modified_job_postings = []

# Loop through filtered job postings, handle 'None' values, and filter out entries with missing job titles
for job in filtered_job_postings:
    if job['title'] is not None:
        # Handle 'None' values for company name, time posted, and number of applicants
        if job['company'] is None:
            job['company'] = "Not specified"  # Replace 'None' with a user-friendly string
        if job['posted_time'] is None:
            job['posted_time'] = "Not specified"  # Replace 'None' with a user-friendly string
        if job['applicants_count'] is None:
            job['applicants_count'] = "Not specified"  # Replace 'None' with a user-friendly string
        # Append the modified job posting to the modified_job_postings list
        modified_job_postings.append(job)

# Display the modified job postings list
print(modified_job_postings)

[{'title': 'Junior Frontend Developer', 'company': 'Vodafone', 'posted_time': '1 week ago', 'applicants_count': 'Not specified'}, {'title': 'Front-end Developer', 'company': 'Sarmad', 'posted_time': '4 weeks ago', 'applicants_count': 'Not specified'}, {'title': 'Frontend Developer - Jeddah', 'company': 'Lucidya | لوسيديا', 'posted_time': '2 weeks ago', 'applicants_count': 'Not specified'}, {'title': 'Frontend Developer - Remotely', 'company': 'Lucidya | لوسيديا', 'posted_time': '2 weeks ago', 'applicants_count': 'Not specified'}, {'title': 'Frontend Engineer', 'company': 'Bosta', 'posted_time': '1 month ago', 'applicants_count': 'Not specified'}, {'title': 'Front-End Web Developer', 'company': 'Fabulous', 'posted_time': '1 month ago', 'applicants_count': 'Not specified'}, {'title': 'Junior/Fresh-graduate SW Support Engineer', 'company': 'Bosta', 'posted_time': '2 months ago', 'applicants_count': 'Not specified'}, {'title': 'Frontend Engineer', 'company': 'Cabrio', 'posted_time': '4 day

In [131]:
# Create a pandas DataFrame using the list of modified job postings
jobs_dataframe = pd.DataFrame(modified_job_postings)

# Display the DataFrame containing the job postings
jobs_dataframe

Unnamed: 0,title,company,posted_time,applicants_count
0,Junior Frontend Developer,Vodafone,1 week ago,Not specified
1,Front-end Developer,Sarmad,4 weeks ago,Not specified
2,Frontend Developer - Jeddah,Lucidya | لوسيديا,2 weeks ago,Not specified
3,Frontend Developer - Remotely,Lucidya | لوسيديا,2 weeks ago,Not specified
4,Frontend Engineer,Bosta,1 month ago,Not specified
5,Front-End Web Developer,Fabulous,1 month ago,Not specified
6,Junior/Fresh-graduate SW Support Engineer,Bosta,2 months ago,Not specified
7,Frontend Engineer,Cabrio,4 days ago,Not specified
8,Frontend Developer,Dsquares,1 month ago,Not specified
9,Front Office Agent - Egyptians Only,Kempinski Hotels,1 month ago,Not specified


In [132]:
# Save the job postings data to a CSV file
jobs_dataframe.to_csv('linkedin_job_postings.csv', index=False)

In [133]:
import tkinter as tk
import pandas as pd
import os

def scrape_job_postings():
    # Get the job title and location entered by the user
    job_title = job_title_entry.get()
    location = location_entry.get()
    
    # Add your web scraping logic here using the job title and location
    # For demonstration purposes, create a DataFrame with dummy data
    job_data = {
        'Job Title': ['Software Engineer', 'Data Analyst', 'Product Manager'],
        'Company': ['Company A', 'Company B', 'Company C'],
        'Location': ['Location A', 'Location B', 'Location C']
    }
    jobs_df = pd.DataFrame(job_data)
    
    # Save the job data to a CSV file
    csv_file_path = 'linkedin_job_postings.csv'
    jobs_df.to_csv(csv_file_path, index=False)
    
    # Open the extracted CSV file
    os.system(csv_file_path)

# Create the main GUI window
root = tk.Tk()
root.title("Job Search")

# Create labels and entry fields for job title and location
job_title_label = tk.Label(root, text="Job Title:")
job_title_label.grid(row=0, column=0, padx=10, pady=5)
job_title_entry = tk.Entry(root)
job_title_entry.grid(row=0, column=1, padx=10, pady=5)

location_label = tk.Label(root, text="Location:")
location_label.grid(row=1, column=0, padx=10, pady=5)
location_entry = tk.Entry(root)
location_entry.grid(row=1, column=1, padx=10, pady=5)

# Create a button to trigger the scraping process
scrape_button = tk.Button(root, text="Scrape Job Postings", command=scrape_job_postings)
scrape_button.grid(row=2, columnspan=2, padx=10, pady=10)

# Run the GUI event loop
root.mainloop()
