## Importing Libraries

In [142]:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import pandas as pd
import time

## Setting Up Driver

In [146]:
driver = webdriver.Chrome()

In [147]:
driver.get("https://www.linkedin.com/jobs/search/?currentJobId=4022699911&distance=25.0&geoId=101022442&keywords=jobs&origin=HISTORY")

In [148]:
driver.maximize_window()

Now, checking if a popup appears. If the popup does not appear, you need not run this code

In [149]:
try:
    wait = WebDriverWait(driver, 10)  # Adjust the timeout if necessary
    close_button = wait.until(EC.element_to_be_clickable((By.XPATH, '//*[@id="base-contextual-sign-in-modal"]/div/section/button')))
    close_button.click()
    print("Popup closed successfully.")
except Exception as e:
    print(f"An error occurred: {e}")

Popup closed successfully.


## Scraping Code

In [150]:
# Store job data in a list of dictionaries
jobs_data = []

# Function to scrape jobs from the current page
def scrape_jobs():
    ul_element = WebDriverWait(driver, 10).until(EC.presence_of_element_located((By.XPATH, '//*[@id="main-content"]/section[2]/ul')))
    job_listings = ul_element.find_elements(By.TAG_NAME, 'li')
    
    for job in job_listings:
        job_data = {}

        try:
            job_data['Job Title'] = job.find_element(By.XPATH, './/h3').text 
        except:
            job_data['Job Title'] = ""

        try:
            job_data['Company'] = job.find_element(By.XPATH, './/h4').text
        except:
            job_data['Company'] = ""

        try:
            job_data['Location'] = job.find_element(By.XPATH, './/span[contains(@class, "location")]').text
        except:
            job_data['Location'] = ""

        try:
            job_data['Job Status'] = job.find_element(By.XPATH, './/div/div[2]/div/div/span').text
        except:
            job_data['Job Status'] = ""

        try:
            job_data['Posting Date'] = job.find_element(By.XPATH, './/div/div[2]/div/time').get_attribute('datetime')
        except:
            job_data['Posting Date'] = ""

        # Add the job data to the list
        jobs_data.append(job_data)

# Load all jobs by scrolling down
try:
    last_job_count = 0  # Initialize last job count
    while True:
        # Scrape jobs on the current page
        scrape_jobs()

        # Log total jobs
        current_job_count = len(jobs_data)
        print(f"Total jobs scraped: {current_job_count}")

        # Check if total jobs reach the limit or if new jobs are being loaded
        if current_job_count >= 3184:
            print("Reached job limit of 3184. Exiting...")
            break  # Exit the loop if the job limit is reached

        if current_job_count == last_job_count:
            print("No new jobs found. Exiting...")
            break  # Exit the loop if no new jobs are found

        last_job_count = current_job_count  # Update the last job count

        # Scroll down by a small amount
        driver.execute_script("window.scrollBy(0, 500);")  # Scroll down 500 pixels
        time.sleep(2)  # Wait for new jobs to load
except Exception as e:
    print(f"An error occurred: {e}")
    
df = pd.DataFrame(jobs_data)

Total jobs scraped: 60
Total jobs scraped: 120
Total jobs scraped: 180
Total jobs scraped: 240
Total jobs scraped: 300
Total jobs scraped: 360
Total jobs scraped: 420
Total jobs scraped: 480
Total jobs scraped: 540
Total jobs scraped: 600
Total jobs scraped: 660
Total jobs scraped: 720
Total jobs scraped: 780
Total jobs scraped: 840
Total jobs scraped: 900
Total jobs scraped: 960
Total jobs scraped: 1020
Total jobs scraped: 1090
Total jobs scraped: 1160
Total jobs scraped: 1230
Total jobs scraped: 1310
Total jobs scraped: 1390
Total jobs scraped: 1470
Total jobs scraped: 1560
Total jobs scraped: 1650
Total jobs scraped: 1740
Total jobs scraped: 1840
Total jobs scraped: 1940
Total jobs scraped: 2040
Total jobs scraped: 2140
Total jobs scraped: 2250
Total jobs scraped: 2360
Total jobs scraped: 2470
Total jobs scraped: 2580
Total jobs scraped: 2690
Total jobs scraped: 2800
Total jobs scraped: 2910
Total jobs scraped: 3020
Total jobs scraped: 3130
Total jobs scraped: 3240
Reached job limit

In [None]:
driver.quit()

## Storing into a csv file

In [153]:
df.to_csv('job_listings.csv', index=False)

Job data saved to 'job_listings.csv'. Total jobs scraped: 3240
