In [None]:
!pip install requests
!pip install beautifulsoup4
!pip install pandas
!pip install selenium

In [None]:
import requests
from bs4 import BeautifulSoup

# Understanding the LinkedIn Jobs URL Structure

https://www.linkedin.com/jobs/search/?keywords=python%20developer&location=San%20Francisco%20Bay%20Area&geoId=90000080&trk=public_jobs_jobs-search-bar_search-submit&position=1&pageNum=0


This URL contains several parameters that we can use to customize our search, such as:

keywords: The job title or skills you're searching for
location: The geographic location where you want to search for jobs
geoId: The unique identifier for the specified location
position: The position of the job posting in the search results
pageNum: The page number of the search results

By modifying these parameters, we can scrape job postings for different job titles, locations, and pages.

# Handling Infinite Scrolling with Selenium
One of the challenges when scraping LinkedIn job postings is that the website uses infinite scrolling. This means that as you scroll down the page, more job postings are loaded dynamically without changing the URL. To handle this, we'll be using Selenium, a web automation tool that allows us to control a web browser programmatically.

Import the necessary modules

In [None]:
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
import time

Initialize the WebDriver

In [None]:
driver = webdriver.Chrome()

# Navigating to the LinkedIn Jobs page: 
Using the driver.get() method to navigate to the LinkedIn Jobs page with your desired search parameters:

In [None]:
driver.get("https://www.linkedin.com/jobs/search/?currentJobId=3882755006&geoId=115884833&keywords=python%20developer&location=Gurugram%2C%20Haryana%2C%20India&origin=JOB_SEARCH_PAGE_LOCATION_AUTOCOMPLETE&refresh=true")

# Scrolling to load more job postings:
Use a loop to scroll down the page and load more job postings:

In [None]:
last_height = driver.execute_script("return document.body.scrollHeight")
while True:
    driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
    time.sleep(2)
    new_height = driver.execute_script("return document.body.scrollHeight")
    if new_height == last_height:
        break
    last_height = new_height

This code scrolls down the page until no more job postings are loaded.

# Extracting the job data 

Extracting Job Details from the Sidebar

extracting the job data using BeautifulSoup or other parsing techniques.
When you click on a job posting on LinkedIn, additional details about the job are displayed in a sidebar. To extract this information, will need to click on each job posting and scrape the data from the sidebar.

Find all job posting elements:
Use Selenium's find_elements_by_css_selector() method to find all the job posting elements on the page:



In [None]:
job_listings = driver.find_elements_by_css_selector("div.job-card-container")

Iterating through each job posting: Loop through each job posting element and click on it to open the sidebar:

In [None]:
for job_listing in job_listings:
    job_listing.click()
    time.sleep(2)  # Wait for the sidebar to load
    # Extract job details from the sidebar

# Extract job details:
 Within the loop, we can use BeautifulSoup or other parsing techniques to extract the desired job details from the sidebar, such as job title, company name, location, salary range, and job description.

# Implementing the LinkedIn Job Scraper
Now that we've covered the necessary setup and concepts, let's dive into the implementation of our LinkedIn job scraper. We'll be using the `requests` library to send HTTP requests and the `BeautifulSoup` library to parse the HTML responses.

# Import the required libraries:
import requests
from bs4 import BeautifulSoup

which we alredy imporetd above

Define the base URL: We'll start by defining the base URL for the LinkedIn Jobs page, which includes the search parameters we're interested in:

In [None]:
base_url = "https://www.linkedin.com/jobs/search/?currentJobId=3882755006&geoId=115884833&keywords=python%20developer&location=Gurugram%2C%20Haryana%2C%20India&origin=JOB_SEARCH_PAGE_LOCATION_AUTOCOMPLETE&refresh=true"

# Send the initial request:
 We'll send an initial request to the base URL to retrieve the first page of job postings:

In [None]:
response = requests.get(base_url)
soup = BeautifulSoup(response.content, "html.parser")

# Extract job data: 
Using BeautifulSoup to find and extract the relevant job data from the HTML response. You can use CSS selectors or other techniques to locate the desired elements:

In [None]:
job_listings = soup.find_all("div", class_="job-card-container")
for job_listing in job_listings:
    job_title = job_listing.find("h3", class_="job-card-container__title").text.strip()
    company_name = job_listing.find("h4", class_="job-card-container__company-name").text.strip()
    location = job_listing.find("span", class_="job-card-container__location").text.strip()
    # Extract other job details as needed

# Paginate through the results:
To scrape job postings from multiple pages, you'll need to modify the pageNum parameter in the URL and send additional requests:

In [None]:
page_num = 0
while True:
    url = f"{base_url}&pageNum={page_num}"
    response = requests.get(url)
    soup = BeautifulSoup(response.content, "html.parser")
    job_listings = soup.find_all("div", class_="job-card-container")
    if not job_listings:
        break  # No more job postings
    for job_listing in job_listings:
        # Extract job data
        # ...
        page_num += 1


This code sends a request to the next page by incrementing the pageNum parameter until no more job postings are found.

# Running the Scraper and Saving Data

After implementing the scraper, can run it and save the scraped data to a file or database for further analysis or processing.

# Create a list or dictionary to store the scraped data

In [None]:
job_data = []

# Append the scraped data to the list or dictionary:

In [None]:
for job_listing in job_listings:
    job_title = job_listing.find("h3", class_="job-card-container__title").text.strip()
    company_name = job_listing.find("h4", class_="job-card-container__company-name").text.strip()
    location = job_listing.find("span", class_="job-card-container__location").text.strip()
    job_data.append({
        "job_title": job_title,
        "company_name": company_name,
        "location": location,
        # Add other job details as needed
    })

# Saving the data to a file or database: 
 can use libraries like pandas or csv to save the data to a CSV file, or use a database library like sqlite3 or pymongo to store the data in a database.

In [None]:
import pandas as pd

# Save data to a CSV file
df = pd.DataFrame(job_data)
df.to_csv("linkedin_jobs.csv", index=False)