# Scrape LinkedIn's Job Postings

In [186]:
import sys
import os
import re
from dotenv import load_dotenv
import pandas as pd
import numpy as np
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from webdriver_manager.chrome import ChromeDriverManager
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.common.by import By
from selenium.webdriver.common.action_chains import ActionChains
from bs4 import BeautifulSoup
import pyautogui
import time

### Load username and password from your .env file

In [187]:
# Load environment variables
load_dotenv()
linkedin_user=os.environ['LINKEDIN_USER']
linkedin_pass=os.environ['LINKEDIN_PASS']

In [188]:
# Open browser
browser = webdriver.Chrome(service=Service(ChromeDriverManager().install()))
browser.get("https://www.linkedin.com")



Current google-chrome version is 99.0.4844
Get LATEST chromedriver version for 99.0.4844 google-chrome
Driver [/Users/WonderWolff/.wdm/drivers/chromedriver/mac64/99.0.4844.51/chromedriver] found in cache


### Login

In [189]:
# Log into LinkedIn.com
username = browser.find_element(By.ID, "session_key")
username.send_keys(linkedin_user)
password = browser.find_element(By.ID, "session_password")
password.send_keys(linkedin_pass)


In [190]:
# Login button
login_button = browser.find_element(By.CLASS_NAME, "sign-in-form__submit-button")
login_button.click()

### Begin looking for jobs

In [191]:
# Set search criteria
position = "data%20scientist"
location = "united%20states"

In [192]:
# Navigate to /jobs/
browser.get(f"https://www.linkedin.com/jobs/search/?keywords={position}&location={location}")

In [193]:
# Get number of job results
num_jobs_word = browser.find_element(By.CSS_SELECTOR, 'div>small').get_attribute('innerText')
num_jobs = int(num_jobs_word.strip(" results").replace(",",""))
num_jobs

145585

In [194]:
%%time
# Scroll through all job search results
i = 1
while i < 25:
    element = browser.find_element(By.CLASS_NAME, "global-footer-compact")
    browser.execute_script("arguments[0].scrollIntoView();", element)
    time.sleep(0.1)
    
    job_lists = browser.find_element(By.CLASS_NAME, "jobs-search-results__list")
    jobs = job_lists.find_elements(By.CLASS_NAME, 'job-card-list__title')
    every_other_5_list = jobs[::i]
    for element in every_other_5_list:
        browser.execute_script("arguments[0].scrollIntoView();", element)
        time.sleep(0.1)
    
    i += 4


CPU times: user 71 ms, sys: 11.4 ms, total: 82.4 ms
Wall time: 4.02 s


In [195]:
# Get company name
company_lists = browser.find_element(By.CLASS_NAME, "jobs-search-results__list")
companies = company_lists.find_elements(By.CLASS_NAME, 'job-card-container__company-name')
company_names=[]
for i in companies:
    company_names.append(i.text)
print(company_names, "\n")
print(len(company_names))

['Roblox', 'Western Governors University', 'Live Nation Entertainment', 'Carvana', 'Microsoft', 'Live Nation Entertainment', 'The Walt Disney Company', 'Criteria Corp', 'ManTech', 'Niantic, Inc.', 'Meta', 'Deloitte', 'Deloitte', 'Microsoft', 'Zoom', 'Autodesk', 'Ropes & Gray LLP', 'Apple', 'Medtronic', 'Deloitte', 'Southern California Edison (SCE)', 'Khan Academy', 'National Research Group', 'Paysafe Group', 'Google'] 

25


In [196]:
# Get job titles
job_lists = browser.find_element(By.CLASS_NAME, "jobs-search-results__list")
jobs = job_lists.find_elements(By.CLASS_NAME, 'job-card-list__title')
job_title=[]
for i in jobs:
    job_title.append(i.text)
print(job_title, "\n")
print(len(job_title))

['Senior Data Scientist - Developer Monetization', 'Data Scientist-2', 'Data Scientist - Marketing', 'Senior Data Scientist, NLP / Conversational AI', 'Data Scientist', 'Data Scientist - Recommendations', 'Senior Data Scientist', 'Data Scientist', 'Senior Data Scientist - Cloud', 'Data Scientist, Game Analytics', 'Data Scientist, Product Analytics - VR Devices (FRL)', 'NLP Data Scientist - Python / R - Top Secret', 'AI Data Scientist - TS Required', 'Data & Applied Scientist Manager', 'Senior Data Scientist, Product Intelligence', 'Senior Data Scientist / Machine Learning Engineer, eCommerce', 'Data Scientist', 'Sr. Data Scientist for Product Marketing and Customer Analytics', 'Senior Data Scientist', 'NLP Data Scientist - Python / R - Top Secret', 'Data Scientist Advisor [HYBRID]', 'Senior Data Scientist/Analyst, District Success', 'Data Scientist', 'Data Scientist, Operations', 'Data Scientist, Revenue Acceleration, Google Cloud'] 

25


In [197]:
# Get location
location_lists = browser.find_element(By.CLASS_NAME, "jobs-search-results__list")
locations = location_lists.find_elements(By.CLASS_NAME, 'artdeco-entity-lockup__caption')
location=[]
for i in locations:
    location.append(element.text)
print(location, "\n")
print(len(location))

['Senior Data Scientist/Analyst, District Success', 'Senior Data Scientist/Analyst, District Success', 'Senior Data Scientist/Analyst, District Success', 'Senior Data Scientist/Analyst, District Success', 'Senior Data Scientist/Analyst, District Success', 'Senior Data Scientist/Analyst, District Success', 'Senior Data Scientist/Analyst, District Success', 'Senior Data Scientist/Analyst, District Success', 'Senior Data Scientist/Analyst, District Success', 'Senior Data Scientist/Analyst, District Success', 'Senior Data Scientist/Analyst, District Success', 'Senior Data Scientist/Analyst, District Success', 'Senior Data Scientist/Analyst, District Success', 'Senior Data Scientist/Analyst, District Success', 'Senior Data Scientist/Analyst, District Success', 'Senior Data Scientist/Analyst, District Success', 'Senior Data Scientist/Analyst, District Success', 'Senior Data Scientist/Analyst, District Success', 'Senior Data Scientist/Analyst, District Success', 'Senior Data Scientist/Analyst

In [198]:
# Get job description
description_lists = browser.find_element(By.CLASS_NAME, "jobs-search-results__list")
job_descriptions = description_lists.find_elements(By.CLASS_NAME, 'jobs-search-results__list-item')
description=[]
for i in job_descriptions:
    i.click()
    element = browser.find_element(By.CLASS_NAME, 'jobs-description__content')
    description.append(element.text)
print(description, "\n")
print(len(description))


25


In [199]:
# # Repeat for all 40 pages
# page = 2
# for i in range(2, 5):
#     page = i
#     browser.get(f'https://www.linkedin.com/jobs/search/?keywords={position}&location={location}&start={page}')
#     time.sleep(1)