Things you'll need: 
1) Jupyter Notebook  
2) Chrome Browser  
3) Chrome Webdriver  
4) Selenium Package  
5) Pandas Package

In [3]:
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
import time
import pandas as pd

In [5]:
# First Open LinkedIn via Chrome Browser in a Private Tab to filter out targeted jobs being shown to you
# Then Search for the jobs you want, in my case I'm going for an entry to mid-level Analyst role so I searched: 
# "Data+analyst NOT Software NOT Scientist NOT Senior NOT Sr NOT Engineer"
# This searches for Data Analyst positions and immediately filters out jobs with "Software","Senior","Sr","Engineer"
# This is so I don't get results for jobs I'm not interested in like a senior role, a job in an unrelated field like Software, or a Data Engineer job
url = 'https://www.linkedin.com/jobs/search/?f_E=2%2C3&f_JT=F&f_SB2=3&f_TPR=r2592000&f_WRA=true&geoId=103644278&keywords=data%2Banalyst%20NOT%20software%20NOT%20scientist%20NOT%20senior%20NOT%20Sr%20NOT%20engineer&location=United%20States&locationId=&sortBy=R&position=1&pageNum=0'
wd = webdriver.Chrome("C:/filepathtochromedriver/chromedriver.exe")
wd.get(url)
#With the code above, a Chrome window opens and goes to your search URL

In [6]:
# This code gets the number of jobs available under your search so it can be used by code we'll get to soon
no_of_jobs = str(wd.find_element_by_css_selector('h1>span').get_attribute('innerText'))
# To be able to treat this as an integer that we can work with, we remove any symbols
no_of_jobs = no_of_jobs.replace('+', '')
no_of_jobs = no_of_jobs.replace(',','')
no_of_jobs = int(no_of_jobs)
no_of_jobs

99

In [7]:
# This code will use the no_of_jobs and scroll all the way down before starting the scrape
# LinkedIn sometimes creates a button you have to click to continue scrolling through your results
# The path to that button is inserted in the code and is attempted to be clicked every time we scroll down, in case
# it decides to appear
# Depending on what you searched for, this code might take a few minutes to an hour, go make some tea or coffee or watch 
# automation do its magic while it scrolls through your results for you!
i = 2
while i <= int(no_of_jobs/25)+1:
    wd.execute_script('window.scrollTo(0, document.body.scrollHeight);')
    i = i + 1
    try:
         wd.find_element_by_xpath('/html/body/div[1]/div/main/section[2]/button').click()
         time.sleep(5)
    except:
         pass
         time.sleep(5)

In [8]:
# This code indexes how many jobs actually appear once you have scrolled through all the results, 
# and returns the real number of results you receive so it can start creating the report accordingly
job_lists = wd.find_element_by_class_name('jobs-search__results-list')
jobs = job_lists.find_elements_by_tag_name('li')
len(jobs)

97

In [9]:
# Now the real magic starts, this code will go through each job to extract relevant information you'll want to look at
# We can go a bit deeper with our scraping, but this provides enough information for now

job_id = []
job_title = []
company_name = []
location = []
date = []
job_link = []

for job in jobs:
    job_id0 = job.get_attribute('data-id')
    job_id.append(job_id0)
 
    job_title0 = job.find_element_by_css_selector('h3').get_attribute('innerText')
    job_title.append(job_title0)
 
    company_name0 = job.find_element_by_css_selector('h4').get_attribute('innerText')
    company_name.append(company_name0)
 
    location0 = job.find_element_by_css_selector('div>div>span').get_attribute('innerText')
    location.append(location0)
 
    date0 = job.find_element_by_css_selector('div>div>time').get_attribute('datetime')
    date.append(date0)
 
    job_link0 = job.find_element_by_css_selector('a').get_attribute('href')
    job_link.append(job_link0)

In [10]:
# Once the data is scraped, we can create a dataframe with pandas, which will be used to generate our report
job_data = pd.DataFrame({'ID': job_id,
'Date': date,
'Company': company_name,
'Title': job_title,
'Location': location,
'Link': job_link
})

In [11]:
# This code is to remove irrelevant jobs whose titles might not pertain to us. 
# If you didn't do any filtering, in the initial job search, you can still do so here.
# For example: even though LinkedIn thinks highly of us, if we're just starting in the field 
# we probably don't want to apply for a Senior position. 
job_data = job_data[~job_data.Title.str.startswith('Senior')]
job_data = job_data[~job_data.Title.str.startswith('Sr')]

# We also get some unrelated queries that don't fall in line with our desired outcome
# For me I don't want to be a Software Engineer and don't have the advanced statistical knowledge required to
# excel as a Data Scientist, so I'll include these filters as well
job_data = job_data[~job_data.Title.str.startswith('Software')]
job_data = job_data[~job_data.Title.str.contains('Scientist')]

# You can also filter out listings based on other fields such as location, company, or date in the same way

In [None]:
# One last thing I do for myself is sort jobs by most recent, If we do this we can be one of the first applicants and/or 
# look at jobs that are less likely to be filled
job_data = job_data.sort_values(by='Date',ascending=False)

In [None]:
# Quick preview to make sure everything looks right
job_data.head()

In [None]:
# Finally, we can create our report, which is generated in the same folder as our python/notebook file!
job_data.to_excel('LinkedIn Jobs Report.xlsx', index = False)