In [1]:
import pandas as pd

from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.common.by import By
from selenium.webdriver.support.wait import WebDriverWait
from selenium.common import NoSuchElementException
from selenium.webdriver.support import expected_conditions as EC

from bs4 import BeautifulSoup

In [2]:
# set up a controllable Firefox instance
# in headless mode
service = Service()
options = webdriver.FirefoxOptions()
options.add_argument("--headless=new")
driver = webdriver.Firefox(
    service=service,
    options=options
)

# define query, location and number of pages to search for
query = '("data scientist" OR "data engineer" OR "data analyst")'
location = ''
pages = 1

# dataframe in which the data will be stored
cols = ['role', 'company_name', 'company_location', 'company_rating', 'salary', 'job_type']
df = pd.DataFrame(columns=cols)

In [3]:
for page in range(0, pages):
    url = f'https://www.indeed.com/jobs?q={query}&l={location}&start={page*10}'
    driver.get(url)

    jobs = driver.find_elements(By.CSS_SELECTOR, ".cardOutline")

    for job in jobs:

        # click on job and wait until it loads its information on the right side panel
        job.click()
        try:
            title_element = WebDriverWait(driver, 5) \
                .until(EC.presence_of_element_located((By.CSS_SELECTOR, ".jobsearch-HeaderContainer")))
        except NoSuchElementException:
            continue
        
        soup = BeautifulSoup(driver.page_source, 'html.parser')
        
        role = soup.find('h2', {'class': 'jobsearch-JobInfoHeader-title'}).find('span').text
        company_info = soup.find('div', {'data-testid': 'jobsearch-CompanyInfoContainer'})
        company_name = company_info.find('div', {'data-testid': 'inlineHeader-companyName'}).find('a').text
        company_location = company_info.find('div', {'data-testid': 'inlineHeader-companyLocation'}).find('div').text
        try:
            company_rating = company_info.find('div', {'id': 'companyRatings'})['aria-label']
        except:
            company_rating = ''

        salary_and_type = soup.find('div', {'id': 'salaryInfoAndJobType'})
        if salary_and_type is not None:
            salary_and_type = salary_and_type.find_all('span')
            if len(salary_and_type) == 2:
                salary = salary_and_type[0].text
                job_type = salary_and_type[1].text
            else:
                if '$' in salary_and_type[0].text:
                    salary = salary_and_type[0].text
                    job_type = ''
                else:
                    salary = ''
                    job_type = salary_and_type[0].text      
        else:
            salary = ''
            job_type = ''

        # append to dataframe
        df.loc[len(df)] = (role, company_name, company_location, company_rating, salary, job_type)

driver.quit()
df

Unnamed: 0,role,company_name,company_location,company_rating,salary,job_type
0,Healthcare Data Analyst - REMOTE - job post,"Perficient, Inc",United States•Remote,3.4 out of 5 stars,,Full-time
1,(5) Data Analyst / Data Scientist - job post,TRA'BIAN ENTERPRISES,Remote,,,Contract
2,Data Engineer - job post,VSP Global,Remote,3.7 out of 5 stars,"$56,000 - $97,000 a year",- Full-time
3,Data Engineer (Remote) - job post,CamoAg,"Palatine, IL•Remote",,,Full-time
4,Machine Learning Operations Engineer / Data Sc...,National Security Agency,"Fort Meade, MD",4.1 out of 5 stars,"$81,233 - $183,500 a year",- Full-time
5,Data Engineer - job post,PHARMALOGIC HOLDINGS,United States•Remote,,,Full-time
6,"Sr. Data Scientist, Data Platform - job post",Pinterest,"San Francisco, CA•Remote",3.8 out of 5 stars,,
7,Health Data Scientist - job post,Booz Allen Hamilton,"Bethesda, MD•Remote",3.9 out of 5 stars,"$93,300 - $212,000 a year",
8,Data Analyst - Carbon and Renewable Energy - j...,Apple,"Cupertino, CA",4.1 out of 5 stars,,Full-time
9,2024 Business Intelligence Analyst & Data Scie...,Applied Materials,"9700 E US 290 HWY SVRD WB, Austin, TX 78724",3.9 out of 5 stars,"$76,000 - $104,500 a year",- Full-time
