# Indeed Web Scraper

This Notebook is for educational purposes. Indeed's Terms of Service does not allow "use of any automated system or software, whether operated by a third party or otherwise, to extract data from the Site (such as screen scraping or crawling)"

Use of this notebook would be a TOS violation

In [1]:
import requests
from bs4 import BeautifulSoup
from datetime import datetime
import pandas as pd
import copy

In [2]:
def get_url(job_position):
    """Function to search a position on the Indeed webpage"""
    template = 'https://www.indeed.com/jobs?q={}&l'
    url = template.format(job_position)

    return url

In [3]:
def response_func(url):
    """Function returns a get request for html of an Indeed webpage"""
    response = requests.get(url, headers={'User-Agent': 'test'})

    return response

In [4]:
def get_cards(response):
    """ Indeed search results are divided up by cards.
        This function returns the cards get_job_cards for each job title'
    """
    soup = BeautifulSoup(response.text, 'html.parser')
    cards = soup.find_all('td', {'class': 'resultContent'})

    return cards

In [5]:
def get_job_title(card):
    """Function returns the job title"""
    job_title = card.find('h2', {'class': 'jobTitle'}).text
    if 'new' in job_title:
        job_title = job_title.replace('new', "")

    return job_title

In [6]:
def get_job_url(card):
    """Function returns the job posting url"""
    try:
        job_url = 'https://www.indeed.com' + card.h2.a.get('href')
        return job_url
    except AttributeError:
        return 'https://www.indeed.com/404'

In [7]:
def get_company_name(card):
    """Function returns the company name who posted the job"""
    company = card.find('span', 'companyName').text
    return company

In [8]:
def get_location(card):
    """Function returns the job location"""
    location = card.find('div', 'companyLocation').text

    return location

In [9]:
def get_job_description(job_url):
    response = requests.get(job_url, headers={'User-Agent': 'test'})
    soup = BeautifulSoup(response.text, 'html.parser')
    try:
        description = soup.find('div', 'jobsearch-jobDescriptionText').text.strip()
        return description
    except AttributeError:
        return "No job description"

In [10]:
def get_job_records(job_position):
    """
    Function returns an array of the following
    * Company Name
    * Job Title
    * Location
    * Descriptions
    * Job Url
    """

    # Retrieves the data on the first page of the search results
    records = []
    url = get_url(job_position)
    response = response_func(url)
    while True:
        cards = get_cards(response)
        for card in cards:
          job_title = get_job_title(card)
          company_name = get_company_name(card)
          job_location = get_location(card)
          job_url = get_job_url(card)
          job_description = get_job_description(job_url)
          today = datetime.today().strftime('%Y-%m-%d')
          records.append([today,
                          job_title,
                          company_name,
                          job_location,
                          job_description,
                          job_url])
    # Goes through each page of the search results
        try:
          response = requests.get(url)
          soup = BeautifulSoup(response.text, 'html.parser')
          url = 'https://www.indeed.com' + soup.find(
                            'a',
                            {'aria=label': 'Next'}
                            ).get('href')
        except AttributeError:
          break
    return records

In [11]:
search_terms = [
    "data scientist",
    "machine learning engineer",
    "data engineer",
    "web developer",
    "frontend developer",
    "backend developer",
    "devops",
    "software engineer",
]

In [12]:
for position in search_terms:
  try:
    scraped_list = get_job_records(position)
    indeed_jobs = pd.DataFrame(scraped_list,
                              columns=[ 'DateOfScrape',
                                        'JobTitle',
                                        'Company',
                                        'Location',
                                        'Description',
                                        'JobUrl'])
    saving_jobs = copy.deepcopy(indeed_jobs)
    saving_jobs = saving_jobs.drop_duplicates(['Description']).reset_index(drop=True)
    saving_jobs = saving_jobs[saving_jobs['Description'].notna()]
    saving_jobs['Description'] = saving_jobs['Description'].apply(lambda x: x.replace('\n', ' '))
    if len(saving_jobs)==1:
      continue

    saving_jobs.set_index(['DateOfScrape'], inplace=True)
    position = position.replace(' ', '_')
    display(saving_jobs)
    saving_jobs.to_csv(f'{position}_jobs.csv', encoding='utf-8', index=False)
    print(f'Successfully saved {position} jobs!")')
  except AttributeError:
    print(f'Failed to save {position} jobs check errlogs')
    continue

Unnamed: 0_level_0,JobTitle,Company,Location,Description,JobUrl
DateOfScrape,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2022-04-25,Junior Data Scientist,Kadence International,Remote,Overview Title: Junior Data Scientist Salary: ...,https://www.indeed.com/pagead/clk?mo=r&ad=-6NY...
2022-04-25,Data Scientist,Crescent Bank,"Carrollton, TX 75006",About the Position: Crescent Bank is looking ...,https://www.indeed.com/pagead/clk?mo=r&ad=-6NY...
2022-04-25,Jr. Data Scientist,Talentheed Inc,Remote,Responsibilities: Data mining or extricating...,https://www.indeed.com/company/Talentheed-Inc/...
2022-04-25,Junior Data Scientist,Conestoga Energy Partners LLC,"Southlake, TX 76092",This position is in-office The Junior Data Sc...,https://www.indeed.com/rc/clk?jk=10e4b2da6bc00...
2022-04-25,"Director, Data Science",Hulu,Remote in Oregon,"At Disney Streaming, we are experiencing explo...",https://www.indeed.com/rc/clk?jk=8e3e08454a725...
2022-04-25,Data Scientist,intellipro,Remote,Responsibilities Building tools to automate d...,https://www.indeed.com/company/Intellipro-Grou...
2022-04-25,Data Scientist,The Burchell Group Inc.,"Dallas, TX",Job Title: Data Scientist Type: Direct Hire ...,https://www.indeed.com/pagead/clk?mo=r&ad=-6NY...
2022-04-25,Data Scientist,Best Egg,Remote,is not designed to cover or contain a comprehe...,https://www.indeed.com/company/Best-Egg/jobs/D...
2022-04-25,Data Scientist,WorkCog,"California City, CA",Job Title: Data Scientist Responsibilities: RE...,https://www.indeed.com/company/WorkCog/jobs/Da...
2022-04-25,Junior Data Scientist,KesarWeb,"Texas City, TX",We are looking for a Data Scientist to analyze...,https://www.indeed.com/company/KesarWeb/jobs/J...


Successfully saved data_scientist jobs!")


Unnamed: 0_level_0,JobTitle,Company,Location,Description,JobUrl
DateOfScrape,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2022-04-25,Junior Web Developer,Scorpion Internet Marketing,Remote,"As a Junior Web Developer, you are responsible...",https://www.indeed.com/company/Scorpion-Intern...
2022-04-25,Front-End Developer (Flutter),MotionMobs,Remote,Seeking a Flutter developer to work with our i...,https://www.indeed.com/company/MotionMobs/jobs...
2022-04-25,Remote Web Developer,Piper Companies,+1 locationRemote,Piper Companies is seeking a Web Developer for...,https://www.indeed.com/rc/clk?jk=f93d500423425...
2022-04-25,Jr UI Developer,Xtivia Inc,Remote,Description: XTIVIA - Jr UI Developer How...,https://www.indeed.com/rc/clk?jk=b4296f4c31188...
2022-04-25,Tableau Developer,HealthPlan Data Solutions,"Columbus, OH 43215 (Downtown area)","Tableau Developer HealthPlan Data Solutions, I...",https://www.indeed.com/company/HealthPlan-Data...
2022-04-25,Front End (Flutter) Developer - 100% Remote,TEEMA Solutions Group Inc,Remote,Front End (Flutter) Developer - 100% Remote P...,https://www.indeed.com/company/TEEMA-Solutions...
2022-04-25,Web Developer,Magneti,Remote,Job Overview Magneti is looking for a marketin...,https://www.indeed.com/rc/clk?jk=f0acdabe5f28d...
2022-04-25,E-Commerce Developer,Green Body Brand,Remote,GBB is focused on helping with the elimination...,https://www.indeed.com/company/Plantarion-LLC/...
2022-04-25,Trainee - Software Developer,LeewayHertz,Remote,This is a remote position. Basic Information...,https://www.indeed.com/rc/clk?jk=df9a96b7b6e68...
2022-04-25,Web Developer (remote),Diib Inc,Remote,Ready to be part of an awesome team? Diib is a...,https://www.indeed.com/company/Diib/jobs/Web-D...


Successfully saved web_developer jobs!")


Unnamed: 0_level_0,JobTitle,Company,Location,Description,JobUrl
DateOfScrape,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2022-04-25,Senior Frontend React JS Developer,Stellar Software,Remote,Are you looking for a Stellar Software solutio...,https://www.indeed.com/pagead/clk?mo=r&ad=-6NY...
2022-04-25,Front End Developer (Entry level),Revature,"Kissimmee, FL+126 locations",One day someone is going to ask you where you ...,https://www.indeed.com/pagead/clk?mo=r&ad=-6NY...
2022-04-25,Front-End Developer (Flutter),MotionMobs,Remote,Seeking a Flutter developer to work with our i...,https://www.indeed.com/company/MotionMobs/jobs...
2022-04-25,Front End (Flutter) Developer - 100% Remote,TEEMA Solutions Group Inc,Remote,Front End (Flutter) Developer - 100% Remote P...,https://www.indeed.com/company/TEEMA-Solutions...
2022-04-25,Front End Developer,Softnice Inc.,Remote,Proven work experience as a Front-end develope...,https://www.indeed.com/company/Softnice-Inc./j...
2022-04-25,Desarrollador Frontend BcB3590,Nisum,+1 locationRemote,Location: Remote Latin American Team: Customer...,https://www.indeed.com/rc/clk?jk=67d3a0ac4f5e5...
2022-04-25,FULLY REMOTE - Frontend Developer,CyberCoders,"Remote in Washington, DC 20001+126 locations",FULLY REMOTE - Frontend Developer If yo...,https://www.indeed.com/pagead/clk?mo=r&ad=-6NY...
2022-04-25,"Senior React Frontend Developer (100% Remote, ...",Cloud Catalogs LLC,Remote,We are looking for a Senior React Front-end De...,https://www.indeed.com/company/Cloud-Catalogs-...
2022-04-25,UI Developer,workcog,+2 locationsRemote,We are looking for a qualified Front-end devel...,https://www.indeed.com/company/WorkCog/jobs/Ui...
2022-04-25,Web3 Frontend Developer,MegaFans,Remote,Web3 Frontend Developer About MegaFans: MegaF...,https://www.indeed.com/company/Megafans/jobs/F...


Successfully saved frontend_developer jobs!")


Unnamed: 0_level_0,JobTitle,Company,Location,Description,JobUrl
DateOfScrape,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2022-04-25,Backend Developer,Trap Door Attractions,Remote,Trap Door Escape and Madness Distillery have b...,https://www.indeed.com/pagead/clk?mo=r&ad=-6NY...
2022-04-25,Software Developer,Zillow,Remote,About the team The Zillow Premier Agent team i...,https://www.indeed.com/pagead/clk?mo=r&ad=-6NY...
2022-04-25,Java Backend Developer,Zachary Piper Solutions,+1 locationRemote,Zachary Piper Solutions is seeking a Java Back...,https://www.indeed.com/rc/clk?jk=1453e2cf69e44...
2022-04-25,Backend Software Developer,BOTG LLC,Remote,We are looking for a Backend Software Develope...,https://www.indeed.com/company/BOTG-LLC/jobs/B...
2022-04-25,Senior Backend Developer,Strategic Employment Partners,Remote,A very well-known and established US-based com...,https://www.indeed.com/pagead/clk?mo=r&ad=-6NY...
2022-04-25,Voice Assistance (Chat Bot) Developer (Remote),Practivest,"Remote in Washington, DC+1 location",Are you an experienced Front-End developer? We...,https://www.indeed.com/company/Practivest/jobs...
2022-04-25,PHP Backend Developer,"DTPM, Inc.",Remote,Job Description: We are looking for fulltime P...,"https://www.indeed.com/company/DTPM,-Inc./jobs..."
2022-04-25,Backend Developer (Go),Averity,+1 locationRemote,Are you a Senior Software Engineer with Golang...,https://www.indeed.com/pagead/clk?mo=r&ad=-6NY...
2022-04-25,Python/Django Developer,Delta,Remote,Discuss screen designs with UI designerDevelop...,https://www.indeed.com/company/Access-To-Futur...
2022-04-25,Java Backend Developer,LGL Technologies,Remote,We are looking for Java Backend Developer for ...,https://www.indeed.com/company/LGL-Technologie...


Successfully saved backend_developer jobs!")


Unnamed: 0_level_0,JobTitle,Company,Location,Description,JobUrl
DateOfScrape,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2022-04-25,DevOps Engineer,Resourznet Consulting,Remote,AWS DevOps Engineer with EKS Experienceclient ...,https://www.indeed.com/company/Resourznet-Cons...
2022-04-25,Entry Level Unix/Linux DEVOPs Admin,Commonwealth of Kentucky,"Frankfort, KY 40601","Entry Level Unix/Linux DevOps Admin Frankfort,...",https://www.indeed.com/company/Harvey-Nash/job...
2022-04-25,Devops Engineer – VIRTUAL/REMOTE,Publix,"Remote in Lakeland, FL 33801+2 locations",Publix is able to offer virtual/remote employm...,https://www.indeed.com/rc/clk?jk=5cb0fe359ab61...
2022-04-25,DevOps Engineer,SailGoldenGate Consulting,Remote,DevOps Engineer (Advanced Level) This is an op...,https://www.indeed.com/company/SailGoldenGate-...
2022-04-25,DevOps Engineer,workcog,+5 locationsRemote,We are looking for a DevOps Engineer to help u...,https://www.indeed.com/company/WorkCog/jobs/De...
2022-04-25,DevOps Engineer,Initializ,"Atlanta, GA",About us Initializ is a small business in Atla...,https://www.indeed.com/company/Initializ/jobs/...
2022-04-25,DevOps Engineer (100% Remote) (Full time perma...,Aptivacorp,+1 locationRemote,Position: Site Reliability EngineerType: Full ...,https://www.indeed.com/company/aptivacorp/jobs...
2022-04-25,DevOps Engineer,Infologitech,Remote,Apply Only US Citizens & GC Holders Skill Requ...,https://www.indeed.com/company/Infologitech/jo...
2022-04-25,"DevOps/AWS/Terraform, Spinnaker, EKS",The Getch,+2 locationsRemote,"Role : Job: Dev Ops/AWS/JPMC/Terraform, Spinna...",https://www.indeed.com/company/The-Getch/jobs/...
2022-04-25,DevOps Engineer,Relex Solutions,"Remote in Atlanta, GA",DevOps Engineer What you'll be doing: Deve...,https://www.indeed.com/rc/clk?jk=6b9a6209f9be7...


Successfully saved devops jobs!")


Unnamed: 0_level_0,JobTitle,Company,Location,Description,JobUrl
DateOfScrape,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2022-04-25,Senior Software Engineer,Amazon,+126 locationsRemote,Amazon’s eCommerce Foundation organization pro...,https://www.indeed.com/pagead/clk?mo=r&ad=-6NY...
2022-04-25,Software Engineer (Early Career),Apple,"Austin, TX+126 locations","Summary Posted: Mar 24, 2022 Weekly Hours: 40 ...",https://www.indeed.com/pagead/clk?mo=r&ad=-6NY...
2022-04-25,Software Engineer I - Enterprise,Indeed,+126 locationsRemote,No job description,https://www.indeed.com/pagead/clk?mo=r&ad=-6NY...
2022-04-25,Computer Engineer,US Defense Contract Management Agency,"San Diego, CA",Help This job is open to Military spous...,https://www.indeed.com/rc/clk?jk=58528687c180b...
2022-04-25,Software Engineer - Automation,Meta,+126 locationsRemote,Reality Labs brings together a world-class tea...,https://www.indeed.com/pagead/clk?mo=r&ad=-6NY...
2022-04-25,Software engineer,Sticker Mule,Remote,About Sticker Mule Sticker Mule is the I...,https://www.indeed.com/rc/clk?jk=1756e7b0298d0...
2022-04-25,Jr. Software Engineer,NBCUniversal,Remote,69366BR Technology & Engineering Operations & ...,https://www.indeed.com/rc/clk?jk=589e7df855f13...
2022-04-25,Software Engineer – multiple openings,The Walt Disney Company (Corporate),"Orlando, FL",Enterprise Technology is an organization withi...,https://www.indeed.com/rc/clk?jk=91ff01de01fff...
2022-04-25,Entry Level Software Engineer,Avant,Remote,Avant is dedicated to building premier digital...,https://www.indeed.com/company/Avant/jobs/Entr...
2022-04-25,Frontend Software Engineer,Tilt,Remote,Frontend Software Engineer Team: Product & Eng...,https://www.indeed.com/rc/clk?jk=33861facb0d2e...


Successfully saved software_engineer jobs!")
