In [1]:
import requests
import bs4
from bs4 import BeautifulSoup

import pandas as pd
import time
import copy
import csv

In [2]:
def get_url(position):
  '''Function to search a position on the Indeed webpage'''
  template = 'https://www.indeed.com/jobs?q={}&l'
  url = template.format(position)
  
  return url

In [3]:
def response_func(url):
  '''Function returns a get request for html of an indeed webpage'''
  response = requests.get(url, headers={'User-Agent': 'test'})

  return response

In [4]:
def get_cards(response):
  ''' Indeed search results are divided up by cards.
      This function returns the cards get_job_cards for each job title'
  '''
  soup = BeautifulSoup(response.text, 'html.parser')
  cards = soup.find_all('td', {'class': 'resultContent'})

  return cards

In [5]:
def get_job_title(card):
  '''Function returns the job title'''
  job_title = card.find_all('span')[1].get_text()
  
  return job_title

In [6]:
def get_job_url(card):
  '''Function returns the job posting url'''
  try:
    job_url = 'https://www.indeed.com' + card.h2.a.get('href')
    return job_url
  except AttributeError:
    return 'https://www.indeed.com/404'

In [7]:
def get_company_name(card):
  '''Function returns the company name who posted the job'''
  company = card.find('span', 'companyName').text

  return company

In [8]:
def get_location(card):
  '''Function returns the job location'''
  location = card.find('div', 'companyLocation').text

  return location

In [9]:
def get_job_description(job_url):
  response = requests.get(job_url, headers={'User-Agent': 'test'})
  soup = BeautifulSoup(response.text, 'html.parser')
  try:
    description = soup.find('div', 'jobsearch-jobDescriptionText').text.strip()
    return description
  except AttributeError:
    return "No job description"

In [10]:
def get_job_records(position):
  '''
  Function returns an array of the following
  * Company Name
  * Job Title
  * Location
  * Descriptions
  * Job Url
  '''
  # Retrieves the data on the first page of the search results

  records = []
  url = get_url(position)
  response = response_func(url)
  while True:
    cards = get_cards(response)
    for card in cards:
      job_title = get_job_title(card)
      company_name = get_company_name(card)
      job_location = get_location(card)
      job_url = get_job_url(card)
      job_description = get_job_description(job_url)

      records.append([job_title,
                      company_name,
                      job_location,
                      job_description,
                      job_url])
    # Goes through each page of the search results
    try:
      response = requests.get(url)
      soup = BeautifulSoup(response.text, 'html.parser')
      url = 'https://www.indeed.com' + soup.find(
                        'a',
                        {'aria=label': 'Next'}
                        ).get('href')
    except AttributeError:
      break
  return records


In [11]:
search_terms = [
    "data%20scientist",
    "machine%20learning%20engineer",
    "data%20engineer",
    "web%20developer",
    "frontend%20developer",
    "backend%20developer",
    "devops",
    "software%20engineer",
]

In [12]:
from random import uniform
for position in search_terms:
  time.sleep(uniform(1.1,2.1))
  try:
    scraped_list = get_job_records(position)
    indeed_jobs = pd.DataFrame(scraped_list,
                              columns=[
                                        'Company',
                                        'JobTitle',
                                        'Location',
                                        'Description',
                                        'JobUrl'])
    saving_jobs = copy.deepcopy(indeed_jobs)
    saving_jobs = saving_jobs.drop_duplicates(['Description']).reset_index(drop=True)
    saving_jobs = saving_jobs[saving_jobs['Description'].notna()]
    if len(saving_jobs)==1:
      continue
    display(saving_jobs)

    position = position.replace('%20', '_')

    saving_jobs.to_csv(f'{position}_jobs.csv', encoding='utf-8', index=False)
    print(f'Successfully saved {position} jobs!")')
  except AttributeError:
    print(f'Failed to save {position} jobs check errlogs')
    continue

Unnamed: 0,Company,JobTitle,Location,Description,JobUrl
0,Zillow,Zillow,Remote,About the team About the team \n We are the ...,https://www.indeed.com/pagead/clk?mo=r&ad=-6NY...
1,Talentheed Inc,Talentheed Inc,Remote,Responsibilities: \n\nData mining or extricati...,https://www.indeed.com/company/Talentheed-Inc/...
2,"Director, Data Science",Hulu,Remote in Oregon,"At Disney Streaming, we are experiencing explo...",https://www.indeed.com/rc/clk?jk=8e3e08454a725...
3,Data Scientist,Columbia Sportswear Company,"Portland, OR 97229",OUTGROWN YOUR OWN BACKYARD? COME PLAY IN OURS....,https://www.indeed.com/rc/clk?jk=74eaf0a8d1ddd...
4,Data Scientist (Entry-Level),GreatSchools,"Remote in Oakland, CA 94612",About GreatSchools.org: GreatSchools is the le...,https://www.indeed.com/company/GreatSchools/jo...
5,Data Scientist,Best Egg,Remote,is not designed to cover or contain a comprehe...,https://www.indeed.com/company/Best-Egg/jobs/D...
6,Madison Square Garden Entertainment,Madison Square Garden Entertainment,"Remote in Tarrytown, NY 10591",Who are we hiring?\n\n As a Basketball Data Sc...,https://www.indeed.com/rc/clk?jk=41d2ea80ddeb8...
7,Data Scientist,Comcentric,"Remote in Salem, OR+34 locations",We are currently seeking a Senior Data Scienti...,https://www.indeed.com/rc/clk?jk=987c988973978...
8,Lark Health,Lark Health,"Remote in Mountain View, CA",About Lark\n\n\n Lark is the world's largest...,https://www.indeed.com/rc/clk?jk=6345333550b73...
9,Wealthfront,Wealthfront,"Remote in Palo Alto, CA 94301+1 location",Data and its proper use is crucial to that vis...,https://www.indeed.com/rc/clk?jk=f436958b48feb...


Successfully saved data_scientist jobs!")


Unnamed: 0,Company,JobTitle,Location,Description,JobUrl
0,Apple,Apple,"Seattle, WA+5 locations","Summary\nPosted: Jan 27, 2022\nWeekly Hours: 4...",https://www.indeed.com/pagead/clk?mo=r&ad=-6NY...
1,Nike,Nike,"Beaverton, OR+10 locations","Become a Part of the NIKE, Inc. Team\n NIKE, I...",https://www.indeed.com/rc/clk?jk=04d4f381c9a12...
2,Data Engineer,workcog,+2 locationsRemote,Role: Data Engineer\nLocation: Remote\nDuratio...,https://www.indeed.com/company/WorkCog/jobs/Da...
3,FlexIT Inc,FlexIT Inc,"Beaverton, OR 97005 (Central Beaverton area)",We are looking for strong experience in Python...,https://www.indeed.com/rc/clk?jk=7ed33d932ab92...
4,FlexIT Inc,FlexIT Inc,"Beaverton, OR 97005 (Central Beaverton area)",APLA is building capabilities around the compa...,https://www.indeed.com/rc/clk?jk=c534685069671...
5,Tableau Data Engineer (100% Remote),Piper Companies,Remote,Piper Companies is currently searching for a r...,https://www.indeed.com/rc/clk?jk=417f94465f37b...
6,Converse,Converse,"Remote in Boston, MA 02116",Become part of the Converse Team\n Converse is...,https://www.indeed.com/rc/clk?jk=f59aef6085566...
7,Twitch,Twitch,"San Francisco, CA+1 location","About Us\n\nLaunched in 2011, Twitch is a glob...",https://www.indeed.com/rc/clk?jk=489dd7f31c6d7...
8,Data Integration Engineer,The Walt Disney Company (Corporate),"Orlando, FL","At Disney, we‘re storytellers. We make the imp...",https://www.indeed.com/rc/clk?jk=f52cdaeedae91...
9,Data Engineer- Chemical Insights,McKinsey & Company,"Remote in San Jose, CA+11 locations",Who You'll Work With\nYou’ll work with McKinse...,https://www.indeed.com/rc/clk?jk=3c8a220bd32bc...


Successfully saved data_engineer jobs!")


Unnamed: 0,Company,JobTitle,Location,Description,JobUrl
0,Dennis Uniform,Dennis Uniform,"Portland, OR 97214 (Buckman area)",COMPANY OVERVIEW\nDENNIS Uniform Founded in 19...,https://www.indeed.com/pagead/clk?mo=r&ad=-6NY...
1,Web Developer,Magneti,Remote,Job Overview\nMagneti is looking for a marketi...,https://www.indeed.com/pagead/clk?mo=r&ad=-6NY...
2,CityCore,CityCore,"West Slope, OR",Looking for a PHP/HTML Developer who has exten...,https://www.indeed.com/rc/clk?jk=a712b65452679...
3,Junior Web Developer,Scorpion Internet Marketing,Remote,"As a Junior Web Developer, you are responsible...",https://www.indeed.com/company/Scorpion-Intern...
4,1Starr,1Starr,Remote,1Starr Enterprises looking for Web Designers w...,https://www.indeed.com/company/1Starr/jobs/Web...
5,USC,USC,"Los Angeles, CA",The Laboratory of Neuro Imaging (www.loni.usc....,https://www.indeed.com/rc/clk?jk=dbd51e1593e90...
6,Web Developer (remote),Diib Inc,Remote,Ready to be part of an awesome team? Diib is a...,https://www.indeed.com/company/Diib/jobs/Web-D...
7,Jr UI Developer,Xtivia Inc,Remote,Description: \n XTIVIA - Jr UI Developer\n H...,https://www.indeed.com/rc/clk?jk=b4296f4c31188...
8,More Seconds,More Seconds,Remote,About Our Company - More Seconds\nAt More Seco...,https://www.indeed.com/company/More-Seconds/jo...
9,Crunchapps,Crunchapps,"New York, NY 10014 (West Village area)",Join an Innovative Deep Learning team with the...,https://www.indeed.com/rc/clk?jk=9e76e485b48fe...


Successfully saved web_developer jobs!")


Unnamed: 0,Company,JobTitle,Location,Description,JobUrl
0,Trap Door Attractions,Trap Door Attractions,Remote,Trap Door Escape and Madness Distillery have b...,https://www.indeed.com/pagead/clk?mo=r&ad=-6NY...
1,Infosys,Infosys,Remote,Infosys is seeking a JAVA BACKEND DEVELOPER. T...,https://www.indeed.com/pagead/clk?mo=r&ad=-6NY...
2,Party City,Party City,Remote,"Company Overview: \nParty City Holdings, Inc. ...",https://www.indeed.com/pagead/clk?mo=r&ad=-6NY...
3,"ViQi, Inc","ViQi, Inc","Remote in Santa Barbara, CA 93117","COMPANY: ViQi, Inc.\nPOSITION: Backend Develop...","https://www.indeed.com/company/ViQi,-Inc/jobs/..."
4,Averity,Averity,+1 locationRemote,Are you a Senior Software Engineer with Golang...,https://www.indeed.com/pagead/clk?mo=r&ad=-6NY...
5,Java developer (Backend),CA-One Tech Cloud Inc,"Sunnyvale, CA 94043+1 location",Position Title: Java developer\nLocation: Bay ...,https://www.indeed.com/company/CA--One-Tech-Cl...
6,"PWCC Marketplace, LLC","PWCC Marketplace, LLC","Remote in Portland, OR 97224",Company Overview: \nWe are PWCC. We're based i...,https://www.indeed.com/company/PWCC-Marketplac...
7,Backend Developer,Actalent,"San Carlos, CA 94070+2 locations",Equivalent Experience \n \n\nDescription:\n We...,https://www.indeed.com/pagead/clk?mo=r&ad=-6NY...
8,Backend Developer,Pepperdata,"Santa Clara, CA",Job Description\n\n\nBackend Developer\n Peppe...,https://www.indeed.com/rc/clk?jk=de697975ea931...
9,Backend Developer,Five Pack Creative,Remote,Back-End Developer needed for full-time contra...,https://www.indeed.com/company/Five-Pack-Creat...


Successfully saved backend_developer jobs!")


Unnamed: 0,Company,JobTitle,Location,Description,JobUrl
0,DemandJump,DemandJump,Remote+1 location,DevOps Engineer\nWhat Does DemandJump Solve?\n...,https://www.indeed.com/pagead/clk?mo=r&ad=-6NY...
1,Trovata,Trovata,Remote,Company Overview: Trovata is the first modern ...,https://www.indeed.com/pagead/clk?mo=r&ad=-6NY...
2,DevOps Engineer,Verizon,"Portland, OR 97209 (Goose Hollow area)+45 loca...",When you join Verizon\n\nVerizon is one of the...,https://www.indeed.com/rc/clk?jk=443b0e2e59e76...
3,DevOps Engineer (remote),DNA325,Remote,We are a team that offers new perspectives to ...,https://www.indeed.com/company/DNA325/jobs/Dev...
4,Dev Ops Engineer - RDNA - CW,Wellstone Technologies,Remote,[Notes before you apply: \nOur Policy is to ta...,https://www.indeed.com/company/Wellstone-Techn...
5,DevOps Engineer,RIIM LLC,"New York, NY",We have an excellent opportunity to discuss. B...,https://www.indeed.com/company/RIIM-LLC/jobs/D...
6,Apple,Apple,"Austin, TX+22 locations","Summary\nPosted: Oct 20, 2021\nWeekly Hours: 4...",https://www.indeed.com/pagead/clk?mo=r&ad=-6NY...
7,Noodle,Noodle,"Remote in Portland, OR 97218+17 locations",DevOps Engineer\n Online education is no longe...,https://www.indeed.com/rc/clk?jk=6da69b4b6f787...
8,DevOps Engineer,Infologitech,Remote,Apply Only US Citizens & GC Holders\nSkill Req...,https://www.indeed.com/company/Infologitech/jo...
9,DevOps Engineer,Resourznet Consulting,Remote,AWS DevOps Engineer with EKS Experienceclient ...,https://www.indeed.com/company/Resourznet-Cons...


Successfully saved devops jobs!")
