# Indeed Job Scraper

Explore the potential of Indeed with more detailed filters than Indeed's basic ones.

In [1]:
#import libraries
from bs4 import BeautifulSoup
import requests
import csv
from time import sleep
from random import randint
from datetime import datetime

In [2]:
headers = {
    'accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9',
    'accept-encoding': 'gzip, deflate, br',
    'accept-language': 'fr-FR,fr;q=0.9',
    'cache-control': 'max-age=0',
    'sec-fetch-dest': 'document',
    'sec-fetch-mode': 'navigate',
    'sec-fetch-site': 'none',
    'sec-fetch-user': '?1',
    'upgrade-insecure-requests': '1',
    'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.67 Safari/537.36 Edg/87.0.664.47'
}

# For Indeed France; to replace with another country copy/paste the URL
#URL is composed by two variables "position" and "location"; "fromage=3" in the URL means that we take only 3 last days to search
def get_url(position, location):
        template = 'https://fr.indeed.com/jobs?q={}&l={}&fromage=3'
        url = template.format(position, location)
        return url

def get_record(card):
    '''Extract job date from a single record '''
    atag = card.h2.a
    try:
        job_title = atag.get('title')
    except AttributeError:
        job_title = ''
    try:
        company = card.find('span', 'company').text.strip()
    except AttributeError:
        company = ''
    try:
        location = card.find('div', 'recJobLoc').get('data-rc-loc')
    except AttributeError:
        location = ''
    try:
        job_summary = card.find('div', 'summary').text.strip()
    except AttributeError:
        job_summary = ''
    try:
        post_date = card.find('span', 'date').text.strip()
    except AttributeError:
        post_date = ''
    try:
        salary = card.find('span', 'salarytext').text.strip()
    except AttributeError:
        salary = ''
    
    extract_date = datetime.today().strftime('%Y-%m-%d')
    job_url = 'https://fr.indeed.com' + atag.get('href')
    
    return (job_title, company, location, job_summary, salary, post_date, extract_date, job_url)

def main(position, location):
    # Run the main program reouting
    records = []  # creating the record list
    url = get_url(position, location)  # create the url while passing in the position and location
    
    while True:
        print(url)
        response = requests.get(url, headers=headers)
        soup = BeautifulSoup(response.text, 'html.parser')
        cards = soup.find_all('div', 'jobsearch-SerpJobCard')

        for card in cards:
            record = get_record(card)
            records.append(record)

        try:
            url = 'https://fr.indeed.com' + soup.find('a', {'aria-label': 'Suivant'}).get('href') #'aria-label': 'Suivant' in the French language, change to Next for English or you have to search in Indeed source code for other countries
            delay = randint(0, 1) #here 1 allows to make faster the query
            sleep(delay)
        except AttributeError:
            break

    with open('Indeed.csv', 'w', newline='', encoding='utf-8') as f: #encode the CSV file Indeed.csv as UTF-8
        writer = csv.writer(f)
        writer.writerow(['Job Title', 'Company', 'Location', 'Salary', 'Posting Date', 'Extract Date', 'Summary', 'Job Url'])
        writer.writerows(records)

In [3]:
#Choose the jobs to search and the city
main('finance', 'Paris')

#Advice: use a BI tool such as Power BI, QlikView or Tableau.
#In my case I want to eliminate consulting firm and as a repetitive task when analysing it is better to process it with a BI tool

https://fr.indeed.com/jobs?q=finance&l=Paris&fromage=3
https://fr.indeed.com/jobs?q=finance&l=Paris&fromage=3&start=10
https://fr.indeed.com/jobs?q=finance&l=Paris&fromage=3&start=20
https://fr.indeed.com/jobs?q=finance&l=Paris&fromage=3&start=30
https://fr.indeed.com/jobs?q=finance&l=Paris&fromage=3&start=40
https://fr.indeed.com/jobs?q=finance&l=Paris&fromage=3&start=50
https://fr.indeed.com/jobs?q=finance&l=Paris&fromage=3&start=60
https://fr.indeed.com/jobs?q=finance&l=Paris&fromage=3&start=70
https://fr.indeed.com/jobs?q=finance&l=Paris&fromage=3&start=80
https://fr.indeed.com/jobs?q=finance&l=Paris&fromage=3&start=90
https://fr.indeed.com/jobs?q=finance&l=Paris&fromage=3&start=100
https://fr.indeed.com/jobs?q=finance&l=Paris&fromage=3&start=110
https://fr.indeed.com/jobs?q=finance&l=Paris&fromage=3&start=120
https://fr.indeed.com/jobs?q=finance&l=Paris&fromage=3&start=130
https://fr.indeed.com/jobs?q=finance&l=Paris&fromage=3&start=140
https://fr.indeed.com/jobs?q=finance&l=Paris