In [1]:
import csv
from datetime import datetime
import requests
from bs4 import BeautifulSoup 
import pandas as pd
import numpy as np
import seaborn as sns 
from matplotlib import pyplot as plt
%matplotlib inline

#### This function takes position and location of job as the input parameters and creates a URL with an appropriate format

In [2]:
def get_url(position, location):
    
    template = "https://ie.indeed.com/jobs?q={}&l={}"
    position = position.replace(' ', "+")
    location = location.replace(' ', "+")
    url = template.format(position, location)
    return url

#### This function's job is to scrape the information of each job card and save it in a tuple.

In [3]:
def get_records(card):
    
    atag = card.h2.a
    job_title = atag.get('title')
    job_url = 'https://ie.indeed.com' + atag.get('href')
    company = card.find('span', 'company').text.strip()
    location = card.find('div', 'recJobLoc').get('data-rc-loc')
    summary = card.find('div', 'summary').text.strip().replace("\n", ' ')
    post_date = card.find('span', 'date').text
    
    record = (job_title, company, location, summary, post_date, job_url)
    return record

#### This is the main function where the URL is accessed and parsed with the help of BeautifulSoup. 
#### Once the records in the first page is scraped, the try-except block will check if there is a next page available. While this condition is True, the function continues. 
#### Finally, a CSV file is opened and all the records are written into it.

In [4]:
def main(position, location):
    
    records = []
    url = get_url(position, location)
    
    while True:
        
        r = requests.get(url) 
        soup = BeautifulSoup(r.text, 'html.parser')
        cards = soup.find_all('div', 'jobsearch-SerpJobCard')
        for card in cards:
            record = get_records(card)
            records.append(record)
        
        try:
            url='https://ie.indeed.com' + soup.find('a', {'aria-label':'Next'}).get('href')
        
        except:
            break

        with open('job_postings.csv', 'w', newline='', encoding='utf-8') as f:
            writer = csv.writer(f)
            writer.writerow(['JobTitle', 'Company', 'Location', 'Summary', 'Post Date', 'Job URL'])
            writer.writerows(records)

In [5]:
main('data analyst', 'dublin')