In [129]:
import csv
from datetime import datetime
import requests
from bs4 import BeautifulSoup
import time

In [130]:

def get_url(position, location):
    """Generate url from position and location"""
    template = "https://www.glassdoor.com/Job/jobs.htm?sc.keyword={}&locT=C&locId=1139970&JobType=all&fromAge=1"
    position = position.replace(" ", "+")
    location = location.replace(" ", "+")
    url = template.format(position, location)
    return url

In [131]:
def get_record(card):
    """Extract job data from a single record"""
    atags = card.find_all("a")
    try:
        job_title = atags[0].text.strip()
    except IndexError:
        job_title = ""
    try:
        company = atags[1].text.strip()
    except IndexError:
        company = ""
    try:
        job_location = card.find("span", {"class": "jobLocation"}).text.strip()
    except AttributeError:
        job_location = ""
    try:
        post_date = card.find("span", {"class": "jobAge"}).text.strip()
    except AttributeError:
        post_date = ""
    try:
        summary = card.find("div", {"class": "jobDescriptionContent"}).text.strip()
    except AttributeError:
        summary = ""
    try:
        salary = card.find("span", {"class": "salaryText"}).text.strip()
    except AttributeError:
        salary = ""
    try:
        job_url = "https://www.glassdoor.com" + atags[0]["href"]
    except (IndexError, TypeError):
        job_url = ""

    today = datetime.today().strftime("%Y-%m-%d")
    record = (job_title, company, job_location, post_date, today, summary, salary, job_url)
    return record

In [132]:

def main(position, location):
    """Run the main program routine"""
    headers = {
        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3"
    }
    url = get_url(position, location)
    records = []
    try:
        response = requests.get(url, headers=headers)
        soup = BeautifulSoup(response.text, "html.parser")
        cards = soup.find_all("li", {"class": "react-job-listing"})
        for card in cards:
            record = get_record(card)
            records.append(record)
        with open("jobs.csv", "a", newline="", encoding="utf-8") as f:
            writer = csv.writer(f)
            writer.writerow(
                [
                    "JobTitle",
                    "Company",
                    "JobLocation",
                    "PostDate",
                    "ExtractDate",
                    "Summary",
                    "Salary",
                    "JobUrl",
                ]
            )
            writer.writerows(records)
    except Exception as e:
        print(e)
        print("Error scraping job postings")
        return None
    print(f"Successfully scraped {len(records)} job postings")
    return


In [133]:
main('developer', 'texas')

Successfully scraped 30 job postings
