In [7]:
from dotenv import load_dotenv
import os

# Load environment variables from .env
load_dotenv()

# Database URL from environment
DATABASE_URL = os.getenv('DATABASE_URL')


In [8]:
import psycopg2

def connect_db():
    return psycopg2.connect(DATABASE_URL)


In [9]:
import requests
from bs4 import BeautifulSoup
from datetime import datetime

def scrape_glassdoor():
    url = "https://www.glassdoor.com/Job/jobs.htm"
    headers = {"User-Agent": "Mozilla/5.0"}
    response = requests.get(url, headers=headers)
    soup = BeautifulSoup(response.content, "html.parser")

    jobs = soup.find_all("li", class_="jl")  # Replace with the actual HTML structure

    job_listings = []
    for job in jobs:
        job_title = job.find("div", class_="jobTitle").text.strip()
        company_name = job.find("div", class_="jobEmpolyerName").text.strip()
        location = job.find("span", class_="subtle loc").text.strip()
        date_posted = job.find("span", class_="date").text.strip()
        job_description = job.find("div", class_="job-snippet").text.strip()
        salary_range = job.find("span", class_="salaryText").text.strip() if job.find("span", class_="salaryText") else "N/A"
        url = "https://www.glassdoor.com" + job.find("a", class_="jobLink")["href"]

        job_listings.append({
            "job_title": job_title,
            "company_name": company_name,
            "location": location,
            "date_posted": datetime.strptime(date_posted, "%m/%d/%Y").date(),  # Example date format
            "job_description": job_description,
            "salary_range": salary_range,
            "url": url
        })

    return job_listings

# Scrape jobs and display the result
job_listings = scrape_glassdoor()
job_listings[:3]  # Show first 3 results


[]

In [10]:
def insert_job_listing(connection, job_data):
    cursor = connection.cursor()
    try:
        cursor.execute("""
            INSERT INTO job_listings (job_title, company_name, location, date_posted, job_description, salary_range, url)
            VALUES (%s, %s, %s, %s, %s, %s, %s)
            ON CONFLICT (job_title, company_name, location, date_posted) DO NOTHING;
        """, (
            job_data['job_title'],
            job_data['company_name'],
            job_data['location'],
            job_data['date_posted'],
            job_data['job_description'],
            job_data['salary_range'],
            job_data['url']
        ))
        connection.commit()
    except Exception as e:
        print(f"Error inserting data: {e}")
    finally:
        cursor.close()

# Insert scraped jobs into the database
connection = connect_db()
for job in job_listings:
    insert_job_listing(connection, job)
connection.close()
