### Task 2

Scrape job listings from the website https://realpython.github.io/fake-jobs and store the data into an SQLite database.

In [16]:
import sqlite3
import requests
from bs4 import BeautifulSoup
import csv

# 1. Create SQLite database and table
def create_db():
    conn = sqlite3.connect('job_listing.db')
    c = conn.cursor()
    c.execute('''
        CREATE TABLE IF NOT EXISTS jobs (
            job_title TEXT,
            company_name TEXT,
            location TEXT,
            application_link TEXT,
            PRIMARY KEY (job_title, company_name, location)
        )
    ''')
    conn.commit()
    conn.close()

# 2. Scrape job listings 
def scrape_jobs():
    url = 'https://realpython.github.io/fake-jobs/'
    resp = requests.get(url)
    soup = BeautifulSoup(resp.content, 'html.parser')

    job_listings = []

    for job in soup.find_all('div', class_='card-content'):
        job_title = job.find('h2').text.strip()
        company_name = job.find('h3').text.strip()
        location = job.find('p', class_='location').text.strip()

        # Try to find the application link, set to '' if not found
        application_link_tag = job.find('a', class_='button')
        application_link = application_link_tag['href'] if application_link_tag else ''

        job_listings.append((job_title, company_name, location, application_link))

    return job_listings    

# 3. Insert job listings into SQLite database (with incremental load)
def insert_jobs(job_listings):
    conn = sqlite3.connect('job_listing.db')
    c = conn.cursor()

    for job in job_listings:
        try:
            c.execute('''
                INSERT OR REPLACE INTO jobs (job_title, company_name, location, application_link)
                VALUES (?, ?, ?, ?)
            ''', job)
        except sqlite3.IntegrityError:
            pass    

    conn.commit()
    conn.close()


# 4. Filter job by location or company name
def filter_jobs(location=None, company_name=None):
    conn = sqlite3.connect('job_listing.db')
    c = conn.cursor()

    query = 'SELECT * FROM jobs WHERE 1=1'

    params = []
    if location:
        query += ' AND location = ?'
        params.append(location)
    if company_name:
        query += ' AND company_name = ?'
        params.append(company_name)

    c.execute(query, tuple(params))
    jobs = c.fetchall()

    conn.close()
    return jobs


# 5. Export to CSV
def export_to_csv(jobs, filename='filtered_jobs.csv'):
    with open(filename, mode='w', newline='', encoding='utf-8') as file:
        writer = csv.writer(file)
        writer.writerow(['Job Title', 'Company Name', 'Location', 'Application Link'])
        writer.writerows(jobs)


# Main function
def main():
    # Create db and table
    create_db()

    # Scrape new job listings
    job_listings = scrape_jobs()

    # Insert new job listings into db (incremental load)
    insert_jobs(job_listings)

    # Filter by location (or company name if needed)
    filtered_jobs = filter_jobs() 
    if filtered_jobs:
        export_to_csv(filtered_jobs)
        print(f"{len(filtered_jobs)} jobs exported to 'filtered_jobs.csv'.")
    else:
        print("No jobs found for the given filter.")


# Run script
if __name__ == '__main__':
    main()


100 jobs exported to 'filtered_jobs.csv'.
