In [31]:
from bs4 import BeautifulSoup
import time
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
import pandas as pd

HEADERS = {
    "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/123.0.0.0 Safari/537.36"
}
URLS = {
    "indeed": "https://ie.indeed.com"
}

def extract_site(site: str, skill_name: str, location="Ireland", num_page=0) -> BeautifulSoup:
    options = Options()
    options.add_argument('--headless')
    options.add_argument(f"user-agent={HEADERS['User-Agent']}")
    driver = webdriver.Chrome(options=options)
    url = ""
    if site == "indeed":
        url = (
            URLS[site]
            + f"/jobs?q={skill_name.replace(' ', '+')}&l={location}&start={num_page * 10}"
        )
    driver.get(url)
    time.sleep(5)  # Let the page load (adjust this time according to your needs)
    soup = BeautifulSoup(driver.page_source, "html.parser")
    driver.quit()  # Close the WebDriver after extracting the HTML
    return soup

def extract_job_description_and_company(driver, job_link):
    driver.get(job_link)
    time.sleep(5)  # Let the page load
    job_soup = BeautifulSoup(driver.page_source, "html.parser")
    job_description_elem = job_soup.find("div", {"id": "jobDescriptionText"})
    job_description = job_description_elem.text.strip() if job_description_elem else "N/A"
    company_name_tag = job_soup.select_one('[data-testid="inlineHeader-companyName"] span a')
    company_name = company_name_tag.text if company_name_tag else "N/A"
    return job_description, company_name

def scrape_jobs(skill_name, location, num_pages=1):
    job_data = []

    for page in range(num_pages):
        soup = extract_site(site="indeed", skill_name=skill_name, location=location, num_page=page)
        job_cards_div = soup.find("div", attrs={"id": "mosaic-provider-jobcards"})
        if job_cards_div:
            jobs = job_cards_div.find_all("li", class_="css-5lfssm eu4oa1w0")
            for job in jobs:
                job_link_elem = job.find('a')
                if job_link_elem:
                    job_id = job_link_elem.get('data-jk')
                    if not job_id:
                        continue
                    job_title_elem = job.find("h2", class_="jobTitle")
                    job_title = job_title_elem.text.strip() if job_title_elem else "N/A"
                    job_location_elem = job.find("div", class_="companyLocation")
                    job_location = job_location_elem.text.strip() if job_location_elem else "N/A"
                    job_link = f"https://ie.indeed.com/viewjob?jk={job_id}"
                    
                    options = Options()
                    options.add_argument('--headless')
                    options.add_argument(f"user-agent={HEADERS['User-Agent']}")
                    driver = webdriver.Chrome(options=options)
                    job_description, company_name = extract_job_description_and_company(driver, job_link)
                    driver.quit()  # Close the WebDriver after extracting job description and company
                    
                    job_data.append({
                        'Job ID': job_id,
                        'Job Title': job_title,
                        'Company': company_name,
                        'Description': job_description,
                        'Link': job_link
                    })
        else:
            print("No job cards found on this page.")
    
    return job_data

if __name__ == "__main__":
    # Set the skill name, location, and number of pages
    skill_name = "Business Analyst"
    location = "Ireland"
    num_pages = 5
    # Scrape jobs and store them to a CSV file
    scraped_data = scrape_jobs(skill_name, location, num_pages)
    
    # Create a DataFrame from the scraped data
    df = pd.DataFrame(scraped_data)
    
    # Export DataFrame to CSV
    df.to_csv('Business Analyst.csv', index=False)
    
    print("Data has been scraped and saved to 'scraped_job_data.csv'")


{'Job ID': '177617785dd8218a', 'Job Title': 'Trainee Designer Menswear Clothing/Graphics', 'Company': 'Penneys', 'Description': 'Trainee Designer Menswear Clothing/Graphics\n\n Better Looking Business\n\n It’s an exciting time to join us as a Trainee Designer on Primark’s global Buying, Merchandising and Design team. Primark is stronger than ever, and our city-centre Head Office is where you can play your part in shaping a better looking business. A business that’s bringing ethical fashion and high-profile brand partnerships together, to share the latest trends with the world.\n \n Are you a Menswear Designer with graphic skills, who wants to be a part of our growing company?\n \n As a Trainee Designer at Primark, you’ll play a key role in supporting the Design and Buying teams in researching and designing commercial ranges, including trend, colour, graphic and shape direction. Working closely with the buyers and designers, this is a fantastic opportunity for you to learn about the ind