In [None]:
from bs4 import BeautifulSoup
import pandas as pd
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import ElementClickInterceptedException, ElementNotInteractableException, NoSuchElementException, TimeoutException

# Initialize the list and dictionary to store job data
job_list = []
job_info = {}

# URL to scrape
target_url = "https://www.glassdoor.co.in/Job/india-data-engineer-jobs-SRCH_IL.0,5_IN115_KO6,19.htm?fromAge=7"

# Set up Selenium WebDriver
PATH = r'C:/chromiumtest/chromedriver-win64/chromedriver.exe'
service = Service(executable_path=PATH)
options = Options()
driver = webdriver.Chrome(service=service, options=options)

# Open the target URL
driver.get(target_url)
driver.maximize_window()

# Click "Show More Jobs" until the button disappears
try:
    while True:
        try:
            # Wait until the "Show More Jobs" button is clickable
            load_more_button = WebDriverWait(driver, 10).until(
                EC.element_to_be_clickable((By.CLASS_NAME, "JobsList_buttonWrapper__ticwb"))
            )
            load_more_button.click()

            # Wait until the "Show More Jobs" button is no longer stale (i.e., more jobs are loaded)
            WebDriverWait(driver, 10).until(EC.staleness_of(load_more_button))
        except (NoSuchElementException, ElementClickInterceptedException, ElementNotInteractableException, TimeoutException):
            print("No more 'Show More Jobs' button found or it is not clickable.")
            break

    # Get the page source after all content is loaded
    page_source = driver.page_source

finally:
    driver.quit()

# Parse the HTML content with BeautifulSoup
soup = BeautifulSoup(page_source, 'html.parser')

# Find the container for all job listings
allJobsContainer = soup.find("ul", {"class": "JobsList_jobsList__lqjTr"})
if allJobsContainer:
    # Find all individual job listings
    allJobs = allJobsContainer.find_all("li")

    # Loop through each job listing to extract information
    for job in allJobs:
        try:
            job_info["name-of-company"] = job.find("div", {"class": "EmployerProfile_profileContainer__VjVBX"}).text.strip()
        except:
            job_info["name-of-company"] = None

        try:
            job_info["name-of-job"] = job.find("a", {"class": "JobCard_jobTitle___7I6y"}).text.strip()
        except:
            job_info["name-of-job"] = None

        try:
            job_info["location"] = job.find("div", {"class": "JobCard_location__rCz3x"}).text.strip()
        except:
            job_info["location"] = None

        try:
            job_info["salary"] = job.find("div", {"class": "JobCard_salaryEstimate__arV5J"}).text.strip()
        except:
            job_info["salary"] = None

        # Extract skills information if available
        try:
            skills_div = job.find(class_="JobCard_jobDescriptionSnippet__yWW8q")
            if skills_div:
                skills_text = skills_div.get_text()
                if "Skills:" in skills_text:
                    skills = skills_text.split("Skills:")[-1].strip()
                    job_info["skills"] = skills
                else:
                    job_info["skills"] = "Skills not found"
            else:
                job_info["skills"] = "Skills not found"
        except:
            job_info["skills"] = "Error extracting skills"

        # Extract posted date
        try:
            posted_date_div = job.find(class_="JobCard_listingAge__Ny_nG")
            job_info["posted-date"] = posted_date_div.get_text().strip() if posted_date_div else "Date not found"
        except:
            job_info["posted-date"] = "Error extracting date"

        # Append the job info dictionary to the list
        job_list.append(job_info)

        # Clear the job_info dictionary for the next job
        job_info = {}

    # Convert the list of dictionaries to a DataFrame and save it as a CSV file
    df = pd.DataFrame(job_list)
    df.to_csv('jobsweek1DE.csv', index=False, encoding='utf-8')

    print("Scraping completed. Data saved to jobsweek1DE.csv")
else:
    print("No job listings found.")
