In [None]:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.chrome.options import Options
#from selenium.common.action_chains import ActionChains
from selenium.common.exceptions import NoSuchElementException, TimeoutException, ElementClickInterceptedException
from bs4 import BeautifulSoup
import pandas as pd
import time
from datetime import datetime
import os
import json

# Input user credentials
email_id = input("Enter your LinkedIn email: ")
password_input = input("Enter your LinkedIn password: ")
designation = input("Enter the job designation: ")
location = input("Enter the job location: ")
num_pages = int(input("Enter the number of pages to scrape: "))

# Initialize the WebDriver
driver = webdriver.Chrome()
driver.get("https://www.linkedin.com/")
driver.maximize_window()

print("\033[1mOpening LinkedIn and initiating login...\033[0m")

# Sign in to LinkedIn
sign_in = WebDriverWait(driver, 20).until(
    EC.element_to_be_clickable((By.XPATH, "/html/body/main/section[1]/div/div/a"))
)
sign_in.click()

email_element = WebDriverWait(driver, 20).until(EC.visibility_of_element_located((By.NAME, "session_key")))
password_element = WebDriverWait(driver, 20).until(EC.visibility_of_element_located((By.NAME, "session_password")))

# Clear and send keys to the input fields
email_element.clear()
email_element.send_keys(email_id)
password_element.clear()
password_element.send_keys(password_input)

login = WebDriverWait(driver, 20).until(EC.visibility_of_element_located((By.CLASS_NAME, "btn__primary--large")))
login.click()

print("\033[1mLogin successful!\033[0m")

# Navigate to Jobs section
time.sleep(5)
jobs_link = WebDriverWait(driver, 20).until(EC.element_to_be_clickable((By.XPATH, "/html/body/div[6]/header/div/nav/ul/li[3]/a")))
jobs_link.click()

print("\033[1mNavigated to Jobs section.\033[0m")

# Construct the job search URL
base_url = "https://www.linkedin.com/jobs/search/"
search_url = f"{base_url}?keywords={designation.replace(' ', '%20')}&location={location.replace(' ', '%20')}"
driver.get(search_url)

print(f"\033[1mSearching for '{designation}' jobs in '{location}'...\033[0m")

# List to store job links
job_desc_links = []

for page in range(num_pages):
    print(f"\033[1mScraping Page {page + 1}...\033[0m")
    
    # Scrape job links from the current page
    try:
        WebDriverWait(driver, 20).until(EC.presence_of_element_located((By.CLASS_NAME, "scaffold-layout__list-container")))
        soup = BeautifulSoup(driver.page_source, 'html.parser')
        list_items = soup.find_all('li', class_="jobs-search-results__list-item")

        for item in list_items:
            link = item.find('a', href=True)
            if link:
                job_desc_links.append("https://www.linkedin.com" + link['href'])

    except Exception as e:
        print(f"Error finding job links on page {page + 1}: {e}")
        break

    # Click on the pagination button for the next page
    try:
        next_button = WebDriverWait(driver, 10).until(
            EC.element_to_be_clickable((By.XPATH, f"//button[@aria-label='Page {page + 2}']"))
        )
        next_button.click()
        time.sleep(2)  # Small delay to allow the page to load
    except (NoSuchElementException, TimeoutException, ElementClickInterceptedException) as e:
        print(f"Error navigating to the next page: {e}")
        break

print(f"\033[1mTotal job links extracted: {len(job_desc_links)}\033[0m")

# Scraping job details
job_data = []

for link in job_desc_links:
    try:
        driver.get(link)
        time.sleep(5)
        soup = BeautifulSoup(driver.page_source, 'html.parser')

        # Extract job title
        div_element = soup.find('div', class_="t-24 job-details-jobs-unified-top-card__job-title")
        job_title = div_element.get_text(strip=True) if div_element else "Title not provided"

        # Extract company name
        company_div = soup.find('div', class_='job-details-jobs-unified-top-card__company-name')
        company_name = company_div.get_text(strip=True) if company_div else "Company not provided"

        # Extract location and date of posting
        loc_date_div = soup.find('div', class_='job-details-jobs-unified-top-card__primary-description-container')
        loc_date_span = loc_date_div.find_all('span', class_='tvm__text tvm__text--low-emphasis') if loc_date_div else []

        job_location = " ".join([span.get_text(strip=True).replace("\u00b7", "") for span in loc_date_span[:2]]) or "Location not provided"
        post_date = " ".join([span.get_text(strip=True).replace("\u00b7", "") for span in loc_date_span[2:]]) or "Date not provided"

        # Extract work type
        work_type_button = soup.find('button', class_='job-details-preferences-and-skills')
        work_types = work_type_button.get_text(strip=True).replace("\u20b9", "INR") if work_type_button else "Work types not provided"

        job_details = {
            "Job Title": job_title,
            "Company": company_name,
            "Portal Link": "https://www.linkedin.com/",
            "Job Listing Link": link,
            "Location": job_location,
            "Date of Posting": post_date,
            "Work Type": work_types,
        }
        
        job_data.append(job_details)
        print(f"\033[1mJob details extracted for: {job_title} at {company_name}\033[0m")

    except Exception as e:
        print(f"Error scraping job details from {link}: {e}")

# Save JSON data
output_dir = os.path.join(os.getcwd(), f"{designation}_{location}_{datetime.now().strftime('%Y-%m-%d_%H-%M-%S')}")
os.makedirs(output_dir, exist_ok=True)

for job in job_data:
    try:
        file_name = f"{job['Company'].replace(' ', '_').replace('/', '_')}_{job['Location'].replace(' ', '_')}.json"
        file_path = os.path.join(output_dir, file_name)
        with open(file_path, 'w', encoding='utf-8') as file:
            json.dump(job, file, indent=4)
        print(f"\033[1mJob details saved for: {job['Job Title']} at {job['Company']}\033[0m")
    except Exception as e:
        print(f"Error saving job data: {e}")

print("\033[1mAll job details saved successfully!\033[0m")
