In [1]:
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.options import Options
from webdriver_manager.chrome import ChromeDriverManager
import time
import pandas as pd

def get_driver():
    options = Options()
    options.add_argument("--start-maximized")  # Ensure browser is maximized
    options.add_argument("--disable-blink-features=AutomationControlled")  # Prevent bot detection
    options.add_argument("--disable-popup-blocking")  
    options.add_argument("--disable-notifications")
    service = Service(ChromeDriverManager().install())
    driver = webdriver.Chrome(service=service, options=options)
    return driver

def scrape_indeed_jobs(job_title, search_location, num_pages=3):
    driver = get_driver()
    base_url = "https://in.indeed.com/jobs?q={}&l={}&start={}"
    job_data = []
    
    for page in range(num_pages):
        print(f"Fetching page {page + 1}...")
        url = base_url.format(job_title.replace(" ", "+"), search_location.replace(" ", "+"), page * 10)
        driver.get(url)
        
        time.sleep(5)  # Allow page to load
        
        job_cards = driver.find_elements(By.CSS_SELECTOR, "div.job_seen_beacon")
        for job in job_cards:
            try:
                title = job.find_element(By.CSS_SELECTOR, "h2.jobTitle").text.strip()
            except:
                title = "N/A"
            
            try:
                company_location = job.find_element(By.CSS_SELECTOR, "div.company_location").text.strip()
                split_data = company_location.split("\n")
                company = split_data[0].strip() if len(split_data) > 1 else "N/A"
                job_location = split_data[1].strip() if len(split_data) > 1 else split_data[0]
            except:
                company, job_location = "N/A", "N/A"

            try:
                salary = job.find_element(By.CSS_SELECTOR, "div.metadata.salary-snippet-container").text.strip()
            except:
                salary = "Unpaid"

            try:
                job_link_element = job.find_element(By.CSS_SELECTOR, "a")
                partial_url = job_link_element.get_attribute("href")
                job_link = partial_url if partial_url.startswith("http") else f"https://in.indeed.com{partial_url}"
            except:
                job_link = "N/A"

            job_data.append({
                "Title": title,
                "Company": company,
                "Job Location": job_location,
                "Salary": salary,
                "Link": job_link
            })
        
        print(f"✅ Scraped {len(job_data)} jobs so far.")
    
    driver.quit()
    
    # Saving Data to CSV
    df = pd.DataFrame(job_data)
    df.to_csv("indeed_jobs.csv", index=False)
    print("✅ Job data saved to 'indeed_jobs_updated.csv'")

# Example usage
scrape_indeed_jobs("Data Scientist", "New Delhi, Delhi", num_pages=6)


Fetching page 1...
✅ Scraped 15 jobs so far.
Fetching page 2...
✅ Scraped 30 jobs so far.
Fetching page 3...
✅ Scraped 45 jobs so far.
Fetching page 4...
✅ Scraped 52 jobs so far.
Fetching page 5...
✅ Scraped 67 jobs so far.
Fetching page 6...
✅ Scraped 82 jobs so far.
✅ Job data saved to 'indeed_jobs_updated.csv'
