In [2]:
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from webdriver_manager.chrome import ChromeDriverManager
from selenium.webdriver.chrome.options import Options
from bs4 import BeautifulSoup
from csv import writer
import time
def get_dom(url):
    driver.get(url)
    page_content = driver.page_source
    soup = BeautifulSoup(page_content, 'html.parser')
    return soup

def get_job_link(job):
    try:
        return job.select_one('h2 a')['href']
    except Exception:
        return 'Not available'

def get_job_title(job):
    try:
        return job.select_one('h2 a span').get_text()
    except Exception:
        return 'Not available'

def get_company_name(job):
    try:
        return job.select_one('span[data-testid="company-name"]').get_text()
    except Exception:
        return 'Not available'

def get_company_location(job):
    try:
        return job.select_one('div.company_location div[data-testid="text-location"]').get_text()
    except Exception:
        return 'Not available'

def get_salary(job):
    try:
        salary = job.select('span.estimated-salary span')
        if salary:
            return salary[0].get_text()
    except Exception:
        pass
    
    try:
        salary = job.select_one('div.metadata.salary-snippet-container div').get_text()
        return salary
    except Exception:
        return 'Not available'




# Set Chrome options if needed
chrome_options = Options()
driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()), options=chrome_options)

# Base URL for Indeed with placeholders
pagination_url = "https://in.indeed.com/jobs?l={}&sc=0kf%3A{}jt%28{}%29%3B&start={}"

# Parameters for job search
job_search_keyword = ['Software+Engineer']
location_search_keyword = ['Hyderabad']
job_type_keyword = ['internship']  # Job types: internship, new graduates
education_levels = ['attr%28HFDVW%29']  # Include bachelors or exclude (empty string for no bachelors)

# Open CSV file for writing
with open('indeed_jobs.csv', 'w', newline='', encoding='utf-8') as f:
    theWriter = writer(f)
    heading = ['job_link', 'job_title', 'company_name', 'company_location', 'salary']
    theWriter.writerow(heading)

    for job_keyword in job_search_keyword:
        for location_keyword in location_search_keyword:
            for job_type in job_type_keyword:
                for education_level in education_levels:
                    all_jobs = []
                    for page_no in range(0, 10, 10):
                        # Format the URL dynamically
                        url = pagination_url.format(location_keyword, education_level, job_type, page_no)
                        print(f"Scraping page: {url}")
                        
                        page_dom = get_dom(url)
                        jobs = page_dom.select('div.job_seen_beacon')
                        all_jobs += jobs

                    for job in all_jobs:
                        job_link = "https://in.indeed.com" + get_job_link(job)
                        job_title = get_job_title(job)
                        company_name = get_company_name(job)
                        company_location = get_company_location(job)
                        salary = get_salary(job)

                        # Scrape job details
                       

                        # Write job data to CSV
                        record = [job_link, job_title, company_name, company_location, salary]
                        theWriter.writerow(record)
                        print(f"Saved job: {job_title}")

# Close the web browser
driver.quit()



Scraping page: https://in.indeed.com/jobs?l=Hyderabad&sc=0kf%3Aattr%28HFDVW%29jt%28internship%29%3B&start=0
Saved job: Intern
Saved job: Content Writer
Saved job: Interns: Open to Fresh Graduates from Any Discipline
Saved job: Internship For IT Students
Saved job: Civil - Internship
Saved job: Intern-to-Hire
Saved job: Web Designer
Saved job: Audit Assistant
Saved job: Student Social Media Influencer
Saved job: Finance- APAY Intern (2025 Graduates Only)
Saved job: 6 Months or 1 Year Internship in Pharmacovigilance (Paid)
Saved job: Intern (Infrastructure)
Saved job: Intern Interior Designer
Saved job: Digital Marketing Training and Intern-Online-Clickcrazedigital
Saved job: UI/UX Designer
