In [101]:
# Import necessary libraries
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.service import Service
from webdriver_manager.chrome import ChromeDriverManager
import pandas as pd
import time

In [None]:
# Function to generate the search URL
def generate_url(job_title, location, page_number=1):
    job_title_encoded = job_title.replace(" ", "-")
    location_encoded = location.replace(" ", "-")
    return f'https://www.naukri.com/{job_title_encoded}-jobs-in-{location_encoded}-{page_number}?k={job_title_encoded}&l={location_encoded}'
#In many URLs,spaces are typically encoded as either hyphens ("-") or plus signs ("+").The behavior of using either can depend on how the target website processes the URL parameters.

In [None]:
# Function to scrape job listings from the current page
def scrape_jobs(driver, job_data):
    job_listings = driver.find_elements(By.CLASS_NAME, 'srp-jobtuple-wrapper')
    for listing in job_listings:
        try:
            job_title_element = listing.find_element(By.CLASS_NAME, 'title')
            job_title = job_title_element.text.strip()

            company_name_element = listing.find_element(By.CLASS_NAME, 'comp-name')
            company_name = company_name_element.text.strip()

            experience_element = listing.find_element(By.CLASS_NAME, 'exp-wrap')
            experience = experience_element.text.strip()

            salary_element = listing.find_element(By.CLASS_NAME, 'sal-wrap')
            salary = salary_element.text.strip()

            location_element = listing.find_element(By.CLASS_NAME, 'loc-wrap')
            location = location_element.text.strip()

            posted_day_element = listing.find_element(By.CLASS_NAME, 'job-post-day')
            posted = posted_day_element.text.strip()

            # Append the job data to the list
            job_data.append({
                'Job Title': job_title,
                'Company Name': company_name,
                'Experience Required': experience,
                'Salary': salary,
                'Location': location,
                'Posted': posted
            })
        except Exception as e:
            print(f"Error extracting job listing: {e}")
    
    return job_data

In [None]:
# Function to scrape job data for given job titles and locations
def scrape_job_data(job_titles, locations, max_pages=100):
    # Initialize the WebDriver
    driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()))
    all_job_data = []

    for job_title in job_titles:
        for location in locations:
            print(f"Scraping jobs for {job_title} in {location}...")
            page = 1

            while page <= max_pages:
                url = generate_url(job_title, location, page)
                print(f"Scraping URL: {url}")
                driver.get(url)
                time.sleep(5)  # Adjust the sleep time as necessary

                all_job_data = scrape_jobs(driver, all_job_data)
                
                # If no jobs are found on the page, break out of the loop
                if not driver.find_elements(By.CLASS_NAME, 'srp-jobtuple-wrapper'):
                    print(f"No more job listings found on page {page}. Stopping.")
                    break

                page += 1

    # Close the WebDriver
    driver.quit()

    return all_job_data

In [None]:
job_titles = ['data-analyst', 'data-scientist', 'data-engineer']
locations = ['mumbai', 'pune', 'bangalore']
max_pages = 5

job_data = scrape_job_data(job_titles, locations, max_pages)

# Convert the list to a pandas DataFrame
df = pd.DataFrame(job_data)

# Optionally save the DataFrame to a CSV file
df.to_csv('naukri_job_listings.csv', index=False)

# Display the DataFrame
print(df)

In [100]:
pd.read_csv('naukri_job_listings.csv')

Unnamed: 0,Job Title,Company Name,Experience Required,Salary,Location,Posted
0,Data Analyst,Airvin Skills,0 Yrs,Not disclosed,"Hybrid - Mumbai (All Areas), Hyderabad, Delhi ...",6 Days Ago
1,Data Analyst,Outsized,2-5 Yrs,Not disclosed,Mumbai (All Areas),2 Days Ago
2,Data Analyst,Morepen Laboratories,0-2 Yrs,2-5 Lacs PA,Mumbai (All Areas),9 Days Ago
3,Data Analyst,Fedelty Healthcare Pvt Ltd,0-2 Yrs,1-2.25 Lacs PA,Mumbai(Fort),10 Days Ago
4,Data Analyst,Pavan Laxmikant Giri,0-5 Yrs,5.5-12 Lacs PA,"Hybrid - Mumbai (All Areas), Hyderabad, Pune",1 Day Ago
...,...,...,...,...,...,...
295,Big Data Engineer,Hexaware Technologies,6-9 Yrs,Not disclosed,"Mumbai, Kolkata, New Delhi, Hyderabad, Pune, C...",Few Hours Ago
296,Data Engineer: Data Modeling,IBM,2-5 Yrs,Not disclosed,Navi Mumbai,9 Days Ago
297,Data Engineer,Accenture,5-7 Yrs,Not disclosed,Mumbai,11 Days Ago
298,Data Engineer,Accenture,3-5 Yrs,Not disclosed,Mumbai,11 Days Ago
