In [1]:
!pip install selenium pandas openpyxl webdriver-manager




[notice] A new release of pip is available: 24.3.1 -> 25.2
[notice] To update, run: python.exe -m pip install --upgrade pip


In [1]:
#imports
import time
import pandas as pd
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from webdriver_manager.chrome import ChromeDriverManager


In [3]:
#Initialize Selenium Chrome Driver
def get_driver():
    options = Options()
    options.add_argument("--disable-blink-features=AutomationControlled")
    options.add_argument("--log-level=3")
    driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()), options=options)
    driver.set_window_size(1200, 900)
    return driver

driver = get_driver()
print("✅ Chrome driver ready!")


✅ Chrome driver ready!


In [4]:
#Define job scraping function
roles = [
    "data-analyst",
    "data-scientist",
    "business-analyst",
    "machine-learning-engineer",
    "python-developer",
    "data-engineer"
]

def scrape_jobs(role, num_pages=3):  # you can increase num_pages later
    all_jobs = []

    for page in range(1, num_pages + 1):
        url = f"https://www.naukri.com/{role}-jobs-in-india-{page}"
        driver.get(url)
        time.sleep(5)

        try:
            job_cards = WebDriverWait(driver, 15).until(
                EC.presence_of_all_elements_located((By.CSS_SELECTOR, "div.cust-job-tuple"))
            )
        except:
            print(f"⚠️ No jobs found for {role} page {page}")
            continue

        for job in job_cards:
            try: title = job.find_element(By.CSS_SELECTOR, "a.title").text
            except: title = "N/A"
            try: company = job.find_element(By.CSS_SELECTOR, "a.comp-name").text
            except: company = "N/A"
            try: location = job.find_element(By.CSS_SELECTOR, "span.locWdth").text
            except: location = "N/A"
            try: experience = job.find_element(By.CSS_SELECTOR, "span.expwdth").text
            except: experience = "N/A"
            try: salary = job.find_element(By.CSS_SELECTOR, "span.salary, span.sal-wrap").text
            except: salary = "N/A"
            try: skills = ", ".join([s.text for s in job.find_elements(By.CSS_SELECTOR, "ul.tags-gt li")])
            except: skills = "N/A"
            try: posted_date = job.find_element(By.CSS_SELECTOR, "span.job-post-day").text
            except: posted_date = "N/A"

            all_jobs.append({
                "Role": role.replace("-", " ").title(),
                "Job Title": title,
                "Company": company,
                "Location": location,
                "Experience": experience,
                "Salary": salary,
                "Skills": skills,
                "Posted Date": posted_date
            })

        print(f"✅ {role} - Page {page} scraped")
    
    return all_jobs


In [5]:
all_jobs_data = []

for role in roles:
    jobs = scrape_jobs(role, num_pages=10)  # 🔧 increase num_pages for more data
    all_jobs_data.extend(jobs)

df = pd.DataFrame(all_jobs_data)
print("✅ Scraping finished. Total records:", len(df))


🔍 Scraping data-analyst - Page 1 ...
✅ data-analyst - Page 1 scraped
🔍 Scraping data-analyst - Page 2 ...
✅ data-analyst - Page 2 scraped
🔍 Scraping data-analyst - Page 3 ...
✅ data-analyst - Page 3 scraped
🔍 Scraping data-analyst - Page 4 ...
✅ data-analyst - Page 4 scraped
🔍 Scraping data-analyst - Page 5 ...
✅ data-analyst - Page 5 scraped
🔍 Scraping data-analyst - Page 6 ...
✅ data-analyst - Page 6 scraped
🔍 Scraping data-analyst - Page 7 ...
✅ data-analyst - Page 7 scraped
🔍 Scraping data-analyst - Page 8 ...
✅ data-analyst - Page 8 scraped
🔍 Scraping data-analyst - Page 9 ...
✅ data-analyst - Page 9 scraped
🔍 Scraping data-analyst - Page 10 ...
✅ data-analyst - Page 10 scraped
🔍 Scraping data-scientist - Page 1 ...
✅ data-scientist - Page 1 scraped
🔍 Scraping data-scientist - Page 2 ...
✅ data-scientist - Page 2 scraped
🔍 Scraping data-scientist - Page 3 ...
✅ data-scientist - Page 3 scraped
🔍 Scraping data-scientist - Page 4 ...
✅ data-scientist - Page 4 scraped
🔍 Scraping data-

In [6]:
df.head(20)


Unnamed: 0,Role,Job Title,Company,Location,Experience,Salary,Skills,Posted Date
0,Data Analyst,Client Data Analyst,JPMorgan Chase Bank,Bengaluru,1-7 Yrs,,"interviewing, service operations, data analysi...",1 day ago
1,Data Analyst,Client Data Analyst,JPMorgan Chase Bank,Bengaluru,1-7 Yrs,,"data analysis, data analytics, business requir...",1 day ago
2,Data Analyst,Data Analyst,Tata Consultancy Services,Chennai,3-8 Yrs,,"Data Analysis, Sales Report, Power Bi, Advance...",1 week ago
3,Data Analyst,Client Data Analyst,JPMorgan Chase Bank,Bengaluru,1-7 Yrs,,"Operational risk, Compliance, Data collection,...",1 day ago
4,Data Analyst,Data Analyst,Zero Gravity Photography,Chennai,0-2 Yrs,,"Usage, Business reporting, Data Analyst, Manag...",1 day ago
5,Data Analyst,Data Analyst,Convegenius Edu,Jaipur,0-3 Yrs,,"database maintenance, python, data analytics, ...",2 days ago
6,Data Analyst,Data Analyst,Leading Client,Bengaluru,2-5 Yrs,,"python, sql, pandas, tableau, basic sql, chart...",2 days ago
7,Data Analyst,Company Data Analyst,MSCI Services,Mumbai,1-4 Yrs,,"database management, python, data analysis, da...",1 day ago
8,Data Analyst,Opening For data analyst SME / TM @Hyderabad,Cognizant,Hyderabad,3-6 Yrs,,"Data Analytics, Power Bi, SAS, VBA, mis, Snowf...",6 days ago
9,Data Analyst,Data Analyst,Cloudtara Technologies,Bengaluru,1-3 Yrs,,"database maintenance, python, data analytics, ...",1 day ago


In [7]:
df.to_csv(r"D:\PROJECTS\Naukri_Job_Trends\data\raw\naukri_jobs_raw.csv", index=False)
print("📁 Data saved to data/raw/naukri_jobs.csv")


📁 Data saved to data/raw/naukri_jobs.csv
