# Indeed web scraping

In [None]:
# Required Libraries
from bs4 import BeautifulSoup
import requests
import pandas as pd
import numpy as np
import time

In [None]:
# Base URL (searching for "Data Analyst" jobs in India)
URL = "https://www.indeed.com/jobs?q=data+analyst&l=India"

HEADERS = {
    "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/139.0.0.0 Safari/537.36",
    "Accept-Language": "en-US,en;q=0.5"
}

# Send request
webpage = requests.get(URL, headers=HEADERS)
print(webpage)  # <Response [200]> means successful

In [None]:
# Parse the HTML
soup = BeautifulSoup(webpage.content, "html.parser")

# Empty lists to store job details
job_titles = []
company_names = []
locations = []
salaries = []
summaries = []

# Loop through each job card
for job_card in soup.find_all("div", {"class": "job_seen_beacon"}):
    
    # Job Title
    title = job_card.find("h2", {"class": "jobTitle"})
    job_titles.append(title.get_text().strip() if title else "N/A")
    
    # Company
    company = job_card.find("span", {"class": "companyName"})
    company_names.append(company.get_text().strip() if company else "N/A")
    
    # Location
    location = job_card.find("div", {"class": "companyLocation"})
    locations.append(location.get_text().strip() if location else "N/A")
    
    # Salary (optional, not always present)
    salary = job_card.find("div", {"class": "salary-snippet"})
    salaries.append(salary.get_text().strip() if salary else "N/A")
    
    # Job Summary (short description)
    summary = job_card.find("div", {"class": "job-snippet"})
    summaries.append(summary.get_text().strip() if summary else "N/A")

# ✅ Convert to DataFrame
df = pd.DataFrame({
    "Job Title": job_titles,
    "Company": company_names,
    "Location": locations,
    "Salary": salaries,
    "Summary": summaries
})

# Show first 5 records
print(df.head())



In [None]:
# Save data
df.to_csv("indeed_jobs.csv", index=False)
print("Data saved to indeed_jobs.csv")
