In [5]:
!pip install pandas


Collecting pandas
  Downloading pandas-2.2.3-cp312-cp312-win_amd64.whl.metadata (19 kB)
Collecting pytz>=2020.1 (from pandas)
  Downloading pytz-2025.2-py2.py3-none-any.whl.metadata (22 kB)
Collecting tzdata>=2022.7 (from pandas)
  Downloading tzdata-2025.2-py2.py3-none-any.whl.metadata (1.4 kB)
Downloading pandas-2.2.3-cp312-cp312-win_amd64.whl (11.5 MB)
   ---------------------------------------- 0.0/11.5 MB ? eta -:--:--
   -- ------------------------------------- 0.8/11.5 MB 8.5 MB/s eta 0:00:02
   ------------ --------------------------- 3.7/11.5 MB 12.9 MB/s eta 0:00:01
   ----------------------- ---------------- 6.8/11.5 MB 13.5 MB/s eta 0:00:01
   --------------------------------- ------ 9.7/11.5 MB 13.7 MB/s eta 0:00:01
   ---------------------------------------- 11.5/11.5 MB 13.1 MB/s eta 0:00:00
Downloading pytz-2025.2-py2.py3-none-any.whl (509 kB)
Downloading tzdata-2025.2-py2.py3-none-any.whl (347 kB)
Installing collected packages: pytz, tzdata, pandas

   ----------------

In [6]:
import requests
from bs4 import BeautifulSoup
import pandas as pd
import time
from datetime import datetime

def scrape_karkidi_jobs(keyword="data science", pages=1, delay=1, save_csv=False, output_file="karkidi_jobs.csv"):
    headers = {'User-Agent': 'Mozilla/5.0'}
    base_url = "https://www.karkidi.com/Find-Jobs/{page}/all/India?search={query}"
    jobs_list = []

    for page in range(1, pages + 1):
        url = base_url.format(page=page, query=keyword.replace(' ', '%20'))
        print(f"Scraping page {page}: {url}")
        response = requests.get(url, headers=headers)

        if response.status_code != 200:
            print(f"Failed to retrieve page {page}, status code: {response.status_code}")
            continue

        soup = BeautifulSoup(response.content, "html.parser")
        job_blocks = soup.find_all("div", class_="ads-details")

        for job in job_blocks:
            try:
                title_tag = job.find("h4")
                title = title_tag.get_text(strip=True) if title_tag else ""

                company_tag = job.find("a", href=lambda x: x and "Employer-Profile" in x)
                company = company_tag.get_text(strip=True) if company_tag else ""

                location_tag = job.find("p")
                location = location_tag.get_text(strip=True) if location_tag else ""

                experience_tag = job.find("p", class_="emp-exp")
                experience = experience_tag.get_text(strip=True) if experience_tag else ""

                skills = ""
                key_skills_tag = job.find("span", string="Key Skills")
                if key_skills_tag:
                    skills_p = key_skills_tag.find_next("p")
                    skills = skills_p.get_text(strip=True) if skills_p else ""

                summary = ""
                summary_tag = job.find("span", string="Summary")
                if summary_tag:
                    summary_p = summary_tag.find_next("p")
                    summary = summary_p.get_text(strip=True) if summary_p else ""

                link_tag = job.find("a", href=True)
                job_url = "https://www.karkidi.com" + link_tag['href'] if link_tag else ""

                jobs_list.append({
                    "Title": title,
                    "Company": company,
                    "Location": location,
                    "Experience": experience,
                    "Skills": skills,
                    "Summary": summary,
                    "JobURL": job_url,
                    "ScrapedAt": datetime.now().isoformat()
                })

            except Exception as e:
                print(f"Error parsing job block: {e}")
                continue

        time.sleep(delay)  # Be nice to the server

    df = pd.DataFrame(jobs_list)

    if save_csv:
        df.to_csv(output_file, index=False)
        print(f"Saved to {output_file}")

    return df

# Example use:
if __name__ == "__main__":
    df_jobs = scrape_karkidi_jobs(keyword="data science", pages=2, delay=1, save_csv=True)
    print(df_jobs.head())


Scraping page 1: https://www.karkidi.com/Find-Jobs/1/all/India?search=data%20science
Scraping page 2: https://www.karkidi.com/Find-Jobs/2/all/India?search=data%20science
Saved to karkidi_jobs.csv
                                               Title      Company  \
0   Principal Product Manager - Growth, Poe (Remote)  Quora, Inc.   
1          Machine Learning Physical Design Engineer       Google   
2  Staff Software Engineer - Monetization, Poe (R...  Quora, Inc.   
3  Staff Backend Engineer - Bot Creator Ecosystem...  Quora, Inc.   
4  Senior Backend Engineer - Bot Creator Ecosyste...  Quora, Inc.   

                      Location Experience  \
0                        India   6-8 year   
1  Bengaluru, Karnataka, India   4-6 year   
2                        India  8-10 year   
3                        India  8-10 year   
4                        India   6-8 year   

                                              Skills  \
0  Aartificial intelligence,Data Analytics,Data s...   
1  Aar