In [None]:
import os
import json
import pandas as pd
import serpapi
from urllib.request import Request, urlopen
from bs4 import BeautifulSoup
import time


SERP_API_KEY = "38100fb1d15ed14424d87cb56b432c03bd7a030d745a6680efc448c2021b82ad"  


companies = [
    "Chargebee", "Plum Insurance", "Yellow.ai", "Graphy", "Kala.ai",
    "Razorpay", "BrowserStack", "Meesho", "Upstox", "Darwinbox",
    "Zoho", "Hasura", "Vidyard", "Fyle", "Dunzo",
    "OfBusiness", "Artivatic", "Stanza Living", "Sigtuple", "Locus"
]


keywords = [
    "Product Manager", "Sales Development", "Software Engineer", "UX Designer",
    "Customer Success", "Marketing", "Data Analyst", "Recruiter", "Engineering Lead"
]


signal_map = {
    "product": "New Product Initiative",
    "software engineer": "Engineering Buildout",
    "sales": "Go-To-Market Expansion",
    "designer": "Design Overhaul",
    "director": "Leadership Upgrade",
    "manager": "Leadership Upgrade",
    "lead": "Leadership Upgrade",
    "vp": "Leadership Upgrade",
    "cxo": "Leadership Upgrade"
}


department_map = {
    "software": "Engineering",
    "engineer": "Engineering",
    "developer": "Engineering",
    "product manager": "Product",
    "pm": "Product",
    "design": "Design",
    "ux": "Design",
    "ui": "Design",
    "sales": "Sales",
    "bd": "Sales",
    "business development": "Sales",
    "marketing": "Marketing",
    "customer success": "CSM",
    "support": "Support",
    "data": "Data",
    "analyst": "Data",
    "devops": "DevOps",
    "qa": "QA",
    "hr": "HR",
    "recruit": "Recruiting",
    "finance": "Finance"
}


seniority_map = {
    "intern": "Intern",
    "internship": "Intern",
    "associate": "Junior",
    "jr": "Junior",
    "junior": "Junior",
    "mid-level": "Mid",
    "mid": "Mid",
    "senior": "Senior",
    "lead": "Lead",
    "principal": "Lead",
    "director": "Director",
    "vp": "VP",
    "cxo": "CXO"
}


def get_department(title):
    title_lower = title.lower()
    for key, dept in department_map.items():
        if key in title_lower:
            return dept
    return "Other"


def get_seniority(title):
    title_lower = title.lower()
    for key, level in seniority_map.items():
        if key in title_lower:
            return level
    return "Mid"


def extract_location(snippet):
    if not snippet:
        return "Unknown"

    
    location_keywords = [
        "Remote", "Bengaluru", "Bangalore", "Hyderabad", "Delhi", "Mumbai", 
        "Pune", "Chennai", "Gurgaon", "Gurugram", "Noida", "Ahmedabad", 
        "Kolkata", "Jaipur", "Chandigarh", "Coimbatore", "Trivandrum", 
        "India", "IN", "IND", "Work from home", "WFH"
    ]

   
    if isinstance(snippet, str):
        snippet = snippet.lower()
    else:
        return "Unknown"

    
    parts = snippet.replace("·", "|").replace("-", "|").replace(",", "|").split("|")
    
    for part in parts:
        part = part.strip().title()  
        for keyword in location_keywords:
            if keyword.lower() in part.lower():
                return part
    
    return "Other"

def search_linkedin_jobs(company, keyword):
    params = {
        "engine": "google",
        "q": f'site:linkedin.com/jobs {keyword} {company}',
        "api_key": SERP_API_KEY,
        "num": 7  
    }

    try:
        client = serpapi.Client(api_key=SERP_API_KEY)
        results = client.search(params)
    except Exception as e:
        print(f"    ❌ Error searching for {keyword} at {company}: {e}")
        return []

    jobs = []
    if "organic_results" in results:
        for result in results["organic_results"]:
            title = result.get("title", "")
            link = result.get("link", "")
            snippet = result.get("snippet", "")

            
            if "/jobs/view/" not in link:
                continue

           
            location = extract_location(snippet)

            
            signal_tags = set()
            for key, tag in signal_map.items():
                if key.lower() in title.lower():
                    signal_tags.add(tag)

            
            dept = get_department(title)
            seniority = get_seniority(title)

            jobs.append({
                "company_name": company,
                "job_title": title,
                "department": dept,
                "location": location,
                "seniority": seniority,
                "job_url": link,
                "growth_signal_tag": ", ".join(signal_tags) if signal_tags else "General Hiring"
            })

    return jobs


def main():
   
    if not SERP_API_KEY or SERP_API_KEY == "your_serpapi_key_here":
        print("❌ Please set your SerpAPI key in the SERP_API_KEY variable")
        return

    all_jobs = []
    total_searches = len(companies) * len(keywords)
    current_search = 0

    for company in companies:
        print(f"\n🔍 Searching for jobs at {company}")
        for keyword in keywords:
            current_search += 1
            print(f"  → Keyword: {keyword} ({current_search}/{total_searches})")
            
            jobs = search_linkedin_jobs(company, keyword)
            all_jobs.extend(jobs)
            
            print(f"    ✅ Found {len(jobs)} jobs")
            
           
            time.sleep(1.5)

    
    if all_jobs:
        df = pd.DataFrame(all_jobs)
        
       
        df = df.drop_duplicates(subset=['job_url'])
        
        
        df = df[[
            "company_name", "job_title", "department", "location", 
            "seniority", "job_url", "growth_signal_tag"
        ]]
        
        df.to_csv("linkedin_job_listings_enhanced.csv", index=False)
        
        print(f"\n✅ Scraped and enriched {len(df)} unique job listings.")
        print("📄 Saved to linkedin_job_listings_enhanced.csv")
        
       
        print(f"\n📊 Enhanced Analytics:")
        print(f"Total unique jobs: {len(df)}")
        
        print(f"\n🏢 Jobs by Company:")
        company_counts = df['company_name'].value_counts()
        for company, count in company_counts.items():
            print(f"  • {company}: {count} jobs")
        
        print(f"\n🏛️ Jobs by Department:")
        dept_counts = df['department'].value_counts()
        for dept, count in dept_counts.items():
            print(f"  • {dept}: {count} jobs")
        
        print(f"\n🎯 Jobs by Seniority:")
        seniority_counts = df['seniority'].value_counts()
        for level, count in seniority_counts.items():
            print(f"  • {level}: {count} jobs")
        
        print(f"\n📍 Jobs by Location:")
        location_counts = df['location'].value_counts()
        for location, count in location_counts.head(10).items():
            print(f"  • {location}: {count} jobs")
        
        print(f"\n🚀 Growth Signals Detected:")
        signal_counts = {}
        for signals in df['growth_signal_tag']:
            if signals and signals != "General Hiring":
                for signal in signals.split(', '):
                    signal_counts[signal] = signal_counts.get(signal, 0) + 1
        
        for signal, count in sorted(signal_counts.items(), key=lambda x: x[1], reverse=True):
            print(f"  • {signal}: {count} jobs")
        
        print(f"\n📋 Sample Results:")
        print(df[['company_name', 'job_title', 'department', 'seniority']].head(3).to_string(index=False))
            
    else:
        print("❌ No jobs found. Please check your API key and search parameters.")

if __name__ == "__main__":
    main()


🔍 Searching for jobs at Chargebee
  → Keyword: Product Manager (1/180)
    ✅ Found 5 jobs
  → Keyword: Sales Development (2/180)
    ✅ Found 5 jobs
  → Keyword: Software Engineer (3/180)
    ✅ Found 4 jobs
  → Keyword: UX Designer (4/180)
    ✅ Found 6 jobs
  → Keyword: Customer Success (5/180)
    ✅ Found 6 jobs
  → Keyword: Marketing (6/180)
    ✅ Found 6 jobs
  → Keyword: Data Analyst (7/180)
    ✅ Found 5 jobs
  → Keyword: Recruiter (8/180)
    ✅ Found 7 jobs
  → Keyword: Engineering Lead (9/180)
    ✅ Found 6 jobs

🔍 Searching for jobs at Plum Insurance
  → Keyword: Product Manager (10/180)
    ✅ Found 5 jobs
  → Keyword: Sales Development (11/180)
    ✅ Found 5 jobs
  → Keyword: Software Engineer (12/180)
    ✅ Found 6 jobs
  → Keyword: UX Designer (13/180)
    ✅ Found 5 jobs
  → Keyword: Customer Success (14/180)
    ✅ Found 5 jobs
  → Keyword: Marketing (15/180)
    ✅ Found 4 jobs
  → Keyword: Data Analyst (16/180)
    ✅ Found 5 jobs
  → Keyword: Recruiter (17/180)
    ✅ Found