In [None]:
%pip install langchain-community chromadb sentence-transformers faiss-cpu requests pillow python-multipart opencv-python

In [None]:
import requests
from bs4 import BeautifulSoup
import json
import pandas as pd
import re
from datetime import datetime
import time
import PyPDF2
import docx
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
import numpy as np
import warnings
warnings.filterwarnings('ignore')



[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m25.1.1[0m[39;49m -> [0m[32;49m25.2[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49m/opt/homebrew/opt/python@3.11/bin/python3.11 -m pip install --upgrade pip[0m
Note: you may need to restart the kernel to use updated packages.


In [10]:
class LightweightOllama:
    def __init__(self, model_name="phi3:mini"):
        self.model_name = model_name
        self.base_url = "http://localhost:11434"
    
    def invoke(self, prompt):
        try:
            response = requests.post(
                f"{self.base_url}/api/generate",
                json={
                    "model": self.model_name,
                    "prompt": prompt,
                    "stream": False,
                    "options": {
                        "temperature": 0.1,
                        "num_predict": 200,
                        "top_k": 20
                    }
                },
                timeout=30
            )
            
            if response.status_code == 200:
                result = response.json().get("response", "")
                if "@@@@" in result or "####" in result:
                    return "Model response contained invalid characters. Please try a different model."
                return result
            else:
                return f"Error: {response.status_code}"
                
        except Exception as e:
            return f"Connection error: {str(e)}"


In [11]:
def get_best_lightweight_model():
    try:
        response = requests.get("http://localhost:11434/api/tags", timeout=10)
        if response.status_code == 200:
            models = response.json().get('models', [])
            available_models = [model['name'] for model in models]
            print("Available models:", available_models)
            preferred_models = ['phi3:mini', 'gemma2:2b', 'qwen2:0.5b', 'phi:latest', 'llama2:latest']
            
            for model in preferred_models:
                if model in available_models:
                    print(f"✅ Using {model} (lightweight)")
                    return model
            
            if available_models:
                print(f"⚠️ Using {available_models[0]} (fallback)")
                return available_models[0]
                
        return None
    except:
        return None


In [12]:
model_name = get_best_lightweight_model()
if model_name:
    llm = LightweightOllama(model_name)
else:
    class SimpleLLM:
        def invoke(self, prompt):
            return "Please install a lightweight model: ollama pull phi3:mini"
    llm = SimpleLLM()

Available models: ['llava:latest', 'qwen2:0.5b', 'gemma2:2b', 'phi3:mini', 'phi:latest', 'llama2:latest']
✅ Using phi3:mini (lightweight)


In [None]:
class ResumeParser:
    def __init__(self):
        self.skills_keywords = [
            'python', 'java', 'javascript', 'sql', 'machine learning', 'deep learning',
            'tensorflow', 'pytorch', 'react', 'node.js', 'aws', 'azure', 'docker',
            'kubernetes', 'git', 'ci/cd', 'data analysis', 'tableau', 'power bi',
            'mongodb', 'postgresql', 'mysql', 'html', 'css', 'rest api', 'graphql',
            'agile', 'scrum', 'project management', 'leadership', 'communication',
            'data science', 'artificial intelligence', 'ai', 'ml', 'nlp', 'computer vision',
            'big data', 'spark', 'hadoop', 'kafka', 'redis', 'elasticsearch',
            'jenkins', 'ansible', 'terraform', 'gcp', 'cloud', 'devops',
            'frontend', 'backend', 'full stack', 'mobile', 'ios', 'android',
            'flutter', 'react native', 'vue', 'angular', 'typescript'
        ]
    
    def extract_text_from_pdf(self, pdf_path):
        text = ""
        try:
            with open(pdf_path, 'rb') as file:
                reader = PyPDF2.PdfReader(file)
                for page in reader.pages:
                    text += page.extract_text()
            return text
        except Exception as e:
            print(f"Error reading PDF: {e}")
            return ""
    
    def extract_text_from_docx(self, docx_path):
        try:
            doc = docx.Document(docx_path)
            text = ""
            for paragraph in doc.paragraphs:
                text += paragraph.text + "\n"
            return text
        except Exception as e:
            print(f"Error reading DOCX: {e}")
            return ""
    
    def parse_resume(self, resume_path):
        print(f"📄 Parsing resume: {resume_path}")
        
        if resume_path.endswith('.pdf'):
            text = self.extract_text_from_pdf(resume_path)
        elif resume_path.endswith('.docx'):
            text = self.extract_text_from_docx(resume_path)
        else:
            try:
                with open(resume_path, 'r', encoding='utf-8') as file:
                    text = file.read()
            except:
                text = resume_path 
        
        if not text.strip():
            return {"error": "Could not extract text from resume"}
       
        prompt = f"""
        Extract the following information from this resume text. Return ONLY a JSON object with these exact keys:
        {{
            "name": "full name",
            "email": "email address", 
            "phone": "phone number if available",
            "summary": "brief professional summary",
            "skills": ["list", "of", "technical", "skills"],
            "experience": "years of experience",
            "education": "highest education level",
            "current_role": "current/most recent job title",
            "industries": ["list", "of", "industries", "worked", "in"]
        }}
        
        RESUME TEXT:
        {text[:3000]}
        
        Return ONLY the JSON object, nothing else.
        """
        
        try:
            response = llm.invoke(prompt)
            response = response.strip()
            if response.startswith('```json'):
                response = response[7:]
            if response.endswith('```'):
                response = response[:-3]
            
            resume_data = json.loads(response)
            resume_data['raw_text'] = text
            return resume_data
            
        except Exception as e:
            print(f"Error parsing with Ollama: {e}")
            return self.simple_parse(text)
    
    def simple_parse(self, text):
        skills_found = []
        for skill in self.skills_keywords:
            if re.search(r'\b' + re.escape(skill) + r'\b', text.lower()):
                skills_found.append(skill)
  
        email_match = re.search(r'\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Z|a-z]{2,}\b', text)
        email = email_match.group() if email_match else "Not found"
        
        lines = text.split('\n')
        name = lines[0].strip() if lines else "Not found"
        
        return {
            'name': name,
            'email': email,
            'skills': skills_found,
            'summary': "Extracted from resume",
            'raw_text': text
        }

In [None]:
class JobScraper:
    def __init__(self):
        self.headers = {
            'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
        }
    
    def scrape_indeed(self, job_title, location="", limit=10):
        jobs = []
        try:
            base_url = "https://www.indeed.com/jobs"
            params = {
                'q': job_title,
                'l': location,
                'limit': limit
            }
            
            response = requests.get(base_url, params=params, headers=self.headers, timeout=10)
            soup = BeautifulSoup(response.content, 'html.parser')
            
            job_cards = soup.find_all('div', class_='job_seen_beacon')
            
            for card in job_cards[:limit]:
                try:
                    title_elem = card.find('h2', class_='jobTitle')
                    company_elem = card.find('span', class_='companyName')
                    location_elem = card.find('div', class_='companyLocation')
                    link_elem = card.find('a', class_='jcs-JobTitle')
                    
                    if title_elem and company_elem:
                        job = {
                            'title': title_elem.text.strip(),
                            'company': company_elem.text.strip(),
                            'location': location_elem.text.strip() if location_elem else "Not specified",
                            'link': "https://www.indeed.com" + link_elem['href'] if link_elem else "",
                            'source': 'Indeed',
                            'description': self.get_job_description("https://www.indeed.com" + link_elem['href']) if link_elem else ""
                        }
                        jobs.append(job)
                except Exception as e:
                    continue
                    
        except Exception as e:
            print(f"Error scraping Indeed: {e}")
            
        return jobs
    
    def scrape_linkedin_simplified(self, job_title, location="", limit=10):
        jobs = []
        try:
            base_url = "https://www.linkedin.com/jobs-guest/jobs/api/seeMoreJobPostings/search"
            params = {
                'keywords': job_title,
                'location': location,
                'start': 0
            }
            
            response = requests.get(base_url, params=params, headers=self.headers, timeout=10)
            if response.status_code == 200:
                soup = BeautifulSoup(response.content, 'html.parser')
                
                job_cards = soup.find_all('li')
                
                for card in job_cards[:limit]:
                    try:
                        title_elem = card.find('h3', class_='base-search-card__title')
                        company_elem = card.find('h4', class_='base-search-card__subtitle')
                        location_elem = card.find('span', class_='job-search-card__location')
                        link_elem = card.find('a', class_='base-card__full-link')
                        
                        if title_elem and company_elem:
                            job = {
                                'title': title_elem.text.strip(),
                                'company': company_elem.text.strip(),
                                'location': location_elem.text.strip() if location_elem else "Not specified",
                                'link': link_elem['href'] if link_elem else "",
                                'source': 'LinkedIn',
                                'description': ""
                            }
                            jobs.append(job)
                    except Exception as e:
                        continue
                    
        except Exception as e:
            print(f"Error scraping LinkedIn: {e}")
            
        return jobs
    
    def get_job_description(self, job_url):
        try:
            response = requests.get(job_url, headers=self.headers, timeout=10)
            soup = BeautifulSoup(response.content, 'html.parser')
            
            description_selectors = [
                'div#jobDescriptionText',
                'div.job-description',
                'div.description',
                'div.job-details'
            ]
            
            for selector in description_selectors:
                desc_elem = soup.select_one(selector)
                if desc_elem:
                    return desc_elem.get_text(strip=True)[:500]
            
            return "Description not available"
        except:
            return "Description not available"
    
    def search_multiple_sources(self, job_title, location="", jobs_per_source=5):
        all_jobs = []
        
        print("🔍 Searching Indeed...")
        indeed_jobs = self.scrape_indeed(job_title, location, jobs_per_source)
        all_jobs.extend(indeed_jobs)
        
        print("🔍 Searching LinkedIn...")
        linkedin_jobs = self.scrape_linkedin_simplified(job_title, location, jobs_per_source)
        all_jobs.extend(linkedin_jobs)
        
        return all_jobs[:10] 

In [None]:
class JobMatcher:
    def __init__(self):
        self.llm = llm
    
    def calculate_similarity(self, resume_text, job_description):
        try:
            vectorizer = TfidfVectorizer()
            tfidf_matrix = vectorizer.fit_transform([resume_text, job_description])
            similarity = cosine_similarity(tfidf_matrix[0:1], tfidf_matrix[1:2])
            return similarity[0][0]
        except:
            return 0
    
    def analyze_job_fit(self, resume_data, job):
        prompt = f"""
        Analyze the match between a candidate and a job opportunity. Return ONLY a JSON response with this structure:
        {{
            "relevance_score": 0-100,
            "skills_match": ["list", "of", "matching", "skills"],
            "missing_skills": ["list", "of", "missing", "important", "skills"],
            "fit_analysis": "brief analysis of why this is a good fit",
            "recommendation": "strong_recommend|recommend|neutral|not_recommend"
        }}
        
        CANDIDATE PROFILE:
        Name: {resume_data.get('name', 'N/A')}
        Skills: {', '.join(resume_data.get('skills', []))}
        Experience: {resume_data.get('experience', 'N/A')}
        Summary: {resume_data.get('summary', 'N/A')}
        
        JOB OPPORTUNITY:
        Title: {job['title']}
        Company: {job['company']}
        Description: {job['description'][:1000]}
        
        Analyze the skills match, experience relevance, and overall fit.
        """
        
        try:
            response = self.llm.invoke(prompt)
            response = response.strip()
            if response.startswith('```json'):
                response = response[7:]
            if response.endswith('```'):
                response = response[:-3]
            
            analysis = json.loads(response)
            
            analysis['text_similarity'] = self.calculate_similarity(
                resume_data.get('raw_text', ''),
                job['description']
            )
            
            return analysis
            
        except Exception as e:
            print(f"Error in job fit analysis: {e}")
            return {
                "relevance_score": 50,
                "skills_match": [],
                "missing_skills": [],
                "fit_analysis": "Analysis unavailable",
                "recommendation": "neutral",
                "text_similarity": 0
            }
    
    def rank_jobs(self, resume_data, jobs):
        ranked_jobs = []
        
        print("🎯 Analyzing job matches...")
        for i, job in enumerate(jobs):
            print(f"  Analyzing job {i+1}/{len(jobs)}...")
            
            analysis = self.analyze_job_fit(resume_data, job)
            
            overall_score = (analysis['relevance_score'] * 0.7 + 
                           analysis['text_similarity'] * 100 * 0.3)
            
            ranked_jobs.append({
                **job,
                'analysis': analysis,
                'overall_score': overall_score
            })
            
          
            time.sleep(1)
        
      
        ranked_jobs.sort(key=lambda x: x['overall_score'], reverse=True)
        return ranked_jobs

In [None]:
def find_relevant_jobs(resume_path, job_title, location="", num_jobs=10):
    """
    Main function to find relevant jobs based on resume
    """
    print("🚀 Starting Job Search Agent...")
    print("=" * 50)
    

    parser = ResumeParser()
    resume_data = parser.parse_resume(resume_path)
    
    if 'error' in resume_data:
        print(f"❌ Error: {resume_data['error']}")
        return []
    
    print(f"✅ Resume parsed for: {resume_data.get('name', 'Unknown')}")
    print(f"📧 Email: {resume_data.get('email', 'Not found')}")
    print(f"🛠️ Skills: {', '.join(resume_data.get('skills', []))}")
    print()
   
    scraper = JobScraper()
    jobs = scraper.search_multiple_sources(job_title, location, num_jobs//2)
    
    if not jobs:
        print("❌ No jobs found. Try different search terms.")
        return []
    
    print(f"✅ Found {len(jobs)} jobs to analyze")
    print()
    matcher = JobMatcher()
    ranked_jobs = matcher.rank_jobs(resume_data, jobs)
    
    return ranked_jobs, resume_data

def display_results(ranked_jobs, resume_data, top_k=10):
    print("\n" + "=" * 80)
    print("🎯 TOP JOB RECOMMENDATIONS")
    print("=" * 80)
    
    for i, job in enumerate(ranked_jobs[:top_k]):
        print(f"\n🏆 #{i+1} | Score: {job['overall_score']:.1f}/100")
        print(f"📌 Title: {job['title']}")
        print(f"🏢 Company: {job['company']}")
        print(f"📍 Location: {job['location']}")
        print(f"🔗 Source: {job['source']}")
        print(f"🔗 Link: {job['link']}")
        
        analysis = job['analysis']
        print(f"✅ Matching Skills: {', '.join(analysis['skills_match'][:5])}")
        if analysis['missing_skills']:
            print(f"⚠️ Missing: {', '.join(analysis['missing_skills'][:3])}")
        print(f"📊 Analysis: {analysis['fit_analysis']}")
        print(f"💡 Recommendation: {analysis['recommendation'].replace('_', ' ').title()}")
        print("-" * 80)

def save_results_to_csv(ranked_jobs, filename="job_recommendations.csv"):
    data = []
    for job in ranked_jobs:
        data.append({
            'Title': job['title'],
            'Company': job['company'],
            'Location': job['location'],
            'Score': f"{job['overall_score']:.1f}",
            'Link': job['link'],
            'Source': job['source'],
            'Matching Skills': ', '.join(job['analysis']['skills_match'][:5]),
            'Missing Skills': ', '.join(job['analysis']['missing_skills'][:3]),
            'Analysis': job['analysis']['fit_analysis']
        })
    
    df = pd.DataFrame(data)
    df.to_csv(filename, index=False)
    print(f"💾 Results saved to {filename}")

In [None]:

def test_system():

    sample_resume_text = """
    Rhea
    Senior Data Scientist
    rhea.1@email.com | 1122334455 | linkedin.com/in/rhea
    
    SUMMARY:
    Experienced Data Scientist with 5+ years in machine learning, NLP, and cloud technologies. 
    Strong background in Python, TensorFlow, and AWS.
    
    EXPERIENCE:
    Senior Data Scientist - TechCorp (2020-Present)
    - Led ML projects improving customer recommendations by 30%
    - Developed NLP models for sentiment analysis
    - Managed AWS infrastructure for ML pipelines
    
    Data Scientist - DataWorks (2018-2020)
    - Built predictive models using Python and Scikit-learn
    - Implemented deep learning solutions with TensorFlow
    
    SKILLS:
    Python, Machine Learning, Deep Learning, TensorFlow, PyTorch, NLP, 
    AWS, SQL, Docker, Kubernetes, Git, Data Analysis
    
    EDUCATION:
    MS Computer Science - IIT
    BS Mathematics - VIT
    """

    with open("sample_resume.txt", "w") as f:
        f.write(sample_resume_text)
  
    job_title = "data scientist"
    location = "Bangalore"
    
    ranked_jobs, resume_data = find_relevant_jobs(
        "sample_resume.txt", 
        job_title, 
        location
    )
    
    if ranked_jobs:
        display_results(ranked_jobs, resume_data)
        save_results_to_csv(ranked_jobs)
    else:
        print("❌ No relevant jobs found.")
print("🧪 Testing Job Search System...")
test_system()

🧪 Testing Job Search System...
🚀 Starting Job Search Agent...
📄 Parsing resume: sample_resume.txt
Error parsing with Ollama: Unterminated string starting at: line 43 column 18 (char 519)
✅ Resume parsed for: 
📧 Email: rhea.1@email.com
🛠️ Skills: python, sql, machine learning, deep learning, tensorflow, pytorch, aws, docker, kubernetes, git, data analysis, ml, nlp, cloud

🔍 Searching Indeed...
🔍 Searching LinkedIn...
✅ Found 5 jobs to analyze

🎯 Analyzing job matches...
  Analyzing job 1/5...
  Analyzing job 2/5...
  Analyzing job 3/5...
  Analyzing job 4/5...
  Analyzing job 5/5...

🎯 TOP JOB RECOMMENDATIONS

🏆 #1 | Score: 59.5/100
📌 Title: Data Scientist- Across PAN India
🏢 Company: Capgemini Engineering
📍 Location: Bengaluru, Karnataka, India
🔗 Source: LinkedIn
🔗 Link: https://in.linkedin.com/jobs/view/data-scientist-across-pan-india-at-capgemini-engineering-4296207682?position=1&pageNum=0&refId=C82IrQTox%2BW7TiOJ8%2BITIQ%3D%3D&trackingId=vhu8TAxDAPxrFUxA7%2BlWUA%3D%3D
✅ Matching Ski

In [None]:
def search_jobs_with_your_resume():
    YOUR_RESUME_PATH = "/Users/siddharthshailendra/Resume_Shortlister_using_llm_basic/sample_resume_2.txt"  # Remove extra slash
    JOB_TITLE = "machine learning engineer"  
    LOCATION = "Mumbai" 
    
    print("🔍 Starting job search with your resume...")
    ranked_jobs, resume_data = find_relevant_jobs(
        YOUR_RESUME_PATH,
        JOB_TITLE, 
        LOCATION
    )
    
    if ranked_jobs:
        display_results(ranked_jobs, resume_data)
        save_results_to_csv(ranked_jobs, "my_job_recommendations.csv")
        
        print(f"\n🎉 Found {len(ranked_jobs)} relevant jobs!")
        print("💡 Tips:")
        print("  - Apply to top 3-5 jobs first")
        print("  - Customize your resume for each application")
        print("  - Use the analysis to improve your resume")
    else:
        print("❌ No relevant jobs found. Try:")
        print("  - Different job titles")
        print("  - Broader location search")
        print("  - Check your resume format")

🔍 Starting job search with your resume...
🚀 Starting Job Search Agent...
📄 Parsing resume: /Users/siddharthshailendra/Resume_Shortlister_using_llm_basic/sample_resume_2.txt
Error parsing with Ollama: Expecting value: line 41 column 17 (char 520)
✅ Resume parsed for: Arjun Sharma
📧 Email: arjun.sharma@email.com
🛠️ Skills: python, sql, machine learning, deep learning, tensorflow, pytorch, aws, docker, kubernetes, git, ci/cd, artificial intelligence, ai, ml, nlp, computer vision, spark, kafka, gcp, cloud, mobile

🔍 Searching Indeed...
🔍 Searching LinkedIn...
✅ Found 5 jobs to analyze

🎯 Analyzing job matches...
  Analyzing job 1/5...
Error in job fit analysis: Unterminated string starting at: line 13 column 5 (char 717)
  Analyzing job 2/5...
  Analyzing job 3/5...
  Analyzing job 4/5...
  Analyzing job 5/5...

🎯 TOP JOB RECOMMENDATIONS

🏆 #1 | Score: 59.5/100
📌 Title: Data scientist- Python- AI/ML GEN AI- Across india
🏢 Company: Capgemini Engineering
📍 Location: Mumbai, Maharashtra, Indi

In [None]:
import pandas as pd
from IPython.display import display, Markdown
import textwrap

def display_csv_results_clean(csv_file_path="my_job_recommendations.csv"):
    """
    Clean display of CSV results without truncation
    """
    try:
        df = pd.read_csv(csv_file_path)
        
        print("🎯 JOB SEARCH RESULTS SUMMARY")
        print("=" * 70)
        print(f"📊 Total Jobs Found: {len(df)}")
        print(f"🏆 Average Score: {df['Score'].mean():.1f}/100")
        print(f"💼 Top Companies: {', '.join(df['Company'].head(3).tolist())}")
        print()
    
        for idx, row in df.iterrows():
            print(f"🏅 RANK #{idx+1} | SCORE: {row['Score']}/100")
            print(f"📌 {row['Title']}")
            print(f"🏢 {row['Company']} | 📍 {row['Location']}")
            print(f"🔗 {row['Link']}")
            print(f"📱 Source: {row['Source']}")
            
            matching_skills = str(row['Matching Skills'])
            if len(matching_skills) > 80:
                matching_skills = textwrap.fill(matching_skills, width=80)
            
            print(f"✅ Matching Skills: {matching_skills}")
            
            missing_skills = str(row['Missing Skills'])
            if missing_skills != 'nan' and missing_skills.strip():
                if len(missing_skills) > 80:
                    missing_skills = textwrap.fill(missing_skills, width=80)
                print(f"⚠️  Missing Skills: {missing_skills}")
        
            analysis = str(row['Analysis'])
            if len(analysis) > 100:
                analysis = textwrap.fill(analysis, width=100)
            print(f"📋 Analysis: {analysis}")
            
            print(f"💡 Recommendation: {row['Recommendation']}")
            print("─" * 70)
            print()
            
    except Exception as e:
        print(f"❌ Error displaying results: {e}")

def display_job_search_insights(csv_file_path="my_job_recommendations.csv"):
    """
    Show insights and analytics from the job search
    """
    try:
        df = pd.read_csv(csv_file_path)
        
        print("📈 JOB SEARCH INSIGHTS")
        print("=" * 60)
        
        df['Score_Numeric'] = pd.to_numeric(df['Score'], errors='coerce')
        
        print(f"🎯 Average Match Score: {df['Score_Numeric'].mean():.1f}/100")
        print(f"🏆 Highest Score: {df['Score_Numeric'].max():.1f}/100")
        print(f"📊 Score Range: {df['Score_Numeric'].min():.1f} - {df['Score_Numeric'].max():.1f}")
        print()
        
        excellent = len(df[df['Score_Numeric'] >= 80])
        good = len(df[(df['Score_Numeric'] >= 60) & (df['Score_Numeric'] < 80)])
        average = len(df[(df['Score_Numeric'] >= 40) & (df['Score_Numeric'] < 60)])
        low = len(df[df['Score_Numeric'] < 40])
        
        print("📈 SCORE DISTRIBUTION:")
        print(f"   🎯 Excellent (80-100): {excellent} jobs")
        print(f"   ✅ Good (60-79): {good} jobs")
        print(f"   ⚠️  Average (40-59): {average} jobs")
        print(f"   🔴 Low (0-39): {low} jobs")
        print()
        
        print("🏢 TOP COMPANIES:")
        company_counts = df['Company'].value_counts()
        for company, count in company_counts.head(3).items():
            avg_score = df[df['Company'] == company]['Score_Numeric'].mean()
            print(f"   • {company}: {count} jobs (avg: {avg_score:.1f}/100)")
        print()
        
        all_matching_skills = []
        for skills in df['Matching Skills'].dropna():
            if str(skills) != 'nan':
                all_matching_skills.extend([s.strip() for s in str(skills).split(',')])
        
        if all_matching_skills:
            from collections import Counter
            skill_counts = Counter(all_matching_skills)
            print("🛠️  TOP MATCHING SKILLS:")
            for skill, count in skill_counts.most_common(5):
                print(f"   • {skill}: {count} jobs")
    
        all_missing_skills = []
        for skills in df['Missing Skills'].dropna():
            if str(skills) != 'nan' and skills.strip():
                all_missing_skills.extend([s.strip() for s in str(skills).split(',')])
        
        if all_missing_skills:
            missing_counts = Counter(all_missing_skills)
            print("\n📚 SKILLS TO IMPROVE:")
            for skill, count in missing_counts.most_common(3):
                print(f"   • {skill}: missing in {count} jobs")
        
        print("\n💡 ACTION PLAN:")
        high_score_jobs = df[df['Score_Numeric'] >= 70]
        if len(high_score_jobs) > 0:
            print(f"   🎯 Apply to {len(high_score_jobs)} high-match jobs first")
        else:
            print(f"   🎯 Apply to top {min(3, len(df))} jobs based on score")
        
        if all_missing_skills:
            top_missing = missing_counts.most_common(1)[0][0]
            print(f"   📚 Focus on learning: {top_missing}")
        
        print(f"   ⏰ Apply within 48 hours for best results")
        
    except Exception as e:
        print(f"❌ Error generating insights: {e}")

def run_enhanced_job_search():
    
    YOUR_RESUME_PATH = "/Users/siddharthshailendra/Resume_Shortlister_using_llm_basic/sample_resume_2.txt"
    JOB_TITLE = "machine learning engineer"  
    LOCATION = "Mumbai" 
    
    print("🚀 ENHANCED JOB SEARCH STARTING...")
    print("=" * 60)
    
    ranked_jobs, resume_data = find_relevant_jobs(
        YOUR_RESUME_PATH,
        JOB_TITLE, 
        LOCATION
    )
    
    if ranked_jobs:
        print("✅ Jobs analyzed successfully!")
        save_results_to_csv(ranked_jobs, "my_job_recommendations.csv")
        
        # Display results in clean format
        display_csv_results_clean("my_job_recommendations.csv")
        
        # Show insights
        display_job_search_insights("my_job_recommendations.csv")
        
        print(f"\n🎉 SEARCH COMPLETED! Found {len(ranked_jobs)} relevant jobs!")
        print("\n💼 NEXT STEPS:")
        print("   1. Apply to top 3 jobs today")
        print("   2. Customize cover letters for each application")
        print("   3. Follow up in 5-7 days")
        print("   4. Track applications in a spreadsheet")
        
    else:
        print("❌ No relevant jobs found.")

# Also add this function to view existing CSV files nicely
def view_existing_jobs_clean(csv_file_path="my_job_recommendations.csv"):
    print(f"📁 Viewing: {csv_file_path}")
    print("=" * 70)
    display_csv_results_clean(csv_file_path)
    display_job_search_insights(csv_file_path)
run_enhanced_job_search()


🚀 ENHANCED JOB SEARCH STARTING...
🚀 Starting Job Search Agent...
📄 Parsing resume: /Users/siddharthshailendra/Resume_Shortlister_using_llm_basic/sample_resume_2.txt
Error parsing with Ollama: Expecting value: line 41 column 17 (char 520)
✅ Resume parsed for: Arjun Sharma
📧 Email: arjun.sharma@email.com
🛠️ Skills: python, sql, machine learning, deep learning, tensorflow, pytorch, aws, docker, kubernetes, git, ci/cd, artificial intelligence, ai, ml, nlp, computer vision, spark, kafka, gcp, cloud, mobile

🔍 Searching Indeed...
🔍 Searching LinkedIn...
✅ Found 5 jobs to analyze

🎯 Analyzing job matches...
  Analyzing job 1/5...
  Analyzing job 2/5...
  Analyzing job 3/5...
  Analyzing job 4/5...
  Analyzing job 5/5...
✅ Jobs analyzed successfully!
💾 Results saved to my_job_recommendations.csv
🎯 JOB SEARCH RESULTS SUMMARY
📊 Total Jobs Found: 5
🏆 Average Score: 53.9/100
💼 Top Companies: Capgemini Engineering, Deloitte, Deloitte

🏅 RANK #1 | SCORE: 59.5/100
📌 Data scientist- Python- AI/ML GEN 