In [1]:
from langchain_groq import ChatGroq
import gradio as gr 
import pandas as pd
import chromadb
import uuid
from langchain_core.prompts import PromptTemplate
from langchain_core.output_parsers import JsonOutputParser
from langchain_community.document_loaders import WebBaseLoader
import requests
from bs4 import BeautifulSoup
import time
import json
from typing import Dict, List, Any
import re
from urllib.parse import urljoin, urlparse

USER_AGENT environment variable not set, consider setting it to identify your requests.


In [None]:
llm = ChatGroq(
    temperature=0,
    groq_api_key="",
    model_name="llama-3.3-70b-versatile"
)

In [None]:

class JobAnalyzer:
    def __init__(self):
        self.client = chromadb.PersistentClient('vectorstore')
        self.collections = self.client.get_or_create_collection(name="portfolio_app")
        self.api_key = ""

    def load_portfolio(self, portfolio_csv):
        """Load portfolio data into vector database"""
        df = pd.read_csv(portfolio_csv)

        if not self.collections.count():
            for i, row in df.iterrows():
                self.collections.add(
                    documents=[row['Technology']],
                    ids=[str(uuid.uuid4())]
                )
        return df

    def extract_job_posting(self, url):
        """Extract job posting information from URL"""
        loader = WebBaseLoader(url)
        page_data = loader.load().pop().page_content

        prompt_extract = PromptTemplate.from_template("""
        ### SCRAPED TEXT FROM WEBSITE:
        {page_data}
        ### INSTRUCTION:
        The scraped text is from the career's page of a website.
        Your job is to extract the job postings and return them in JSON format containing the
        following keys: `role`, `experience`, `skills`, `description`, `company`, `location`, `requirements`.
        Only return the valid JSON.
        ### VALID JSON (NO PREAMBLE):
        """)

        chain_extract = prompt_extract | llm
        res = chain_extract.invoke(input={'page_data': page_data})
        json_parser = JsonOutputParser()
        return json_parser.parse(res.content)

    def analyze_skills_and_generate_questions(self, job_description):
        """Analyze skills match and generate interview questions"""
        prompt_skills_and_question = PromptTemplate.from_template("""
        ### JOB DESCRIPTION:
        {job_description}

        ### INSTRUCTION:
        You are Mishu Dhar Chando, the CEO of Knowledge Doctor, a YouTube channel specializing in educating individuals on machine learning, deep learning, and natural language processing.
        Your expertise lies in bridging the gap between theoretical knowledge and practical applications through engaging content and innovative problem-solving techniques.
        Your job is to:
        1. Analyze the given job description to identify the required technical skills and match them with the provided skill set to calculate a percentage match.
        2. Generate a list of relevant interview questions based on the job description.
        3. Return the information in JSON format with the following keys:
            - `skills_match`: A dictionary where each key is a skill, and the value is the matching percentage.
            - `interview_questions`: A list of tailored questions related to the job description.

        Only return the valid JSON.
        ### VALID JSON (NO PREAMBLE):
        """)

        chain_skills_and_question = prompt_skills_and_question | llm
        res = chain_skills_and_question.invoke({"job_description": str(job_description)})
        json_parser = JsonOutputParser()
        return json_parser.parse(res.content)

    def tailor_portfolio(self, portfolio_df, job_data):
        """NEW FEATURE 1: Tailor portfolio based on job requirements"""
        prompt_tailor = PromptTemplate.from_template("""
        ### CURRENT PORTFOLIO:
        {current_portfolio}
        
        ### JOB REQUIREMENTS:
        {job_requirements}
        
        ### INSTRUCTION:
        You are a career advisor helping tailor a portfolio for a specific job application.
        Analyze the job requirements and current portfolio to suggest improvements.
        
        Return a JSON with:
        - `missing_skills`: List of skills mentioned in job requirements but missing from portfolio
        - `skill_gaps`: Skills that exist but need strengthening  
        - `portfolio_suggestions`: Specific recommendations to improve portfolio alignment
        - `new_portfolio_entries`: Suggested new entries to add to portfolio CSV
        - `emphasis_areas`: Existing skills that should be highlighted more
        
        ### VALID JSON (NO PREAMBLE):
        """)

        current_portfolio = portfolio_df.to_string()
        job_requirements = f"Role: {job_data.get('role', '')}\nSkills: {job_data.get('skills', '')}\nRequirements: {job_data.get('requirements', '')}"

        chain_tailor = prompt_tailor | llm
        res = chain_tailor.invoke({
            "current_portfolio": current_portfolio,
            "job_requirements": job_requirements
        })
        json_parser = JsonOutputParser()
        return json_parser.parse(res.content)

    def generate_cover_letter(self, portfolio_df, job_data):
        """NEW FEATURE 2: Generate personalized cover letter"""
        prompt_cover_letter = PromptTemplate.from_template("""
        ### PORTFOLIO SKILLS:
        {portfolio_skills}
        
        ### JOB DETAILS:
        {job_details}
        
        ### INSTRUCTION:
        You are a professional career coach. Generate a compelling cover letter that:
        1. Addresses the specific company and role
        2. Highlights relevant skills from the portfolio that match job requirements
        3. Shows enthusiasm and cultural fit
        4. Includes specific examples from the portfolio
        5. Has a professional yet engaging tone
        6. Is concise (3-4 paragraphs)
        
        Return a JSON with:
        - `cover_letter`: The complete cover letter text
        - `key_highlights`: List of portfolio items that were emphasized
        - `customization_notes`: Suggestions for further personalization
        
        ### VALID JSON (NO PREAMBLE):
        """)

        portfolio_skills = portfolio_df['Technology'].tolist()
        job_details = f"""
        Company: {job_data.get('company', 'the company')}
        Role: {job_data.get('role', '')}
        Location: {job_data.get('location', '')}
        Skills Required: {job_data.get('skills', '')}
        Description: {job_data.get('description', '')}
        """

        chain_cover_letter = prompt_cover_letter | llm
        res = chain_cover_letter.invoke({
            "portfolio_skills": portfolio_skills,
            "job_details": job_details
        })
        json_parser = JsonOutputParser()
        return json_parser.parse(res.content)

    def find_matching_jobs(self, portfolio_df, search_terms=None, location="", num_results=10):
        """NEW FEATURE 3: Job search agent based on portfolio"""
        portfolio_skills = portfolio_df['Technology'].tolist()

        # Generate search terms from portfolio if not provided
        if not search_terms:
            prompt_search_terms = PromptTemplate.from_template("""
            ### PORTFOLIO SKILLS:
            {skills}
            
            ### INSTRUCTION:
            Based on these skills, generate relevant job search terms and keywords.
            Return a JSON with:
            - `job_titles`: List of relevant job titles to search for
            - `key_technologies`: Most important technologies from the portfolio
            - `search_queries`: Optimized search queries for job boards
            
            ### VALID JSON (NO PREAMBLE):
            """)

            chain_search = prompt_search_terms | llm
            res = chain_search.invoke({"skills": portfolio_skills})
            json_parser = JsonOutputParser()
            search_data = json_parser.parse(res.content)
            search_terms = search_data.get('job_titles', ['Software Engineer', 'Developer'])

        # Fetch jobs
        jobs_found = self._simulate_job_search(search_terms, location, num_results)

        # Rank jobs based on portfolio match
        ranked_jobs = self._rank_jobs_by_portfolio_match(jobs_found, portfolio_skills)

        return {
            "search_terms_used": search_terms,
            "total_jobs_found": len(jobs_found),
            "top_matches": ranked_jobs[:5],  # ✅ includes URLs already
            "job_recommendations": self._generate_job_recommendations(ranked_jobs, portfolio_skills)
        }

    def _simulate_job_search(self, search_terms, location, num_results=5):
        """Fetch job search results using a real API instead of mock data"""

        url = "https://jsearch.p.rapidapi.com/search"
        headers = {
            "X-RapidAPI-Key": self.api_key,
            "X-RapidAPI-Host": "jsearch.p.rapidapi.com"
        }

        query = {
            "query": f"{search_terms} in {location}",
            "page": 1,
            "num_pages": 1
        }

        try:
            response = requests.get(url, headers=headers, params=query)
            if response.status_code == 200:
                data = response.json()
                jobs = data.get("data", [])

                formatted_jobs = [
                    {
                        "title": job.get("job_title"),
                        "company": job.get("employer_name"),
                        "location": job.get("job_city"),
                        "description": job.get("job_description"),
                        "skills_required": job.get("job_required_skills", []),
                        "url": job.get("job_apply_link"),  # ✅ Job link included
                        "salary_range": (
                            f"{job.get('job_salary_currency', '')} "
                            f"{job.get('job_min_salary', '')} - {job.get('job_max_salary', '')}"
                            if job.get("job_min_salary") else "Not specified"
                        )
                    }
                    for job in jobs[:num_results]
                ]

                return formatted_jobs
            else:
                return [{"error": f"API request failed with status {response.status_code}"}]

        except Exception as e:
            return [{"error": str(e)}]

    def _rank_jobs_by_portfolio_match(self, jobs, portfolio_skills):
        """Rank jobs by how well they match portfolio skills"""
        portfolio_skills_flat = []
        for skill_set in portfolio_skills:
            portfolio_skills_flat.extend([s.strip() for s in skill_set.split(',')])

        for job in jobs:
            job_skills = job.get('skills_required', [])
            matches = sum(
                1 for skill in job_skills if any(ps.lower() in skill.lower() or skill.lower() in ps.lower() for ps in portfolio_skills_flat)
            )
            job['match_score'] = (matches / len(job_skills)) * 100 if job_skills else 0
            job['matching_skills'] = [
                skill for skill in job_skills if any(ps.lower() in skill.lower() or skill.lower() in ps.lower() for ps in portfolio_skills_flat)
            ]

        return sorted(jobs, key=lambda x: x['match_score'], reverse=True)

    def _generate_job_recommendations(self, ranked_jobs, portfolio_skills):
        """Generate personalized job recommendations"""
        prompt_recommendations = PromptTemplate.from_template("""
        ### TOP MATCHING JOBS:
        {top_jobs}
        
        ### PORTFOLIO SKILLS:
        {portfolio_skills}
        
        ### INSTRUCTION:
        Based on the job matches and portfolio skills, provide strategic recommendations:
        
        Return JSON with:
        - `application_strategy`: How to approach these job applications
        - `skill_development`: Skills to develop for better matches
        - `networking_suggestions`: How to leverage connections
        - `timeline_recommendations`: Suggested application timeline
        
        ### VALID JSON (NO PREAMBLE):
        """)

        top_jobs_str = json.dumps(ranked_jobs[:3], indent=2)

        chain_recommendations = prompt_recommendations | llm
        res = chain_recommendations.invoke({
            "top_jobs": top_jobs_str,
            "portfolio_skills": portfolio_skills
        })
        json_parser = JsonOutputParser()
        return json_parser.parse(res.content)


# Main processing functions
def process_job_analysis(url, portfolio_csv):
    """Original job analysis functionality"""
    try:
        analyzer = JobAnalyzer()
        df = analyzer.load_portfolio(portfolio_csv)
        job_data = analyzer.extract_job_posting(url)

        job_skills = job_data.get('skills', []) if isinstance(job_data, dict) else job_data[0].get('skills', [])
        analysis_result = analyzer.analyze_skills_and_generate_questions(job_skills)

        return {
            "job_details": job_data,
            "skills_analysis": analysis_result,
            "status": "success"
        }
    except Exception as e:
        return {"error": str(e), "status": "error"}


def process_portfolio_tailoring(url, portfolio_csv):
    """NEW: Portfolio tailoring functionality"""
    try:
        analyzer = JobAnalyzer()
        df = analyzer.load_portfolio(portfolio_csv)
        job_data = analyzer.extract_job_posting(url)

        # Get the first job if multiple jobs returned
        job_info = job_data if isinstance(job_data, dict) else job_data[0]
        tailoring_result = analyzer.tailor_portfolio(df, job_info)

        return {
            "tailoring_suggestions": tailoring_result,
            "job_details": job_info,
            "status": "success"
        }
    except Exception as e:
        return {"error": str(e), "status": "error"}


def process_cover_letter_generation(url, portfolio_csv):
    """NEW: Cover letter generation functionality"""
    try:
        analyzer = JobAnalyzer()
        df = analyzer.load_portfolio(portfolio_csv)
        job_data = analyzer.extract_job_posting(url)

        # Get the first job if multiple jobs returned
        job_info = job_data if isinstance(job_data, dict) else job_data[0]
        cover_letter_result = analyzer.generate_cover_letter(df, job_info)

        return {
            "cover_letter": cover_letter_result,
            "job_details": job_info,
            "status": "success"
        }
    except Exception as e:
        return {"error": str(e), "status": "error"}


def process_job_search(portfolio_csv, search_terms, location):
    """NEW: Job search agent functionality"""
    try:
        analyzer = JobAnalyzer()
        df = analyzer.load_portfolio(portfolio_csv)

        # Convert search terms string to list
        search_terms_list = [term.strip() for term in search_terms.split(',')] if search_terms else None

        job_search_result = analyzer.find_matching_jobs(df, search_terms_list, location)

        return {
            "job_search_results": job_search_result,
            "status": "success"
        }
    except Exception as e:
        return {"error": str(e), "status": "error"}


# Gradio Interface
def create_gradio_app():
    with gr.Blocks(theme='Respair/Shiki@1.2.1', title="Enhanced Job Analyzer") as app:
        gr.Markdown("# Enhanced Job Analyzer with AI-Powered Features")
        gr.Markdown("Upload your portfolio and analyze jobs, tailor your profile, generate cover letters, and find matching opportunities!")

        # Common inputs
        with gr.Row():
            portfolio_input = gr.File(label="Upload Portfolio CSV", file_types=[".csv"])

        # Tab interface for different features
        with gr.Tabs():
            # Tab 1: Original Job Analysis
            with gr.TabItem("Job Analysis & Interview Questions"):
                with gr.Row():
                    url_input1 = gr.Textbox(
                        label="Job Posting URL",
                        placeholder="Enter the URL of the job posting"
                    )

                analyze_button = gr.Button("Analyze Job Posting", variant="primary")
                analysis_output = gr.JSON(label="Analysis Results")

                analyze_button.click(
                    process_job_analysis,
                    inputs=[url_input1, portfolio_input],
                    outputs=analysis_output
                )

            # Tab 2: Portfolio Tailoring
            with gr.TabItem("Portfolio Tailoring"):
                with gr.Row():
                    url_input2 = gr.Textbox(
                        label="Job Posting URL",
                        placeholder="Enter the URL of the job posting"
                    )

                tailor_button = gr.Button("Tailor Portfolio", variant="primary")
                tailoring_output = gr.JSON(label="Portfolio Tailoring Suggestions")

                tailor_button.click(
                    process_portfolio_tailoring,
                    inputs=[url_input2, portfolio_input],
                    outputs=tailoring_output
                )

            # Tab 3: Cover Letter Generation
            with gr.TabItem("Cover Letter Generator"):
                with gr.Row():
                    url_input3 = gr.Textbox(
                        label="Job Posting URL",
                        placeholder="Enter the URL of the job posting"
                    )

                cover_letter_button = gr.Button("Generate Cover Letter", variant="primary")
                cover_letter_output = gr.JSON(label="Generated Cover Letter")

                cover_letter_button.click(
                    process_cover_letter_generation,
                    inputs=[url_input3, portfolio_input],
                    outputs=cover_letter_output
                )

            # Tab 4: Job Search Agent
            with gr.TabItem("Job Search Agent"):
                with gr.Row():
                    search_terms_input = gr.Textbox(
                        label="Search Terms (comma-separated)",
                        placeholder="e.g., Software Engineer, Data Scientist, ML Engineer",
                        value=""
                    )
                    location_input = gr.Textbox(
                        label="Location",
                        placeholder="e.g., San Francisco, CA",
                        value=""
                    )

                search_button = gr.Button("Search Jobs", variant="primary")
                search_output = gr.JSON(label="Job Search Results")

                search_button.click(
                    process_job_search,
                    inputs=[portfolio_input, search_terms_input, location_input],
                    outputs=search_output
                )

        # Instructions
        with gr.Accordion("Instructions", open=False):
            gr.Markdown("""
            ### How to Use This Enhanced Job Analyzer:
            
            1. **Upload Portfolio CSV**: Your CSV should have a 'Technology' column with comma-separated skills
            2. **Job Analysis**: Paste job URL to get skills match and interview questions
            3. **Portfolio Tailoring**: Get suggestions to improve your portfolio for specific jobs
            4. **Cover Letter**: Generate personalized cover letters based on your portfolio and job requirements
            5. **Job Search**: Find jobs that match your portfolio skills automatically
            
            ### Features:
            - Skills matching and gap analysis
            - AI-generated interview questions  
            - Portfolio optimization suggestions
            - Personalized cover letter generation
            - Intelligent job search and ranking
            - Career development recommendations
            """)

    return app


# Launch the application
if __name__ == "__main__":
    app = create_gradio_app()
    app.launch(share=True)


* Running on local URL:  http://127.0.0.1:7860
* Running on public URL: https://86ddea520403cf6d08.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)
