In [2]:
def date_posted(days) -> str:
    return "r" + str(days * 86400)

date_posted(7)

'r604800'

In [69]:
from pydantic import BaseModel,Field,computed_field
from typing import List
from enum import Enum

In [70]:
class JobType(str,Enum):
    REMOTE = 'remote'
    HYBRID = 'hybrid'
    ONSITE = 'onsite'

class ExperienceLevel(str, Enum):
    Internship = "Internship"
    Entry_level = "Entry_level"
    Associate = "Associate"
    Mid_Senior_level = "Mid_Senior_level"
    Director = "Director"
    Executive = "Executive"

In [None]:
class JobInfo(BaseModel):
    title : str | None = Field(default=None,description= "Primary job title or role to search for.This represents the main occupation or position of interest." ,examples=['AI engineer','Data Scientist','SQL','Java','Software Engineer'])
    location : str | None = Field(default=None,description= "The name of the country where job needs to be find, If any city name is entered then think of the contry in which the city exist",examples=['India','America'])
    days : int | None = Field(default=7,description= "Job posted within the last days ",examples=[1,3,7,14])
    companyName : List[str] | None = Field(default=None,description= "The List of companies which needs to consider first or which is only needs to considered, ordered by priority ",examples=['Google','Microsoft'])
    companyId : List[str] | None = Field(default=None,description= "The List of Ids of the companies which needs to consider first or which is only needs to considered, ordered by priority ",examples=['21345','5567483'])
    skipJobId : List[str] | None = Field(default=None,description= "The List of Ids of the companies which needs to  be skiped or not considered",examples=['21345','5567483'])
    jobType : List[JobType] | None = Field(default=None,description= "This is the list of type of job the user preferred, ordered by priority ",examples=[['remote','hybrid'],['onsite']])
    experience_level : List[ExperienceLevel] | None = Field(default=None,description= "Preferred experience levels for the job, ordered by priority (from most to least preferred).")
    # typeOfContract : List[str] | None = Field(default=None,description= "The type of the ontract preffered by the user rankwise ",examples=[['Part_time','Full_time']])
    limit : int | None = Field(default=3,description= "The number of jobs the user wants to find even if the user will say a big number limit it up to 3",le=3,ge=1,examples=[3,1,2])
    
    @computed_field
    @property
    def datePosted(self) -> str:
        return "r" + str(self.days * 86400)

    @computed_field
    @property
    def remote(self) -> List[int] | None:
        if not self.jobType:
            return None

        mapping = {
            JobType.ONSITE: '1',
            JobType.REMOTE: '2',
            JobType.HYBRID: '3',
        }

        return [mapping[jt] for jt in self.jobType]
    
    @computed_field
    @property
    def experienceLevel(self) -> List[str] | None:
        if not self.experience_level:
            return None

        mapping = {
            ExperienceLevel.Internship: "1",
            ExperienceLevel.Entry_level: "2",
            ExperienceLevel.Associate: "3",
            ExperienceLevel.Mid_Senior_level: "4",
            ExperienceLevel.Director: "5",
            ExperienceLevel.Executive: "6",
        }

        return [mapping[ctype] for ctype in self.experience_level]  
    
    

In [None]:
""""1"	Internship
"2"	Entry level
"3"	Associate
"4"	Mid-Senior level
"5"	Director
"6"	Executive"""

In [1]:
from langchain_google_genai import ChatGoogleGenerativeAI
from dotenv import load_dotenv

load_dotenv()

llm = ChatGoogleGenerativeAI(model='gemini-2.5-flash-lite')

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
llm.invoke("What is the cpaital of India in one word")

AIMessage(content='Delhi', additional_kwargs={}, response_metadata={'prompt_feedback': {'block_reason': 0, 'safety_ratings': []}, 'finish_reason': 'STOP', 'model_name': 'gemini-2.5-flash-lite', 'safety_ratings': [], 'model_provider': 'google_genai'}, id='lc_run--019b53b1-b1a4-7ee1-9579-72d7b701c22d-0', usage_metadata={'input_tokens': 12, 'output_tokens': 1, 'total_tokens': 13, 'input_token_details': {'cache_read': 0}})

In [81]:
user_prompt = "I want to find the jobs for AI engineering remote internship find 100 jobs in Ahmedabad"

In [82]:
from langchain_core.prompts import PromptTemplate

prompt = PromptTemplate(template="""
        You are job searching helping agent you need to pass the 
        data to job searchng API for that you need to structure the 
        output from the given user prompt : {user_prompt}
        """,
        input_variables=['user_prompt'])

In [83]:
job_info_agent = (
    prompt | 
    llm.with_structured_output(JobInfo)
    )

In [84]:
res = job_info_agent.invoke({'user_prompt':user_prompt})

  PydanticSerializationUnexpectedValue(Expected `int` - serialized value may not be as expected [field_name='remote', input_value='2', input_type=str])
  return self.__pydantic_serializer__.to_python(


In [85]:
res

JobInfo(title='AI engineer', location='Ahmedabad', days=7, companyName=None, companyId=None, skipJobId=None, jobType=[<JobType.REMOTE: 'remote'>], experience_level=[<ExperienceLevel.Internship: 'Internship'>], limit=3, datePosted='r604800', remote=['2'], experienceLevel=['1'])

In [16]:
type(res)

__main__.JobInfo

In [86]:
job_info_dict = res.model_dump()

In [87]:
job_info_dict

{'title': 'AI engineer',
 'location': 'Ahmedabad',
 'days': 7,
 'companyName': None,
 'companyId': None,
 'skipJobId': None,
 'jobType': [<JobType.REMOTE: 'remote'>],
 'experience_level': [<ExperienceLevel.Internship: 'Internship'>],
 'limit': 3,
 'datePosted': 'r604800',
 'remote': ['2'],
 'experienceLevel': ['1']}

In [19]:
class Job_Info_state(BaseModel):
    title : str  | None
    location : str | None
    datePosted : str | None
    companyName : List[str] | None
    companyId : List[str] | None
    skipJobId : List[str] | None
    remote : List[str] | None
    experienceLevel : List[str] | None
    contractType : List[str] | None
    limit : int | None
    
{
  "title": "Java",
  "location": "New York",
  "datePosted": "r604800",
  "companyName": [
    "Google",
    "Apple"
  ],
  "companyId": [
    "1441",
    "162479"
  ],
  "contractType": [
    "F",
    "P"
  ],
  "experienceLevel": [
    "1",
    "2"
  ],
  "remote": [
    "2",
    "3"
  ],
  "limit": 100,
  "urlParam": [
    {
      "key": "f_TPR",
      "value": "r3600"
    }
  ],
  "skipJobId": [
    "4219847745",
    "4219847746"
  ]
}

{'title': 'Java',
 'location': 'New York',
 'datePosted': 'r604800',
 'companyName': ['Google', 'Apple'],
 'companyId': ['1441', '162479'],
 'contractType': ['F', 'P'],
 'experienceLevel': ['1', '2'],
 'remote': ['2', '3'],
 'limit': 100,
 'urlParam': [{'key': 'f_TPR', 'value': 'r3600'}],
 'skipJobId': ['4219847745', '4219847746']}

In [20]:
job_info_obj = Job_Info_state.model_validate(job_info_dict)

In [21]:
job_info_obj

Job_Info_state(title='AI engineer', location='Ahmedabad', datePosted='r604800', companyName=None, companyId=None, skipJobId=None, remote=['2'], experienceLevel=None, contractType=['I'], limit=3)

In [22]:
Job_Info_state(**job_info_dict)

Job_Info_state(title='AI engineer', location='Ahmedabad', datePosted='r604800', companyName=None, companyId=None, skipJobId=None, remote=['2'], experienceLevel=None, contractType=['I'], limit=3)

In [88]:
job_info_dict

{'title': 'AI engineer',
 'location': 'Ahmedabad',
 'days': 7,
 'companyName': None,
 'companyId': None,
 'skipJobId': None,
 'jobType': [<JobType.REMOTE: 'remote'>],
 'experience_level': [<ExperienceLevel.Internship: 'Internship'>],
 'limit': 3,
 'datePosted': 'r604800',
 'remote': ['2'],
 'experienceLevel': ['1']}

In [89]:
clean_payload = res.model_dump(
    mode="json",
    exclude = ['days','jobType','experience_level'],
    exclude_none=True
)

  PydanticSerializationUnexpectedValue(Expected `int` - serialized value may not be as expected [field_name='remote', input_value='2', input_type=str])
  return self.__pydantic_serializer__.to_python(


In [90]:
clean_payload

{'title': 'AI engineer',
 'location': 'Ahmedabad',
 'limit': 3,
 'datePosted': 'r604800',
 'remote': ['2'],
 'experienceLevel': ['1']}

In [91]:
load_dotenv()

True

In [None]:
import os 
APIFY_TOKEN = os.getenv('APIFY_TOKEN') 
APIFY_ACTOR_NAME = os.getenv('APIFY_ACTOR_NAME')

In [30]:
APIFY_ACTOR_NAME

'valig/linkedin-jobs-scraper'

In [None]:
from apify_client import ApifyClient

apify_client = ApifyClient(APIFY_TOKEN)

# Define the input for the Actor
actor_input = {'title': 'AI engineer',
 'location': 'India',
 'limit': 3,
 'datePosted': 'r604800',
 'remote': ['2'],
 'experienceLevel': ['1']}
# actor_input = clean_payload

# Run an Actor with an input
print("Running the Actor...")
actor_name = APIFY_ACTOR_NAME
actor_run = apify_client.actor(actor_name).start(run_input=actor_input)

print("üöÄ Actor was started")
print("üíæ Check your run here: https://console.apify.com/actors/runs/%(id)s" % {"id": actor_run["id"]})

Running the Actor...
üöÄ Actor was started
üíæ Check your run here: https://console.apify.com/actors/runs/deA7vtUpFTc1nNric


In [100]:
class Job(BaseModel):
    id : str
    url : str
    title : str
    location: str
    companyName: str 
    companyUrl:str
    recruiterName:str
    recruiterUrl:str
    experienceLevel: str
    contractType:str
    workType:str
    sector: str
    salary:  str
    applyType: str
    applyUrl:str 
    postedTimeAgo:str  
    postedDate: str
    applicationsCount: str
    description:str

In [102]:
l = []
for item in apify_client.dataset(actor_run["defaultDatasetId"]).iterate_items():
    print(item)
    print(type(item))
    l.append(Job(**item))
print(l)

{'id': '4347051914', 'url': 'https://www.linkedin.com/jobs/view/machine-learning-engineer-intern-at-upstart-4347051914', 'title': 'Machine Learning Engineer Intern', 'location': 'United States', 'companyName': 'Upstart', 'companyUrl': 'https://www.linkedin.com/company/upstart-network', 'recruiterName': '', 'recruiterUrl': '', 'experienceLevel': 'Internship', 'contractType': 'Internship', 'workType': 'Engineering and Information Technology', 'sector': 'Financial Services', 'salary': '$141,000.00/yr - $150,000.00/yr', 'applyType': 'EXTERNAL', 'applyUrl': '', 'postedTimeAgo': '3 days ago', 'postedDate': '2025-12-13T00:00:00.000Z', 'applicationsCount': 'Over 200 applicants', 'description': "About UpstartUpstart is the leading AI lending marketplace partnering with banks and credit unions to expand access to affordable credit. By leveraging Upstart's AI marketplace, Upstart-powered banks and credit unions can have higher approval rates and lower loss rates across races, ages, and genders, w

In [103]:
l

[Job(id='4347051914', url='https://www.linkedin.com/jobs/view/machine-learning-engineer-intern-at-upstart-4347051914', title='Machine Learning Engineer Intern', location='United States', companyName='Upstart', companyUrl='https://www.linkedin.com/company/upstart-network', recruiterName='', recruiterUrl='', experienceLevel='Internship', contractType='Internship', workType='Engineering and Information Technology', sector='Financial Services', salary='$141,000.00/yr - $150,000.00/yr', applyType='EXTERNAL', applyUrl='', postedTimeAgo='3 days ago', postedDate='2025-12-13T00:00:00.000Z', applicationsCount='Over 200 applicants', description="About UpstartUpstart is the leading AI lending marketplace partnering with banks and credit unions to expand access to affordable credit. By leveraging Upstart's AI marketplace, Upstart-powered banks and credit unions can have higher approval rates and lower loss rates across races, ages, and genders, while simultaneously delivering the exceptional digita

In [107]:
for job in l:
    print(job.description)

About UpstartUpstart is the leading AI lending marketplace partnering with banks and credit unions to expand access to affordable credit. By leveraging Upstart's AI marketplace, Upstart-powered banks and credit unions can have higher approval rates and lower loss rates across races, ages, and genders, while simultaneously delivering the exceptional digital-first lending experience their customers demand. More than 80% of borrowers are approved instantly, with zero documentation to upload.Upstart is a digital-first company, which means that most Upstarters live and work anywhere in the United States. However, we also have offices in San Mateo, California; Columbus, Ohio; Austin, Texas; and New York City, NY (opening Summer 2026).Most Upstarters join us because they connect with our mission of enabling access to effortless credit based on true risk. If you are energized by the impact you can make at Upstart, we‚Äôd love to hear from you!The TeamMachine Learning is at the heart of Upstart

In [34]:
actor_input

{'title': 'AI Engineer',
 'location': 'India',
 'datePosted': 'r604800',
 'experienceLevel': ['1', '2'],
 'remote': ['2', '3'],
 'limit': 1}

In [35]:
clean_payload

{'title': 'AI engineer',
 'location': 'Ahmedabad',
 'days': 7,
 'jobType': ['remote'],
 'typeOfContract': ['Internship'],
 'limit': 3,
 'datePosted': 'r604800',
 'remote': ['2'],
 'contractType': ['I']}

In [None]:
clean_payload.pop('days')
clean_payload.pop('jobType')
clean_payload.pop('experience_level')

['Internship']

In [39]:
clean_payload

{'title': 'AI engineer',
 'location': 'Ahmedabad',
 'limit': 3,
 'datePosted': 'r604800',
 'remote': ['2'],
 'contractType': ['I']}

In [None]:
actor_input = {
  "title": "AI Engineer",
  "location": "Ahmedabad",
  "limit": 3,
  "datePosted": "r604800",
  "remote": ["2"]
}

In [55]:
actor_input

{'title': 'AI Engineer',
 'location': 'Ahmedabad',
 'limit': 1,
 'datePosted': 'r604800',
 'remote': ['2'],
 'contractType': ['F']}

In [59]:
clean_payload_final.pop('contractType')

['I']

In [62]:
clean_payload_final

{'title': 'AI engineer',
 'location': 'Ahmedabad',
 'limit': 3,
 'datePosted': 'r604800',
 'remote': ['2']}

In [58]:
from apify_client import ApifyClient

apify_client = ApifyClient(APIFY_TOKEN)

# Define the input for the Actor
# actor_input = {
#   "title": "AI Engineer",
#   "location": "India",
#   "datePosted": "r604800",
#   "experienceLevel": [
#     "1",
#     "2"
#   ],
#   "remote": [
#     "2",
#     "3"
#   ],
#   "limit": 1,
# }

# Run an Actor with an input
print("Running the Actor...")
actor_name = APIFY_ACTOR_NAME
actor_run = apify_client.actor(actor_name).start(run_input=actor_input)

print("üöÄ Actor was started")
print("üíæ Check your run here: https://console.apify.com/actors/runs/%(id)s" % {"id": actor_run["id"]})

Running the Actor...
üöÄ Actor was started
üíæ Check your run here: https://console.apify.com/actors/runs/EQbZFXPvedQcWQLW4


In [65]:
from apify_client import ApifyClient

apify_client = ApifyClient(APIFY_TOKEN)

# Define the input for the Actor
actor_input = {
  "title": "AI Engineer",
  "location": "India",
  "datePosted": "r604800",
  "experienceLevel": [
    "1",
    "2"
  ],
  "remote": [
    "2",
    "3"
  ],
  "limit": 1,
  "experienceLevel" : ['0']
}

# Run an Actor with an input
print("Running the Actor...")
actor_name = APIFY_ACTOR_NAME
actor_run = apify_client.actor(actor_name).start(run_input=actor_input)

print("üöÄ Actor was started")
print("üíæ Check your run here: https://console.apify.com/actors/runs/%(id)s" % {"id": actor_run["id"]})

Running the Actor...


ApifyApiError: Input is not valid: Field input.experienceLevel.0 must be equal to one of the allowed values: "1", "2", "3", "4", "5", "6"

In [4]:
import os

In [5]:
APIFY_TOKEN = os.getenv('APIFY_TOKEN') 
APIFY_ACTOR_NAME = os.getenv('APIFY_ACTOR_NAME')

In [10]:
"""
Prompt Templates for AI Agents.

This module contains all the prompt templates used by various AI agents
in the job search and resume analysis workflow.
"""

# Job Search Prompt Template
JOB_SERCHING_PROMPT = """
        You are job searching helping agent you need to pass the 
        data to job searchng API for that you need to structure the 
        output from the given user prompt : {user_prompt}
        """


# Resume Analysis Prompt Template
RESUME = """
        You are thoughtful agent help in extracting the Important features 
        from the resume text. Resume text is not structures as it is extracted 
        from a knowledge so use your llm skills to extract the following:
        Skills : The main programing and technikal skills focus more on the domain specific skills rather than soft skills
        Profile : The brief info about the user found in the resume text
        Projects : The Projects that are built by the user found in the resume
        Certifications : THe certifications of the user in the resume
        Experience : The Experience of the user mentioned in the resume
        Education : The education oof the yser found in the resume
        The Remuse text is : {resume_text}
"""


# Resume Feedback and Similarity Prompt Template
RESUME_FEEDBACK = """
        You are a agent that hepls users to find the best job be hepling them improve there resume
        Your job is to tell what are the things that lack In there resume.
        What are those keywords, skills or projects what will help him to improve the resume,
        Along with that give the similarity score between 0 to 100 how well the jon suits the user
        
        Resemu text : {resume_text}
        Job description : {job_description}
"""

In [11]:
"""
Structured Output Models for AI Agents.

This module defines Pydantic models that structure the outputs from
various AI agents, ensuring type safety and data validation.
"""

from pydantic import BaseModel, Field, computed_field
from typing import List
from enum import Enum


class JobType(str, Enum):
    """
    Enumeration of job location/work arrangement types.
    
    Attributes:
        REMOTE: Fully remote work.
        HYBRID: Mix of remote and office work.
        ONSITE: Work from office only.
    """
    REMOTE = 'remote'
    HYBRID = 'hybrid'
    ONSITE = 'onsite'


class ExperienceLevel(str, Enum):
    """
    Enumeration of professional experience levels.
    
    Attributes:
        Internship: Internship positions.
        Entry_level: Entry-level positions (0-2 years).
        Associate: Associate level (2-5 years).
        Mid_Senior_level: Mid to senior level (5-10 years).
        Director: Director level positions.
        Executive: Executive/C-level positions.
    """
    Internship = "Internship"
    Entry_level = "Entry_level"
    Associate = "Associate"
    Mid_Senior_level = "Mid_Senior_level"
    Director = "Director"
    Executive = "Executive"


class JobInfo(BaseModel):
    """
    Structured job search parameters extracted from user input.
    
    This model is used to parse natural language job search queries into
    structured parameters that can be used with job search APIs.
    
    Attributes:
        title: Primary job title or role to search for.
        location: Country where jobs should be searched.
        days: Number of days to look back for job postings.
        companyName: Preferred companies, ordered by priority.
        companyId: Company IDs to filter by, ordered by priority.
        skipJobId: Job IDs to exclude from results.
        jobType: Preferred work arrangements (remote, hybrid, onsite).
        experience_level: Preferred experience levels, ordered by priority.
        limit: Maximum number of jobs to retrieve (capped at 3).
    """
    title: str | None = Field(
        default=None,
        description="Primary job title or role to search for.This represents the main occupation or position of interest.",
        examples=['AI engineer', 'Data Scientist', 'SQL', 'Java', 'Software Engineer']
    )
    location: str | None = Field(
        default=None,
        description="The name of the country where job needs to be find, If any city name is entered then think of the contry in which the city exist",
        examples=['India', 'America']
    )
    days: int | None = Field(
        default=7,
        description="Job posted within the last days",
        examples=[1, 3, 7, 14]
    )
    companyName: List[str] | None = Field(
        default=None,
        description="The List of companies which needs to consider first or which is only needs to considered, ordered by priority",
        examples=['Google', 'Microsoft']
    )
    companyId: List[str] | None = Field(
        default=None,
        description="The List of Ids of the companies which needs to consider first or which is only needs to considered, ordered by priority",
        examples=['21345', '5567483']
    )
    skipJobId: List[str] | None = Field(
        default=None,
        description="The List of Ids of the companies which needs to  be skiped or not considered",
        examples=['21345', '5567483']
    )
    jobType: List[JobType] | None = Field(
        default=None,
        description="This is the list of type of job the user preferred, ordered by priority",
        examples=[['remote', 'hybrid'], ['onsite']]
    )
    experience_level: List[ExperienceLevel] | None = Field(
        default=None,
        description="Preferred experience levels for the job, ordered by priority (from most to least preferred)."
    )
    limit: int | None = Field(
        default=3,
        description="The number of jobs the user wants to find even if the user will say a big number limit it up to 3",
        le=3,
        ge=1,
        examples=[3, 1, 2]
    )
    
    @computed_field
    @property
    def datePosted(self) -> str:
        """
        Convert days to LinkedIn API date format.
        
        Returns:
            str: Date filter in format 'r{seconds}' for LinkedIn API.
        """
        return "r" + str(self.days * 86400)

    @computed_field
    @property
    def remote(self) -> List[int] | None:
        """
        Convert JobType enum to LinkedIn API remote filter codes.
        
        Returns:
            List[int] | None: List of remote type codes (1=onsite, 2=remote, 3=hybrid).
        """
        if not self.jobType:
            return None

        mapping = {
            JobType.ONSITE: '1',
            JobType.REMOTE: '2',
            JobType.HYBRID: '3',
        }

        return [mapping[jt] for jt in self.jobType]
    
    @computed_field
    @property
    def experienceLevel(self) -> List[str] | None:
        """
        Convert ExperienceLevel enum to LinkedIn API experience filter codes.
        
        Returns:
            List[str] | None: List of experience level codes (1-6).
        """
        if not self.experience_level:
            return None

        mapping = {
            ExperienceLevel.Internship: "1",
            ExperienceLevel.Entry_level: "2",
            ExperienceLevel.Associate: "3",
            ExperienceLevel.Mid_Senior_level: "4",
            ExperienceLevel.Director: "5",
            ExperienceLevel.Executive: "6",
        }

        return [mapping[ctype] for ctype in self.experience_level]  


class Resume(BaseModel):
    """
    Structured fields extracted from a resume.
    
    Attributes:
        skills: Technical and domain-specific skills (not soft skills).
        profile: Brief professional summary about the candidate.
        Projects: List of projects mentioned in the resume.
        Certifications: Professional certifications held.
        Experience: Work experience details.
        Education: Educational qualifications.
    """
    skills: List[str] = Field(
        default=['No skills'],
        description="The main programing and technikal skills focus more on the domain specific skills rather than soft skills"
    )
    profile: str = Field(
        default='No profile',
        description="The brief info about the user found in the resume text"
    )
    Projects: List[str] = Field(
        default=['No Projects'],
        description="The Projects that are built by the user found in the resume"
    )
    Certifications: List[str] = Field(
        default=['No Certifications'],
        description='THe certifications of the user in the resume'
    )
    Experience: List[str] = Field(
        default=['No Experience'],
        description="The Experience of the user mentioned in the resume"
    )
    Education: List[str] = Field(
        default='[No Education]',
        description='The education oof the yser found in the resume'
    )


class SimilarAndFeedback(BaseModel):
    """
    Similarity score and feedback for resume-job matching.
    
    Attributes:
        similarity: Match score from 0-100 indicating job suitability.
        feedback: Detailed feedback on what's missing or needs improvement.
    """
    similarity: int = Field(
        ...,
        description='Judge how good the Job is matching the resume text The similarity between 0 to 100 to tell him will this job suits him by his resume'
    )
    feedback: str = Field(
        default='No feedback',
        description='Tell him what is missing from the user resume to get this job, kind of the feedback, what sector is lacking and what is lacking'
    )


class Job_Summary(BaseModel):
    """
    Extracted summary and key information from job description.
    
    Attributes:
        job_skills: List of required skills mentioned in the job.
        job_info: 3-line summary of the job role.
    """
    job_skills: List[str] = Field(
        ...,
        description='Extract the skills that are mentioned in the Job description'
    )
    job_info: str = Field(
        ...,
        description="Summary of the job in 3 lines"
    )

In [13]:
"""
AI Agents Module for Job Search Application.

This module defines all the AI agents (LLM chains) used for various tasks
including job search parsing, resume analysis, and feedback generation.
"""

import logging
from langchain_core.prompts import PromptTemplate
from langchain_google_genai import ChatGoogleGenerativeAI
from langchain_huggingface import HuggingFaceEmbeddings
# from src.prompts import *
# from src.structure_outputs import *
from dotenv import load_dotenv

load_dotenv()

# Configure logging
logger = logging.getLogger(__name__)


class Agents:
    """
    Collection of AI agents for job search and resume analysis.
    
    This class initializes and manages various LLM-based agents that perform
    different tasks in the workflow:
    - Job search input parsing
    - Job description summarization
    - Resume field extraction
    - Resume-job matching and feedback
    
    Attributes:
        job_input_agent: Agent for parsing user job search queries.
        job_summary_agent: Agent for extracting key info from job descriptions.
        resume_agent: Agent for extracting structured fields from resumes.
        resume_feedback_agent: Agent for generating feedback and similarity scores.
    """
    
    def __init__(self):
        """
        Initialize all AI agents with their respective models and prompts.
        
        Sets up:
        - Google Gemini LLM for text generation
        - HuggingFace embeddings for semantic search
        - Prompt templates for each agent
        - Structured output schemas
        """
        logger.info("Initializing AI agents")
        
        # Initialize LLM
        logger.debug("Loading Gemini 2.5 Flash Lite model")
        llm = ChatGoogleGenerativeAI(model="gemini-2.5-flash-lite")
        
        # Initialize embeddings
        logger.debug("Loading HuggingFace embeddings model")
        embeddings = HuggingFaceEmbeddings(
            model_name="sentence-transformers/all-MiniLM-L6-v2",
            model_kwargs={"device": "cpu"},
            encode_kwargs={"normalize_embeddings": True}
        )
        
        # Job Search Input Agent
        logger.debug("Setting up job searching agent")
        job_searching_prompt = PromptTemplate(
            template=JOB_SERCHING_PROMPT,
            input_variables=['user_prompt']
        )
        
        self.job_input_agent = (
            job_searching_prompt |
            llm.with_structured_output(JobInfo)
        )
        logger.debug("Job searching agent initialized")
        
        # Job Description Summary Agent
        logger.debug("Setting up job summary agent")
        job_description_template = PromptTemplate(
            template="""{job_description}""",
            input_variables=['job_description']
        )
        
        self.job_summary_agent = (
            job_description_template |
            llm.with_structured_output(Job_Summary)
        )
        logger.debug("Job summary agent initialized")
        
        # Resume Extraction Agent
        logger.debug("Setting up resume extraction agent")
        resume_template = PromptTemplate(
            template=RESUME,
            input_variables=['resume_text']
        )
        
        self.resume_agent = (
            resume_template | 
            llm.with_structured_output(Resume)
        )
        logger.debug("Resume extraction agent initialized")
        
        # Resume Feedback Agent
        logger.debug("Setting up resume feedback agent")
        resume_feedback_template = PromptTemplate(
            template=RESUME_FEEDBACK,
            input_variables=['resume_skills', 'resume_profile', 'job_skills', 'job_info'],
            input_types={
                'resume_skills': List[str],
                'resume_profile': str,
                'job_skills': List[str],
                'job_info': str
            }
        )
        
        self.resume_feedback_agent = (
            resume_feedback_template |
            llm.with_structured_output(SimilarAndFeedback)
        )
        logger.debug("Resume feedback agent initialized")
        
        logger.info("All AI agents initialized successfully")
        
        

In [15]:
from apify_client import ApifyClient

def linkedin_scrapper(actor_input: dict):
    """
    Scrape LinkedIn job listings using Apify Actor.
    
    This function uses the Apify platform to run a LinkedIn scraping actor
    that searches for jobs based on the provided input parameters.
    
    Args:
        actor_input (dict): Dictionary containing job search parameters.
            Expected keys include:
            - title: Job title to search for
            - location: Geographic location
            - datePosted: Date range filter
            - experienceLevel: Experience level filters
            - remote: Remote work type filters
            - limit: Maximum number of results
            
    Returns:
        ApifyDataset: Dataset object containing scraped job listings.
        Use dataset.iterate_items() to access individual job records.
        
    Raises:
        ValueError: If APIFY_TOKEN or APIFY_ACTOR_NAME not configured.
        ApifyClientError: If scraping fails or actor errors occur.
        
    Example:
        >>> job_params = {
        ...     'title': 'Data Scientist',
        ...     'location': 'India',
        ...     'limit': 10
        ... }
        >>> dataset = linkedin_scrapper(job_params)
        >>> for job in dataset.iterate_items():
        ...     print(job['title'])
    """
    # Validate environment variables
    if not APIFY_TOKEN:
        # logger.error("APIFY_TOKEN environment variable not set")
        raise ValueError("APIFY_TOKEN environment variable is required")
    
    if not APIFY_ACTOR_NAME:
        # logger.error("APIFY_ACTOR_NAME environment variable not set")
        raise ValueError("APIFY_ACTOR_NAME environment variable is required")
    
    # logger.info("Initializing Apify client for LinkedIn scraping")
    # logger.debug(f"Actor input parameters: {actor_input}")
    
    try:
        # Initialize Apify client
        apify_client = ApifyClient(APIFY_TOKEN)

        # Run the Actor with input parameters
        # logger.info(f"Starting Apify Actor: {APIFY_ACTOR_NAME}")
        actor_name = APIFY_ACTOR_NAME
        actor_run = apify_client.actor(actor_name).start(run_input=actor_input)

        # logger.info(f"üöÄ Actor started successfully. Run ID: {actor_run['id']}")
        # logger.info(f"üíæ Monitor run at: https://console.apify.com/actors/runs/{actor_run['id']}")
        
        # Return dataset for iteration
        dataset = apify_client.dataset(actor_run["defaultDatasetId"])
        # logger.info(f"Dataset ID: {actor_run['defaultDatasetId']}")
        
        return dataset
        
    except Exception as e:
        # logger.error(f"Failed to scrape LinkedIn jobs: {str(e)}", exc_info=True)
        raise

In [None]:
# def job_searching_node(self, state: GraphState) -> GraphState:
"""
Search for jobs based on user input and scrape job listings.

This node:
1. Takes user input from the state
2. Uses the job input agent to structure the search query
3. Calls the LinkedIn scraper with the structured query
4. Returns a list of Job objects

Args:
    state (GraphState): Current workflow state containing user_input.
    
Returns:
    GraphState: Updated state with 'jobs' list populated.
    
Raises:
    Exception: If job scraping fails.
"""
# logger.info("Starting job search node")
agents = Agents()

user_input = {
  "title": "Software engineer",
  "location": "India",
  "limit": 1,
  "datePosted": "r604800",
  "experienceLevel": [
    "4"
  ]
}
# logger.debug(f"User input: {user_input}")

try:
    # Structure the user input into job search parameters
    # logger.debug("Invoking job input agent")
    res = agents.job_input_agent.invoke({'user_prompt': user_input})
    
    # Convert to dictionary format for scraper
    job_info_dict = res.model_dump(
        mode="json",
        exclude=['days', 'jobType', 'experience_level'],
        exclude_none=True
    )
    # logger.info(f"Job search parameters: {job_info_dict}")
    
    # Scrape jobs from LinkedIn
    # logger.info("Initiating LinkedIn job scraping")
    scrap_job = linkedin_scrapper(user_input)
    print("scap jobs:",scrap_job)
    # Convert scraped items to Job objects
    l = []
    for item in scrap_job.iterate_items():
        print(item)
        l.append(Job(**item))
    
    # logger.info(f"Successfully scraped {len(l)} jobs")
    print("list:",l)
    
except Exception as e:
    # logger.error(f"Error in job searching node: {str(e)}", exc_info=True)
    raise

scap jobs: <apify_client.clients.resource_clients.dataset.DatasetClient object at 0x000001F0D2636190>
list: []


In [18]:
scrap_job

<apify_client.clients.resource_clients.dataset.DatasetClient at 0x1f0d2636190>

In [20]:
class Job(BaseModel):
    """
    Complete job listing information scraped from LinkedIn.
    
    Attributes:
        id (str): Unique job identifier.
        url (str): Direct URL to the job posting.
        title (str): Job title/position name.
        location (str): Job location (city, state, country).
        companyName (str): Name of the hiring company.
        companyUrl (str): LinkedIn URL of the company.
        recruiterName (str): Name of the recruiter (if available).
        recruiterUrl (str): LinkedIn URL of the recruiter.
        experienceLevel (str): Required experience level (Entry, Mid, Senior, etc.).
        contractType (str): Type of employment (Full-time, Part-time, Contract, etc.).
        workType (str): Nature of work/department.
        sector (str): Industry sector of the company.
        salary (str): Salary information (if provided).
        applyType (str): Application method (EASY_APPLY, EXTERNAL, etc.).
        applyUrl (str): URL to apply for the job.
        postedTimeAgo (str): Human-readable time since posting.
        postedDate (str): ISO format date of posting.
        applicationsCount (str): Number of applicants.
        description (str): Full job description text.
    """
    id: str
    url: str
    title: str
    location: str
    companyName: str 
    companyUrl: str
    recruiterName: str
    recruiterUrl: str
    experienceLevel: str
    contractType: str
    workType: str
    sector: str
    salary: str
    applyType: str
    applyUrl: str 
    postedTimeAgo: str  
    postedDate: str
    applicationsCount: str
    description: str

In [24]:
from typing import TypedDict

In [26]:
class GraphState(TypedDict):
    """
    Main workflow state that flows through all nodes in the LangGraph.
    
    This TypedDict defines the complete state structure that gets passed
    between nodes in the workflow. Some fields use Annotated with operators
    to specify how they should be combined when multiple nodes write to them.
    
    Attributes:
        user_input (str): Original user query/request for job search.
        job_info (Job_Info_state): Structured job search parameters.
        resume_text (str): Raw text extracted from resume PDF.
        jobs (List[Job]): List of scraped job postings.
        visited_ids (Set[int]): Set of job IDs already processed for summaries.
        job_summaries (Annotated[List[Job_Summary], operator.add]): 
            Accumulated list of job summaries (nodes can append to it).
        resume_fields (Resume_Fields): Structured resume information.
        job_feedbacks (Annotated[List[Job_Feedback], operator.add]): 
            Accumulated feedback and similarity scores for jobs.
        visited_ids_feedback (Set[int]): Set of job IDs already processed for feedback.
    """
    user_input: str
    # job_info: Job_Info_state
    resume_text: str
    jobs: List[Job]
    # visited_ids: Set[int]
    # job_summaries: Annotated[List[Job_Summary], operator.add]
    # # resume_fields: Resume_Fields
    # job_feedbacks: Annotated[List[Job_Feedback], operator.add]
    # visited_ids_feedback: Set[int]
    

In [28]:
for item in scrap_job.iterate_items():
    print(item)
    l.append(Job(**item))

# logger.info(f"Successfully scraped {len(l)} jobs")
print("list:",l)
GraphState['jobs' : l]

{'id': '4294598179', 'url': 'https://in.linkedin.com/jobs/view/python-developer-at-persistent-systems-4294598179', 'title': 'Python Developer', 'location': 'Bengaluru, Karnataka, India', 'companyName': 'Persistent Systems', 'companyUrl': 'https://in.linkedin.com/company/persistent-systems', 'recruiterName': '', 'recruiterUrl': '', 'experienceLevel': 'Entry level', 'contractType': 'Full-time', 'workType': 'Engineering and Information Technology', 'sector': 'IT Services and IT Consulting', 'salary': '', 'applyType': 'EASY_APPLY', 'applyUrl': 'https://in.linkedin.com/jobs/view/python-developer-at-persistent-systems-4294598179', 'postedTimeAgo': '6 days ago', 'postedDate': '2025-12-18T00:00:00.000Z', 'applicationsCount': 'Over 200 applicants', 'description': "About PersistentWe are an AI-led, platform-driven Digital Engineering and Enterprise Modernization partner, combining deep technical expertise and industry experience to help our clients anticipate what‚Äôs next. Our offerings and pro

__main__.GraphState[slice('jobs', [Job(id='4294598179', url='https://in.linkedin.com/jobs/view/python-developer-at-persistent-systems-4294598179', title='Python Developer', location='Bengaluru, Karnataka, India', companyName='Persistent Systems', companyUrl='https://in.linkedin.com/company/persistent-systems', recruiterName='', recruiterUrl='', experienceLevel='Entry level', contractType='Full-time', workType='Engineering and Information Technology', sector='IT Services and IT Consulting', salary='', applyType='EASY_APPLY', applyUrl='https://in.linkedin.com/jobs/view/python-developer-at-persistent-systems-4294598179', postedTimeAgo='6 days ago', postedDate='2025-12-18T00:00:00.000Z', applicationsCount='Over 200 applicants', description="About PersistentWe are an AI-led, platform-driven Digital Engineering and Enterprise Modernization partner, combining deep technical expertise and industry experience to help our clients anticipate what‚Äôs next. Our offerings and proven solutions creat

In [35]:
if GraphState['jobs']:
    print(GraphState['jobs'])

__main__.GraphState['jobs']
