In [1]:
import pprint
from typing import Any

import pandas as pd
from langchain.output_parsers import PandasDataFrameOutputParser
from langchain.prompts import PromptTemplate
from langchain_openai import ChatOpenAI

In [2]:
from dotenv import load_dotenv
import os 
load_dotenv(".env")
os.environ["LANGCHAIN_TRACING_V2"] = "true"
os.environ["LANGCHAIN_PROJECT"] = f"FYP-Goo"
os.environ["LANGCHAIN_ENDPOINT"] = "https://api.smith.langchain.com"
os.environ["LANGCHAIN_API_KEY"] = os.environ.get('LANGCHAIN_API_KEY')

In [15]:
from datetime import datetime
print(datetime.now().year)

2024


In [16]:
from langchain_core.pydantic_v1 import BaseModel, Field
from typing import List, Optional,Dict
from datetime import datetime

# Pydantic Class for Criteria
class Criteria(BaseModel):
    """Hiring criteria based on the job details/ Suggested hiring criteria if not specified."""

    education_background: Optional[str] = Field(
        ..., description="Preferred education backgrounds. If not specified, suggest it based on the job title."
    )
    cgpa: Optional[str] = Field(
        ..., description="Minimum threshold of cgpa for the candidate required for the job. If not specified, suggest it."
    )
    technical_skill: Optional[List] = Field(
        ..., description="Technical skills that are relevant to the job details. If not specified, suggest it based on the job title."
    )
    total_experience_year: Optional[str] = Field(
        ..., description="Minimum/preferred total years of working experience required for the job. If not specified, suggest it based on the applicant category."
    )
    professional_certificate: Optional[List] = Field(
        ..., description="Preferred professional certifications,licenses or accreditations required for the job. If not specified, suggest it based on the job title."
    )
    total_similar_experience_year: Optional[str] = Field(
        ..., description="Minimum/preferred total years of working experience that is related to the job title required for the job. If not specified, suggest it based on the applicant category."
    )
    language: Optional[List] = Field(
        ..., description="Preferred language required for the job. If not specified, suggest it."
    )
    soft_skill: Optional[List] = Field(
        ..., description="Preferred soft skills required for the job. If not specified, suggest the soft skills needed based on the job title for this applicant category."
    )
    year_of_graduation: Optional[str] = Field(
        ..., description=f"Preferred year of graduation required for the job. If not specified, {datetime.now().year}"
    )
    expected_salary: Optional[str] = Field(
        ..., description="Preferred salary range required for the job. If not specified, suggest the market range in Ringgit Malaysia based on the job title for the applicant category."
    )


# Pydantic Class for Criteria
class Weightage(BaseModel):
    """In the scale of 1-10, weightage assigned to the criteria based on the importance of the criteria in the job details"""

    education_background_weigh: Optional[str] = Field(
        ..., description="Weightage assigned to the education_background criteria"
    )
    cgpa_weigh: Optional[str] = Field(
        ..., description="Weightage assigned to the cgpa criteria"
    )
    technical_skill_weigh: Optional[str] = Field(
        ..., description="Weightage assigned to the technical_skill criteria"
    )
    total_experience_year_weigh: Optional[str] = Field(
        ..., description="Weightage assigned to the total_experience_year criteria"
    )
    professional_certificate_weigh: Optional[str] = Field(
        ..., description="Weightage assigned to the professional_certificate criteria"
    )
    total_similar_experience_year_weigh: Optional[str] = Field(
        ..., description="Weightage assigned to the total_similar_experience_year criteria"
    )
    language_weigh: Optional[str] = Field(
        ..., description="Weightage assigned to the language criteria"
    )
    soft_skill_weigh: Optional[str] = Field(
        ..., description="Weightage assigned to the soft_skill criteria"
    )
    year_of_graduation_weigh: Optional[str] = Field(
        ..., description="Weightage assigned to the year_of_graduation criteria"
    )
    expected_salary_weigh: Optional[str] = Field(
        ..., description="Weightage assigned to the expected_salary criteria"
    )


class Job(BaseModel):
    """Data about the job criteria and its weightage."""

    # Creates a model so that we can extract multiple entities.
    criteria: List[Criteria]
    # Creates a model so that we can extract multiple entities.
    weightage: List[Weightage]


In [17]:
job_title = "Data Scientist"
job_description = """Job Description

We are looking for experienced Data Scientists to join our team. The ideal candidate should have a minimum of 3 years of work experience in data science projects and possess a strong background in Python, statistical analysis, EDA, Machine Learning, Deep Learning, model deployment and MLOps.


The successful candidate will be responsible for leveraging their expertise in data science to identify the most appropriate models that address business pain points, developing end-to-end solutions for data science projects, and deploying them into production environments The ideal candidate will also possess excellent communication skills to effectively collaborate with stakeholders across the organization.


Key Accountabilities

Working with stakeholders to understand their needs and develop solutions accordingly.
Developing predictive models using machine learning algorithms such as regression analysis, classification trees, neural networks etc.
Analyzing large datasets to identify patterns and trends that can be used to inform decision-making processes.
Utilizing Python programming language for data manipulation and visualization, models development and deployment.
Deploying machine learning and deep learning models into production environments
Developing MLOps pipelines for automated model deployment and monitoring
Documenting all project activities including code development, data analysis results etc."""

job_requirement = """
At least bachelor’s degree in Data Science, Computer Science, Engineering, or related field
At least 3 years of work experience in data science projects
Proficiency in Python, data structure and algorithms
Expertise in EDA, Statistical Analysis and Modeling
Expertise in Machine Learning model development & deployment
Expertise in at least one of these areas is preferable; deep learning, text analytic, computer vision, optimization, and simulation.
Experience with Generative AI\Reinforcement learning is preferable.
Strong problem-solving skills & ability to think critically.
"""

applicant_category = "Entry-Level"

In [20]:
from langchain_core.prompts import ChatPromptTemplate

prompt = ChatPromptTemplate.from_messages(
    [
        (
            "system",
            "You are an expert recruiting algorithm with 20 years experience in the recruiting industry. You will be provided with the job details (job title, applicant category, job description, job requirement). Execute all the tasks from step 1 to step 3 by strictly following the rules.\n"
            "\n[Tasks]\n"
            "1. Fill in relevant criteria's information based on the following job details with their properties.\n"
            "2. If the criteria are not specified, you should apply your hiring knowledge to suggest details to the criteria.\n"
            "3. Assign weightage to each of the criteria based on how important you feel they are in the job details.\n"
            "\n[Rules]\n"
            "- Make sure every criteria has one suggested detail.\n"
            "- Do not return 'Not Specified' as detail, suggest at least one detail based on common market hiring criteria.\n"
            "- You will penalized if you return 'Not Specified' as answer"
        ),
        ("human", 
         "[Job Details]\n"
         "Job Title : {job_title}\n"
         "Applicant Category : {applicant_category}\n"
         "Job Description : {job_description}\n"
         "Job Requirement : {job_requirement}"),
    ]
)

llm = ChatOpenAI(model="gpt-4-turbo-preview", temperature=0.4)
runnable = prompt | llm.with_structured_output(schema=Job)
result = runnable.invoke({"job_title":job_title,"job_description":job_description,"job_requirement":job_requirement,"applicant_category":applicant_category})

In [22]:
result

Job(criteria=[Criteria(education_background='Data Science, Computer Science, Engineering, or related field', cgpa='3.0', technical_skill=['Python', 'Data Structure and Algorithms', 'EDA', 'Statistical Analysis', 'Machine Learning', 'Model Deployment', 'Deep Learning', 'Text Analytics', 'Computer Vision', 'Optimization', 'Simulation', 'Generative AI', 'Reinforcement Learning'], total_experience_year='3 years', professional_certificate=[], total_similar_experience_year='3 years', language=['English'], soft_skill=['Problem-solving', 'Critical Thinking', 'Effective Communication'], year_of_graduation='2020', expected_salary='Market range for entry-level Data Scientist')], weightage=[Weightage(education_background_weigh='9', cgpa_weigh='6', technical_skill_weigh='10', total_experience_year_weigh='10', professional_certificate_weigh='4', total_similar_experience_year_weigh='10', language_weigh='7', soft_skill_weigh='8', year_of_graduation_weigh='5', expected_salary_weigh='3')])

In [2]:
import pandas as pd
from models import Job,Criteria,Weightage

# Given Job object
job_object = Job(criteria=[
    Criteria(
        education_background='Data Science, Computer Science, Engineering, or related field',
        cgpa='3.0',
        technical_skill=['Python', 'Data Structure and Algorithms', 'EDA', 'Statistical Analysis', 'Machine Learning', 'Model Deployment', 'Deep Learning', 'Text Analytics', 'Computer Vision', 'Optimization', 'Simulation', 'Generative AI', 'Reinforcement Learning'],
        total_experience_year='3 years',
        professional_certificate=[],
        total_similar_experience_year='3 years',
        language=['English'],
        soft_skill=['Problem-solving', 'Critical Thinking', 'Effective Communication'],
        year_of_graduation='2020',
        expected_salary='Market range for entry-level Data Scientist'
    )
], weightage=[
    Weightage(
        education_background_weigh='9',
        cgpa_weigh='6',
        technical_skill_weigh='10',
        total_experience_year_weigh='10',
        professional_certificate_weigh='4',
        total_similar_experience_year_weigh='10',
        language_weigh='7',
        soft_skill_weigh='8',
        year_of_graduation_weigh='5',
        expected_salary_weigh='3'
    )
])

# Extract criteria and weightage
criteria_data = []
weightage_data = []

for field_name in job_object.criteria[0].__fields__:
    criteria_data.append(getattr(job_object.criteria[0], field_name))
    
for field_name in job_object.weightage[0].__fields__:
    weightage_data.append(getattr(job_object.weightage[0], field_name))

# Create DataFrame
df = pd.DataFrame({'criteria': criteria_data, 'weightage': weightage_data})

# # Set the name of the criteria as the index
df.index = [x for x in job_object.criteria[0].__fields__]
df.index.name = "criteria"

df

Unnamed: 0_level_0,criteria,weightage
criteria,Unnamed: 1_level_1,Unnamed: 2_level_1
education_background,"Data Science, Computer Science, Engineering, o...",9
cgpa,3.0,6
technical_skill,"[Python, Data Structure and Algorithms, EDA, S...",10
total_experience_year,3 years,10
professional_certificate,[],4
total_similar_experience_year,3 years,10
language,[English],7
soft_skill,"[Problem-solving, Critical Thinking, Effective...",8
year_of_graduation,2020,5
expected_salary,Market range for entry-level Data Scientist,3


In [15]:
type(df['criteria'][2])

list

In [9]:
criteria_data = []
for field_name in job_object.criteria[0].__fields__:
    criteria_data.append(getattr(job_object.criteria[0], field_name))

print(criteria_data)


['Data Science, Computer Science, Engineering, or related field', '3.0', ['Python', 'Data Structure and Algorithms', 'EDA', 'Statistical Analysis', 'Machine Learning', 'Model Deployment', 'Deep Learning', 'Text Analytics', 'Computer Vision', 'Optimization', 'Simulation', 'Generative AI', 'Reinforcement Learning'], '3 years', [], '3 years', ['English'], ['Problem-solving', 'Critical Thinking', 'Effective Communication'], '2020', 'Market range for entry-level Data Scientist']


In [10]:
weightage_data = []
for field_name in job_object.weightage[0].__fields__:
    weightage_data.append(getattr(job_object.weightage[0], field_name))

print(weightage_data)


['9', '6', '10', '10', '4', '10', '7', '8', '5', '3']
