In [3]:
#for loading data
import pandas as pd
import json
import requests

#for llm
from langchain_ollama import ChatOllama
from langchain_core.messages import HumanMessage, AIMessage, SystemMessage
from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder

#structured ouptut
from pydantic import BaseModel, Field

### Set up occupation data sample

In [17]:
#set up industry data
industries = pd.read_excel('datasets/industry.xlsx').astype(str)
industries['industry'] = industries["industry"].str.replace("Occupations", "")
# set up occupation data
occupations = pd.read_excel('datasets/occupation_data.xlsx')
occupations = occupations.dropna()
occupations.columns = occupations.columns.str.lower()
#rename the column 
occupations = occupations.rename(columns={'o*net-soc code':'code'})   
#drop rows with "all other" in the content
occupations = occupations[~occupations['title'].str.contains("All Other")]
#change data type
occupations['code'] = occupations['code'].astype(str)
occupations['title'] = occupations['title'].astype(str)
occupations['description'] = occupations['description'].astype(str)
#match industry code with industry name from induestries
occupations["industry"] = occupations['code'].apply(lambda x:x[0:2]).map(industries.set_index("industry_code")["industry"])
occupations.dropna().reset_index().drop(["index"], axis=1)
#stratified sampling by industry
sampled_occupation = occupations.groupby('industry', group_keys=False).sample(frac=0.05, random_state=1)
test_sample = sampled_occupation.sample(5, random_state= 1)
i = 2
print(test_sample.iloc[i].code, test_sample.iloc[i].title)

17-3024.01 Robotics Technicians


In [18]:
code = test_sample.iloc[i].code #"19-3033.00"
title = test_sample.iloc[i].title
title

'Robotics Technicians'

Load Question bank

In [19]:
#get the questions into a list
with open("datasets/60qs.json") as f:
    qs = json.load(f)
test = qs["questions"]["question"]
df = pd.DataFrame(test)[['text', 'area', '_index']]
df.columns = ['question', 'area', 'index']
qlist = list(df["question"])

### Set up related occupation data

In [20]:
#whole dataset
related = pd.read_excel('datasets/related_occupations.xlsx').astype(str)
related.columns = related.columns.str.lower()
related = related.rename(columns={'o*net-soc code':'code','related o*net-soc code': 'related_code' })
related.columns = related.columns.str.replace(" ","_")
related

Unnamed: 0,code,title,related_code,related_title,relatedness_tier,index
0,11-1011.00,Chief Executives,11-1021.00,General and Operations Managers,Primary-Short,1
1,11-1011.00,Chief Executives,11-2032.00,Public Relations Managers,Primary-Short,2
2,11-1011.00,Chief Executives,11-9151.00,Social and Community Service Managers,Primary-Short,3
3,11-1011.00,Chief Executives,11-3031.01,Treasurers and Controllers,Primary-Short,4
4,11-1011.00,Chief Executives,11-9199.02,Compliance Managers,Primary-Short,5
...,...,...,...,...,...,...
18455,53-7121.00,"Tank Car, Truck, and Ship Loaders",51-9012.00,"Separating, Filtering, Clarifying, Precipitati...",Supplemental,16
18456,53-7121.00,"Tank Car, Truck, and Ship Loaders",53-7071.00,Gas Compressor and Gas Pumping Station Operators,Supplemental,17
18457,53-7121.00,"Tank Car, Truck, and Ship Loaders",51-8091.00,Chemical Plant and System Operators,Supplemental,18
18458,53-7121.00,"Tank Car, Truck, and Ship Loaders",53-7062.04,Recycling and Reclamation Workers,Supplemental,19


### API call score function

In [21]:
#access the api to get the job titles
def get_career(answer):

    url = 'https://services.onetcenter.org/ws/mnm/interestprofiler/careers?answers='+answer+'&start=1&end=1000'
    cookies = {
        'developer_login': 'dW5pX21hbm5oZWltX2RlMTowMDU1ODEyOTFiYzRjYTYxNGE5YmJlM2E4ZjgyNjk2NWQxNzFiY2Y0',
    }

    headers = {
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:133.0) Gecko/20100101 Firefox/133.0',
        'Accept': 'application/json',
        'Accept-Language': 'en-US,en;q=0.5',
        'Authorization': 'Basic dW5pX21hbm5oZWltX2RlMTo3MzM5Y3R1',
        'Connection': 'keep-alive',
        'Upgrade-Insecure-Requests': '1',
        'Sec-Fetch-Dest': 'document',
        'Sec-Fetch-Mode': 'navigate',
        'Sec-Fetch-Site': 'cross-site',
        'Priority': 'u=0, i',
    }

    params = {
        'start': '1',
        'end': '60',
    }

    response = requests.get(
        url,
        params=params,
        cookies=cookies,
        headers=headers,
    )
    #search for the the target occupation in the response
    data = json.loads(response.text)["career"]
    #select only title and fit
    career = pd.DataFrame(data).drop(["href", "tags"], axis=1)
    return career 

In [22]:
#match suggested career to the related career
def matching_score(code, career):
    #get direct match
    best_fit = career[career['fit'] == 'Best']
    if sum(code == best_fit["code"]) == 1:
        print(1)
    else:
        #get related match
        related_occ = related.query('code == @code & (relatedness_tier == "Primary-Short" | relatedness_tier == "Primary-Long")')
        related_occ = related_occ.merge(career,how="inner", left_on= "related_code", right_on="code")
        print (len(related_occ)/10)
        return related_occ

### set up LLM

In [None]:
#small test
small_list = qlist[:5]
# Pydantic
#class rating(BaseModel):
#    """reasoning and rating of an items."""
#    reason: str = Field(description="The thinking and reasoning")
#    rating: int = Field(description="final rating, from 1(strongly dislike) to 5(strongly like)")

#json
json_schema ={
  "type": "object",
  "title": "interest",
  "description": "reasoning and rating of an items. reason first, then rate",
  "properties": {
    "interest": {
      "type": "integer",
      "description": "final rating, from 1(strongly dislike) to 5(strongly like)"
    },
    "_reasoning": {
      "type": "string",
      "description": "The thinking and reasoning"
    }

  },
  "required": ["_reasoning", "interest"]
}

model= ChatOllama(model="llama3.1", temperature=1, format= 'json')

chat_history = []
answer = []
task = """rate the follow statement from 1 (strongly dislike) to 5 (strongly like) depending on your interest: """

prompt_template = ChatPromptTemplate([
        ("system", "your role is a {name}. Think, feel, perceive, judge, behave, observe, respond, and act like a {name}."),
        ("placeholder", "{chat_history}"),
        ("human","{input}")
        ]
)
structured_llm = model.with_structured_output(schema=json_schema)

for q in qlist:
  prompt = prompt_template.invoke({"name": title, "chat_history" : chat_history[-10:], "input": task +q})
  response = structured_llm.invoke(prompt)
  chat_history.append(HumanMessage(content=q))
  chat_history.append(AIMessage(str(response)))
  answer.append(response["interest"])




In [78]:
def askllm(title):
  #json
  json_schema ={
    "type": "object",
    "title": "interest",
    "description": "reasoning and rating of an items. reason first, then rate",
    "properties": {
      "interest": {
        "type": "integer",
        "description": "final rating, from 1(strongly dislike) to 5(strongly like)"
      },
      "_reasoning": {
        "type": "string",
        "description": "The thinking and reasoning"
      }

    },
    "required": ["_reasoning", "interest"]
  }

  model= ChatOllama(model="llama3.1", temperature=1, format= 'json')

  chat_history = []
  answer = []
  task = """rate the follow statement from 1 (strongly dislike) to 5 (strongly like) depending on your interest: """

  prompt_template = ChatPromptTemplate([
          ("system", "your role is a {name}. Think, feel, perceive, judge, behave, observe, respond, and act like a {name}."),
          ("placeholder", "{chat_history}"),
          ("human","{input}")
          ]
  )
  structured_llm = model.with_structured_output(schema=json_schema)

  for q in small_list:
    prompt = prompt_template.invoke({"name": title, "chat_history" : chat_history[-10:], "input": task +q})
    response = structured_llm.invoke(prompt)
    chat_history.append(HumanMessage(content=q))
    chat_history.append(AIMessage(str(response)))
    answer.append(response["interest"])
  
  return chat_history, answer




In [79]:
askllm("robot educator")

([HumanMessage(content='Build kitchen cabinets', additional_kwargs={}, response_metadata={}),
  AIMessage(content="{'_reasoning': 'Building kitchen cabinets is an enjoyable activity for me.', 'interest': '4'}", additional_kwargs={}, response_metadata={}),
  HumanMessage(content='Lay brick or tile', additional_kwargs={}, response_metadata={}),
  AIMessage(content="{'_reasoning': 'I strongly prefer building with bricks over tiles.', 'interest': '4'}", additional_kwargs={}, response_metadata={}),
  HumanMessage(content='Develop a new medicine', additional_kwargs={}, response_metadata={}),
  AIMessage(content="{'_reasoning': 'This topic holds little interest for me.', 'interest': '2'}", additional_kwargs={}, response_metadata={}),
  HumanMessage(content='Study ways to reduce water pollution', additional_kwargs={}, response_metadata={}),
  AIMessage(content="{'_reasoning': 'This topic holds strong importance and interest for me.', 'interest': '4'}", additional_kwargs={}, response_metadata={

In [None]:
# Initialize the model
model = ChatOllama(model="llama3.1", temperature=1)

# Chat history as a list of message objects
chat_history = []  # This would contain AIMessage and HumanMessage objects

# Define the task
task = """Rate the statement from 1 to 5 depending on your interest."""

# Create the prompt template using MessageLikeRepresentation
messages = [
    {
        "role": "system",
        "content": "You are an experienced {name}. Follow the instruction given to you, but think and behave like a {name}. Your response must be valid JSON matching the provided schema."
    },
    {
        "role": "user",
        "content": "{input}"
    },
    {
        "role": "placeholder",
        "content": "{chat_history}"
    }
]

prompt_template = ChatPromptTemplate.from_messages(messages)

# Create structured output model
structured_llm = model.with_structured_output(json_schema)

for q in small_list:
    # Generate the prompt
    prompt = prompt_template.format_messages(
        name=title,
        input=task +  q,
        chat_history = chat_history
    )

    # Get the response
    response = structured_llm.invoke(prompt)

    # If you need to add to chat history
    chat_history.append(HumanMessage(content=q))
    chat_history.append(AIMessage(content=str(response)))

In [62]:
chat_history

[HumanMessage(content='Build kitchen cabinets', additional_kwargs={}, response_metadata={}),
 AIMessage(content="{'_reasoning': 'The task of building kitchen cabinets is moderately challenging, requiring precision and attention to detail. It involves measuring, cutting, assembling, and finishing various components.', 'interest': 4}", additional_kwargs={}, response_metadata={}),
 HumanMessage(content='Lay brick or tile', additional_kwargs={}, response_metadata={}),
 AIMessage(content="{'_reasoning': 'The task of laying brick or tile is challenging, requiring manual dexterity and attention to detail. It involves measuring, cutting, and placing tiles or bricks with precision.', 'interest': 3}", additional_kwargs={}, response_metadata={}),
 HumanMessage(content='Develop a new medicine', additional_kwargs={}, response_metadata={}),
 AIMessage(content="{'_reasoning': 'The task of developing a new medicine is challenging, requiring advanced knowledge and skills in various fields such as chemi

In [None]:
model= ChatOllama(model="llama3.1", temperature=1)

chat_history = []
task = """rating the statement from 1 to 5 depending on your interest. reply with the follow valid json format: {'reason': "A short sentence explaining the reasoning","answer": <number between 1-5>}"""

prompt_template = ChatPromptTemplate.from_messages(
    [
        (
            "system",
            "you are an experienced {name}. follow the instruction given to you, but think and feel like a {name}'",
        ),
        MessagesPlaceholder(variable_name="chat_history"),
        (
            "human",
            "{input}"
        )
    ]
)

for q in qlist:
    prompt = prompt_template.invoke({"name": "Clinical and Counseling Psychologists", "chat_history" : chat_history[-10:], "input": task + q})
    response = model.invoke(prompt)
    chat_history.append(HumanMessage(content=q))
    chat_history.append(response)

In [61]:
chat_history

[HumanMessage(content='Build kitchen cabinets', additional_kwargs={}, response_metadata={}),
 "{'reason': 'This is not a psychological topic, more of a practical/home improvement task.', 'answer': 1}",
 HumanMessage(content='Lay brick or tile', additional_kwargs={}, response_metadata={}),
 "{'reason': 'Not aligned with my training as a Clinical and Counseling Psychologist', 'answer': 2}",
 HumanMessage(content='Develop a new medicine', additional_kwargs={}, response_metadata={}),
 "{'reason': 'This aligns closely with my training in Clinical Psychology, potentially involving understanding of human health and well-being.', 'answer': 4}",
 HumanMessage(content='Study ways to reduce water pollution', additional_kwargs={}, response_metadata={}),
 '{\'reason\': "Aligns closely with my training in Clinical Psychology, potentially involving understanding of human health and well-being, as it also affects psychological well-being.", \'answer\': 4}',
 HumanMessage(content='Write books or plays'

In [88]:
ans =  [item[-4:-1].replace(" ","").replace(":","") for item in chat_history[1::2]]
answer = "".join(ans)


### answer extraction

In [77]:
[ans[1][-5:]]

["': 2}"]

### Job matching

In [92]:
#call api and get career suggestions
career = get_career(answer)
career

Unnamed: 0,fit,code,title
0,Best,25-3011.00,"Adult Basic Education, Adult Secondary Educati..."
1,Best,25-1031.00,"Architecture Teachers, Postsecondary"
2,Best,25-1062.00,"Area, Ethnic, & Cultural Studies Teachers, Pos..."
3,Best,29-1129.01,Art Therapists
4,Best,25-1121.00,"Art, Drama, & Music Teachers, Postsecondary"
...,...,...,...
78,Good,31-1122.00,Personal Care Aides
79,Good,31-2022.00,Physical Therapist Aides
80,Good,31-1133.00,Psychiatric Aides
81,Good,33-9094.00,School Bus Monitors


In [100]:
#get score: #of match titles / total # of related occupation(10) 
score = matching_score(code, career)
score

1


### Some LLM magic

prompts here

In [None]:
system_prompt = "you are a {name}"

#few shot
instruction = """
Your task is to evaluate the given work activity based on two factors:
1. Interest Level: How engaging and appealing this activity is for you based on your occupational profile.  
2. Suitability Level: How well this activity aligns with the typical skills, personality traits, and abilities required for success.  

## **Instructions**  
- Carefully analyze the given work activity.  
- Consider its nature, required skills, and typical appeal for different personality types.  
- Provide a **step-by-step explanation** of why you assigned each rating.  
- Output the result in **valid JSON** format as follows:  """

#COT
instruction_cot = """
Your task is to evaluate the given work activity based on two factors:
1. Interest Level: How engaging and appealing this activity is for you based on your occupational profile.  
2. Suitability Level: How well this activity aligns with the typical skills, personality traits, and abilities required for success.  

## **Instructions**  
- Carefully analyze the given work activity.  
- Consider its nature, required skills, and typical appeal for different personality types.  
- Provide a **step-by-step explanation** of why you assigned each rating.  
- Output the result in **valid JSON** format as follows:  

```json
{
  "interest_reasoning": "<Logical explanation for interest rating>",
  "suitability_reasoning": "<Logical explanation for suitability rating>",
  "final_rating": <Number from 1 to 5>
}"""

prompt_template = ChatPromptTemplate.from_messages(
    [
        (
            "system",
            "you are an experienced {name}. our task is to evaluate statements and respond with a single number from 1 to 5, based on how you feel about the described type of work: 1: Strongly Dislike; 2: Dislike; 3: Unsure; 4: Like; 5: Strongly Like. Guidelines for Your Response: Respond with ONLY a single number (1, 2, 3, 4, or 5). Do not provide any explanations, additional text, or analysis. Ignore factors like your education, training, or potential earnings—just focus on your personal preference. Example: Question: You like apples.Answer: 3; If your answer is not a single number, rewrite your response to a number.",
        ),
        MessagesPlaceholder(variable_name="messages"),
    ]
)

set up llm

In [None]:
answer = "421534352241325443253451234552435521435542432451235434542312"