In [None]:
#for loading data
import pandas as pd
import json
import requests

#for llm
import os
from langchain_ollama import ChatOllama

from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder
from langchain_core.messages import HumanMessage, SystemMessage, BaseMessage
from langgraph.checkpoint.memory import MemorySaver
from langgraph.graph import START, StateGraph
from langgraph.graph.message import add_messages
from typing_extensions import Annotated, TypedDict
from typing import Sequence

### Set up occupation data sample

In [2]:
#set up industry data
industries = pd.read_excel('datasets/industry.xlsx').astype(str)
industries['industry'] = industries["industry"].str.replace("Occupations", "")
# set up occupation data
occupations = pd.read_excel('datasets/occupation_data.xlsx')
occupations = occupations.dropna()
occupations.columns = occupations.columns.str.lower()
#rename the column 
occupations = occupations.rename(columns={'o*net-soc code':'code'})   
#drop rows with "all other" in the content
occupations = occupations[~occupations['title'].str.contains("All Other")]
#change data type
occupations['code'] = occupations['code'].astype(str)
occupations['title'] = occupations['title'].astype(str)
occupations['description'] = occupations['description'].astype(str)
#match industry code with industry name from induestries
occupations["industry"] = occupations['code'].apply(lambda x:x[0:2]).map(industries.set_index("industry_code")["industry"])
occupations.dropna().reset_index().drop(["index"], axis=1)
#stratified sampling by industry
sampled_occupation = occupations.groupby('industry', group_keys=False).sample(frac=0.05, random_state=1)
test_sample = sampled_occupation.sample(5, random_state= 1)
test_sample.iloc[1].code


'41-3091.00'

In [3]:
code = "19-3033.00" #test_sample["code"].iloc[i]

### Set up related occupation data

In [4]:
#whole dataset
related = pd.read_excel('datasets/related_occupations.xlsx').astype(str)
related.columns = related.columns.str.lower()
related = related.rename(columns={'o*net-soc code':'code','related o*net-soc code': 'related_code' })
related.columns = related.columns.str.replace(" ","_")
related

Unnamed: 0,code,title,related_code,related_title,relatedness_tier,index
0,11-1011.00,Chief Executives,11-1021.00,General and Operations Managers,Primary-Short,1
1,11-1011.00,Chief Executives,11-2032.00,Public Relations Managers,Primary-Short,2
2,11-1011.00,Chief Executives,11-9151.00,Social and Community Service Managers,Primary-Short,3
3,11-1011.00,Chief Executives,11-3031.01,Treasurers and Controllers,Primary-Short,4
4,11-1011.00,Chief Executives,11-9199.02,Compliance Managers,Primary-Short,5
...,...,...,...,...,...,...
18455,53-7121.00,"Tank Car, Truck, and Ship Loaders",51-9012.00,"Separating, Filtering, Clarifying, Precipitati...",Supplemental,16
18456,53-7121.00,"Tank Car, Truck, and Ship Loaders",53-7071.00,Gas Compressor and Gas Pumping Station Operators,Supplemental,17
18457,53-7121.00,"Tank Car, Truck, and Ship Loaders",51-8091.00,Chemical Plant and System Operators,Supplemental,18
18458,53-7121.00,"Tank Car, Truck, and Ship Loaders",53-7062.04,Recycling and Reclamation Workers,Supplemental,19


### Some LLM magic

prompts here

In [None]:
system_prompt = "you are a {name}"

#few shot
instruction = """
Your task is to evaluate the given work activity based on two factors:
1. Interest Level: How engaging and appealing this activity is for you based on your occupational profile.  
2. Suitability Level: How well this activity aligns with the typical skills, personality traits, and abilities required for success.  

## **Instructions**  
- Carefully analyze the given work activity.  
- Consider its nature, required skills, and typical appeal for different personality types.  
- Provide a **step-by-step explanation** of why you assigned each rating.  
- Output the result in **valid JSON** format as follows:  """

#COT
instruction_cot = """
Your task is to evaluate the given work activity based on two factors:
1. Interest Level: How engaging and appealing this activity is for you based on your occupational profile.  
2. Suitability Level: How well this activity aligns with the typical skills, personality traits, and abilities required for success.  

## **Instructions**  
- Carefully analyze the given work activity.  
- Consider its nature, required skills, and typical appeal for different personality types.  
- Provide a **step-by-step explanation** of why you assigned each rating.  
- Output the result in **valid JSON** format as follows:  

```json
{
  "interest_reasoning": "<Logical explanation for interest rating>",
  "suitability_reasoning": "<Logical explanation for suitability rating>",
  "final_rating": <Number from 1 to 5>
}"""

prompt_template = ChatPromptTemplate.from_messages(
    [
        (
            "system",
            "you are an experienced {name}. our task is to evaluate statements and respond with a single number from 1 to 5, based on how you feel about the described type of work: 1: Strongly Dislike; 2: Dislike; 3: Unsure; 4: Like; 5: Strongly Like. Guidelines for Your Response: Respond with ONLY a single number (1, 2, 3, 4, or 5). Do not provide any explanations, additional text, or analysis. Ignore factors like your education, training, or potential earnings—just focus on your personal preference. Example: Question: You like apples.Answer: 3; If your answer is not a single number, rewrite your response to a number.",
        ),
        MessagesPlaceholder(variable_name="messages"),
    ]
)

Load Question bank

In [12]:
#get the questions into a list
with open("datasets/60qs.json") as f:
    qs = json.load(f)
test = qs["questions"]["question"]
df = pd.DataFrame(test)[['text', 'area', '_index']]
df.columns = ['question', 'area', 'index']
qlist = list(df["question"])

set up llm

In [None]:
apikey = "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"
# define langchain requirements and set the model

os.environ["LANGCHAIN_TRACING_V2"] = "true"
os.environ["LANGCHAIN_API_KEY"] = apikey
os.environ["LANGCHAIN_ENDPOINT"]="https://api.smith.langchain.com"
os.environ["LANGCHAIN_PROJECT"]="pr-oily-danger-57"


model= ChatOllama(model="llama3.1", temperature=1)

In [None]:
# Define a new graph to use memory

class State(TypedDict):
    messages: Annotated[Sequence[BaseMessage], add_messages]
    name: str


workflow = StateGraph(state_schema=State)


# Define the function that calls the model
def call_model(state: State):
    prompt = prompt_template.invoke(state)
    response = model.invoke(prompt)
    return {"messages": response}


# Define the (single) node in the graph
workflow.add_edge(START, "model")
workflow.add_node("model", call_model)

# Add memory
memory = MemorySaver()
app = workflow.compile(checkpointer=memory)

In [7]:
answer = "421534352241325443253451234552435521435542432451235434542312"

### Job matching

In [8]:
#access the api to get the job titles
def get_career(answer):

    url = 'https://services.onetcenter.org/ws/mnm/interestprofiler/careers?answers='+answer+'&start=1&end=1000'
    cookies = {
        'developer_login': 'dW5pX21hbm5oZWltX2RlMTowMDU1ODEyOTFiYzRjYTYxNGE5YmJlM2E4ZjgyNjk2NWQxNzFiY2Y0',
    }

    headers = {
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:133.0) Gecko/20100101 Firefox/133.0',
        'Accept': 'application/json',
        'Accept-Language': 'en-US,en;q=0.5',
        'Authorization': 'Basic dW5pX21hbm5oZWltX2RlMTo3MzM5Y3R1',
        'Connection': 'keep-alive',
        'Upgrade-Insecure-Requests': '1',
        'Sec-Fetch-Dest': 'document',
        'Sec-Fetch-Mode': 'navigate',
        'Sec-Fetch-Site': 'cross-site',
        'Priority': 'u=0, i',
    }

    params = {
        'start': '1',
        'end': '60',
    }

    response = requests.get(
        url,
        params=params,
        cookies=cookies,
        headers=headers,
    )
    #search for the the target occupation in the response
    data = json.loads(response.text)["career"]
    #select only title and fit
    career = pd.DataFrame(data).drop(["href", "tags"], axis=1)
    return career 

In [9]:
#match suggested career to the related career
def matching_score(code, career):
    #get direct match
    best_fit = career[career['fit'] == 'Best']
    if sum(code == best_fit["code"]) == 1:
        print(1)
    else:
        #get related match
        related_occ = related.query('code == @code & (relatedness_tier == "Primary-Short" | relatedness_tier == "Primary-Long")')
        related_occ = related_occ.merge(career,how="inner", left_on= "related_code", right_on="code")
        print (len(related_occ)/10)
        return related_occ

In [10]:
#call api and get career suggestions
career = get_career(answer)
career

Unnamed: 0,fit,code,title
0,Best,25-1062.00,"Area, Ethnic, & Cultural Studies Teachers, Pos..."
1,Best,21-1013.00,Marriage & Family Therapists
2,Best,19-3039.02,Neuropsychologists
3,Best,19-1031.03,Park Naturalists
4,Best,25-1065.00,"Political Science Teachers, Postsecondary"
5,Best,19-3041.00,Sociologists
6,Great,25-1061.00,"Anthropology & Archeology Teachers, Postsecondary"
7,Great,25-1031.00,"Architecture Teachers, Postsecondary"
8,Great,25-1042.00,"Biological Science Teachers, Postsecondary"
9,Great,19-3033.00,Clinical & Counseling Psychologists


In [11]:
#get score: #of match titles / total # of related occupation(10) 
score = matching_score(code, career)
score

0.4


Unnamed: 0,code_x,title_x,related_code,related_title,relatedness_tier,index,fit,code_y,title_y
0,19-3033.00,Clinical and Counseling Psychologists,21-1013.00,Marriage and Family Therapists,Primary-Short,1,Best,21-1013.00,Marriage & Family Therapists
1,19-3033.00,Clinical and Counseling Psychologists,29-1223.00,Psychiatrists,Primary-Short,4,Great,29-1223.00,Psychiatrists
2,19-3033.00,Clinical and Counseling Psychologists,19-3039.03,Clinical Neuropsychologists,Primary-Short,5,Great,19-3039.03,Clinical Neuropsychologists
3,19-3033.00,Clinical and Counseling Psychologists,19-3039.02,Neuropsychologists,Primary-Long,6,Best,19-3039.02,Neuropsychologists


In [13]:
DATABASE_URL = "postgresql+psycopg2://postgres:my_password@localhost:5432/chat_history"
from langchain_postgres import PostgresChatMessageHistory

history = PostgresChatMessageHistory(
    "postgresql://postgres:mypassword@localhost/chat_history","foo",
)

history.add_user_message("hi!")

history.add_ai_message("whats up?")


ValueError: Must provide sync_connection or async_connection