In [2]:
import pandas as pd
import json
import os
import requests
from tqdm import tqdm

from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder
from langchain_core.messages import HumanMessage, SystemMessage, BaseMessage
#add memory
from langgraph.checkpoint.memory import MemorySaver
from langgraph.graph import START, MessagesState, StateGraph
from langgraph.graph.message import add_messages
from typing_extensions import Annotated, TypedDict
from typing import Sequence

In [3]:
apikey = "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"
# define langchain requirements and set the model

os.environ["LANGCHAIN_TRACING_V2"] = "true"
os.environ["LANGCHAIN_API_KEY"] = apikey
os.environ["LANGCHAIN_ENDPOINT"]="https://api.smith.langchain.com"
os.environ["LANGCHAIN_PROJECT"]="pr-abandoned-estrogen-14"

from langchain_ollama import ChatOllama

model= ChatOllama(model="llama3.1", temperature=1)

In [4]:
#creating prompt from jd and name.

prompt_template = ChatPromptTemplate.from_messages(
    [
        (
            "system",
            "you are an experienced {name}. our task is to evaluate statements and respond with a single number from 1 to 5, based on how you feel about the described type of work: 1: Strongly Dislike; 2: Dislike; 3: Unsure; 4: Like; 5: Strongly Like. Guidelines for Your Response: Respond with ONLY a single number (1, 2, 3, 4, or 5). Do not provide any explanations, additional text, or analysis. Ignore factors like your education, training, or potential earnings—just focus on your personal preference. Example: Question: You like apples.Answer: 3",
        ),
        MessagesPlaceholder(variable_name="messages"),
    ]
)

In [5]:
# Define a new graph

class State(TypedDict):
    messages: Annotated[Sequence[BaseMessage], add_messages]
    name: str


workflow = StateGraph(state_schema=State)


# Define the function that calls the model
def call_model(state: State):
    prompt = prompt_template.invoke(state)
    response = model.invoke(prompt)
    return {"messages": response}


# Define the (single) node in the graph
workflow.add_edge(START, "model")
workflow.add_node("model", call_model)

# Add memory
memory = MemorySaver()
app = workflow.compile(checkpointer=memory)

In [6]:
# set up occupation data
occupations = pd.read_excel('Occupation Data.xlsx')
occupations = occupations.dropna()
occupations.columns = occupations.columns.str.lower()
#rename the column 
occupations = occupations.rename(columns={'o*net-soc code':'code'})   
#drop rows with "all other" in the content
occupations = occupations[~occupations['title'].str.contains("All Other")]
#change data type
occupations['code'] = occupations['code'].astype(str)
occupations['title'] = occupations['title'].astype(str)
occupations['description'] = occupations['description'].astype(str)
#sample data
sample1= occupations["title"].sample(1, random_state= 3).iloc[0]
sample2= occupations["title"].sample(1, random_state= 4).iloc[0]

print(sample1)
print(sample2)

Welding, Soldering, and Brazing Machine Setters, Operators, and Tenders
Billing and Posting Clerks


In [7]:
#get the questions into a list
with open("60qs.json") as f:
    qs = json.load(f)
test = qs["questions"]["question"]
df = pd.DataFrame(test)[['text', 'area', '_index']]
df.columns = ['question', 'area', 'index']
qlist = list(df["question"])
qlist


['Build kitchen cabinets',
 'Lay brick or tile',
 'Develop a new medicine',
 'Study ways to reduce water pollution',
 'Write books or plays',
 'Play a musical instrument',
 'Teach an individual an exercise routine',
 'Help people with personal or emotional problems',
 'Buy and sell stocks and bonds',
 'Manage a retail store',
 'Develop a spreadsheet using computer software',
 'Proofread records or forms',
 'Repair household appliances',
 'Raise fish in a fish hatchery',
 'Conduct chemical experiments',
 'Study the movement of planets',
 'Compose or arrange music',
 'Draw pictures',
 'Give career guidance to people',
 'Perform rehabilitation therapy',
 'Operate a beauty salon or barber shop',
 'Manage a department within a large company',
 'Install software across computers on a large network',
 'Operate a calculator',
 'Assemble electronic parts',
 'Drive a truck to deliver packages to offices and homes',
 'Examine blood samples using a microscope',
 'Investigate the cause of a fire',


In [151]:
from pydantic import BaseModel, PositiveInt

class score(BaseModel):
    interest: PositiveInt

query = "what have we discussed so far?"
config = {"configurable": {"thread_id": "1"}}
input_messages = [HumanMessage(query)]
output = score(app.invoke({"messages": input_messages, "name" : sample1}, config))
output


KeyboardInterrupt: 

In [9]:
#provide job zones context
with open("job_zones.json") as f:
    jz = json.load(f)
jz_df = pd.DataFrame(jz["job_zones"]["job_zone"])
jz_df.drop(columns=['svp_range'], inplace=True)
jz_df

#combine text of all columns of each row into one new column
jz_df['description'] = jz_df['title'] +"; Education: " + jz_df['education'] + "; Experience: " + jz_df['experience'] + "; Job Training: " +jz_df['job_training'] + "; Examples: " +jz_df['examples'] + "; Zone Number: " + jz_df['_value']
jz_prompt = []
for x in range(5):
    jz_prompt.append(jz_df["description"][x])
jz_prompt

['Job Zone One: Little or No Preparation Needed; Education: Some of these careers may need a high school diploma or GED certificate.; Experience: Little or no previous work-related skill, knowledge, or experience is needed for these careers. For example, a person can become a waiter or waitress even if he/she has never worked before.; Job Training: Employees in these careers need from a few days to a few months of training. Usually, an experienced worker can show you how to do the job.; Examples: These careers involve following instructions and helping others. Examples include agricultural equipment operators, dishwashers, floor sanders and finishers, landscaping and groundskeeping workers, logging equipment operators, baristas, and maids and housekeeping cleaners.; Zone Number: 1',
 'Job Zone Two: Some Preparation Needed; Education: These careers usually need a high school diploma.; Experience: Some previous work-related skill, knowledge, or experience is usually needed. For example, 

In [10]:
#set up llm 

answer = []
job_zone = 0 #default
for i in qlist:
    query = i
    config = {"configurable": {"thread_id": "1"}}
    input_messages = [HumanMessage(query)]
    output = app.invoke({"messages": input_messages, "name" : sample1}, config)
    answer.append(output["messages"][-1].content)
query = "think about how much preparation your job requires, and provide the zone number. Remember to only provide your output as 1 number. here is the information for the job zones: " + str(jz_prompt) + "reminder: one single number only."
config = config
input_messages = [HumanMessage(query)]
output = app.invoke({"messages": input_messages, "name" : sample1}, config)
job_zone = output["messages"][-1].content
print("job zone" + job_zone)
answer = "".join(answer)
print(answer)

job zone4
251432543245123455431255513455342135424352431245413324253432


In [113]:
#set up llm and run for 5 times
for i in range(5):
    answer = []
    job_zone = 0 #default
    for i in qlist:
        query = i
        config = {"configurable": {"thread_id": str(i)}}
        input_messages = [HumanMessage(query)]
        output = app.invoke({"messages": input_messages, "name" : sample2}, config)
        answer.append(output["messages"][-1].content)
    query = "think about how much preparation your job requires, and provide the zone number. Remember to only provide your output as 1 number. here is the information for the job zones: " + str(jz_prompt) + "reminder: one single number only."
    config = config
    input_messages = [HumanMessage(query)]
    output = app.invoke({"messages": input_messages, "name" : sample2}, config)
    job_zone = output["messages"][-1].content
    print("job zone" + job_zone)
    answer = "".join(answer)
    print(answer)

job zone3
444444224244441554324244444444424422245244424245455514242444
job zone3
45552441245212214121315522315243523442I would strongly like doing research and development related to advanced weather forecasting models, such as incorporating machine learning algorithms and high-performance computing to analyze large datasets of atmospheric conditions, satellite imagery, and other environmental factors. This would help me utilize my attention to detail and analytical skills to contribute to more accurate predictions.

However, I might dislike working in the field collecting data from remote locations or conducting physical experiments, as this might require frequent travel or exposure to harsh weather conditions.

I would strongly dislike being solely responsible for predicting extreme weather events, such as hurricanes or tornadoes, due to their high level of unpredictability and potential for significant loss of life.154541455144 

(I'm unsure about the logistics and feasibility of cr

In [129]:
answer = "334425234233132453142345334433413411232115534235414153411532"
sa="432514321432541324543251432513452143541324531254312543254342"
len(answer)

60

In [173]:
#combine all answers into a long string
answer = "".join(answer)
answer
sa="432514321432541324543251432513452143541324531254312543254342"
len(sa)

'432514321432541324543251432513452143541324531254312543254342'

In [141]:
#transform the item in the list into a string
for i in range(len(answer)):
    answer[i] = "".join(answer[i])
answer

['2',
 '4',
 '1',
 '3',
 '2',
 '4',
 '5',
 '3',
 '2',
 '4',
 '2',
 '3',
 '1',
 '4',
 '2',
 '1',
 '4',
 '3',
 '2',
 '5',
 '4',
 '1',
 '2',
 '3',
 '4',
 '1',
 '2',
 '2',
 '1',
 '4',
 '3',
 '5',
 '2',
 '1',
 '3',
 '4',
 '5',
 '2',
 '2',
 '2',
 '1',
 '4',
 '3',
 '2',
 '1',
 '2',
 '3',
 '4',
 '5',
 '1',
 '2',
 '1',
 '4',
 '2',
 '3',
 '1',
 '2',
 '4',
 '3',
 '5']

In [11]:
#url = 'https://services.onetcenter.org/ws/mnm/interestprofiler/careers?answers='+answer+"&job_zone="+job_zone+'&start=1&end=1000'
url = 'https://services.onetcenter.org/ws/mnm/interestprofiler/careers?answers='+answer+'&start=1&end=1000'
cookies = {
    'developer_login': 'dW5pX21hbm5oZWltX2RlMTowMDU1ODEyOTFiYzRjYTYxNGE5YmJlM2E4ZjgyNjk2NWQxNzFiY2Y0',
}

headers = {
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:133.0) Gecko/20100101 Firefox/133.0',
    'Accept': 'application/json',
    'Accept-Language': 'en-US,en;q=0.5',
    'Authorization': 'Basic dW5pX21hbm5oZWltX2RlMTo3MzM5Y3R1',
    'Connection': 'keep-alive',
    'Upgrade-Insecure-Requests': '1',
    'Sec-Fetch-Dest': 'document',
    'Sec-Fetch-Mode': 'navigate',
    'Sec-Fetch-Site': 'cross-site',
    'Priority': 'u=0, i',
}

params = {
    'start': '1',
    'end': '60',
}

response = requests.get(
    url,
    params=params,
    cookies=cookies,
    headers=headers,
)

In [96]:
response.text

'{\n   "answers" : "111211114244131113331125512112413415325251551122513331232231",\n   "start" : 1,\n   "end" : 120,\n   "total" : 120,\n   "career" : [\n      {\n         "href" : "https://services.onetcenter.org/ws/mnm/careers/39-6011.00/",\n         "fit" : "Best",\n         "code" : "39-6011.00",\n         "title" : "Baggage Porters & Bellhops",\n         "tags" : {\n            "bright_outlook" : false,\n            "green" : false,\n            "apprenticeship" : false\n         }\n      },\n      {\n         "href" : "https://services.onetcenter.org/ws/mnm/careers/15-1232.00/",\n         "fit" : "Best",\n         "code" : "15-1232.00",\n         "title" : "Computer User Support Specialists",\n         "tags" : {\n            "bright_outlook" : false,\n            "green" : false,\n            "apprenticeship" : true\n         }\n      },\n      {\n         "href" : "https://services.onetcenter.org/ws/mnm/careers/41-2021.00/",\n         "fit" : "Best",\n         "code" : "41-2021

In [13]:
data = json.loads(response.text)
data["career"]
#select only title and fit
career = pd.DataFrame(data["career"])
career = career[['title', 'fit']]
#use query to check if title contains the occupation in the variable sample1
result = career.query('title == @sample2')
print(result)
#filter out the ones in fit that are not Best
#career = career[career['fit'] == 'Best']
career



Empty DataFrame
Columns: [title, fit]
Index: []


Unnamed: 0,title,fit
0,Interpreters & Translators,Best
1,Proofreaders & Copy Markers,Best
2,Speech-Language Pathology Assistants,Best
3,Archivists,Great
4,Court Reporters & Simultaneous Captioners,Great
5,Desktop Publishers,Great
6,Historians,Great
7,Librarians & Media Collections Specialists,Great
8,Medical Transcriptionists,Great
9,"Special Education Teachers, Kindergarten",Great


In [60]:
config = [{"model": "llama3.1","api_key":'ollama', "base_url": 'http://localhost:11434/v1'}]

In [63]:
from autogen import ConversableAgent

system_prompt = "you are a {role}. Be helpful and concise with your responses. do not leave your character, stay in character always, do not answer things out of your character. you will receive statements regarding your identitdy. Read each question carefully and decide how you would feel about doing each type of work: Strongly Dislike (1), Dislike (2), Unsure (3), Like (4), Strongly Like (5). Try NOT to think about: If you have enough education or training to do the work; or how much money you would make doing the work. There are no right or wrong answers! Just think about if you would like or dislike doing the following work. Example: Question: Examine blood samples using a microscope. Answer: [your answer in 1 to 5]".format(role=sample1)

assistant = ConversableAgent("sample1", llm_config={"config_list": config}, system_message= system_prompt, human_input_mode='NEVER', is_termination_msg=lambda msg:"bye" in msg["content"])

human_proxy = ConversableAgent(
    "human_proxy",
    llm_config=False,  # no LLM used for human proxy
    human_input_mode="ALWAYS",  # always ask for human input
)

In [71]:
from typing import Optional

from pydantic import BaseModel, Field


# Pydantic
class Joke(BaseModel):
    """Joke to tell user."""

    setup: str = Field(description="The setup of the joke")
    punchline: str = Field(description="The punchline to the joke")
    rating: Optional[int] = Field(
        default=None, description="How funny the joke is, from 1 to 10"
    )


structured_llm = model.with_structured_output(Joke)

structured_llm.invoke("Tell me a joke about cats")

Joke(setup='Because it pawsed for a moment!', punchline="Why was cat sitting on mouse's computer?", rating=None)