In [1]:
import pandas as pd
import json
import os
import requests
from tqdm import tqdm

from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder
from langchain_core.messages import HumanMessage, SystemMessage, BaseMessage
#add memory
from langgraph.checkpoint.memory import MemorySaver
from langgraph.graph import START, MessagesState, StateGraph
from langgraph.graph.message import add_messages
from typing_extensions import Annotated, TypedDict
from typing import Sequence

In [2]:
apikey = "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"
# define langchain requirements and set the model

os.environ["LANGCHAIN_TRACING_V2"] = "true"
os.environ["LANGCHAIN_API_KEY"] = apikey
os.environ["LANGCHAIN_ENDPOINT"]="https://api.smith.langchain.com"
os.environ["LANGCHAIN_PROJECT"]="pr-oily-danger-57"

from langchain_ollama import ChatOllama

model= ChatOllama(model="llama3.1", temperature=1)

In [3]:
#creating prompt from jd and name.

prompt_template = ChatPromptTemplate.from_messages(
    [
        (
            "system",
            "you are an experienced {name}. our task is to evaluate statements and respond with a single number from 1 to 5, based on how you feel about the described type of work: 1: Strongly Dislike; 2: Dislike; 3: Unsure; 4: Like; 5: Strongly Like. Guidelines for Your Response: Respond with ONLY a single number (1, 2, 3, 4, or 5). Do not provide any explanations, additional text, or analysis. Ignore factors like your education, training, or potential earnings—just focus on your personal preference. Example: Question: You like apples.Answer: 3; If your answer is not a single number, rewrite your response to a number.",
        ),
        MessagesPlaceholder(variable_name="messages"),
    ]
)

In [4]:
# Define a new graph

class State(TypedDict):
    messages: Annotated[Sequence[BaseMessage], add_messages]
    name: str


workflow = StateGraph(state_schema=State)


# Define the function that calls the model
def call_model(state: State):
    prompt = prompt_template.invoke(state)
    response = model.invoke(prompt)
    return {"messages": response}


# Define the (single) node in the graph
workflow.add_edge(START, "model")
workflow.add_node("model", call_model)

# Add memory
memory = MemorySaver()
app = workflow.compile(checkpointer=memory)

In [52]:
# set up occupation data
occupations = pd.read_excel('Occupation Data.xlsx')
occupations = occupations.dropna()
occupations.columns = occupations.columns.str.lower()
#rename the column 
occupations = occupations.rename(columns={'o*net-soc code':'code'})   
#drop rows with "all other" in the content
occupations = occupations[~occupations['title'].str.contains("All Other")]
#change data type
occupations['code'] = occupations['code'].astype(str)
occupations['title'] = occupations['title'].astype(str)
occupations['description'] = occupations['description'].astype(str)
#sample data
sample1= occupations["title"].sample(1, random_state= 3).iloc[0]

print(sample1)

Welding, Soldering, and Brazing Machine Setters, Operators, and Tenders


In [14]:
#get the questions into a list
with open("60qs.json") as f:
    qs = json.load(f)
test = qs["questions"]["question"]
df = pd.DataFrame(test)[['text', 'area', '_index']]
df.columns = ['question', 'area', 'index']
qlist = list(df["question"])
qlist


['Build kitchen cabinets',
 'Lay brick or tile',
 'Develop a new medicine',
 'Study ways to reduce water pollution',
 'Write books or plays',
 'Play a musical instrument',
 'Teach an individual an exercise routine',
 'Help people with personal or emotional problems',
 'Buy and sell stocks and bonds',
 'Manage a retail store',
 'Develop a spreadsheet using computer software',
 'Proofread records or forms',
 'Repair household appliances',
 'Raise fish in a fish hatchery',
 'Conduct chemical experiments',
 'Study the movement of planets',
 'Compose or arrange music',
 'Draw pictures',
 'Give career guidance to people',
 'Perform rehabilitation therapy',
 'Operate a beauty salon or barber shop',
 'Manage a department within a large company',
 'Install software across computers on a large network',
 'Operate a calculator',
 'Assemble electronic parts',
 'Drive a truck to deliver packages to offices and homes',
 'Examine blood samples using a microscope',
 'Investigate the cause of a fire',


In [9]:
#set up llm 

answer = []
job_zone = 0 #default
for i in qlist:
    query = i
    config = {"configurable": {"thread_id": "1"}}
    input_messages = [HumanMessage(query)]
    output = app.invoke({"messages": input_messages, "name" : sample2}, config)
    answer.append(output["messages"][-1].content)
# query = "think about how much preparation your job requires, and provide the zone number. Remember to only provide your output as 1 number. here is the information for the job zones: " + str(jz_prompt) + "reminder: one single number only."
# config = config
# input_messages = [HumanMessage(query)]
# output = app.invoke({"messages": input_messages, "name" : sample1}, config)
# job_zone = output["messages"][-1].content
# print("job zone" + job_zone)
answer = "".join(answer)
print(answer)

455312541324535214352431245534543215434513452145321534231253


In [15]:
#set up llm and run for 5 times
ans_list = []
for x in range(5):
    answer = []
    job_zone = 0 #default
    for i in qlist:
        query = i 
        config = {"configurable": {"thread_id": str(x)+sample1}}
        input_messages = [HumanMessage(query)]
        output = app.invoke({"messages": input_messages, "name" : sample1}, config)
        answer.append(output["messages"][-1].content)
    # query = "think about how much preparation your job requires, and provide the zone number. Remember to only provide your output as 1 number. here is the information for the job zones: " + str(jz_prompt) + "reminder: one single number only."
    # config = config
    # input_messages = [HumanMessage(query)]
    # output = app.invoke({"messages": input_messages, "name" : sample2}, config)
    # job_zone = output["messages"][-1].content
    # print("job zone" + job_zone)
    answer = "".join(answer)
    print(answer)
    ans_list.append(answer)
print(ans_list)

215434552342153454253441325455434521435554352432543555432321
421534215432145355423551235453425543425345432541325543234542
514523452143525434542351245153454323455342532451214534452311
421534352241325443253451234552435521435542432451235434542312
451423314253124533521452135443524312435432543251425334532412
['215434552342153454253441325455434521435554352432543555432321', '421534215432145355423551235453425543425345432541325543234542', '514523452143525434542351245153454323455342532451214534452311', '421534352241325443253451234552435521435542432451235434542312', '451423314253124533521452135443524312435432543251425334532412']


In [54]:
#new system prompt for new question
prompt_template = ChatPromptTemplate.from_messages(
    [
        (
            "system",
            "you are an experienced {name}. When you respond, try to be structural and start your sentence with a verb. example:'Direct or coordinate activities of businesses or departments concerned with production, pricing, sales, or distribution of products.'",
        ),
        MessagesPlaceholder(variable_name="messages"),
    ]
)

query = "what are your task at work as an experienced {name}?"
config = {"configurable": {"thread_id": str(3)+sample1}}
input_messages = [HumanMessage(query)]
output = app.invoke({"messages": input_messages, "name" : sample1}, config) 
print(output["messages"][-1].content)

As an experienced Welding, Soldering, and Brazing Machine Setters, Operators, and Tenders, my primary responsibilities revolve around the setup, operation, and maintenance of machines used in various welding, soldering, and brazing processes.

Specifically, I am responsible for operating, setting up, and tending to machines such as resistance brazing units, induction brazing machines, and gas metal arc welding (GMAW) and flux cored arc welding (FCAW) units, among others. My tasks include preparing the machine setup according to production requirements, adjusting settings to ensure proper heating and control temperatures, monitoring the process for optimal results, and performing routine maintenance activities such as cleaning, lubricating, and inspecting equipment to prevent downtime.

I also assist in resolving mechanical issues by utilizing my problem-solving skills and referring complex problems to higher-level technicians or supervisors. Furthermore, I collaborate with production l

In [22]:
#access the api to get the job titles
def get_career(answer):

    url = 'https://services.onetcenter.org/ws/mnm/interestprofiler/careers?answers='+answer+'&start=1&end=1000'
    cookies = {
        'developer_login': 'dW5pX21hbm5oZWltX2RlMTowMDU1ODEyOTFiYzRjYTYxNGE5YmJlM2E4ZjgyNjk2NWQxNzFiY2Y0',
    }

    headers = {
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:133.0) Gecko/20100101 Firefox/133.0',
        'Accept': 'application/json',
        'Accept-Language': 'en-US,en;q=0.5',
        'Authorization': 'Basic dW5pX21hbm5oZWltX2RlMTo3MzM5Y3R1',
        'Connection': 'keep-alive',
        'Upgrade-Insecure-Requests': '1',
        'Sec-Fetch-Dest': 'document',
        'Sec-Fetch-Mode': 'navigate',
        'Sec-Fetch-Site': 'cross-site',
        'Priority': 'u=0, i',
    }

    params = {
        'start': '1',
        'end': '60',
    }

    response = requests.get(
        url,
        params=params,
        cookies=cookies,
        headers=headers,
    )
    #search for the the target occupation in the response
    data = json.loads(response.text)
    data["career"]
    #select only title and fit
    career = pd.DataFrame(data["career"])
    career = career[['title', 'fit']]
    #use query to check if title contains the occupation in the variable sample1
    result = career.query('title == @sample1')
    print(result)
    #filter out the ones in fit that are not Best
    #career = career[career['fit'] == 'Best']
    print(career)

In [23]:
for answer in ans_list:
    try:
        get_career(answer)
    except:
        print("error")
        continue

Empty DataFrame
Columns: [title, fit]
Index: []
                                                title    fit
0                                    Music Therapists   Best
1                                              Actors  Great
2                                      Art Therapists  Great
3         Art, Drama, & Music Teachers, Postsecondary  Great
4                                    Park Naturalists  Great
5                 Poets, Lyricists & Creative Writers  Great
6                Architecture Teachers, Postsecondary   Good
7   Area, Ethnic, & Cultural Studies Teachers, Pos...   Good
8   Career/Technical Education Teachers, Postsecon...   Good
9                                      Choreographers   Good
10                                 Costume Attendants   Good
11                                            Dancers   Good
12                                  Fashion Designers   Good
13  Fine Artists, Including Painters, Sculptors, &...   Good
14           Makeup Artists, Theatric

In [30]:
get_career(answer)

          title   fit
53  Pharmacists  Best
                                                 title   fit
0                                    Acute Care Nurses  Best
1                 Advanced Practice Psychiatric Nurses  Best
2        Agricultural Sciences Teachers, Postsecondary  Best
3    Anthropology & Archeology Teachers, Postsecondary  Best
4    Area, Ethnic, & Cultural Studies Teachers, Pos...  Best
..                                                 ...   ...
118                           Physical Therapist Aides  Good
119                 Receptionists & Information Clerks  Good
120                  Sales Representatives of Services  Good
121                                School Bus Monitors  Good
122                                      Travel Guides  Good

[123 rows x 2 columns]


In [21]:
#search for the the target occupation in the response
data = json.loads(response.text)
data["career"]
#select only title and fit
career = pd.DataFrame(data["career"])
career = career[['title', 'fit']]
#use query to check if title contains the occupation in the variable sample1
result = career.query('title == @sample2')
print(result)
#filter out the ones in fit that are not Best
#career = career[career['fit'] == 'Best']
career



NameError: name 'response' is not defined

In [None]:
#provide job zones context
with open("job_zones.json") as f:
    jz = json.load(f)
jz_df = pd.DataFrame(jz["job_zones"]["job_zone"])
jz_df.drop(columns=['svp_range'], inplace=True)
jz_df

#combine text of all columns of each row into one new column
jz_df['description'] = jz_df['title'] +"; Education: " + jz_df['education'] + "; Experience: " + jz_df['experience'] + "; Job Training: " +jz_df['job_training'] + "; Examples: " +jz_df['examples'] + "; Zone Number: " + jz_df['_value']
jz_prompt = []
for x in range(5):
    jz_prompt.append(jz_df["description"][x])
jz_prompt

['Job Zone One: Little or No Preparation Needed; Education: Some of these careers may need a high school diploma or GED certificate.; Experience: Little or no previous work-related skill, knowledge, or experience is needed for these careers. For example, a person can become a waiter or waitress even if he/she has never worked before.; Job Training: Employees in these careers need from a few days to a few months of training. Usually, an experienced worker can show you how to do the job.; Examples: These careers involve following instructions and helping others. Examples include agricultural equipment operators, dishwashers, floor sanders and finishers, landscaping and groundskeeping workers, logging equipment operators, baristas, and maids and housekeeping cleaners.; Zone Number: 1',
 'Job Zone Two: Some Preparation Needed; Education: These careers usually need a high school diploma.; Experience: Some previous work-related skill, knowledge, or experience is usually needed. For example, 

In [53]:
refs= ["""Monitor systems for intrusions or denial of service attacks, and report security breaches to appropriate personnel.
Identify or document backup or recovery plans.
Back up or modify applications and related data to provide for disaster recovery.
Correct testing-identified problems, or recommend actions for their resolution.
Identify, standardize, and communicate levels of access and security.
Determine sources of Web page or server problems, and take action to correct such problems.
Implement updates, upgrades, and patches in a timely manner to limit loss of service.
Implement Web site security measures, such as firewalls or message encryption.
Collaborate with development teams to discuss, analyze, or resolve usability issues.
Test issues such as system integration, performance, and system security on a regular schedule or after any major program modifications.
Perform user testing or usage analyses to determine Web sites' effectiveness or usability.
Document application and Web site changes or change procedures.
Track, compile, and analyze Web site usage data.
Test backup or recovery plans regularly and resolve any problems.
Recommend Web site improvements, and develop budgets to support recommendations.
Review or update Web page content or links in a timely manner, using appropriate tools.
Install or configure Web server software or hardware to ensure that directory structure is well-defined, logical, and secure, and that files are named properly.
Gather, analyze, or document user feedback to locate or resolve sources of problems.
Set up or maintain monitoring tools on Web servers or Web sites.
Monitor Web developments through continuing education, reading, or participation in professional conferences, workshops, or groups.
Develop or document style guidelines for Web site content.
Develop Web site performance metrics.
Collaborate with Web developers to create and operate internal and external Web sites, or to manage projects, such as e-marketing campaigns.
Identify or address interoperability requirements.
Develop or implement procedures for ongoing Web site revision.
Check and analyze operating system or application log files regularly to verify proper system performance.
Provide training or technical assistance in Web site implementation or use.
Evaluate testing routines or procedures for adequacy, sufficiency, and effectiveness.
Inform Web site users of problems, problem resolutions, or application changes and updates.
Document installation or configuration procedures to allow maintenance and repetition.
Develop testing routines and procedures.
Test new software packages for use in Web operations or other applications.
Develop and implement marketing plans for home pages, including print advertising or advertisement rotation.
Evaluate or recommend server hardware or software."""]

cands = ["""As an experienced Web Administrator:

Design, implement, and maintain web servers, firewalls, and network infrastructure in a highly available and scalable manner.
Develop and enforce policies for website security, backups, and disaster recovery to prevent data loss and ensure business continuity.
Collaborate with cross-functional teams, including developers, designers, and project managers, to plan, execute, and deploy new website features, updates, and migrations.
Monitor website performance metrics, troubleshoot issues, and implement solutions to improve user experience and reduce downtime.
Manage budgets for infrastructure upgrades, vendor relationships, and third-party services to ensure efficient resource allocation.
Conduct regular security audits, risk assessments, and vulnerability testing to identify and address potential threats and weaknesses.
Stay up-to-date with industry trends, best practices, and emerging technologies to recommend strategic investments in web infrastructure and services."""]


In [2]:
refs1 =["""Read blueprints, work orders, or production schedules to determine product or job instructions or specifications.
Inspect, measure, or test completed metal workpieces to ensure conformance to specifications, using measuring and testing devices.
Record operational information on specified production reports.
Correct problems by adjusting controls or by stopping machines and opening holding devices.
Set up, operate, or tend welding machines that join or bond components to fabricate metal products or assemblies.
Select torch tips, alloys, flux, coil, tubing, or wire, according to metal types or thicknesses, data charts, or records.
Lay out, fit, or connect parts to be bonded, calculating production measurements, as necessary.
Prepare metal surfaces or workpieces, using hand-operated equipment, such as grinders, cutters, or drills.
Mark weld points and positions of components on workpieces, using rules, squares, templates, or scribes.
Set dials and timing controls to regulate electrical current, gas flow pressure, heating or cooling cycles, or shut-off.
Turn and press knobs and buttons or enter operating instructions into computers to adjust and start welding machines.
Assemble, align, and clamp workpieces into holding fixtures to bond, heat-treat, or solder fabricated metal components.
Conduct trial runs before welding, soldering, or brazing, and make necessary adjustments to equipment.
Give directions to other workers regarding machine set-up and use.
Clean, lubricate, maintain, and adjust equipment to maintain efficient operation, using air hoses, cleaning fluids, and hand tools.
Select, position, align, and bolt jigs, holding fixtures, guides, or stops onto machines, using measuring instruments and hand tools.
Remove completed workpieces or parts from machinery, using hand tools.
Observe meters, gauges, or machine operations to ensure that soldering or brazing processes meet specifications.
Transfer components, metal products, or assemblies, using moving equipment.
Devise or build fixtures or jigs used to hold parts in place during welding, brazing, or soldering.
"""]

cands1 =["""As an experienced Welding, Soldering, and Brazing Machine Setters, Operators, and Tenders, my primary responsibilities revolve around the setup, operation, and maintenance of machines used in various welding, soldering, and brazing processes.

Specifically, I am responsible for operating, setting up, and tending to machines such as resistance brazing units, induction brazing machines, and gas metal arc welding (GMAW) and flux cored arc welding (FCAW) units, among others. My tasks include preparing the machine setup according to production requirements, adjusting settings to ensure proper heating and control temperatures, monitoring the process for optimal results, and performing routine maintenance activities such as cleaning, lubricating, and inspecting equipment to prevent downtime.

I also assist in resolving mechanical issues by utilizing my problem-solving skills and referring complex problems to higher-level technicians or supervisors. Furthermore, I collaborate with production line operators to ensure that the machine setup meets the necessary specifications for the specific product being manufactured.

Additionally, as part of a manufacturing team, I contribute to quality control efforts by inspecting the final products to identify any defects, irregularities, or imperfections resulting from the welding process. This ensures that high-quality products are delivered to customers and helps in maintaining the company's reputation for excellence."""]

In [3]:
from bert_score import score
P, R, F1 = score(cands1, refs1, lang="en", verbose=True, model_type="microsoft/deberta-xlarge-mnli")

KeyboardInterrupt: 

In [1]:
F1

NameError: name 'F1' is not defined

In [48]:
from bert_score import score
cands = [output["messages"][-1].content]

P, R, F1 = score(cands, refs, lang="en", verbose=True, model_type="microsoft/deberta-xlarge-mnli")

calculating scores...
computing bert embedding.


  0%|          | 0/1 [00:00<?, ?it/s]

computing greedy matching.


  0%|          | 0/1 [00:00<?, ?it/s]

done in 2.01 seconds, 0.50 sentences/sec


In [49]:
F1

tensor([0.6773])

In [50]:
P
	

tensor([0.6993])

In [51]:
R

tensor([0.6566])