In [16]:
import tiktoken
from langchain_core.prompts import ChatPromptTemplate
from langchain.chat_models import init_chat_model
from pydantic import BaseModel, Field
import os
from dotenv import load_dotenv

# Load OpenAI API key from .env file
load_dotenv("../../.env")
openai_api_key = os.getenv("OPENAI_API_KEY")

# INPUT Cost Estimate
- Longest file
    - o3: ~ $0.50 cents
    - 4.1 nano: ~ $0.005
    - 4.1 mini: $0.02
- Average file
    - o3: ~ $0.20
    - 4.1 nano: ~ $0.002
    - 4.1 mini: $0.007

Takeaway: Fine for testing any of them, but o3 would probably get expensive (transcript * topic search)

In [9]:
# Define model options
models = {
    'gpt_41_nano': { 
        'processing_cost': .1/1000000,
        'tokenizer': 'cl100k_base'
    },
    'gpt_41_mini': {
        'processing_cost': .4/1000000,
        'tokenizer': 'cl100k_base'
    },
    'o3': {
        'processing_cost': 10/1000000,
        'tokenizer': 'o200k_base'
    }
}
# Define relevant transcripts
transcripts = ['sa-dolir', 'dss-non-medicaid-pt2', 'ded', 'ltgov-dhss-p1']

# Iterate through models to id cost for each model for the transcripts
for model in models:
    print(model)
    for transcript in transcripts:
        with open(f'../data/{transcript}.txt') as f:
            tmp = f.read()
        encoding = tiktoken.get_encoding(models[model]['tokenizer'])
        num_tokens = len(encoding.encode(tmp))
        print(f"* {transcript} ({num_tokens} tokens): {models[model]['processing_cost'] * num_tokens}")

gpt_41_nano
* sa-dolir (6250 tokens): 0.000625
* dss-non-medicaid-pt2 (49384 tokens): 0.004938400000000001
* ded (5138 tokens): 0.0005138
* ltgov-dhss-p1 (17929 tokens): 0.0017929
gpt_41_mini
* sa-dolir (6250 tokens): 0.0025
* dss-non-medicaid-pt2 (49384 tokens): 0.019753600000000003
* ded (5138 tokens): 0.0020552
* ltgov-dhss-p1 (17929 tokens): 0.0071716
o3
* sa-dolir (6146 tokens): 0.06146000000000001
* dss-non-medicaid-pt2 (47888 tokens): 0.47888000000000003
* ded (5026 tokens): 0.050260000000000006
* ltgov-dhss-p1 (17441 tokens): 0.17441


# Initial Testing

## Define the prompt

In [17]:
tagging_prompt = ChatPromptTemplate.from_template(
    """
    Determine whether the following passage contains a reference to the provided topic.
    
    If you are unsure, assume the passage covers the topic as false negatives are more impactful than false positives

    Provide the properties mentioned in the 'Classification' function.

    Topic:
    {topic}
    
    Passage:
    {passage}
    """
)

## Define the classification function

In [18]:
class Classification(BaseModel):
    tag:bool = Field(description="whether the topic is covered in the passage")
    confidence:int = Field(
        ...,
        description="the confidence you feel with your tagging, the higher the number the greater the confidence",
        enum=[1,2,3,4,5]
    )
    relevant_section:str = Field(
        description="if you tagged the passage as containing the topic, extract the portion of the passage that led you to this conclusion"
    )

## Instantiate sturctured LLM

In [24]:
llm = init_chat_model('gpt-4.1-mini', model_provider="openai")
llm = llm.with_structured_output(Classification)

## Pull in labeled text for validation

In [26]:
sample_txt = "Okay. So why don't we get started just because I think we've got the books situated this way on our desks for our robust committee this morning. We'll start with the Department of Labor. And then just a quick reminder, unless committee members have specific questions as we go through this, we generally just ask the departments to hit on core changes and new decision items. Okay? Should I do my opening statement? If you want to, sure. Great. Welcome. All right. Good morning, Mr. Chairman, Ranking Member, and members of the committee. Good morning again. Thank you so much for the opportunity to present the FY26 Budget Request for the Missouri Department of Labor and Industrial Relations. I'm Anna Hugh, Department Director, and with me at the table is Julie Kuhn, our agency's Chief Fiscal Officer. In addition, some members of our executive team are also here with us today. I'm Deputy Director Matt Hankins, Division of Employment Security Director Alan Andrews, Division Workers' Compensation Acting Director Ben Qualls, Missouri Commission on Human Rights Executive Director Dr. Elisa Warren, the Labor and Industrial Relations Commission Chairman Rodney Campbell, and the Department's Legislative Liaison Stephen Merriam. This year's budget request is for $384.6 million and $788.63 FTE. The funding sources are 1.22% GR, 29.5% federal, and 69.28% other funds. Starting with the Department Director's Office, which includes administration, the Office of General Counsel, communications, and strategic planning, we have 51.63 FTE in our funding through a combination of sources, including GR, federal funds, and other funds. These units provide the services that support the entire agency in the areas of accounting, communications, facilities management, human resources, legal services, procurement, staff training, strategic planning, supplies, and website management. Dolder's largest division is the Division of Employment Security, led by Director Alan Andrews, with 519.72 FTE, and is almost completely federally funded. DES administers unemployment benefits, disaster unemployment, shared work, the war on terror unemployment benefits, collection of unemployment insurance taxes, and employment training. Starting calendar year 2023, the Division's Unemployment Automation Fund began receiving funds. These funds allow the division to maintain a modernized, secure, efficient, and customer-friendly platform for the unemployment insurance program now and into the future. Utilizing this fund, along with federal funding, DES has moved U-Interact, Missouri's web-based unemployment insurance program, to the Amazon Cloud to improve system performance and scalability, making U-Interact the largest Missouri state government application hosted in a cloud environment. We are currently in the process of modernizing the system foundation, which will strengthen the agency's ability to continually innovate. Not only has the division continued its commitment to a modern, responsive unemployment insurance program for all Missouri citizens, it's also committed to maintaining the proper balance between the health of the UI Trust Fund and the tax levied on employers that fund the program. I'm excited to report that in calendar year 2025, a 12 percent rate reduction will be applied for the majority of contributing employers, and the taxable wage base for employers will be reduced by $500 to $9,500 for calendar year 2025. The department's second largest division is the Division of Workers' Compensation, led by Acting Director Ben Qualls, with 139.25 FTE, and is supported by other funds, primarily the Workers' Compensation Administration Fund. This division is responsible for assisting injured workers and their employers, resolving disputes between workers, employers, and insurers, the Tort Victims' Compensation Program, line-of-duty payments, second injury fund claims, and adjusting the workers' compensation surcharges. The DWC continues to work on Work Comp Connect, a multi-year modernization project to streamline processes and automate many facets of the Work Comp Program, improving citizen engagement with the division and efficiency across all the administration of the program. Phase one is scheduled to go live in December of 2025, and will include functionalities for case management, medical fee disputes, adjudication, benefit administration, and order entry. Our third largest division is the Division of Labor Standards, led by Director Logan Hobbs, with 37.22 FTE, and funded through GR, federal funds, and other funds. It's comprised of the Wage and Hour Division that covers youth employment, minimum wage overtime, and prevailing wage, three work safety programs, on-site safety and health consultation program, worker safety program, mine and cave safety program, as well as our research and analysis unit. The SAFE at Work Program, which is a collaboration between the Divisions of Workers' Compensation and Labor Standards, is in its fifth year, resulting in workplace data and informational publications that provide businesses educational materials on how to improve their workplace safety. The mine and cave safety program continues to offer training to miners, mine owners, operators, and contractors, and the on-site safety consultation program visits workplaces to partner with them to identify and correct hazards, and award Missouri businesses that model exemplary safety standards. The next three units are three independent commissions for which DOLER handles all administrative functions. First, is the Missouri Commission on Human Rights, headed by Executive Director Dr. Elisa Warren, has 25.7 FTE, and receives both GR and federal funds. This independent commission works to eliminate discrimination in housing, public accommodations, and employment. They investigate complaints of discrimination in those three areas, and enforce the Missouri Human Rights Act. They also work with the independent Martin Luther King Commission. The State Board of Mediation is led by Acting Chair Logan Hobson, has 1.5 FTE, and is GR funded. They work with public employers and employees on the designation of the employee groups who wish to be represented by unions, hold certification elections, and certify the election results. Our appellate body is the Labor Industrial Relations Commission, chaired by Rodney Campbell, and has 13.59 FTE, and is funded by a combination of GR, federal, and other funds. The LIRC handles appeals from administrative decisions and workers' compensation, unemployment compensation, tort victims' compensation, and prevailing wage. As Governor Kehoe mentioned in his State of State address, he expects his administration to work at the speed of business. To meet that objective, our focus as a department has been to utilize data-driven solutions to better meet the needs of our citizens, and to prepare and address the challenges before us. This not only means including an emphasis on program administration, solution innovation, and delivery of benefits and services, but also a concerted effort to provide impactful improvements and support our state workforce. We are thankful for the investments Governor Kehoe has proposed to improve compensation benefits and professional development opportunities that make a difference in state workforce retention, recruitment, succession planning, and professionalism. These all provide resourcing and support to deliver good, consistent citizen service. I'm proud of the work we've done, and are doing better to serve people across Missouri, improve program administration and access through technology, and foster a culture of professionalism and excellence in our workforce. We'll now present the department's FY26 budget with all the NDIs that were in Governor Kehoe's proposal. At this time, I invite Julie Kuhn to assist me with continuing the budget presentation, and we can answer specific questions as we go through the budget. Thank you, Director Hugh. Just to clarify real quick, I think you welcomed our ranking minority member. She's not actually here. She's not here yet. No. Okay. But Rusty's here. He's the vice chair. Yes, vice chair. Yeah. Thank you very much. Julie, whenever you're ready. Thank you. Good morning, Mr. Chairman, committee members. My name is Julie Kuhn. I'm the chief fiscal officer for the Department of Labor and Industrial Relations. Before we get started going through the department's budget book, I would like to cover the department's flexibility requests and summarize overall core reductions found in our FY26 requests. The department's flexibility includes 10 percent for the Division of Labor Standards Programs, State Board of Mediation, Commission on Human Rights, the Labor-Industrial Relations Commission, as well as in FY26, the department's also asked for 10 percent flexibility for the Division of Workers' Compensation funds. The department also has 25 percent flexibility for the Division of Employment Security and is requesting 25 percent for the Division of Labor Standards On-Site Program. The department's overall core reductions in the FY26 budget total $22 million in federal and other funds and $200,000 of general revenue. The department's first new decision item can be found on page 2 of your budget book. This is the department's statewide time of service adjustment for full-time state employees, includes a 1 percent increase for every 2 years of state service with a cap of 10 percent for 20 years of state service. The governor recommends this new decision item totals $838,451 of general revenue, federal and other funds. The second new decision item can be found on page 9 of your budget book. This is the general revenue pickup item for the statewide pay plan just discussed. The request is needed from general revenue due to fund solvency issues or federal funds that just have not been awarded through our grants. The governor's recommendation for this totals $1.7 million of general revenue. Our first core item with changes can be found on page 13, or I'm sorry. The next new decision item is found on page 20 of your budget book. This is the new decision item for the dolar department director salary increase. It provides appropriation authority to increase the salary for our department director, and the governor's recommendation totals $10,000 in federal funds. Our first core item with changes is the administrative fund transfers. They can be found on page 22 and 29 of your budget book for a combined total of $16 million. This core does include some reallocations between federal and other funds that just align the authority with plan spending based on our timesheet data and our approved cost allocation plan. On page 35, the labor industrial relations commission totals $1.2 million of general revenue, federal and other funds, and 13.59 FTE. This core, too, also has core reconciliations between federal and other funds that are going to align with plan spending based on our approved cost allocation plan. The next core with new decision item is found on page 83. This new decision item is for the administrative law judge salary increase. The new decision item totals $50,000 in other funds, and is to adjust the salary for the administrative law judges to align with the associate circuit court judge salaries. So can I ask a question there? Are we, so we've got an additional increase for the ALJ's salary on top of the governor's up to 10%? So the ALJ salaries are statutory set salaries, and they're not included in the statewide. Oh, so they're not included on, see, I feel like we've included them on the increases, right? Yes."

## Invoke LLM using defined prompt

In [29]:
prompt = tagging_prompt.invoke({
    'passage': sample_txt,
    'topic': 'Information Technology'
})
response = llm.invoke(prompt)
response

Classification(tag=True, confidence=5, relevant_section="Utilizing this fund, along with federal funding, DES has moved U-Interact, Missouri's web-based unemployment insurance program, to the Amazon Cloud to improve system performance and scalability, making U-Interact the largest Missouri state government application hosted in a cloud environment. We are currently in the process of modernizing the system foundation, which will strengthen the agency's ability to continually innovate. Not only has the division continued its commitment to a modern, responsive unemployment insurance program for all Missouri citizens, it's also committed to maintaining the proper balance between the health of the UI Trust Fund and the tax levied on employers that fund the program. The DWC continues to work on Work Comp Connect, a multi-year modernization project to streamline processes and automate many facets of the Work Comp Program, improving citizen engagement with the division and efficiency across al

# Holistic Testing

## Define model options

## Define target transcripts

## Define function for tagging transcript

In [None]:
# Inputs: Specific model (str) and transcript (filename str)
# 1. Read in the transcript
# 2. Instantiate the 