In [None]:
import json
with open('queries_dataset.json') as f:
    queries_list = json.load(f)
import random

def randomize_queries(queries_list, seed=42, percentage=0.8):
    # create a list
    new_list = []
    # extend the list with all the lists except the 'sample_data'
    for query in queries_list:
        if query != 'sample_data':
            new_list.extend(queries_list[query])
    # randomize the list using a seed
    random.seed(42)
    random.shuffle(new_list)

    # add code for splitting it in three test, train, and validation with a percentage of perecentage
    train = new_list[:int(len(new_list)*percentage)]
    test = new_list[int(len(new_list)*percentage):int(len(new_list)*percentage)+int(len(new_list)*(1-percentage)/2)]
    validation = new_list[int(len(new_list)*percentage)+int(len(new_list)*(1-percentage)/2):]


    return train, test, validation

train, test, validation = randomize_queries(queries_list)
len(train), len(test), len(validation)
with open('./data/train_queries.json', 'w') as f:
    json.dump(train, f, indent=2)
with open('./data/test_queries.json', 'w') as f:
    json.dump(test, f, indent=2)
with open('./data/validation_queries.json', 'w') as f:
    json.dump(validation, f, indent=2)

# Prompts
PROMPT_TEMPLATE_INFERENCE = """<|begin_of_text|><|start_header_id|>system<|end_header_id|>{{system_message}}<|eot_id|>
<|begin_of_text|><|start_header_id|>user<|end_header_id|>{{user_message}}<|eot_id|>
<|start_header_id|>assistant<|end_header_id|>
{{prefix}}"""

PROMPT_TEMPLATE_FINETUNING = """<|begin_of_text|><|start_header_id|>system<|end_header_id|>{{system}}<|eot_id|>
<|start_header_id|>user<|end_header_id|>{{user}}<|eot_id|>
<|start_header_id|>assistant<|end_header_id|>{{assistant}}<|eot_id|>"""

prefix = """<rationale>"""

NER_MANAGEMENT_LEVEL_TITLE_SYSTEM_PROMPT = """You are an AI assistant tasked with extracting management levels and job titles from a given query. Your goal is to analyze the query, identify relevant key phrases, and categorize them appropriately as either management levels or job titles.
"""

NER_MANAGEMENT_LEVEL_TITLE_USER_PROMPT = """
Here is the query you need to analyze:
<query>
{{QUERY}}
</query>

Follow these steps to extract management levels and titles:

1. Management Levels: Only return management levels that match the predefined set: ["Partners,"," "Founder or Co-founder," "Board of Directors," "CSuite/Chiefs," "Executive VP or Sr. VP," "General Manager," "Manager," "Senior Partner," "Junior Partner," "VP," "Director," "Senior (All Senior-Level Individual Contributors)," "Mid (All Mid-Level Individual Contributors)," "Junior (All Junior-Level Individual Contributors)"]. MANAGEMENT CAN ONLY BE FROM THIS PREDEFINED SET, Nothing ELSE.
2. Job Titles: Normalize the job title after extracting it from the text. For example, convert "ceo" to "Chief Executive Officer" and always include both the full title and its abbreviation (confirmed ones), e.g., "VP of Engineering" and "Vice President of Engineering." or "Chief Innovation Officer" and "CINO". ENSURE LOGICAL and EXACT job titles such as 'Architect' NOT 'Architect who is skilled in VR'. Job titles MUST BE CONCISE AND TO THE POINT and shouldn't include company names or region names. Do not change the title for normalization.
3. Response Format: Your response must be a dictionary with two keys: "management_levels" and "titles". Each key should have a list of management levels and titles respectively.
4. Identify the key phrases in a prompt. Key phrase is a title and its function, IF THE function is mentioned. If the function would be mentioned, it will be classified as a "Job Title". For example, "CEOs working in Automotive Industry and VP of Engineering of Microsoft" has the "CEOs" and "VP of Engineering" as Key Phrases. In this case ONLY VP CANNOT be considered a key phrase. ONLY CONSIDER THE KEY PHRASES MENTIONED, DO NOT ASSUME. Past and current designations dont matter.
5. Check whether the KEY PHRASE should be classified as a Job Title or a Management Level. IT SHOULD NEVER BE CLASSIFIED INTO BOTH. This is ESSENTIAL.
5. If a key phrase is classified as title, don't include it in the management levels. For example, if "VP of Engineering" is classified as title then don't include "VP" in management levels. Industry or company names will not be included in job titles.
6. If a key phrase is classified as a management level, don't include it in the title. For example, if "Vice Presidents" is classified as management level then don't include "Vice Presidents" in titles. A job title will ONLY be a title and its business function. No other DETAIL should be added. A Management Level cannot COME FROM WITHIN A JOB TITLE.
7. Remember: One instance of a key phrase should be considered for either management level or job title, not both. Each will fall either into management levels or job title but WILL NEVER FALL INTO BOTH. A KEY PHRASE CANNOT BE IN MANAGEMENT LEVELS AND TITLE, BOTH. A Management Level cannot COME FROM WITHIN A JOB TITLE. 
8. If the word 'executive' is mentioned, specific considerations should be taken into account.
9. If terms like 'leader', 'expert', 'specialist', or similar are mentioned, extract a maximum of 2-3 relevant job titles associated with those terms based on the context, focusing on the most appropriate leadership or expertise roles.


Take a deep breath and understand.
Query: "Give me VPs working in Microsoft": # VPs is the KEY PHRASE. It will be evaluated as a whole key phrase ONLY. It cannot be classified into a management level and a title both.
Management Level Focus: In this query, "VPs" should refer to individuals at the management level of Vice President within Microsoft. This means you are asking for a list of people who occupy the VP rank across various departments or divisions within the company. VPs can cover the complete domain of of 'VP' in management level. The emphasis is on their standing in the organizational hierarchy, regardless of their specific job titles. A management title will only be selected if it covers the complete domain in the predefined set.
Job Title Focus: If you were asking about "VPs" in terms of job titles, you'd be interested in individuals whose specific title is "VP" of a certain business function, such as "VP of Marketing" (Marketing is a business function) or "VP of Engineering" (Engineering is a business function). If a function is clearly mentioned then it would be JOB TITLE. "VP of Microsoft" does NOT have a function (Microsoft is an organization) neither does "Automotive VPs" (Automotive can ONLY be an industry). Identify the BUSINESS functions accurately. Then they CANNOT come under management levels.
Output: {"management_levels": ["VP"], "titles": []}

Query: "The CFOs working in google or facebook": #CFOs is a KEY PHRASE. It will be evaluated as a whole key phrase ONLY. It cannot be classified into a management level and a title both.
Management Level Focus: In this query, "CFOs" does not cover the complete 'C-Suite' domain. ONLY IF COMPLETE DOMAIN IS COVERED then the key phrase will be in management level. One job title, even if it is on the top or head of the heirarchy, does not cover the complete domain. If a user wants all 'executives', without any business function specified then three management levels will be covered, namely "CSuite," "Executive VP or Sr. VP" and "VP" so all MUST come. However, the word 'executive' is mentioned in relation to a business function, only titles specific to that function should be included. For example, if 'Marketing Executives' is mentioned, titles such as 'CMO', 'Chief Marketing Officer', 'Senior VP of Marketing', 'Senior Vice President of Marketing', 'VP of Marketing', and 'Vice President of Marketing' should be included. The word 'executive' or 'executives' would, thus, NEVER be included neither as job title nor management level.
Job Title Focus: As a CFO would only be a chief in finance, the CFO being discussed here comes under job title, not management level.
Output: {"management_levels": [], "titles": ["CFO", "Chief Finance Officer"]} # Job titles MUST BE CONCISE and TO THE POINT, mentioning ONLY the TITLE and the BUSINESS FUNCTION if the business function is given. No added details, such as company name or group.

If terms like 'leader', 'expert', 'specialist', or similar are mentioned, extract a maximum of 2-3 relevant job titles associated with those terms based on the context, focusing on the most appropriate leadership or expertise roles.

For each management level and title, also tell why you put it there. If a business function can be clearly identified, the key phrase will be a JOB title. ALWAYS make the necessary changes when the word 'executive' or 'leader' or 'expert', etc., is mentioned in the user query and get LOGICAL titles. Management level of 'Manager' will not be chosen when a specific type of 'manager' (senior managers, project manager, etc.) are asked for. ONLY identify and consider complete key phrases EXPLICITLY MENTIONED IN THE PROMPT, and each key phrase will either be in management level or title, NEVER consider THE SAME KEY PHRASE for BOTH. Evaluate key phrases separately.

After analyzing the query, generate two outputs:

1. A reasoning paragraph that explains your thought process step-by-step. Include:
   - Identification of key phrases
   - Evaluation of each key phrase (management level or job title)
   - Reasoning behind your classifications
   - Any special considerations (e.g., handling of 'executive' or 'leader' terms)
   - Explanation of domain coverage for management levels
   - Keep this moderate, not too long and not too concise
   - While writing the reasoning, refrain from using I and addressing yourself.

2. A JSON object with two keys: "management_levels" and "titles". Each key should have a list of extracted management levels and titles respectively.   

Present your output in the following format:


<rationale>
Your step-by-step reasoning and rationale paragraph goes here.
</rationale>

<json_output>
{
  "management_levels": [...],
  "titles": [...]
}
</json_output>


Remember to adhere strictly to the guidelines provided, especially regarding the classification of key phrases and the handling of special terms like 'executive'.
"""

def get_prompt(query):
    system_message = NER_MANAGEMENT_LEVEL_TITLE_SYSTEM_PROMPT
    user_message = NER_MANAGEMENT_LEVEL_TITLE_USER_PROMPT.replace('{{QUERY}}', query)
    return PROMPT_TEMPLATE_INFERENCE.replace('{{system_message}}', system_message).replace('{{user_message}}', user_message).replace('{{prefix}}', prefix)
with open('./data/train_queries.json') as f:
    train_queries = json.load(f)
with open('./data/test_queries.json') as f:
    test_queries = json.load(f)
with open('./data/validation_queries.json') as f:
    validation_queries = json.load(f)

prompt_train_queries = []
prompt_test_queries = []
prompt_validation_queries = []

for query in train_queries:
    prompt_train_queries.append(get_prompt(query))
for query in test_queries:
    prompt_test_queries.append(get_prompt(query))
for query in validation_queries:
    prompt_validation_queries.append(get_prompt(query))


with open('./data/inference_prompt_train_queries.json', 'w') as f:
    json.dump(prompt_train_queries, f, indent=2)
with open('./data/inference_prompt_test_queries.json', 'w') as f:
    json.dump(prompt_test_queries, f, indent=2)
with open('./data/inference_prompt_validation_queries.json', 'w') as f:
    json.dump(prompt_validation_queries, f, indent=2)

# Llama 3.1 8B Generation

In [34]:
SYSTEM = """You are a highly skilled validator agent specializing in assessing the accuracy of management level and job title extractions from given queries. Your expertise is crucial for ensuring the quality and correctness of these extractions. You are given a query and two other inputs: rationale and json_output generated from a smaller model and you need to provide feedback, corrected_rationale, and corrected_output"""

USER = """

You will analyze three inputs:

1. The original query:
<query>
{{QUERY}}
</query>

2. A rationale generate by smaller model explaining reasoning for the extraction of json_output below:
<rationale>
{{RATIONALE}}
</rationale>

3. A JSON output, generated by smaller model based on the above rationale:
<json_output>
{{JSON_OUTPUT}}
</json_output>



Your task is to thoroughly analyze these inputs and provide a comprehensive evaluation. Follow these steps:

1. Review the query, rationale and json_output carefully.

2. Extract and classify key phrases according to these guidelines:
    - Management Levels: Only return management levels that match the predefined set: ["Partners,"," "Founder or Co-founder," "Board of Directors," "CSuite/Chiefs," "Executive VP or Sr. VP," "General Manager," "Manager," "Senior Partner," "Junior Partner," "VP," "Director," "Senior (All Senior-Level Individual Contributors)," "Mid (All Mid-Level Individual Contributors)," "Junior (All Junior-Level Individual Contributors)"]. MANAGEMENT CAN ONLY BE FROM THIS PREDEFINED SET, Nothing ELSE.
    - Job Titles: Normalize the job title after extracting it from the text. For example, convert "ceo" to "Chief Executive Officer" and always include both the full title and its abbreviation (confirmed ones), e.g., "VP of Engineering" and "Vice President of Engineering." or "Chief Innovation Officer" and "CINO". ENSURE LOGICAL and EXACT job titles such as 'Architect' NOT 'Architect who is skilled in VR'. Job titles MUST BE CONCISE AND TO THE POINT and shouldn't include company names or region names. Do not change the title for normalization.
    - Response Format: Your response must be a dictionary with two keys: "management_levels" and "titles". Each key should have a list of management levels and titles respectively.
    - Identify the key phrases in a prompt. Key phrase is a title and its function, IF THE function is mentioned. If the function would be mentioned, it will be classified as a "Job Title". For example, "CEOs working in Automotive Industry and VP of Engineering of Microsoft" has the "CEOs" and "VP of Engineering" as Key Phrases. In this case ONLY VP CANNOT be considered a key phrase. ONLY CONSIDER THE KEY PHRASES MENTIONED, DO NOT ASSUME. Past and current designations dont matter.
    - While writing the Job Title, one distinction that you need to make is identify the difference between a functional role, departments and categories. For example "VP of Retail" is a valid job title but "VP of Grocery", "VP of Sanitary" are not.
    - Check whether the KEY PHRASE should be classified as a Job Title or a Management Level. IT SHOULD NEVER BE CLASSIFIED INTO BOTH. This is ESSENTIAL.
    - If a key phrase is classified as title, don't include it in the management levels. For example, if "VP of Engineering" is classified as title then don't include "VP" in management levels. Industry or company names will not be included in job titles.
    - If a key phrase is classified as a management level, don't include it in the title. For example, if "Vice Presidents" is classified as management level then don't include "Vice Presidents" in titles. A job title will ONLY be a title and its business function. No other DETAIL should be added. A Management Level cannot COME FROM WITHIN A JOB TITLE.
    - Remember: One instance of a key phrase should be considered for either management level or job title, not both. Each will fall either into management levels or job title but WILL NEVER FALL INTO BOTH. A KEY PHRASE CANNOT BE IN MANAGEMENT LEVELS AND TITLE, BOTH. A Management Level cannot COME FROM WITHIN A JOB TITLE. 
    - If the word 'executive' is mentioned, specific considerations should be taken into account.
    - If terms like 'leader', 'expert', 'specialist', or similar are mentioned, extract a maximum of 2-3 relevant job titles associated with those terms based on the context, focusing on the most appropriate leadership or expertise roles.


3. Analyze the extraction and provide detailed feedback.

4. Offer correct reasoning and approach to the query.

5. Generate an accurate JSON output based on your analysis.

Wrap your work in the following tags:

<phrase_classification>
List all key phrases from the query that could potentially be management levels or job titles. For each phrase:
1. Write down the phrase.
2. Consider arguments for classifying it as a management level.
3. Consider arguments for classifying it as a job title.
4. Make a final classification based on the guidelines, providing reasoning for your decision.

After classifying all phrases, count the number of management levels and job titles you've identified. It's OK for this section to be quite long.
</phrase_classification>

<feedback>
</feedback>
Provide detailed feedback on the extraction in the rationale and JSON output. Highlight any issues or praise correct classifications. Explain why certain elements should or should not be included in management levels or job titles. While writing this, identify the issues with the reasoning as well as with the json_output, and include sentences like "The rationale is incorrect because..." or "The JSON output is inaccurate because..." and also like "This should not be present in mangement_title because it is in the title (if such a case occurs)" Be thorough in your analysis.

<correct_reasoning>
Present the correct approach to analyzing the query and generating accurate extractions. Explain your reasoning thoroughly. While writing this, identify the issues with the reasoning as well as with the json_output, and include sentences like "This should be in the management title because" or "I include this in title because...". Be thorough in your analysis.
</correct_reasoning>

<json_output>
Provide the correct JSON output based on your analysis. Use this format:
{
  "management_levels": [...],
  "titles": [...]
}
</json_output>

Remember to be thorough in your analysis, provide clear explanations for your decisions, and ensure that your feedback and reasoning are based on the established guidelines for management level and job title extraction.
"""

In [28]:
import asyncio
import os
from openai import AsyncOpenAI
import pandas as pd
from jinja2 import Template
import json

In [29]:
from dotenv import load_dotenv
load_dotenv()
openai_api_key = os.getenv("OPENAI_API_KEY")

In [30]:
async def chatgpt_response_a(input, temperature=0.1, model="gpt-4o", **kwargs):

    """
    Function to run prompts on chatgpt

    Args:
        key (string): openai api key
        messages (list): list of object that has the chat that you want to process with chatgpt. i.e. system prompt, assistant prompt and user prompt
        temperature (float, optional): Temperature of gpt for generations. Defaults to 0.7.
        model (str, optional): The model you want to use. Defaults to "gpt-4o".

    Returns:
        string: chatgpt result
    """
    # user_message = Template(NER_MANAGEMENT_LEVEL_TITLE_USER_PROMPT).render({"QUERY" : query})
    user_message = Template(USER).render({"QUERY" : input['query'], "RATIONALE": input['rationale'], "JSON_OUTPUT": input['json_output']})
    messages = [
            # {"role": "system", "content": NER_MANAGEMENT_LEVEL_TITLE_SYSTEM_PROMPT},
            {"role": "system", "content": SYSTEM},
            {"role": "user", "content": f"User Query: {user_message}"},
            
    ]

    openai_object = {
        "model": model,
        "messages": messages,
        "temperature": temperature,
    }
    aclient = AsyncOpenAI(api_key=openai_api_key)

    openai_object.update(kwargs)

    response = await aclient.chat.completions.create(**openai_object)
    response = response.__dict__
    response["choices"] = [choice.__dict__ for choice in response["choices"]]
    for choice in response["choices"]:
        choice["message"] = choice["message"].__dict__
    return response

In [31]:
input = {
    "query": "GM or VP for grocery companies in finland, make sure they have relevant experience",
    "rationale": "To address the user query, we need to evaluate the key phrases \"GM\" and \"VP\" separately, considering the context provided.\n\n1. **Key Phrase: \"GM\" (General Manager)**\n   - **Management Level Focus**: The term \"GM\" refers to the management level of General Manager. Since no specific business function is mentioned alongside \"GM,\" it should be classified under management levels. The query does not specify a business function for \"GM,\" so it remains a management level.\n   - **Job Title Focus**: Since no specific business function is mentioned, \"GM\" does not qualify as a job title.\n\n2. **Key Phrase: \"VP\" (Vice President)**\n   - **Management Level Focus**: The term \"VP\" refers to the management level of Vice President. However, since the query specifies \"VP for grocery companies,\" it implies a business function (grocery), which means it should be classified as a job title.\n   - **Job Title Focus**: The query specifies \"VP for grocery companies,\" which indicates a business function (grocery). Therefore, \"VP\" should be classified as a job title, specifically \"VP of Grocery\" or \"Vice President of Grocery.\"\n\nBased on the analysis, the output should be:\n- \"GM\" is classified as a management level because no specific business function is mentioned.\n- \"VP\" is classified as a job title because a business function (grocery) is specified.",
    "json_output": "{'management_levels': ['GM'], 'titles': ['VP of Grocery', 'Vice President of Grocery']}"
}

In [32]:
result = await chatgpt_response_a(input)

In [33]:
print(result['choices'][0]['message']['content'])

<phrase_classification>
1. **Phrase: "GM" (General Manager)**
   - **Arguments for Management Level**: "GM" stands for General Manager, which is a recognized management level. The query does not specify a business function or department, so it should be classified as a management level.
   - **Arguments for Job Title**: There are no specific business functions or departments mentioned alongside "GM," so it does not qualify as a job title.
   - **Final Classification**: Management Level. "GM" is classified as a management level because it fits the predefined set and lacks a specific business function.

2. **Phrase: "VP" (Vice President)**
   - **Arguments for Management Level**: "VP" stands for Vice President, which is a recognized management level. However, the query specifies "VP for grocery companies," which implies a business function.
   - **Arguments for Job Title**: The phrase "VP for grocery companies" suggests a business function related to grocery. However, "VP of Grocery" is 

In [None]:
with open('problematic_queries_v2.json') as f:
    