In [1]:
import asyncio
import os
from openai import AsyncOpenAI
from jinja2 import Template
import pandas as pd
import json
from copy import deepcopy

In [2]:
from dotenv import load_dotenv
load_dotenv()
openai_api_key = os.getenv("OPENAI_API_KEY")

## Loading the test data

In [92]:
data = []
with open('test_data.jsonl') as f:
    for line in f:
        data.append(json.loads(line))
with open('test_data_v2.jsonl') as f:
    for line in f:
        data.append(json.loads(line))

In [95]:
len(data)

51

In [96]:
data[1]

{'messages': [{'role': 'system',
   'content': 'You are an AI assistant tasked with extracting management levels and job titles from a given query. Your goal is to analyze the query, identify relevant key phrases, and categorize them appropriately as either management levels or job titles while generating proper reasoning and thinking step by step. \nYour output format should be like below:\n<rationale>\nProvide reasoning and step-by-step thinking inside here\n</rationale>\n<json_output>\nProvide json output inside here\n</json_output>\n'},
  {'role': 'user',
   'content': 'User Query: \nInstructions:\n1. Identify key phrases that are management levels or job titles.\n2. Categorize each key phrase as either a management level or a job title.\n3. Use predefined management levels: ["Partners,"," "Founder or Co-founder," "Board of Directors," "CSuite/Chiefs," "Executive VP or Sr. VP," "General Manager," "Manager," "Head," "Senior Partner," "Junior Partner," "VP," "Director," "Senior (All 

In [97]:
inference_data = []
# inference_data = deepcopy(data)
ground_truths_list = []

In [98]:
import re

def extract_rationale_and_json_output(input_string):
    """
    Parses the input string and extracts content inside <rationale> and <json_output> tags.

    Args:
        input_string (str): The string containing rationale and JSON output.

    Returns:
        tuple: A tuple containing rationale (str) and json_output (str).
    """
    # Regular expressions to extract content inside <rationale> and <json_output>
    rationale_pattern = r"<rationale>\n?(.*?)\n?</rationale>"
    json_output_pattern = r"<json_output>\n?(.*?)\n?</json_output>"
    
    # Extract rationale
    rationale_match = re.search(rationale_pattern, input_string, re.DOTALL)
    rationale = rationale_match.group(1).strip() if rationale_match else None

    # Extract JSON output
    json_output_match = re.search(json_output_pattern, input_string, re.DOTALL)
    json_output = json_output_match.group(1).strip() if json_output_match else None

    return rationale, json_output


def extract_query(input_string):
    """
    Extracts the query enclosed in backticks after the phrase 'Here is the query you need to analyze:'.

    Args:
        input_string (str): The input string containing the query.

    Returns:
        str: The extracted query or None if not found.
    """
    # Regular expression to match the query enclosed in backticks
    query_pattern = r"Here is the query you need to analyze:\s+`(.*?)`"
    
    # Search for the query
    query_match = re.search(query_pattern, input_string, re.DOTALL)
    
    # Extract the query if found
    return query_match.group(1).strip() if query_match else None



In [99]:
inference_data = []
ground_truths_list = []

In [100]:
for item in data:
    inference_item = item["messages"][:2]
    ground_truth_rationale, ground_truth_json_output = extract_rationale_and_json_output(item["messages"][-1]["content"])
    inference_data.append({ 
        "messages" : inference_item
    })
    ground_truths_list.append({
        "query" : extract_query(item["messages"][1]["content"]),
        "gt_rationale" : ground_truth_rationale,
        "gt_json_output" : ground_truth_json_output
    })
    # print('-'*47)
    # print("Inference Item: ")
    # print(inference_item)
    # print('-'*47)
    # print("Rationale: ")
    # print(ground_truth_rationale)
    # print('-'*47)
    # print("JSON Output: ")
    # print(ground_truth_json_output)

In [101]:
ground_truths_list[0]

{'query': 'I need sales directors in healthcare AI companies based in Germany.',
 'gt_rationale': 'To address the user query, we need to identify and evaluate the key phrases mentioned. The key phrase in this query is "sales directors."\n\n1. **Key Phrase Identification**: \n   - "Sales directors" is the key phrase. It refers to individuals holding the title of "Director" with a specific business function, which is "Sales."\n\n2. **Classification**:\n   - **Management Level**: The term "Director" is part of the predefined management levels. However, since "Sales" is a specific business function mentioned, it should be classified as a job title rather than a management level.\n   - **Job Title**: "Sales directors" refers to the job title of "Director of Sales," which includes the business function "Sales."\n\n3. **Conclusion**:\n   - Since "Sales directors" specifies a business function, it falls under the category of job titles.\n\nTherefore, the output should reflect this classificati

In [102]:
with open("test_data_gts.json", "w") as f:
    json.dump(ground_truths_list, f, indent=2)

In [103]:
len(inference_data), len(ground_truths_list)

(51, 51)

In [104]:
inference_data[:2]

[{'messages': [{'role': 'system',
    'content': 'You are an AI assistant tasked with extracting management levels and job titles from a given query. Your goal is to analyze the query, identify relevant key phrases, and categorize them appropriately as either management levels or job titles while generating proper reasoning and thinking step by step. \nYour output format should be like below:\n<rationale>\nProvide reasoning and step-by-step thinking inside here\n</rationale>\n<json_output>\nProvide json output inside here\n</json_output>\n'},
   {'role': 'user',
    'content': 'User Query: \nInstructions:\n1. Identify key phrases that are management levels or job titles.\n2. Categorize each key phrase as either a management level or a job title.\n3. Use predefined management levels: ["Partners,"," "Founder or Co-founder," "Board of Directors," "CSuite/Chiefs," "Executive VP or Sr. VP," "General Manager," "Manager," "Head," "Senior Partner," "Junior Partner," "VP," "Director," "Senior (

In [105]:
STUDENT_MODEL_PROMPT_SYSTEM = """You are an AI assistant tasked with extracting management levels and job titles from a given query. Your goal is to analyze the query, identify relevant key phrases, and categorize them appropriately as either management levels or job titles while generating proper reasoning and thinking step by step. 
Your output format should be like below:
<rationale>
Provide reasoning and step-by-step thinking inside here
</rationale>
<json_output>
Provide json output inside here
</json_output>
"""

STUDENT_MODEL_PROMPT_USER_ORIGINAL = """Instructions:
1. Management Levels: Only return management levels that match the predefined set: ["Partners,"," "Founder or Co-founder," "Board of Directors," "CSuite/Chiefs," "Executive VP or Sr. VP," "General Manager," "Manager," "Head," "Senior Partner," "Junior Partner," "VP," "Director," "Senior (All Senior-Level Individual Contributors)," "Mid (All Mid-Level Individual Contributors)," "Junior (All Junior-Level Individual Contributors)"]. MANAGEMENT CAN ONLY BE FROM THIS PREDEFINED SET, Nothing ELSE.
2. Job Titles: Normalize the job title after extracting it from the text. For example, convert "ceo" to "Chief Executive Officer" and always include both the full title and its abbreviation (confirmed ones), e.g., "VP of Engineering" and "Vice President of Engineering." or "Chief Innovation Officer" and "CINO". ENSURE LOGICAL and EXACT job titles such as 'Architect' NOT 'Architect who is skilled in VR'. Job titles MUST BE CONCISE AND TO THE POINT and shouldn't include company names or region names. Do not change the title for normalization.
3. Response Format: Your response must be a dictionary with two keys: "management_levels" and "titles". Each key should have a list of management levels and titles respectively.
4. Identify the key phrases in a prompt. Key phrase is a title and its function, IF THE function is mentioned. If the function would be mentioned, it will be classified as a "Job Title". For example, "CEOs working in Automotive Industry and VP of Engineering of Microsoft" has the "CEOs" and "VP of Engineering" as Key Phrases. In this case ONLY VP CANNOT be considered a key phrase. ONLY CONSIDER THE KEY PHRASES MENTIONED, DO NOT ASSUME. Past and current designations dont matter.
5. Check whether the KEY PHRASE should be classified as a Job Title or a Management Level. IT SHOULD NEVER BE CLASSIFIED INTO BOTH. This is ESSENTIAL.
5. If a key phrase is classified as title, don't include it in the management levels. For example, if "VP of Engineering" is classified as title then don't include "VP" in management levels. Industry or company names will not be included in job titles.
6. If a key phrase is classified as a management level, don't include it in the title. For example, if "Vice Presidents" is classified as management level then don't include "Vice Presidents" in titles. A job title will ONLY be a title and its business function. No other DETAIL should be added. A Management Level cannot COME FROM WITHIN A JOB TITLE.
7. Remember: One instance of a key phrase should be considered for either management level or job title, not both. Each will fall either into management levels or job title but WILL NEVER FALL INTO BOTH. A KEY PHRASE CANNOT BE IN MANAGEMENT LEVELS AND TITLE, BOTH. A Management Level cannot COME FROM WITHIN A JOB TITLE. 
8. If the word 'executive' is mentioned, specific considerations should be taken into account.

Take a deep breath and understand.
Query: "Give me VPs working in Microsoft": # VPs is the KEY PHRASE. It will be evaluated as a whole key phrase ONLY. It cannot be classified into a management level and a title both.
Management Level Focus: In this query, "VPs" should refer to individuals at the management level of Vice President within Microsoft. This means you are asking for a list of people who occupy the VP rank across various departments or divisions within the company. VPs can cover the complete domain of of 'VP' in management level. The emphasis is on their standing in the organizational hierarchy, regardless of their specific job titles. A management title will only be selected if it covers the complete domain in the predefined set.
Job Title Focus: If you were asking about "VPs" in terms of job titles, you'd be interested in individuals whose specific title is "VP" of a certain business function, such as "VP of Marketing" (Marketing is a business function) or "VP of Engineering" (Engineering is a business function). If a function is clearly mentioned then it would be JOB TITLE. "VP of Microsoft" does NOT have a function (Microsoft is an organization) neither does "Automotive VPs" (Automotive can ONLY be an industry). Identify the BUSINESS functions accurately. Then they CANNOT come under management levels.
Output: {"management_levels": ["VP"], "titles": []}

Query: "The CFOs working in google or facebook": #CFOs is a KEY PHRASE. It will be evaluated as a whole key phrase ONLY. It cannot be classified into a management level and a title both.
Management Level Focus: In this query, "CFOs" does not cover the complete 'C-Suite' domain. ONLY IF COMPLETE DOMAIN IS COVERED then the key phrase will be in management level. One job title, even if it is on the top or head of the heirarchy, does not cover the complete domain. If a user wants all 'executives', without any business function specified then three management levels will be covered, namely "CSuite," "Executive VP or Sr. VP" and "VP" so all MUST come. However, the word 'executive' is mentioned in relation to a business function, only titles specific to that function should be included. For example, if 'Marketing Executives' is mentioned, titles such as 'CMO', 'Chief Marketing Officer', 'Senior VP of Marketing', 'Senior Vice President of Marketing', 'VP of Marketing', and 'Vice President of Marketing' should be included. The word 'executive' or 'executives' would, thus, NEVER be included neither as job title nor management level.
Job Title Focus: As a CFO would only be a chief in finance (not covering the CSuite domain), the CFO being discussed here comes under job title, not management level, same if the title was "heads of finance" (wouldn't cover Head domain), etc.
Output: {"management_levels": [], "titles": ["CFO", "Chief Finance Officer"]} # Job titles MUST BE CONCISE and TO THE POINT, mentioning ONLY the TITLE and the BUSINESS FUNCTION if the business function is given. No added details, such as company name or group.

If terms like 'leader', 'expert', 'specialist', or similar are mentioned, extract a maximum of 2-3 relevant job titles associated with those terms based on the context, focusing on the most appropriate leadership or expertise roles. However, do not extract titles based on names, companies, etc.

For each management level and title, also tell why you put it there. If a business function can be clearly identified, the key phrase will be a JOB title. ALWAYS make the necessary changes when the word 'executive' or 'leader' or 'expert', etc., is mentioned in the user query and get LOGICAL titles. Management level of 'Manager' will not be chosen when a specific type of 'manager' (senior managers, project manager, etc.) are asked for. ONLY identify and consider complete key phrases EXPLICITLY MENTIONED IN THE PROMPT, and each key phrase will either be in management level or title, NEVER consider THE SAME KEY PHRASE for BOTH. Evaluate key phrases separately.
Always return a JSON object in your output.

User Query: {{QUERY}}
Let's think step by step about each key phrase."""

STUDENT_MODEL_PROMPT_USER_V3 = """
Instructions:
1. Identify key phrases that are management levels or job titles.
2. Categorize each key phrase as either a management level or a job title.
3. Use predefined management levels: ['Partners,',' 'Founder or Co-founder,' 'Board of Directors,' 'CSuite/Chiefs,' 'Executive VP or Sr. VP,' 'General Manager,' 'Manager,' 'Head,' 'Senior Partner,' 'Junior Partner,' 'VP,' 'Director,' 'Senior (All Senior-Level Individual Contributors),' 'Mid (All Mid-Level Individual Contributors),' 'Junior (All Junior-Level Individual Contributors)'].
4. For job titles:
   - Normalize and include abbreviations if applicable.
   - Exclude company or region names.
   
Always return a JSON object. 


Here is the query you need to analyze: `{{QUERY}}`
Let's think step by step about each key phrase.
"""




ASSISTANT_RESPONSE_TEMPLATE = """
<rationale>
{{rationale}}
</rationale>

<json_output>
{{json_output}}
</json_output>
"""


In [135]:

# model_name = "ft:gpt-4o-mini-2024-07-18:qlu:ner-v1-338-training-examples-08-01-2024-3-epochs:AnSGTc0m:ckpt-step-676"
# model_name = "ft:gpt-4o-mini-2024-07-18:qlu:ner-v2-49-improved-training-examples-09-01-2024-2-epochs:AnjrNkjW"
model_name = "gpt-4o-mini-2024-07-18"
async def chatgpt_response_a(query, temperature=0.1, model=model_name, **kwargs):

    """
    Function to run prompts on chatgpt

    Args:
        key (string): openai api key
        messages (list): list of object that has the chat that you want to process with chatgpt. i.e. system prompt, assistant prompt and user prompt
        temperature (float, optional): Temperature of gpt for generations. Defaults to 0.7.
        model (str, optional): The model you want to use. Defaults to "gpt-4o".

    Returns:
        string: chatgpt result
    """
    # user_message = Template(NER_MANAGEMENT_LEVEL_TITLE_USER_PROMPT).render({"QUERY" : query})
    user_message = Template(STUDENT_MODEL_PROMPT_USER_ORIGINAL).render({"QUERY" : query})
    messages = [
            # {"role": "system", "content": NER_MANAGEMENT_LEVEL_TITLE_SYSTEM_PROMPT},
            {"role": "system", "content": STUDENT_MODEL_PROMPT_SYSTEM},
            {"role": "user", "content": f"User Query: {user_message}"},
            
    ]
    # print(messages)
    openai_object = {
        "model": model,
        "messages": messages,
        "temperature": temperature,
    }
    aclient = AsyncOpenAI(api_key=openai_api_key)

    openai_object.update(kwargs)

    response = await aclient.chat.completions.create(**openai_object)
    response = response.__dict__
    response["choices"] = [choice.__dict__ for choice in response["choices"]]
    for choice in response["choices"]:
        choice["message"] = choice["message"].__dict__
    return response

In [136]:
import asyncio
from typing import List, Dict
import math

# model_name = "ft:gpt-4o-mini-2024-07-18:qlu:ner-v1-338-training-examples-08-01-2024-3-epochs:AnSGTc0m:ckpt-step-676"
# model_name = "ft:gpt-4o-mini-2024-07-18:qlu:ner-v2-49-improved-training-examples-09-01-2024-2-epochs:AnjrNkjW"
model_name = "gpt-4o-mini-2024-07-18"

async def process_batch(texts: List[str]) -> List[Dict]:
    """Process a single batch of texts using the original title_management logic"""
    ans = []
    tasks = []
    for text in texts:
        tasks.append(chatgpt_response_a(text, temperature=0.1, model=model_name))

    results = await asyncio.gather(*tasks)
    for query, res in zip(texts, results):
        response = res
        response = response["choices"][0]["message"]["content"]
        rationale, json_output = extract_rationale_and_json_output(response)
        ans.append({
            "query": query,
            "generated_rationale": rationale,
            "generated_json_output": json_output,
            "unparsed_response" : response
        })
    return ans

async def batched_title_management(texts: List[str], batch_size: int = 10) -> List[Dict]:
    """Process texts in batches of specified size"""
    all_results = []
    num_batches = math.ceil(len(texts) / batch_size)
    
    for i in range(num_batches):
        start_idx = i * batch_size
        end_idx = min((i + 1) * batch_size, len(texts))
        batch = texts[start_idx:end_idx]
        
        try:
            batch_results = await process_batch(batch)
            all_results.extend(batch_results)
            
            # Optional: Add a small delay between batches to avoid rate limiting
            if i < num_batches - 1:
                await asyncio.sleep(1)
                
        except Exception as e:
            print(f"Error processing batch {i+1}/{num_batches}: {str(e)}")
            # You might want to handle the error differently depending on your needs
            continue
    
    return all_results

In [137]:
list_of_queries = []
for item in ground_truths_list:
    list_of_queries.append(item["query"])


In [138]:
len(list_of_queries)

51

In [110]:
# (len(list_of_queries) - 7)/ len(list_of_queries) * 100

In [139]:
results = []
results = await batched_title_management(list_of_queries, batch_size=10)

In [112]:
results[1]

{'query': 'Find operations managers at logistics startups using AI for route optimization in Southeast Asia.',
 'generated_rationale': 'To address the user query, we need to identify the key phrases and determine whether they should be classified as management levels or job titles.\n\n1. **Key Phrase Identification**:\n   - "operations managers" is the key phrase here. It specifies a type of manager, which is "operations managers."\n\n2. **Classification**:\n   - **Management Level Focus**: The term "operations managers" specifies a particular type of manager, which means it does not cover the complete "Manager" domain in the predefined set. Therefore, it cannot be classified as a management level.\n   - **Job Title Focus**: Since "operations managers" specifies a business function (operations), it should be classified as a job title. The job title would be concise and to the point, focusing on the title and the business function.\n\n3. **Output**:\n   - Since "operations managers" is 

In [113]:
comparison_list = []

In [140]:
with open("finetunied_model_v2_results_comparison.json") as f:
    comparison_list = json.load(f)

In [142]:
len(comparison_list)

51

In [143]:
final_comparison_list = []

In [144]:
for gt, pred, cl in zip(ground_truths_list, results, comparison_list):
    final_comparison_list.append({
        "query" : gt["query"],
        "Ground Truth" : gt["gt_json_output"],
        "Base Prediction" : pred["generated_json_output"],
        "FT Prediction" : cl["Prediction"],
        "GT Rationale" : gt["gt_rationale"],
        "PD Rationale" : pred["generated_rationale"],
        "FT Ratinale" : cl["PD Rationale"],
        "unparsed_model_response" : pred["unparsed_response"]
    })


In [146]:
len(final_comparison_list)

51

In [147]:
with open("finetunied_model_v2_results_comparison_2.json", "w") as f:
    json.dump(final_comparison_list, f, indent=2)

In [50]:
result = await chatgpt_response_a("Give me VP of Sales")

In [51]:
rationale, json_output = extract_rationale_and_json_output(result['choices'][0]["message"]["content"])

In [52]:
print(rationale)

1. **Identify Key Phrases**: The query is "Give me VP of Sales". The key phrase here is "VP of Sales". This is a specific title with a business function (Sales) mentioned.

2. **Determine Classification**: 
   - "VP of Sales" is a job title because it specifies a business function (Sales). According to the instructions, if a business function is mentioned, it should be classified as a job title.
   - The management level "VP" is not considered separately because it is part of the job title "VP of Sales".

3. **Normalization**: 
   - Normalize "VP of Sales" to include both the full title and its abbreviation: "VP of Sales" and "Vice President of Sales".

4. **Final Categorization**: 
   - Since "VP of Sales" is classified as a job title, it will not be included in the management levels.
   - The management levels list remains empty as no standalone management level is identified.

5. **Output Construction**: 
   - The job title list includes "VP of Sales" and "Vice President of Sales".


In [53]:
json_output

'{\n  "management_levels": [],\n  "titles": ["VP of Sales", "Vice President of Sales"]\n}'

## Comparing Results

In [152]:
comparison_list_data = []

In [153]:
with open("finetunied_model_v2_results_comparison_2.json") as f:
    comparison_list_data = json.load(f) 

In [154]:
base_problem_count = 0
ft_problem_count = 0
parsing_count = 0

In [155]:
problematic_queries = []

In [165]:
for item in comparison_list_data:
    if item.get("base_problem") == "true":
        base_problem_count += 1
        # print('-'*47)
        # print(json.dumps(item, indent=2)) 
        problematic_queries.append(item)
    if item.get("ft_problem") == "true":
        ft_problem_count += 1
        problematic_queries.append(item)
        print(json.dumps(item, indent=2)) 
    if item.get("ft_problem") == "parsing":
        parsing_count += 1
        # problematic_queries.append(item)

{
  "query": "GM or VP for grocery companies in finland, make sure they have relevant experience",
  "Ground Truth": "{'management_levels': ['General Manager', 'VP'], 'titles': []}",
  "Base Prediction": "{\n  \"management_levels\": [\"GM\", \"VP\"],\n  \"titles\": []\n}",
  "FT Prediction": "{'management_levels': ['GM'], 'titles': ['VP of Grocery', 'Vice President of Grocery']}",
  "GT Rationale": "To address the user query, we need to identify the key phrases and determine whether they should be classified as management levels or job titles. Let's break it down:\n\n1. **Key Phrase: \"GM\"**\n   - **Analysis**: \"GM\" stands for \"General Manager.\" In this context, it refers to a management level within grocery companies in Finland. The term \"General Manager\" is part of the predefined set of management levels.\n   - **Classification**: Management Level\n   - **Reason**: \"GM\" covers the complete domain of \"General Manager\" in the predefined management levels set.\n\n2. **Key Phr

In [161]:
total_queries = len(comparison_list_data)
base_accuracy = (total_queries - base_problem_count)/total_queries * 100
ft_accuracy = (total_queries - ft_problem_count)/total_queries*100

In [162]:
base_accuracy, ft_accuracy

(76.47058823529412, 90.19607843137256)

In [163]:
with open('problematic_queries_v2.json', "w") as f:
    json.dump(problematic_queries, f, indent=2) 