In [1]:
import asyncio
import os
from openai import AsyncOpenAI
import pandas as pd

In [2]:
from dotenv import load_dotenv
import os

# Load environment variables from .env file
load_dotenv()

key = os.getenv("OPENAI_API_KEY")

In [3]:
import pandas as pd

with open("/home/raza/Documents/onboarding-task/phi-3-results_temp_10/10-epoch-results.csv") as f:
    df = pd.read_csv(f)

In [8]:
queries_list = list(df["Query"])

63

In [10]:
async def chatgpt_response_a(messages, temperature=0.7, model="gpt-4o-mini", **kwargs):
    """
    Function to run prompts on chatgpt

    Args:
        key (string): openai api key
        messages (list): list of object that has the chat that you want to process with chatgpt. i.e. system prompt, assistant prompt and user prompt
        temperature (float, optional): Temperature of gpt for generations. Defaults to 0.7.
        model (str, optional): The model you want to use. Defaults to "gpt-4o-mini".

    Returns:
        string: chatgpt result
    """
    openai_object = {
        "model": model,
        "messages": messages,
        "temperature": temperature,
    }
    aclient = AsyncOpenAI(api_key=key)

    openai_object.update(kwargs)

    response = await aclient.chat.completions.create(**openai_object)
    response = response.__dict__
    response["choices"] = [choice.__dict__ for choice in response["choices"]]
    for choice in response["choices"]:
        choice["message"] = choice["message"].__dict__
    return response

In [25]:
MANAGEMENT_LEVEL_TITLE_PROMPT = """
You are an intelligent assistant dedicated to extracting management levels and job titles from user queries. Before doing so, you must understand what a functional area is.

Definition of a Functional Area:
- A functional area is a department or group of personnel tasked with a specific organizational function. These include departments like finance, marketing, engineering, etc.

Definition of Management Level:
- A management level refers to a hierarchical position within an organization without a specific functional area. It encompasses broader titles that may include roles across different functional areas.
- Management levels include: "Board of Directors," "CSuite and President," "Executive and Sr. VP," "General Manager," "VP," "Director," "Manager," "Senior (Individual Contributor)," "Mid (Individual Contributor)," and "Junior."

Definition of a Job Title:
- A job title refers to a specific employment position combined with a functional area.
- Examples include 'VP of Engineering' (functional area: Engineering) and 'Director of Finance' (functional area: Finance).

Instructions:
1. Management Levels: Only return management levels that match the predefined set: ["Board of Directors," "CSuite and President," "Executive and Sr. VP," "General Manager," "VP," "Director," "Manager," "Senior (Individual Contributor)," "Mid (Individual Contributor)," "Junior"].
2. Job Titles: Normalize the job title after extracting it from the text. For example, convert "ceo" to "Chief Executive Officer" and include both the full title and its abbreviation if mentioned in the query, e.g., "VP of Engineering" and "Vice President of Engineering."
3. Response Format: Your response must be a dictionary with two keys: "management_levels" and "titles". Each key should have a list of management levels and titles respectively.
4. If a keyword is classified as title, don't include it in the management levels and vice versa. e.g if "VP of Engineering" is classified as title then don't include "VP" in management levels.

Examples:

Query: Provide a list of CFOs and VPs working in the technology sector
Output: {"management_levels": ["VP"], "titles": ["Chief Financial Officer", "CFO"]}

Guidelines:
- "Leaders" implies CSuite/President level
- Titles without functional areas (e.g., "Founder", "Managing Director") go in titles list
- Use knowledge base for educated guesses about classifications

"""

In [26]:
async def title_management(texts):
    ans = []
    tasks = []
    for text in texts:
        chat = [
            {"role": "system", "content": MANAGEMENT_LEVEL_TITLE_PROMPT},
            {"role": "user", "content": f"User Query: {text}"},
        ]
        tasks.append(chatgpt_response_a(chat, temperature=0.3, model="gpt-4o"))
    
    results = await asyncio.gather(*tasks)
    for res in results:
        response = res
        response = response["choices"][0]["message"]["content"]
        response = response[response.find("{") : response.rfind("}") + 1]
        response = eval(response)
        if len(response) == 0:
            ans.append("")
        ans.append(response)
    return ans

In [33]:
df = pd.read_csv("sample_file.csv")
texts = [row['Query'] for idx, row in df.iterrows()]

In [27]:
ground_truths = await title_management(queries_list)

In [28]:
len(ground_truths)

63

In [29]:
ground_truths

[{'management_levels': [], 'titles': []},
 {'management_levels': [], 'titles': ['Director of Human-AI Collaboration']},
 {'management_levels': ['Junior'], 'titles': []},
 {'management_levels': ['Senior (Individual Contributor)'],
  'titles': ['Engineer']},
 {'management_levels': [], 'titles': ['Director of Mental Health Programs']},
 {'management_levels': ['CSuite and President'], 'titles': []},
 {'management_levels': [],
  'titles': ['VP of Planetary Science',
   'Vice President of Planetary Science']},
 {'management_levels': [],
  'titles': ['VP of Construction Innovation',
   'Vice President of Construction Innovation']},
 {'management_levels': ['Director', 'Manager'], 'titles': []},
 {'management_levels': [], 'titles': ['Chief Archaeologist']},
 {'management_levels': [],
  'titles': ['VP of Marine Innovation',
   'Vice President of Marine Innovation']},
 {'management_levels': [], 'titles': ['Head of Marketing']},
 {'management_levels': [], 'titles': ['Chief Data Ethics Officer']},


In [30]:
count = 0
for item in ground_truths:
    if 'management_levels' in item and 'titles' in item:
        count += 1

In [31]:
count

63

In [32]:
ground_truths_dicts = []

In [33]:
for query, gt in zip(queries_list, ground_truths):
    ground_truths_dicts.append({
        "query" : query,
        "GT" : gt
    })

In [34]:
ground_truths_dicts

[{'query': 'From the latest report of Genentech, tell me the key highlights of the company.',
  'GT': {'management_levels': [], 'titles': []}},
 {'query': 'Director of Human-AI Collaboration',
  'GT': {'management_levels': [],
   'titles': ['Director of Human-AI Collaboration']}},
 {'query': 'Identify junior roles in cloud services.',
  'GT': {'management_levels': ['Junior'], 'titles': []}},
 {'query': 'List all senior engineers in robotics innovation labs.',
  'GT': {'management_levels': ['Senior (Individual Contributor)'],
   'titles': ['Engineer']}},
 {'query': 'Director Mental Health Programs',
  'GT': {'management_levels': [],
   'titles': ['Director of Mental Health Programs']}},
 {'query': 'Leaders of top energy companies in the US.',
  'GT': {'management_levels': ['CSuite and President'], 'titles': []}},
 {'query': 'VP Planetary Science',
  'GT': {'management_levels': [],
   'titles': ['VP of Planetary Science',
    'Vice President of Planetary Science']}},
 {'query': 'VP Const