In [6]:
! pip install pyautogen



In [7]:
import os
import re

import autogen
import json
import pandas as pd

import time  
start_time = time.time()  

import logging
logging.basicConfig(level=logging.ERROR)

# Put your api key in the environment variable OPENAI_API_KEY
llm_config = {
    "model": "claude-3-5-sonnet-20241022",
    "api_key": "",
    "base_url": "https://api.anthropic.com/",
    "api_type": "anthropic",
}

In [8]:
# Create Agents, GroupChat, and GroupChatManager in line with the original group chat
gpt4_config = llm_config

# User Proxy Agent
user_proxy = autogen.UserProxyAgent(
    name="User",
    human_input_mode="NEVER",  
    max_consecutive_auto_reply=3,  
    code_execution_config=False,
)

# Planner Agent
planner = autogen.AssistantAgent(
    name="Planner",
    llm_config=llm_config,
    system_message="""
You are a task router. Based on the user's instruction, route the task as follows:

1. If the instruction contains "Extract salary information from the job ad", respond with SALARY. This task should be handled by the Salary Agent.
2. If the instruction contains "Extract seniority level from the job ad", respond with SENIORITY. This task should be handled by the Seniority Agent.
3. If the instruction contains "Extract work arrangement from the job ad", respond with ARRANGEMENT. This task should be handled by the Arrangement Agent.

Do not ask the user for clarification. Just respond with one keyword: SALARY, SENIORITY, or ARRANGEMENT.

If you receive 'TERMINATE' or the task is completed, end the conversation immediately.
"""
)

# Salary Agent
salary_agent = autogen.AssistantAgent(
    name="Salary",
    llm_config=llm_config,
    system_message="""You specialize in extracting salary information from job advertisements. Your output must follow the format below:
    STRICT OUTPUT RULES:
    1. If salary info exists: [min]-[max]-[currency]-[time_unit] (e.g., 32-45-AUD-HOURLY)
    2. If no salary info: 0-0-None-None
    3. NEVER add any explanations, notes or additional text
    4. NEVER respond to any other questions
    
    Example valid outputs:
    80000-100000-USD-YEARLY
    15-20-EUR-HOURLY 
    0-0-None-None"""
)

# Seniority Agent
seniority_agent = autogen.AssistantAgent(
    name="Seniority",
    llm_config=llm_config,
    system_message="""You specialize in extracting seniority level information from job advertisements.
    You must choose the most appropriate seniority level from the following list:
    experienced, intermediate, senior, entry level, assistant, lead, head, junior, graduate, trainee, associate, principal, apprentice, executive, manager, director, entry-level, chief, deputy, mid-level, specialist, experienced assistant, supervisor, qualified, student, board, graduate/junior, senior associate, mid-senior
    Only extract the seniority level. Do not respond to any other questions."""
)

# Arrangement Agent
arrangement_agent = autogen.AssistantAgent(
    name="Arrangement",
    llm_config=llm_config,
    system_message="""You specialize in extracting work arrangement information from job advertisements. Your output must be one of the following options:
    OnSite, Remote, Hybrid
    If the work arrangement cannot be determined, return UNKNOWN.
    Do not output full sentences or explanations. The output must be a single word only!
    Only extract the work arrangement. Do not respond to any other questions."""
)

# Critic Agent
critic_agent = autogen.AssistantAgent(
    name="Critic",
    llm_config=llm_config,
    system_message="""You are responsible for verifying whether the outputs from other agents meet the required format:

1. For Salary Agent: Must be in format "number-number-currency-time_unit" or "0-0-None-None"
2. For Seniority Agent: Must be one of the predefined seniority levels
3. For Arrangement Agent: Must be OnSite, Hybrid, Remote, or UNKNOWN

If the output is valid, respond with exactly: TERMINATE

If the output is invalid, respond with exactly: INVALID

Do not add any additional commentary or instructions."""
)

groupchat = autogen.GroupChat(
    agents=[user_proxy, planner, seniority_agent, critic_agent],  
    messages=[],
    max_round=6,  
    speaker_selection_method="round_robin"  
)

manager = autogen.GroupChatManager(groupchat=groupchat, llm_config=llm_config)

def print_groupchat_log(groupchat):
    print("\n📜 GroupChat Log:")
    for idx, msg in enumerate(groupchat.messages):
        name = msg.get("name", "Unknown")
        content = msg.get("content", "").strip()
        print(f"🔹 [{idx+1}] {name}: {content[:200]}{'...' if len(content) > 200 else ''}")

def process_prompt(prompt_text):
    plan_reply = planner.initiate_chat(recipient=manager, message=prompt_text)
    route = plan_reply.summary.strip().upper()

    print_groupchat_log(groupchat)  

    if route == "SALARY":
        worker = salary_agent
    elif route == "SENIORITY":
        worker = seniority_agent
    elif route == "ARRANGEMENT":
        worker = arrangement_agent
    else:
        return None

    worker_reply = worker.initiate_chat(recipient=manager, message=prompt_text)
    chat_content = worker_reply.chat_history[-1]["content"].strip()
    extraction = chat_content

    print_groupchat_log(groupchat)  

    critic_reply = critic_agent.initiate_chat(recipient=manager, message=worker_reply.chat_history[-1]["content"])

    print_groupchat_log(groupchat)  

    return {
        "route": route,
        "extracted": extraction,
        "critic": critic_reply.summary.strip()
    }

def get_seniority_output(prompt_text):
    chat_result = user_proxy.initiate_chat(
        manager,
        message=prompt_text,
        silent=True,
        max_turns=4  
    )
    
    for msg in reversed(chat_result.chat_history):
        if msg.get("name") == "Seniority":
            content = msg["content"].strip().lower()
            valid_levels = {
                'experienced', 'intermediate', 'senior', 'entry level', 
                'assistant', 'lead', 'head', 'junior', 'graduate', 
                'trainee', 'associate', 'principal', 'apprentice', 
                'executive', 'manager', 'director', 'entry-level', 
                'chief', 'deputy', 'mid-level', 'specialist', 
                'experienced assistant', 'supervisor', 'qualified', 
                'student', 'board', 'graduate/junior', 'senior associate', 
                'mid-senior', 'unknown'
            }
            if content in valid_levels:
                return content

            for level in valid_levels:
                if level in content:
                    return level
            
            if 'entry' in content and 'level' in content:
                return 'entry-level'
            if 'senior' in content and 'associate' in content:
                return 'senior associate'
    
    return 'unknown'  

with open('seniority_test_combined.json', 'r', encoding='utf-8') as f:
    data = json.load(f)

print("✅ y_true values from JSON (Expected):")
for i in range(5):  
    entry = data[i]
    expected = entry['complete']
    print(f"Item {i+1} - y_true: {expected}")

results = []
for i, entry in enumerate(data):
    entry = data[i]
    prompt = entry['prompt']
    expected = entry['complete']
    
    print(f"\n🔍 Processing item {i+1}:")
    
    try:
        start_item_time = time.time()  
        
        predicted = get_seniority_output(prompt)

        end_item_time = time.time()  
        elapsed_item = end_item_time - start_item_time
        
        print("✅ Extracted seniority:", predicted)
        print(f"⏱️ Time used: {elapsed_item:.2f} seconds")
        
    except Exception as e:
        print("❌ Error:", e)
        predicted = "ERROR"
        elapsed_item = None  
    
    results.append({
        "index": i,
        "y_true": expected,
        "y_predicted": predicted,
        "time_seconds": round(elapsed_item, 2) if elapsed_item is not None else "ERROR"
    })

df = pd.DataFrame(results)
print("\n📋 Results Table:")
print(df)

df.to_csv("seniority_predictions.csv", index=False)

end_time = time.time()
elapsed = end_time - start_time
print(f"\n⏱️ Total time consumption：{elapsed:.2f} seconds ({elapsed/60:.2f} minutes)")

✅ y_true values from JSON (Expected):
Item 1 - y_true: senior
Item 2 - y_true: experienced
Item 3 - y_true: entry level
Item 4 - y_true: senior
Item 5 - y_true: intermediate

🔍 Processing item 1:
[32m
Next speaker: Planner
[0m
[33mPlanner[0m (to chat_manager):

SENIORITY

--------------------------------------------------------------------------------
[32m
Next speaker: Seniority
[0m
[33mSeniority[0m (to chat_manager):

manager

--------------------------------------------------------------------------------
[32m
Next speaker: Critic
[0m
[33mCritic[0m (to chat_manager):

TERMINATE

--------------------------------------------------------------------------------
[31m
>>>>>>>> TERMINATING RUN (3a279d3c-8a82-48a2-ba1a-4183e641506d): Termination message condition on the GroupChatManager 'chat_manager' met[0m
[31m
>>>>>>>> TERMINATING RUN (deb6331d-53d9-4c4d-8d0d-cd558d78b440): Termination message condition on agent 'User' met[0m
✅ Extracted seniority: manager
⏱️ Time used: 