## Init For Agents Testing

In [None]:
# Setup: Load environment variables and dependencies
import os
import sys
from pathlib import Path

from jinja2 import Environment, FileSystemLoader

project_root = Path.cwd()
src_path = project_root / "src"

if str(src_path) not in sys.path:
    sys.path.insert(0, str(src_path))
    print(f"✓ Added to sys.path: {src_path}")

from LabAgentSkill import skills_utils
from LabAgentSkill.SkillAwareAgent import SkillAwareAgent

root_dir = Path.cwd().parent if Path.cwd().name == "notebooks" else Path.cwd()
env_path = root_dir / ".env"
env = {}

if env_path.exists():
    for line in env_path.read_text().splitlines():
        line = line.strip()
        if not line or line.startswith("#") or "=" not in line:
            continue
        key, value = line.split("=", 1)
        env[key.strip()] = value.strip()

# Set API key
os.environ["OPENAI_API_KEY"] = env.get("OPENAI_API_KEY", os.environ.get("OPENAI_API_KEY", ""))
env = Environment(loader=FileSystemLoader('prompts/'))  
skills_folder = Path("/home/snt/projects_lujun/LabAgentSkill/skillsHub/skills_finer")
all_skills = skills_utils.read_all_skills_metadata(skills_folder)
for skill in all_skills:
    print(f"  - {skill['name']}: {skill['description']}")

model_name = "claude-opus-4-6"
base_url = None
# model_name = "google/gemma-3-270m-it"
# model_name = "Qwen/Qwen2.5-0.5B-Instruct"
# base_url = "http://127.0.0.1:8001/v1"

✓ Added to sys.path: /home/snt/projects_lujun/LabAgentSkill/src


  from .autonotebook import tqdm as notebook_tqdm


  - XBRL-tag-classification: Classify financial text into specific XBRL tags by analyzing semantic cues, context, and category boundaries.
  - algorithmic-art: Creating algorithmic art using p5.js with seeded randomness and interactive parameter exploration. Use this when users request creating art using code, generative art, algorithmic art, flow fields, or particle systems. Create original algorithmic art rather than copying existing artists' work to avoid copyright violations.
  - brand-guidelines: Applies Anthropic's official brand colors and typography to any sort of artifact that may benefit from having Anthropic's look-and-feel. Use it when brand colors or style guidelines, visual formatting, or company design standards apply.
  - canvas-design: Create beautiful visual art in .png and .pdf documents using design philosophy. You should use this skill when the user asks to create a poster, piece of art, design, or other static piece. Create original visual designs, never copying e

## Load Data - Sentimental Analysis

In [2]:
from datasets import load_dataset
dataset_name = "Volavion/finer-139-numeric-sampled"
loaded_dataset = load_dataset(dataset_name, split="train")
loaded_df = loaded_dataset.to_pandas()
loaded_df = loaded_df.reset_index(drop=True)

## Select skills 

In [3]:
import time
import json
from datetime import datetime
import pandas as pd
from tqdm import tqdm

from LabAgentSkill.evaluate import get_predicted_label, get_prediction_XBRL_TAGS

print(f"Using model: {model_name}")
# Initialize agents
agent_skill_aware = SkillAwareAgent(use_chat_history=True, use_trim_messages=True, model=model_name, base_url=base_url)
agent_skill_exec_agent = SkillAwareAgent(use_chat_history=True, use_trim_messages=True, model=model_name, base_url=base_url)
agent_simple = SkillAwareAgent(use_chat_history=True, use_trim_messages=False, model=model_name, base_url=base_url)

p_exec_finer_temp = env.get_template('p_exec_finer.jinja')
p_skill_select_temp = env.get_template('p_skill_select.jinja')
p_skill_discov_temp = env.get_template('p_skill_discov.jinja')
p_default_system_temp = env.get_template('p_default_system.jinja')
p_skill_exec_temp = env.get_template('p_skill_exec.jinja')

# JSONL output path
output_dir = "/home/snt/projects_lujun/LabAgentSkill/assets/results/"
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
jsonl_path = output_dir+f"finer_standard_{model_name.split('/')[-1]}_{timestamp}.jsonl"
print(f"Results will be saved to: {jsonl_path}")

skill_count = 0
count_row = 0
if os.path.exists(jsonl_path):
    df_exist = pd.read_json(jsonl_path, lines=True)
    count_row = len(df_exist)
    print(f"Resume from row: {count_row}")


# Process each sample
for idx, row in tqdm( loaded_df.iterrows(), total=len(loaded_df), desc="Processing samples",):

    if idx < count_row:
        continue
    sample_start_time, sentence, tag_name, tag_token = time.time(), row.sentence, row.tag_name[2:], row.tag_token
    true_label = tag_name
    # Step 1: Skill Selection
    # print(f"Start Skill Selection Phase for Sample {idx + 1}/{len(loaded_df)}")
    skill_context = "\n".join([
        f"- **{skill['name']}**: {skill['description']}"
        for skill in all_skills
    ])

    p_skill_select = p_skill_select_temp.render(SKILL_CONTEXT=skill_context)
    p_exec_finer = p_exec_finer_temp.render(SENTENCE_CONTENT = sentence, NUMERIC_ENTITY = tag_token)
    skill_select_resp = agent_skill_aware.chat(user_input=p_exec_finer, custom_system_prompt=p_skill_select)
    selected_skills = skills_utils.parse_skills_from_json_response(json_response=skill_select_resp, skills_hub_dir=skills_folder)

    # Track whether "movie-sentiment-analysis" was selected in Step 1 
    selected_skill_names_step1 = [s["name"] for s in selected_skills]
    hit_target_skill = "XBRL-tag-classification" in selected_skill_names_step1 ## This is hard Coded

    skill_execution_context = ""
    for skill_meta in selected_skills:
        skill_execution_context += (
            f"SKill {skill_count + 1}: \n"
            f"{skill_meta['description']}\n"
            f"{'\n'.join(skill_meta['body'].split('\n')[1:])}\n\n"
        )
        skill_count += 1

    skill_count_prev = skill_count

    # Step 2: Skill Discovery
    discovery_rounds = 0
    while len(selected_skills) > 0:
        p_skill_discov = p_skill_discov_temp.render(SKILL_CONTEXT=skill_execution_context)
        skill_discov_resp = agent_skill_exec_agent.chat(user_input=p_skill_discov, custom_system_prompt=p_default_system_temp.render())
        selected_skills = skills_utils.parse_skills_from_json_response(json_response=skill_discov_resp, skills_hub_dir=skills_folder)

        for skill_meta in selected_skills:
            skill_execution_context += (
                f"SKill {skill_count + 1}: \n"
                f"{skill_meta['description']}\n"
                f"{'\n'.join(skill_meta['body'].split('\n')[1:])}\n\n"
            )
            skill_count += 1
        discovery_rounds += 1
    new_skills_found = skill_count - skill_count_prev


    # print(f"End of skill discovery phase. Found total of new skills: {new_skills_found}")
    # Step 3: Query Execution
    p_exec_finer_sys = p_skill_exec_temp.render(SKILL_CONTEXT=skill_execution_context)
    finer_exec_response = agent_skill_exec_agent.chat(user_input=p_exec_finer, custom_system_prompt=p_exec_finer_sys)
    message_classification = skills_utils.parse_message_from_json_response(finer_exec_response)
    is_correct = tag_name.lower() in message_classification.strip().lower()

    predicted_label = get_prediction_XBRL_TAGS(message_classification)
    sample_end_time = time.time()
    sample_elapsed = sample_end_time - sample_start_time
    chat_history_agent_skill_select = agent_skill_aware.get_human_ai_message_history()
    chat_history_agent_exec = agent_skill_exec_agent.get_human_ai_message_history()

    # Build record and append to JSONL
    record = {
        "index": int(idx),
        "sentence": sentence,
        "tag_name": tag_name,
        "tag_token": tag_token,
        "true_label": true_label,
        "predicted_label": predicted_label,
        "raw_response": message_classification,
        "correct": is_correct,
        "selected_skills_step1": selected_skill_names_step1,
        "hit_target_skill": hit_target_skill,
        "new_skills_discovered": new_skills_found,
        "discovery_rounds": discovery_rounds,
        "elapsed_seconds": round(sample_elapsed, 4),
        "model": model_name,
        "timestamp": datetime.now().isoformat(),
        "chat_history_agent_skill_select": chat_history_agent_skill_select,
        "chat_history_agent_exec": chat_history_agent_exec,
        "task_type": "agent_skill_based"
        
    }

    dataframe_record = pd.DataFrame([record])
    dataframe_record.to_json(jsonl_path, orient="records", lines=True, mode="a" if os.path.exists(jsonl_path) else "w")
    agent_skill_aware.clear_history()
    agent_skill_exec_agent.clear_history()

    ######################################################################################################################################
    sample_start_time, sentence, tag_name, tag_token = time.time(), row.sentence, row.tag_name[2:], row.tag_token
    p_exec_finer_sys = p_default_system_temp.render()
    finer_exec_response = agent_simple.chat(user_input=p_exec_finer, custom_system_prompt=p_exec_finer_sys)
    message_classification = skills_utils.parse_message_from_json_response(finer_exec_response)
    is_correct = tag_name.lower() in message_classification.strip().lower()

    predicted_label = get_prediction_XBRL_TAGS(message_classification)
    sample_end_time = time.time()
    sample_elapsed = sample_end_time - sample_start_time
    chat_history_agent_exec = agent_simple.get_human_ai_message_history()

    # Build record and append to JSONL
    record = {
        "index": int(idx),
        "sentence": sentence,
        "tag_name": tag_name,
        "tag_token": tag_token,
        "true_label": true_label,
        "predicted_label": predicted_label,
        "raw_response": message_classification,
        "correct": is_correct,
        "selected_skills_step1": "",
        "hit_target_skill": "",
        "new_skills_discovered": "",
        "discovery_rounds": "",
        "elapsed_seconds": round(sample_elapsed, 4),
        "model": model_name,
        "timestamp": datetime.now().isoformat(),
        "chat_history_agent_skill_select": "",
        "chat_history_agent_exec": chat_history_agent_exec,
        "task_type": "agent_simple"
        
    }

    dataframe_record = pd.DataFrame([record])
    dataframe_record.to_json(jsonl_path, orient="records", lines=True, mode="a" if os.path.exists(jsonl_path) else "w")
    agent_simple.clear_history()


    ######################################################################################################################################
    sample_start_time, sentence, tag_name, tag_token = time.time(), row.sentence, row.tag_name[2:], row.tag_token
    skill_context_all =  "The following are skills informaiton you can use as a reference for user request:\n".join([
        f"- **{skill['name']}**:\n {skill['description']} **:\n {skill['body']}"
        for skill in all_skills
    ])
    p_exec_finer = p_exec_finer_temp.render(SENTENCE_CONTENT = sentence, NUMERIC_ENTITY = tag_token+  "\n" + skill_context_all)
    p_exec_finer_sys = p_default_system_temp.render()

    finer_exec_response = agent_simple.chat(user_input=p_exec_finer, custom_system_prompt=p_exec_finer_sys)
    message_classification = skills_utils.parse_message_from_json_response(finer_exec_response)
    is_correct = true_label.lower() in message_classification.strip().lower()

    predicted_label = get_prediction_XBRL_TAGS(message_classification)
    sample_end_time = time.time()
    sample_elapsed = sample_end_time - sample_start_time
    chat_history_agent_exec = agent_simple.get_human_ai_message_history()

    # Build record and append to JSONL
    record = {
        "index": int(idx),
        "sentence": sentence,
        "tag_name": tag_name,
        "tag_token": tag_token,
        "true_label": true_label,
        "predicted_label": predicted_label,
        "raw_response": message_classification,
        "correct": is_correct,
        "selected_skills_step1": "",
        "hit_target_skill": "",
        "new_skills_discovered": "",
        "discovery_rounds": "",
        "elapsed_seconds": round(sample_elapsed, 4),
        "model": model_name,
        "timestamp": datetime.now().isoformat(),
        "chat_history_agent_skill_select": "",
        "chat_history_agent_exec": chat_history_agent_exec,
        "task_type": "agent_skill_full_context"
        
    }

    dataframe_record = pd.DataFrame([record])
    dataframe_record.to_json(jsonl_path, orient="records", lines=True, mode="a" if os.path.exists(jsonl_path) else "w")
    agent_simple.clear_history()

print(f"\n{'='*60}")
print(f"All {len(loaded_df)} samples processed. Results saved to: {jsonl_path}")
print(f"{'='*60}")

Using model: gpt-4o-mini
✓ SkillAwareAgent initialized
  Model: gpt-4o-mini
  Chat History: ENABLED ✓
  Trim Messages: ENABLED ✓
✓ SkillAwareAgent initialized
  Model: gpt-4o-mini
  Chat History: ENABLED ✓
  Trim Messages: ENABLED ✓
✓ SkillAwareAgent initialized
  Model: gpt-4o-mini
  Chat History: ENABLED ✓
  Trim Messages: DISABLED ✗
Results will be saved to: /home/snt/projects_lujun/LabAgentSkill/assets/results/finer_standard_gpt-4o-mini_20260211_233411.jsonl


Processing samples:   0%|          | 0/403 [00:00<?, ?it/s]

Start Skill Selection Phase for Sample 1/403


Processing samples:   0%|          | 1/403 [00:15<1:40:48, 15.05s/it]

Start Skill Selection Phase for Sample 2/403


Processing samples:   0%|          | 2/403 [00:42<2:29:18, 22.34s/it]

Start Skill Selection Phase for Sample 3/403


Processing samples:   1%|          | 3/403 [01:04<2:27:46, 22.17s/it]

Start Skill Selection Phase for Sample 4/403


Processing samples:   1%|          | 4/403 [01:25<2:24:12, 21.69s/it]

Start Skill Selection Phase for Sample 5/403


Processing samples:   1%|          | 5/403 [01:48<2:26:40, 22.11s/it]

Start Skill Selection Phase for Sample 6/403


Processing samples:   1%|▏         | 6/403 [02:05<2:15:35, 20.49s/it]

Start Skill Selection Phase for Sample 7/403


Processing samples:   2%|▏         | 7/403 [02:15<1:53:18, 17.17s/it]

Start Skill Selection Phase for Sample 8/403


Processing samples:   2%|▏         | 8/403 [02:33<1:54:40, 17.42s/it]

Start Skill Selection Phase for Sample 9/403


Processing samples:   2%|▏         | 9/403 [02:54<2:00:27, 18.35s/it]

Start Skill Selection Phase for Sample 10/403


Processing samples:   2%|▏         | 10/403 [03:16<2:07:04, 19.40s/it]

Start Skill Selection Phase for Sample 11/403


Processing samples:   3%|▎         | 11/403 [03:30<1:57:18, 17.96s/it]

Start Skill Selection Phase for Sample 12/403


Processing samples:   3%|▎         | 12/403 [03:41<1:43:21, 15.86s/it]

Start Skill Selection Phase for Sample 13/403


Processing samples:   3%|▎         | 13/403 [04:00<1:47:48, 16.59s/it]

Start Skill Selection Phase for Sample 14/403


Processing samples:   3%|▎         | 14/403 [04:18<1:50:19, 17.02s/it]

Start Skill Selection Phase for Sample 15/403


Processing samples:   4%|▎         | 15/403 [04:35<1:49:54, 17.00s/it]

Start Skill Selection Phase for Sample 16/403


Processing samples:   4%|▍         | 16/403 [04:58<2:01:54, 18.90s/it]

Start Skill Selection Phase for Sample 17/403


Processing samples:   4%|▍         | 17/403 [05:11<1:50:39, 17.20s/it]

Start Skill Selection Phase for Sample 18/403


Processing samples:   4%|▍         | 18/403 [05:21<1:36:08, 14.98s/it]

Start Skill Selection Phase for Sample 19/403


Processing samples:   5%|▍         | 19/403 [05:32<1:28:42, 13.86s/it]

Start Skill Selection Phase for Sample 20/403


Processing samples:   5%|▍         | 20/403 [05:49<1:34:20, 14.78s/it]

Start Skill Selection Phase for Sample 21/403


Processing samples:   5%|▌         | 21/403 [06:13<1:51:27, 17.51s/it]

Start Skill Selection Phase for Sample 22/403


Processing samples:   5%|▌         | 22/403 [06:32<1:53:14, 17.83s/it]

Start Skill Selection Phase for Sample 23/403


Processing samples:   6%|▌         | 23/403 [06:59<2:12:06, 20.86s/it]

Start Skill Selection Phase for Sample 24/403


Processing samples:   6%|▌         | 24/403 [07:24<2:19:01, 22.01s/it]

Start Skill Selection Phase for Sample 25/403


Processing samples:   6%|▌         | 25/403 [07:45<2:16:00, 21.59s/it]

Start Skill Selection Phase for Sample 26/403


Processing samples:   6%|▋         | 26/403 [08:14<2:30:44, 23.99s/it]

Start Skill Selection Phase for Sample 27/403


Processing samples:   7%|▋         | 27/403 [08:44<2:41:21, 25.75s/it]

Start Skill Selection Phase for Sample 28/403


Processing samples:   7%|▋         | 28/403 [09:17<2:53:29, 27.76s/it]

Start Skill Selection Phase for Sample 29/403


Processing samples:   7%|▋         | 29/403 [09:41<2:47:28, 26.87s/it]

Start Skill Selection Phase for Sample 30/403


Processing samples:   7%|▋         | 30/403 [10:03<2:37:02, 25.26s/it]

Start Skill Selection Phase for Sample 31/403


Processing samples:   8%|▊         | 31/403 [10:37<2:52:49, 27.87s/it]

Start Skill Selection Phase for Sample 32/403


Processing samples:   8%|▊         | 32/403 [11:03<2:48:43, 27.29s/it]

Start Skill Selection Phase for Sample 33/403


Processing samples:   8%|▊         | 33/403 [11:21<2:31:30, 24.57s/it]

Start Skill Selection Phase for Sample 34/403


Processing samples:   8%|▊         | 34/403 [11:32<2:06:27, 20.56s/it]

Start Skill Selection Phase for Sample 35/403


Processing samples:   9%|▊         | 35/403 [11:43<1:47:37, 17.55s/it]

Start Skill Selection Phase for Sample 36/403


Processing samples:   9%|▉         | 36/403 [11:53<1:34:42, 15.48s/it]

Start Skill Selection Phase for Sample 37/403


Processing samples:   9%|▉         | 37/403 [12:05<1:27:18, 14.31s/it]

Start Skill Selection Phase for Sample 38/403


Processing samples:   9%|▉         | 38/403 [12:33<1:52:02, 18.42s/it]

Start Skill Selection Phase for Sample 39/403


Processing samples:  10%|▉         | 39/403 [12:53<1:54:58, 18.95s/it]

Start Skill Selection Phase for Sample 40/403


Processing samples:  10%|▉         | 40/403 [13:13<1:55:55, 19.16s/it]

Start Skill Selection Phase for Sample 41/403


Processing samples:  10%|█         | 41/403 [13:29<1:50:33, 18.33s/it]

Start Skill Selection Phase for Sample 42/403


Processing samples:  10%|█         | 42/403 [13:59<2:11:10, 21.80s/it]

Start Skill Selection Phase for Sample 43/403


Processing samples:  11%|█         | 43/403 [14:27<2:21:11, 23.53s/it]

Start Skill Selection Phase for Sample 44/403


Processing samples:  11%|█         | 44/403 [15:01<2:39:43, 26.70s/it]

Start Skill Selection Phase for Sample 45/403


Processing samples:  11%|█         | 45/403 [15:37<2:55:25, 29.40s/it]

Start Skill Selection Phase for Sample 46/403


Processing samples:  11%|█▏        | 46/403 [16:03<2:49:38, 28.51s/it]

Start Skill Selection Phase for Sample 47/403


Processing samples:  12%|█▏        | 47/403 [16:36<2:56:45, 29.79s/it]

Start Skill Selection Phase for Sample 48/403


Processing samples:  12%|█▏        | 48/403 [16:53<2:34:27, 26.11s/it]

Start Skill Selection Phase for Sample 49/403


Processing samples:  12%|█▏        | 49/403 [17:13<2:22:53, 24.22s/it]

Start Skill Selection Phase for Sample 50/403


Processing samples:  12%|█▏        | 50/403 [17:37<2:21:25, 24.04s/it]

Start Skill Selection Phase for Sample 51/403


Processing samples:  13%|█▎        | 51/403 [18:10<2:36:55, 26.75s/it]

Start Skill Selection Phase for Sample 52/403


Processing samples:  13%|█▎        | 52/403 [18:44<2:50:16, 29.11s/it]

Start Skill Selection Phase for Sample 53/403


Processing samples:  13%|█▎        | 53/403 [19:15<2:51:44, 29.44s/it]

Start Skill Selection Phase for Sample 54/403


Processing samples:  13%|█▎        | 54/403 [19:32<2:30:37, 25.90s/it]

Start Skill Selection Phase for Sample 55/403


Processing samples:  14%|█▎        | 55/403 [19:52<2:20:12, 24.17s/it]

Start Skill Selection Phase for Sample 56/403


Processing samples:  14%|█▍        | 56/403 [20:14<2:16:14, 23.56s/it]

Start Skill Selection Phase for Sample 57/403


Processing samples:  14%|█▍        | 57/403 [20:28<1:58:00, 20.46s/it]

Start Skill Selection Phase for Sample 58/403


Processing samples:  14%|█▍        | 58/403 [20:36<1:35:52, 16.67s/it]

Start Skill Selection Phase for Sample 59/403


Processing samples:  15%|█▍        | 59/403 [20:46<1:24:05, 14.67s/it]

Start Skill Selection Phase for Sample 60/403


Processing samples:  15%|█▍        | 60/403 [20:56<1:16:20, 13.35s/it]

Start Skill Selection Phase for Sample 61/403


Processing samples:  15%|█▌        | 61/403 [21:12<1:21:38, 14.32s/it]

Start Skill Selection Phase for Sample 62/403


Processing samples:  15%|█▌        | 62/403 [21:29<1:25:04, 14.97s/it]

Start Skill Selection Phase for Sample 63/403


Processing samples:  16%|█▌        | 63/403 [21:39<1:16:06, 13.43s/it]

Start Skill Selection Phase for Sample 64/403


Processing samples:  16%|█▌        | 64/403 [21:49<1:11:06, 12.58s/it]

Start Skill Selection Phase for Sample 65/403


Processing samples:  16%|█▌        | 65/403 [22:03<1:13:30, 13.05s/it]

Start Skill Selection Phase for Sample 66/403


Processing samples:  16%|█▋        | 66/403 [22:13<1:06:33, 11.85s/it]

Start Skill Selection Phase for Sample 67/403


Processing samples:  17%|█▋        | 67/403 [22:26<1:08:19, 12.20s/it]

Start Skill Selection Phase for Sample 68/403


Processing samples:  17%|█▋        | 68/403 [22:39<1:09:34, 12.46s/it]

Start Skill Selection Phase for Sample 69/403


Processing samples:  17%|█▋        | 69/403 [22:51<1:09:58, 12.57s/it]

Start Skill Selection Phase for Sample 70/403


Processing samples:  17%|█▋        | 70/403 [23:12<1:23:24, 15.03s/it]

Start Skill Selection Phase for Sample 71/403


Processing samples:  18%|█▊        | 71/403 [23:26<1:21:40, 14.76s/it]

Start Skill Selection Phase for Sample 72/403


Processing samples:  18%|█▊        | 72/403 [23:56<1:45:35, 19.14s/it]

Start Skill Selection Phase for Sample 73/403


Processing samples:  18%|█▊        | 73/403 [24:21<1:55:36, 21.02s/it]

Start Skill Selection Phase for Sample 74/403


Processing samples:  18%|█▊        | 74/403 [24:32<1:38:47, 18.02s/it]

Start Skill Selection Phase for Sample 75/403


Processing samples:  19%|█▊        | 75/403 [24:46<1:32:05, 16.85s/it]

Start Skill Selection Phase for Sample 76/403


Processing samples:  19%|█▉        | 76/403 [24:57<1:21:56, 15.04s/it]

Start Skill Selection Phase for Sample 77/403


Processing samples:  19%|█▉        | 77/403 [25:20<1:33:57, 17.29s/it]

Start Skill Selection Phase for Sample 78/403


Processing samples:  19%|█▉        | 78/403 [25:36<1:32:32, 17.08s/it]

Start Skill Selection Phase for Sample 79/403


Processing samples:  20%|█▉        | 79/403 [26:10<1:59:53, 22.20s/it]

Start Skill Selection Phase for Sample 80/403


Processing samples:  20%|█▉        | 80/403 [26:34<2:01:26, 22.56s/it]

Start Skill Selection Phase for Sample 81/403


Processing samples:  20%|██        | 81/403 [26:58<2:04:21, 23.17s/it]

Start Skill Selection Phase for Sample 82/403


Processing samples:  20%|██        | 82/403 [27:17<1:56:19, 21.74s/it]

Start Skill Selection Phase for Sample 83/403


Processing samples:  21%|██        | 83/403 [27:33<1:47:49, 20.22s/it]

Start Skill Selection Phase for Sample 84/403


Processing samples:  21%|██        | 84/403 [27:50<1:42:23, 19.26s/it]

Start Skill Selection Phase for Sample 85/403


Processing samples:  21%|██        | 85/403 [28:17<1:54:04, 21.52s/it]

Start Skill Selection Phase for Sample 86/403


Processing samples:  21%|██▏       | 86/403 [28:49<2:10:16, 24.66s/it]

Start Skill Selection Phase for Sample 87/403


Processing samples:  22%|██▏       | 87/403 [29:22<2:22:07, 26.99s/it]

Start Skill Selection Phase for Sample 88/403


Processing samples:  22%|██▏       | 88/403 [30:15<3:03:36, 34.97s/it]

Start Skill Selection Phase for Sample 89/403


Processing samples:  22%|██▏       | 89/403 [30:29<2:29:37, 28.59s/it]

Start Skill Selection Phase for Sample 90/403


Processing samples:  22%|██▏       | 90/403 [30:39<2:00:24, 23.08s/it]

Start Skill Selection Phase for Sample 91/403


Processing samples:  23%|██▎       | 91/403 [30:51<1:42:41, 19.75s/it]

Start Skill Selection Phase for Sample 92/403


Processing samples:  23%|██▎       | 92/403 [31:04<1:31:23, 17.63s/it]

Start Skill Selection Phase for Sample 93/403


Processing samples:  23%|██▎       | 93/403 [31:17<1:24:29, 16.35s/it]

Start Skill Selection Phase for Sample 94/403


Processing samples:  23%|██▎       | 94/403 [31:43<1:38:45, 19.18s/it]

Start Skill Selection Phase for Sample 95/403


Processing samples:  24%|██▎       | 95/403 [32:05<1:42:52, 20.04s/it]

Start Skill Selection Phase for Sample 96/403


Processing samples:  24%|██▍       | 96/403 [32:31<1:51:16, 21.75s/it]

Start Skill Selection Phase for Sample 97/403


Processing samples:  24%|██▍       | 97/403 [32:47<1:43:18, 20.26s/it]

Start Skill Selection Phase for Sample 98/403


Processing samples:  24%|██▍       | 98/403 [33:00<1:31:19, 17.96s/it]

Start Skill Selection Phase for Sample 99/403


Processing samples:  25%|██▍       | 99/403 [33:12<1:21:36, 16.11s/it]

Start Skill Selection Phase for Sample 100/403


Processing samples:  25%|██▍       | 100/403 [33:22<1:11:30, 14.16s/it]

Start Skill Selection Phase for Sample 101/403


Processing samples:  25%|██▌       | 101/403 [33:33<1:07:54, 13.49s/it]

Start Skill Selection Phase for Sample 102/403


Processing samples:  25%|██▌       | 102/403 [33:52<1:14:35, 14.87s/it]

Start Skill Selection Phase for Sample 103/403


Processing samples:  26%|██▌       | 103/403 [34:11<1:20:45, 16.15s/it]

Start Skill Selection Phase for Sample 104/403


Processing samples:  26%|██▌       | 104/403 [34:25<1:17:23, 15.53s/it]

Start Skill Selection Phase for Sample 105/403


Processing samples:  26%|██▌       | 105/403 [34:49<1:30:01, 18.13s/it]

Start Skill Selection Phase for Sample 106/403


Processing samples:  26%|██▋       | 106/403 [35:17<1:44:40, 21.15s/it]

Start Skill Selection Phase for Sample 107/403


Processing samples:  27%|██▋       | 107/403 [35:44<1:52:43, 22.85s/it]

Start Skill Selection Phase for Sample 108/403


Processing samples:  27%|██▋       | 108/403 [35:56<1:36:40, 19.66s/it]

Start Skill Selection Phase for Sample 109/403


Processing samples:  27%|██▋       | 109/403 [36:08<1:24:11, 17.18s/it]

Start Skill Selection Phase for Sample 110/403


Processing samples:  27%|██▋       | 110/403 [36:34<1:38:10, 20.10s/it]

Start Skill Selection Phase for Sample 111/403


Processing samples:  28%|██▊       | 111/403 [36:47<1:26:26, 17.76s/it]

Start Skill Selection Phase for Sample 112/403


Processing samples:  28%|██▊       | 112/403 [37:16<1:43:14, 21.29s/it]

Start Skill Selection Phase for Sample 113/403


Processing samples:  28%|██▊       | 113/403 [37:40<1:46:55, 22.12s/it]

Start Skill Selection Phase for Sample 114/403


Processing samples:  28%|██▊       | 114/403 [37:49<1:27:07, 18.09s/it]

Start Skill Selection Phase for Sample 115/403


Processing samples:  29%|██▊       | 115/403 [37:59<1:15:45, 15.78s/it]

Start Skill Selection Phase for Sample 116/403


Processing samples:  29%|██▉       | 116/403 [38:12<1:10:10, 14.67s/it]

Start Skill Selection Phase for Sample 117/403


Processing samples:  29%|██▉       | 117/403 [38:23<1:05:41, 13.78s/it]

Start Skill Selection Phase for Sample 118/403


Processing samples:  29%|██▉       | 118/403 [38:51<1:25:12, 17.94s/it]

Start Skill Selection Phase for Sample 119/403


Processing samples:  30%|██▉       | 119/403 [39:20<1:40:12, 21.17s/it]

Start Skill Selection Phase for Sample 120/403


Processing samples:  30%|██▉       | 120/403 [39:44<1:44:32, 22.17s/it]

Start Skill Selection Phase for Sample 121/403


Processing samples:  30%|███       | 121/403 [40:02<1:37:50, 20.82s/it]

Start Skill Selection Phase for Sample 122/403


Processing samples:  30%|███       | 122/403 [40:16<1:27:34, 18.70s/it]

Start Skill Selection Phase for Sample 123/403


Processing samples:  31%|███       | 123/403 [40:31<1:22:34, 17.69s/it]

Start Skill Selection Phase for Sample 124/403


Processing samples:  31%|███       | 124/403 [40:44<1:15:31, 16.24s/it]

Start Skill Selection Phase for Sample 125/403


Processing samples:  31%|███       | 125/403 [40:58<1:12:04, 15.56s/it]

Start Skill Selection Phase for Sample 126/403


Processing samples:  31%|███▏      | 126/403 [41:26<1:29:36, 19.41s/it]

Start Skill Selection Phase for Sample 127/403


Processing samples:  32%|███▏      | 127/403 [41:47<1:31:34, 19.91s/it]

Start Skill Selection Phase for Sample 128/403


Processing samples:  32%|███▏      | 128/403 [42:04<1:27:19, 19.05s/it]

Start Skill Selection Phase for Sample 129/403


Processing samples:  32%|███▏      | 129/403 [42:21<1:24:02, 18.40s/it]

Start Skill Selection Phase for Sample 130/403


Processing samples:  32%|███▏      | 130/403 [42:31<1:12:08, 15.85s/it]

Start Skill Selection Phase for Sample 131/403


Processing samples:  33%|███▎      | 131/403 [42:41<1:04:16, 14.18s/it]

Start Skill Selection Phase for Sample 132/403


Processing samples:  33%|███▎      | 132/403 [42:54<1:01:28, 13.61s/it]

Start Skill Selection Phase for Sample 133/403


Processing samples:  33%|███▎      | 133/403 [43:15<1:12:27, 16.10s/it]

Start Skill Selection Phase for Sample 134/403


Processing samples:  33%|███▎      | 134/403 [43:39<1:21:53, 18.27s/it]

Start Skill Selection Phase for Sample 135/403


Processing samples:  33%|███▎      | 135/403 [44:13<1:42:41, 22.99s/it]

Start Skill Selection Phase for Sample 136/403


Processing samples:  34%|███▎      | 136/403 [44:34<1:39:32, 22.37s/it]

Start Skill Selection Phase for Sample 137/403


Processing samples:  34%|███▍      | 137/403 [44:49<1:29:40, 20.23s/it]

Start Skill Selection Phase for Sample 138/403


Processing samples:  34%|███▍      | 138/403 [45:04<1:22:04, 18.58s/it]

Start Skill Selection Phase for Sample 139/403


Processing samples:  34%|███▍      | 139/403 [45:24<1:23:45, 19.04s/it]

Start Skill Selection Phase for Sample 140/403


Processing samples:  35%|███▍      | 140/403 [45:39<1:18:47, 17.98s/it]

Start Skill Selection Phase for Sample 141/403


Processing samples:  35%|███▍      | 141/403 [46:15<1:41:26, 23.23s/it]

Start Skill Selection Phase for Sample 142/403


Processing samples:  35%|███▌      | 142/403 [46:33<1:34:50, 21.80s/it]

Start Skill Selection Phase for Sample 143/403


Processing samples:  35%|███▌      | 143/403 [46:51<1:28:42, 20.47s/it]

Start Skill Selection Phase for Sample 144/403


Processing samples:  36%|███▌      | 144/403 [47:43<2:09:24, 29.98s/it]

Start Skill Selection Phase for Sample 145/403


Processing samples:  36%|███▌      | 145/403 [48:07<2:01:39, 28.29s/it]

Start Skill Selection Phase for Sample 146/403


Processing samples:  36%|███▌      | 146/403 [48:26<1:48:38, 25.36s/it]

Start Skill Selection Phase for Sample 147/403


Processing samples:  36%|███▋      | 147/403 [48:55<1:53:22, 26.57s/it]

Start Skill Selection Phase for Sample 148/403


Processing samples:  37%|███▋      | 148/403 [49:21<1:52:31, 26.48s/it]

Start Skill Selection Phase for Sample 149/403


Processing samples:  37%|███▋      | 149/403 [49:51<1:56:21, 27.49s/it]

Start Skill Selection Phase for Sample 150/403


Processing samples:  37%|███▋      | 150/403 [50:17<1:53:16, 26.86s/it]

Start Skill Selection Phase for Sample 151/403


Processing samples:  37%|███▋      | 151/403 [50:44<1:53:26, 27.01s/it]

Start Skill Selection Phase for Sample 152/403


Processing samples:  38%|███▊      | 152/403 [51:06<1:46:30, 25.46s/it]

Start Skill Selection Phase for Sample 153/403


Processing samples:  38%|███▊      | 153/403 [51:21<1:33:37, 22.47s/it]

Start Skill Selection Phase for Sample 154/403


Processing samples:  38%|███▊      | 154/403 [51:35<1:21:50, 19.72s/it]

Start Skill Selection Phase for Sample 155/403


Processing samples:  38%|███▊      | 155/403 [51:47<1:12:31, 17.55s/it]

Start Skill Selection Phase for Sample 156/403


Processing samples:  39%|███▊      | 156/403 [52:02<1:09:23, 16.86s/it]

Start Skill Selection Phase for Sample 157/403


Processing samples:  39%|███▉      | 157/403 [52:23<1:14:15, 18.11s/it]

Start Skill Selection Phase for Sample 158/403


Processing samples:  39%|███▉      | 158/403 [52:45<1:18:11, 19.15s/it]

Start Skill Selection Phase for Sample 159/403


Processing samples:  39%|███▉      | 159/403 [52:59<1:12:18, 17.78s/it]

Start Skill Selection Phase for Sample 160/403


Processing samples:  40%|███▉      | 160/403 [53:35<1:34:03, 23.22s/it]

Start Skill Selection Phase for Sample 161/403


Processing samples:  40%|███▉      | 161/403 [53:53<1:27:18, 21.65s/it]

Start Skill Selection Phase for Sample 162/403


Processing samples:  40%|████      | 162/403 [54:12<1:23:23, 20.76s/it]

Start Skill Selection Phase for Sample 163/403


Processing samples:  40%|████      | 163/403 [54:37<1:28:00, 22.00s/it]

Start Skill Selection Phase for Sample 164/403


Processing samples:  41%|████      | 164/403 [54:56<1:23:45, 21.03s/it]

Start Skill Selection Phase for Sample 165/403


Processing samples:  41%|████      | 165/403 [55:08<1:13:08, 18.44s/it]

Start Skill Selection Phase for Sample 166/403


Processing samples:  41%|████      | 166/403 [55:36<1:24:16, 21.34s/it]

Start Skill Selection Phase for Sample 167/403


Processing samples:  41%|████▏     | 167/403 [55:52<1:17:27, 19.69s/it]

Start Skill Selection Phase for Sample 168/403


Processing samples:  42%|████▏     | 168/403 [56:13<1:18:19, 20.00s/it]

Start Skill Selection Phase for Sample 169/403


Processing samples:  42%|████▏     | 169/403 [56:28<1:12:24, 18.57s/it]

Start Skill Selection Phase for Sample 170/403


Processing samples:  42%|████▏     | 170/403 [56:53<1:19:49, 20.56s/it]

Start Skill Selection Phase for Sample 171/403


Processing samples:  42%|████▏     | 171/403 [57:25<1:32:38, 23.96s/it]

Start Skill Selection Phase for Sample 172/403


Processing samples:  43%|████▎     | 172/403 [57:59<1:43:56, 27.00s/it]

Start Skill Selection Phase for Sample 173/403


Processing samples:  43%|████▎     | 173/403 [58:34<1:52:51, 29.44s/it]

Start Skill Selection Phase for Sample 174/403


Processing samples:  43%|████▎     | 174/403 [59:05<1:54:13, 29.93s/it]

Start Skill Selection Phase for Sample 175/403


Processing samples:  43%|████▎     | 175/403 [59:36<1:54:54, 30.24s/it]

Start Skill Selection Phase for Sample 176/403


Processing samples:  44%|████▎     | 176/403 [1:00:14<2:02:22, 32.35s/it]

Start Skill Selection Phase for Sample 177/403


Processing samples:  44%|████▍     | 177/403 [1:00:39<1:53:33, 30.15s/it]

Start Skill Selection Phase for Sample 178/403


Processing samples:  44%|████▍     | 178/403 [1:00:48<1:29:08, 23.77s/it]

Start Skill Selection Phase for Sample 179/403


Processing samples:  44%|████▍     | 179/403 [1:00:59<1:15:17, 20.17s/it]

Start Skill Selection Phase for Sample 180/403


Processing samples:  45%|████▍     | 180/403 [1:01:12<1:06:14, 17.82s/it]

Start Skill Selection Phase for Sample 181/403


Processing samples:  45%|████▍     | 181/403 [1:01:24<59:27, 16.07s/it]  

Start Skill Selection Phase for Sample 182/403


Processing samples:  45%|████▌     | 182/403 [1:01:37<55:48, 15.15s/it]

Start Skill Selection Phase for Sample 183/403


Processing samples:  45%|████▌     | 183/403 [1:02:01<1:05:46, 17.94s/it]

Start Skill Selection Phase for Sample 184/403


Processing samples:  46%|████▌     | 184/403 [1:02:34<1:21:35, 22.36s/it]

Start Skill Selection Phase for Sample 185/403


Processing samples:  46%|████▌     | 185/403 [1:02:50<1:14:48, 20.59s/it]

Start Skill Selection Phase for Sample 186/403


Processing samples:  46%|████▌     | 186/403 [1:02:58<1:00:56, 16.85s/it]

Start Skill Selection Phase for Sample 187/403


Processing samples:  46%|████▋     | 187/403 [1:03:09<54:21, 15.10s/it]  

Start Skill Selection Phase for Sample 188/403


Processing samples:  47%|████▋     | 188/403 [1:03:19<47:56, 13.38s/it]

Start Skill Selection Phase for Sample 189/403


Processing samples:  47%|████▋     | 189/403 [1:03:43<59:11, 16.60s/it]

Start Skill Selection Phase for Sample 190/403


Processing samples:  47%|████▋     | 190/403 [1:03:54<53:25, 15.05s/it]

Start Skill Selection Phase for Sample 191/403


Processing samples:  47%|████▋     | 191/403 [1:04:26<1:10:49, 20.05s/it]

Start Skill Selection Phase for Sample 192/403


Processing samples:  48%|████▊     | 192/403 [1:04:41<1:05:26, 18.61s/it]

Start Skill Selection Phase for Sample 193/403


Processing samples:  48%|████▊     | 193/403 [1:04:55<59:32, 17.01s/it]  

Start Skill Selection Phase for Sample 194/403


Processing samples:  48%|████▊     | 194/403 [1:05:05<52:56, 15.20s/it]

Start Skill Selection Phase for Sample 195/403


Processing samples:  48%|████▊     | 195/403 [1:05:16<47:19, 13.65s/it]

Start Skill Selection Phase for Sample 196/403


Processing samples:  49%|████▊     | 196/403 [1:05:26<44:13, 12.82s/it]

Start Skill Selection Phase for Sample 197/403


Processing samples:  49%|████▉     | 197/403 [1:05:48<52:33, 15.31s/it]

Start Skill Selection Phase for Sample 198/403


Processing samples:  49%|████▉     | 198/403 [1:05:59<48:10, 14.10s/it]

Start Skill Selection Phase for Sample 199/403


Processing samples:  49%|████▉     | 199/403 [1:06:20<55:00, 16.18s/it]

Start Skill Selection Phase for Sample 200/403


Processing samples:  50%|████▉     | 200/403 [1:06:53<1:11:58, 21.27s/it]

Start Skill Selection Phase for Sample 201/403


Processing samples:  50%|████▉     | 201/403 [1:07:06<1:03:08, 18.76s/it]

Start Skill Selection Phase for Sample 202/403


Processing samples:  50%|█████     | 202/403 [1:07:18<56:19, 16.81s/it]  

Start Skill Selection Phase for Sample 203/403


Processing samples:  50%|█████     | 203/403 [1:07:32<53:31, 16.06s/it]

Start Skill Selection Phase for Sample 204/403


Processing samples:  51%|█████     | 204/403 [1:07:46<50:22, 15.19s/it]

Start Skill Selection Phase for Sample 205/403


Processing samples:  51%|█████     | 205/403 [1:08:11<1:00:03, 18.20s/it]

Start Skill Selection Phase for Sample 206/403


Processing samples:  51%|█████     | 206/403 [1:08:23<54:12, 16.51s/it]  

Start Skill Selection Phase for Sample 207/403


Processing samples:  51%|█████▏    | 207/403 [1:08:48<1:01:53, 18.94s/it]

Start Skill Selection Phase for Sample 208/403


Processing samples:  52%|█████▏    | 208/403 [1:09:21<1:14:54, 23.05s/it]

Start Skill Selection Phase for Sample 209/403


Processing samples:  52%|█████▏    | 209/403 [1:09:31<1:02:36, 19.36s/it]

Start Skill Selection Phase for Sample 210/403


Processing samples:  52%|█████▏    | 210/403 [1:09:39<51:20, 15.96s/it]  

Start Skill Selection Phase for Sample 211/403


Processing samples:  52%|█████▏    | 211/403 [1:09:50<45:50, 14.33s/it]

Start Skill Selection Phase for Sample 212/403


Processing samples:  53%|█████▎    | 212/403 [1:09:57<38:50, 12.20s/it]

Start Skill Selection Phase for Sample 213/403


Processing samples:  53%|█████▎    | 213/403 [1:10:17<45:28, 14.36s/it]

Start Skill Selection Phase for Sample 214/403


Processing samples:  53%|█████▎    | 214/403 [1:10:38<52:05, 16.54s/it]

Start Skill Selection Phase for Sample 215/403


Processing samples:  53%|█████▎    | 215/403 [1:10:55<52:15, 16.68s/it]

Start Skill Selection Phase for Sample 216/403


Processing samples:  54%|█████▎    | 216/403 [1:11:09<49:28, 15.88s/it]

Start Skill Selection Phase for Sample 217/403


Processing samples:  54%|█████▍    | 217/403 [1:11:22<46:12, 14.90s/it]

Start Skill Selection Phase for Sample 218/403


Processing samples:  54%|█████▍    | 218/403 [1:11:33<42:19, 13.73s/it]

Start Skill Selection Phase for Sample 219/403


Processing samples:  54%|█████▍    | 219/403 [1:11:45<40:57, 13.36s/it]

Start Skill Selection Phase for Sample 220/403


Processing samples:  55%|█████▍    | 220/403 [1:11:59<40:35, 13.31s/it]

Start Skill Selection Phase for Sample 221/403


Processing samples:  55%|█████▍    | 221/403 [1:12:24<51:28, 16.97s/it]

Start Skill Selection Phase for Sample 222/403


Processing samples:  55%|█████▌    | 222/403 [1:12:54<1:03:08, 20.93s/it]

Start Skill Selection Phase for Sample 223/403


Processing samples:  55%|█████▌    | 223/403 [1:13:20<1:07:10, 22.39s/it]

Start Skill Selection Phase for Sample 224/403


Processing samples:  56%|█████▌    | 224/403 [1:13:37<1:02:07, 20.82s/it]

Start Skill Selection Phase for Sample 225/403


Processing samples:  56%|█████▌    | 225/403 [1:13:53<57:17, 19.31s/it]  

Start Skill Selection Phase for Sample 226/403


Processing samples:  56%|█████▌    | 226/403 [1:14:01<47:25, 16.08s/it]

Start Skill Selection Phase for Sample 227/403


Processing samples:  56%|█████▋    | 227/403 [1:14:20<49:33, 16.89s/it]

Start Skill Selection Phase for Sample 228/403


Processing samples:  57%|█████▋    | 228/403 [1:14:29<41:45, 14.32s/it]

Start Skill Selection Phase for Sample 229/403


Processing samples:  57%|█████▋    | 229/403 [1:14:39<38:09, 13.16s/it]

Start Skill Selection Phase for Sample 230/403


Processing samples:  57%|█████▋    | 230/403 [1:14:54<39:20, 13.64s/it]

Start Skill Selection Phase for Sample 231/403


Processing samples:  57%|█████▋    | 231/403 [1:15:14<44:27, 15.51s/it]

Start Skill Selection Phase for Sample 232/403


Processing samples:  58%|█████▊    | 232/403 [1:15:37<51:02, 17.91s/it]

Start Skill Selection Phase for Sample 233/403


Processing samples:  58%|█████▊    | 233/403 [1:15:50<46:40, 16.47s/it]

Start Skill Selection Phase for Sample 234/403


Processing samples:  58%|█████▊    | 234/403 [1:16:03<43:22, 15.40s/it]

Start Skill Selection Phase for Sample 235/403


Processing samples:  58%|█████▊    | 235/403 [1:16:15<39:56, 14.26s/it]

Start Skill Selection Phase for Sample 236/403


Processing samples:  59%|█████▊    | 236/403 [1:16:27<37:44, 13.56s/it]

Start Skill Selection Phase for Sample 237/403


Processing samples:  59%|█████▉    | 237/403 [1:16:49<44:42, 16.16s/it]

Start Skill Selection Phase for Sample 238/403


Processing samples:  59%|█████▉    | 238/403 [1:17:16<53:17, 19.38s/it]

Start Skill Selection Phase for Sample 239/403


Processing samples:  59%|█████▉    | 239/403 [1:17:32<50:08, 18.34s/it]

Start Skill Selection Phase for Sample 240/403


Processing samples:  60%|█████▉    | 240/403 [1:17:46<46:43, 17.20s/it]

Start Skill Selection Phase for Sample 241/403


Processing samples:  60%|█████▉    | 241/403 [1:17:58<42:18, 15.67s/it]

Start Skill Selection Phase for Sample 242/403


Processing samples:  60%|██████    | 242/403 [1:18:09<37:55, 14.13s/it]

Start Skill Selection Phase for Sample 243/403


Processing samples:  60%|██████    | 243/403 [1:18:18<33:36, 12.60s/it]

Start Skill Selection Phase for Sample 244/403


Processing samples:  61%|██████    | 244/403 [1:18:36<37:22, 14.11s/it]

Start Skill Selection Phase for Sample 245/403


Processing samples:  61%|██████    | 245/403 [1:18:54<40:37, 15.43s/it]

Start Skill Selection Phase for Sample 246/403


Processing samples:  61%|██████    | 246/403 [1:19:22<50:20, 19.24s/it]

Start Skill Selection Phase for Sample 247/403


Processing samples:  61%|██████▏   | 247/403 [1:19:43<51:21, 19.76s/it]

Start Skill Selection Phase for Sample 248/403


Processing samples:  62%|██████▏   | 248/403 [1:20:04<51:35, 19.97s/it]

Start Skill Selection Phase for Sample 249/403


Processing samples:  62%|██████▏   | 249/403 [1:20:16<45:16, 17.64s/it]

Start Skill Selection Phase for Sample 250/403


Processing samples:  62%|██████▏   | 250/403 [1:20:22<36:23, 14.27s/it]

Start Skill Selection Phase for Sample 251/403


Processing samples:  62%|██████▏   | 251/403 [1:20:28<30:00, 11.84s/it]

Start Skill Selection Phase for Sample 252/403


Processing samples:  63%|██████▎   | 252/403 [1:20:37<27:22, 10.88s/it]

Start Skill Selection Phase for Sample 253/403


Processing samples:  63%|██████▎   | 253/403 [1:20:46<25:59, 10.40s/it]

Start Skill Selection Phase for Sample 254/403


Processing samples:  63%|██████▎   | 254/403 [1:20:59<27:22, 11.03s/it]

Start Skill Selection Phase for Sample 255/403


Processing samples:  63%|██████▎   | 255/403 [1:21:16<31:33, 12.80s/it]

Start Skill Selection Phase for Sample 256/403


Processing samples:  64%|██████▎   | 256/403 [1:21:28<30:44, 12.55s/it]

Start Skill Selection Phase for Sample 257/403


Processing samples:  64%|██████▍   | 257/403 [1:21:38<28:36, 11.76s/it]

Start Skill Selection Phase for Sample 258/403


Processing samples:  64%|██████▍   | 258/403 [1:21:47<26:58, 11.16s/it]

Start Skill Selection Phase for Sample 259/403


Processing samples:  64%|██████▍   | 259/403 [1:22:03<29:41, 12.37s/it]

Start Skill Selection Phase for Sample 260/403


Processing samples:  65%|██████▍   | 260/403 [1:22:11<26:55, 11.30s/it]

Start Skill Selection Phase for Sample 261/403


Processing samples:  65%|██████▍   | 261/403 [1:22:24<27:33, 11.64s/it]

Start Skill Selection Phase for Sample 262/403


Processing samples:  65%|██████▌   | 262/403 [1:22:36<28:02, 11.93s/it]

Start Skill Selection Phase for Sample 263/403


Processing samples:  65%|██████▌   | 263/403 [1:22:50<28:49, 12.35s/it]

Start Skill Selection Phase for Sample 264/403


Processing samples:  66%|██████▌   | 264/403 [1:23:02<28:33, 12.32s/it]

Start Skill Selection Phase for Sample 265/403


Processing samples:  66%|██████▌   | 265/403 [1:23:14<27:56, 12.15s/it]

Start Skill Selection Phase for Sample 266/403


Processing samples:  66%|██████▌   | 266/403 [1:23:23<25:58, 11.37s/it]

Start Skill Selection Phase for Sample 267/403


Processing samples:  66%|██████▋   | 267/403 [1:23:32<24:07, 10.64s/it]

Start Skill Selection Phase for Sample 268/403


Processing samples:  67%|██████▋   | 268/403 [1:23:42<23:37, 10.50s/it]

Start Skill Selection Phase for Sample 269/403


Processing samples:  67%|██████▋   | 269/403 [1:24:05<31:15, 14.00s/it]

Start Skill Selection Phase for Sample 270/403


Processing samples:  67%|██████▋   | 270/403 [1:24:16<29:13, 13.18s/it]

Start Skill Selection Phase for Sample 271/403


Processing samples:  67%|██████▋   | 271/403 [1:24:26<27:09, 12.35s/it]

Start Skill Selection Phase for Sample 272/403


Processing samples:  67%|██████▋   | 272/403 [1:24:38<26:44, 12.25s/it]

Start Skill Selection Phase for Sample 273/403


Processing samples:  68%|██████▊   | 273/403 [1:24:49<25:32, 11.79s/it]

Start Skill Selection Phase for Sample 274/403


Processing samples:  68%|██████▊   | 274/403 [1:24:59<24:22, 11.34s/it]

Start Skill Selection Phase for Sample 275/403


Processing samples:  68%|██████▊   | 275/403 [1:25:12<24:43, 11.59s/it]

Start Skill Selection Phase for Sample 276/403


Processing samples:  68%|██████▊   | 276/403 [1:25:22<23:47, 11.24s/it]

Start Skill Selection Phase for Sample 277/403


Processing samples:  69%|██████▊   | 277/403 [1:25:34<23:58, 11.41s/it]

Start Skill Selection Phase for Sample 278/403


Processing samples:  69%|██████▉   | 278/403 [1:25:53<28:36, 13.73s/it]

Start Skill Selection Phase for Sample 279/403


Processing samples:  69%|██████▉   | 279/403 [1:26:34<45:25, 21.98s/it]

Start Skill Selection Phase for Sample 280/403


Processing samples:  69%|██████▉   | 280/403 [1:27:05<50:33, 24.66s/it]

Start Skill Selection Phase for Sample 281/403


Processing samples:  70%|██████▉   | 281/403 [1:27:29<49:42, 24.45s/it]

Start Skill Selection Phase for Sample 282/403


Processing samples:  70%|██████▉   | 282/403 [1:27:50<47:03, 23.33s/it]

Start Skill Selection Phase for Sample 283/403


Processing samples:  70%|███████   | 283/403 [1:28:21<51:33, 25.78s/it]

Start Skill Selection Phase for Sample 284/403


Processing samples:  70%|███████   | 284/403 [1:28:48<51:56, 26.19s/it]

Start Skill Selection Phase for Sample 285/403


Processing samples:  71%|███████   | 285/403 [1:29:19<54:10, 27.55s/it]

Start Skill Selection Phase for Sample 286/403


Processing samples:  71%|███████   | 286/403 [1:29:45<52:46, 27.06s/it]

Start Skill Selection Phase for Sample 287/403


Processing samples:  71%|███████   | 287/403 [1:29:58<44:05, 22.80s/it]

Start Skill Selection Phase for Sample 288/403


Processing samples:  71%|███████▏  | 288/403 [1:30:26<46:54, 24.47s/it]

Start Skill Selection Phase for Sample 289/403


Processing samples:  72%|███████▏  | 289/403 [1:30:38<39:04, 20.57s/it]

Start Skill Selection Phase for Sample 290/403


Processing samples:  72%|███████▏  | 290/403 [1:30:51<34:41, 18.42s/it]

Start Skill Selection Phase for Sample 291/403


Processing samples:  72%|███████▏  | 291/403 [1:31:01<29:37, 15.87s/it]

Start Skill Selection Phase for Sample 292/403


Processing samples:  72%|███████▏  | 292/403 [1:31:12<26:34, 14.37s/it]

Start Skill Selection Phase for Sample 293/403


Processing samples:  73%|███████▎  | 293/403 [1:31:21<23:37, 12.89s/it]

Start Skill Selection Phase for Sample 294/403


Processing samples:  73%|███████▎  | 294/403 [1:31:46<29:49, 16.42s/it]

Start Skill Selection Phase for Sample 295/403


Processing samples:  73%|███████▎  | 295/403 [1:32:13<35:32, 19.74s/it]

Start Skill Selection Phase for Sample 296/403


Processing samples:  73%|███████▎  | 296/403 [1:32:27<32:03, 17.98s/it]

Start Skill Selection Phase for Sample 297/403


Processing samples:  74%|███████▎  | 297/403 [1:32:48<33:16, 18.84s/it]

Start Skill Selection Phase for Sample 298/403


Processing samples:  74%|███████▍  | 298/403 [1:33:02<30:15, 17.29s/it]

Start Skill Selection Phase for Sample 299/403


Processing samples:  74%|███████▍  | 299/403 [1:33:18<29:17, 16.90s/it]

Start Skill Selection Phase for Sample 300/403


Processing samples:  74%|███████▍  | 300/403 [1:33:31<26:56, 15.69s/it]

Start Skill Selection Phase for Sample 301/403


Processing samples:  75%|███████▍  | 301/403 [1:33:43<24:44, 14.56s/it]

Start Skill Selection Phase for Sample 302/403


Processing samples:  75%|███████▍  | 302/403 [1:34:06<28:50, 17.13s/it]

Start Skill Selection Phase for Sample 303/403


Processing samples:  75%|███████▌  | 303/403 [1:34:25<29:39, 17.80s/it]

Start Skill Selection Phase for Sample 304/403


Processing samples:  75%|███████▌  | 304/403 [1:34:36<26:05, 15.81s/it]

Start Skill Selection Phase for Sample 305/403


Processing samples:  76%|███████▌  | 305/403 [1:34:48<23:42, 14.51s/it]

Start Skill Selection Phase for Sample 306/403


Processing samples:  76%|███████▌  | 306/403 [1:35:02<23:04, 14.27s/it]

Start Skill Selection Phase for Sample 307/403


Processing samples:  76%|███████▌  | 307/403 [1:35:14<21:54, 13.69s/it]

Start Skill Selection Phase for Sample 308/403


Processing samples:  76%|███████▋  | 308/403 [1:35:27<21:38, 13.67s/it]

Start Skill Selection Phase for Sample 309/403


Processing samples:  77%|███████▋  | 309/403 [1:35:47<24:17, 15.51s/it]

Start Skill Selection Phase for Sample 310/403


Processing samples:  77%|███████▋  | 310/403 [1:36:20<32:13, 20.79s/it]

Start Skill Selection Phase for Sample 311/403


Processing samples:  77%|███████▋  | 311/403 [1:36:51<36:18, 23.68s/it]

Start Skill Selection Phase for Sample 312/403


Processing samples:  77%|███████▋  | 312/403 [1:37:12<34:51, 22.99s/it]

Start Skill Selection Phase for Sample 313/403


Processing samples:  78%|███████▊  | 313/403 [1:37:28<31:24, 20.94s/it]

Start Skill Selection Phase for Sample 314/403


Processing samples:  78%|███████▊  | 314/403 [1:37:39<26:37, 17.95s/it]

Start Skill Selection Phase for Sample 315/403


Processing samples:  78%|███████▊  | 315/403 [1:37:52<23:54, 16.30s/it]

Start Skill Selection Phase for Sample 316/403


Processing samples:  78%|███████▊  | 316/403 [1:38:03<21:36, 14.90s/it]

Start Skill Selection Phase for Sample 317/403


Processing samples:  79%|███████▊  | 317/403 [1:38:17<20:42, 14.45s/it]

Start Skill Selection Phase for Sample 318/403


Processing samples:  79%|███████▉  | 318/403 [1:38:36<22:22, 15.79s/it]

Start Skill Selection Phase for Sample 319/403


Processing samples:  79%|███████▉  | 319/403 [1:38:53<22:53, 16.35s/it]

Start Skill Selection Phase for Sample 320/403


Processing samples:  79%|███████▉  | 320/403 [1:39:07<21:28, 15.53s/it]

Start Skill Selection Phase for Sample 321/403


Processing samples:  80%|███████▉  | 321/403 [1:39:36<26:40, 19.52s/it]

Start Skill Selection Phase for Sample 322/403


Processing samples:  80%|███████▉  | 322/403 [1:39:47<23:10, 17.17s/it]

Start Skill Selection Phase for Sample 323/403


Processing samples:  80%|████████  | 323/403 [1:39:59<20:49, 15.62s/it]

Start Skill Selection Phase for Sample 324/403


Processing samples:  80%|████████  | 324/403 [1:40:14<20:17, 15.41s/it]

Start Skill Selection Phase for Sample 325/403


Processing samples:  81%|████████  | 325/403 [1:40:35<22:11, 17.08s/it]

Start Skill Selection Phase for Sample 326/403


Processing samples:  81%|████████  | 326/403 [1:40:51<21:26, 16.71s/it]

Start Skill Selection Phase for Sample 327/403


Processing samples:  81%|████████  | 327/403 [1:41:07<20:43, 16.37s/it]

Start Skill Selection Phase for Sample 328/403


Processing samples:  81%|████████▏ | 328/403 [1:41:28<22:14, 17.80s/it]

Start Skill Selection Phase for Sample 329/403


Processing samples:  82%|████████▏ | 329/403 [1:41:49<23:14, 18.84s/it]

Start Skill Selection Phase for Sample 330/403


Processing samples:  82%|████████▏ | 330/403 [1:42:00<20:08, 16.56s/it]

Start Skill Selection Phase for Sample 331/403


Processing samples:  82%|████████▏ | 331/403 [1:42:18<20:17, 16.91s/it]

Start Skill Selection Phase for Sample 332/403


Processing samples:  82%|████████▏ | 332/403 [1:42:33<19:19, 16.33s/it]

Start Skill Selection Phase for Sample 333/403


Processing samples:  83%|████████▎ | 333/403 [1:43:04<24:06, 20.67s/it]

Start Skill Selection Phase for Sample 334/403


Processing samples:  83%|████████▎ | 334/403 [1:43:19<21:51, 19.00s/it]

Start Skill Selection Phase for Sample 335/403


Processing samples:  83%|████████▎ | 335/403 [1:43:35<20:39, 18.23s/it]

Start Skill Selection Phase for Sample 336/403


Processing samples:  83%|████████▎ | 336/403 [1:43:52<19:53, 17.82s/it]

Start Skill Selection Phase for Sample 337/403


Processing samples:  84%|████████▎ | 337/403 [1:44:10<19:35, 17.81s/it]

Start Skill Selection Phase for Sample 338/403


Processing samples:  84%|████████▍ | 338/403 [1:44:25<18:14, 16.85s/it]

Start Skill Selection Phase for Sample 339/403


Processing samples:  84%|████████▍ | 339/403 [1:44:47<19:46, 18.54s/it]

Start Skill Selection Phase for Sample 340/403


Processing samples:  84%|████████▍ | 340/403 [1:45:18<23:14, 22.13s/it]

Start Skill Selection Phase for Sample 341/403


Processing samples:  85%|████████▍ | 341/403 [1:45:46<24:48, 24.01s/it]

Start Skill Selection Phase for Sample 342/403


Processing samples:  85%|████████▍ | 342/403 [1:46:17<26:22, 25.95s/it]

Start Skill Selection Phase for Sample 343/403


Processing samples:  85%|████████▌ | 343/403 [1:46:39<24:52, 24.87s/it]

Start Skill Selection Phase for Sample 344/403


Processing samples:  85%|████████▌ | 344/403 [1:46:52<20:51, 21.22s/it]

Start Skill Selection Phase for Sample 345/403


Processing samples:  86%|████████▌ | 345/403 [1:47:11<19:49, 20.52s/it]

Start Skill Selection Phase for Sample 346/403


Processing samples:  86%|████████▌ | 346/403 [1:47:38<21:25, 22.55s/it]

Start Skill Selection Phase for Sample 347/403


Processing samples:  86%|████████▌ | 347/403 [1:48:02<21:28, 23.01s/it]

Start Skill Selection Phase for Sample 348/403


Processing samples:  86%|████████▋ | 348/403 [1:48:23<20:36, 22.47s/it]

Start Skill Selection Phase for Sample 349/403


Processing samples:  87%|████████▋ | 349/403 [1:48:45<19:58, 22.19s/it]

Start Skill Selection Phase for Sample 350/403


Processing samples:  87%|████████▋ | 350/403 [1:48:57<16:52, 19.10s/it]

Start Skill Selection Phase for Sample 351/403


Processing samples:  87%|████████▋ | 351/403 [1:49:10<15:09, 17.49s/it]

Start Skill Selection Phase for Sample 352/403


Processing samples:  87%|████████▋ | 352/403 [1:49:22<13:25, 15.79s/it]

Start Skill Selection Phase for Sample 353/403


Processing samples:  88%|████████▊ | 353/403 [1:49:36<12:35, 15.11s/it]

Start Skill Selection Phase for Sample 354/403


Processing samples:  88%|████████▊ | 354/403 [1:49:43<10:34, 12.95s/it]

Start Skill Selection Phase for Sample 355/403


Processing samples:  88%|████████▊ | 355/403 [1:49:53<09:32, 11.93s/it]

Start Skill Selection Phase for Sample 356/403


Processing samples:  88%|████████▊ | 356/403 [1:50:13<11:09, 14.24s/it]

Start Skill Selection Phase for Sample 357/403


Processing samples:  89%|████████▊ | 357/403 [1:50:32<11:58, 15.63s/it]

Start Skill Selection Phase for Sample 358/403


Processing samples:  89%|████████▉ | 358/403 [1:50:55<13:24, 17.87s/it]

Start Skill Selection Phase for Sample 359/403


Processing samples:  89%|████████▉ | 359/403 [1:51:16<13:49, 18.86s/it]

Start Skill Selection Phase for Sample 360/403


Processing samples:  89%|████████▉ | 360/403 [1:51:32<12:56, 18.06s/it]

Start Skill Selection Phase for Sample 361/403


Processing samples:  90%|████████▉ | 361/403 [1:51:45<11:28, 16.40s/it]

Start Skill Selection Phase for Sample 362/403


Processing samples:  90%|████████▉ | 362/403 [1:51:54<09:42, 14.22s/it]

Start Skill Selection Phase for Sample 363/403


Processing samples:  90%|█████████ | 363/403 [1:52:03<08:31, 12.78s/it]

Start Skill Selection Phase for Sample 364/403


Processing samples:  90%|█████████ | 364/403 [1:52:11<07:20, 11.30s/it]

Start Skill Selection Phase for Sample 365/403


Processing samples:  91%|█████████ | 365/403 [1:52:30<08:39, 13.67s/it]

Start Skill Selection Phase for Sample 366/403


Processing samples:  91%|█████████ | 366/403 [1:52:41<07:51, 12.74s/it]

Start Skill Selection Phase for Sample 367/403


Processing samples:  91%|█████████ | 367/403 [1:52:57<08:21, 13.93s/it]

Start Skill Selection Phase for Sample 368/403


Processing samples:  91%|█████████▏| 368/403 [1:53:12<08:18, 14.26s/it]

Start Skill Selection Phase for Sample 369/403


Processing samples:  92%|█████████▏| 369/403 [1:53:25<07:50, 13.84s/it]

Start Skill Selection Phase for Sample 370/403


Processing samples:  92%|█████████▏| 370/403 [1:53:35<06:54, 12.55s/it]

Start Skill Selection Phase for Sample 371/403


Processing samples:  92%|█████████▏| 371/403 [1:54:05<09:26, 17.70s/it]

Start Skill Selection Phase for Sample 372/403


Processing samples:  92%|█████████▏| 372/403 [1:54:17<08:15, 15.99s/it]

Start Skill Selection Phase for Sample 373/403


Processing samples:  93%|█████████▎| 373/403 [1:54:27<07:08, 14.29s/it]

Start Skill Selection Phase for Sample 374/403


Processing samples:  93%|█████████▎| 374/403 [1:54:39<06:39, 13.77s/it]

Start Skill Selection Phase for Sample 375/403


Processing samples:  93%|█████████▎| 375/403 [1:55:01<07:32, 16.14s/it]

Start Skill Selection Phase for Sample 376/403


Processing samples:  93%|█████████▎| 376/403 [1:55:27<08:35, 19.11s/it]

Start Skill Selection Phase for Sample 377/403


Processing samples:  94%|█████████▎| 377/403 [1:55:47<08:22, 19.31s/it]

Start Skill Selection Phase for Sample 378/403


Processing samples:  94%|█████████▍| 378/403 [1:55:58<06:57, 16.69s/it]

Start Skill Selection Phase for Sample 379/403


Processing samples:  94%|█████████▍| 379/403 [1:56:07<05:45, 14.39s/it]

Start Skill Selection Phase for Sample 380/403


Processing samples:  94%|█████████▍| 380/403 [1:56:16<04:58, 12.97s/it]

Start Skill Selection Phase for Sample 381/403


Processing samples:  95%|█████████▍| 381/403 [1:56:28<04:35, 12.51s/it]

Start Skill Selection Phase for Sample 382/403


Processing samples:  95%|█████████▍| 382/403 [1:56:59<06:22, 18.22s/it]

Start Skill Selection Phase for Sample 383/403


Processing samples:  95%|█████████▌| 383/403 [1:57:19<06:13, 18.66s/it]

Start Skill Selection Phase for Sample 384/403


Processing samples:  95%|█████████▌| 384/403 [1:57:31<05:18, 16.74s/it]

Start Skill Selection Phase for Sample 385/403


Processing samples:  96%|█████████▌| 385/403 [1:57:42<04:30, 15.02s/it]

Start Skill Selection Phase for Sample 386/403


Processing samples:  96%|█████████▌| 386/403 [1:57:53<03:52, 13.67s/it]

Start Skill Selection Phase for Sample 387/403


Processing samples:  96%|█████████▌| 387/403 [1:58:01<03:15, 12.20s/it]

Start Skill Selection Phase for Sample 388/403


Processing samples:  96%|█████████▋| 388/403 [1:58:12<02:54, 11.65s/it]

Start Skill Selection Phase for Sample 389/403


Processing samples:  97%|█████████▋| 389/403 [1:58:39<03:46, 16.17s/it]

Start Skill Selection Phase for Sample 390/403


Processing samples:  97%|█████████▋| 390/403 [1:59:07<04:16, 19.73s/it]

Start Skill Selection Phase for Sample 391/403


Processing samples:  97%|█████████▋| 391/403 [1:59:43<04:58, 24.84s/it]

Start Skill Selection Phase for Sample 392/403


Processing samples:  97%|█████████▋| 392/403 [2:00:13<04:47, 26.18s/it]

Start Skill Selection Phase for Sample 393/403


Processing samples:  98%|█████████▊| 393/403 [2:00:36<04:13, 25.36s/it]

Start Skill Selection Phase for Sample 394/403


Processing samples:  98%|█████████▊| 394/403 [2:01:04<03:55, 26.19s/it]

Start Skill Selection Phase for Sample 395/403


Processing samples:  98%|█████████▊| 395/403 [2:01:49<04:14, 31.76s/it]

Start Skill Selection Phase for Sample 396/403


Processing samples:  98%|█████████▊| 396/403 [2:02:20<03:39, 31.41s/it]

Start Skill Selection Phase for Sample 397/403


Processing samples:  99%|█████████▊| 397/403 [2:02:53<03:12, 32.04s/it]

Start Skill Selection Phase for Sample 398/403


Processing samples:  99%|█████████▉| 398/403 [2:03:28<02:45, 33.05s/it]

Start Skill Selection Phase for Sample 399/403


Processing samples:  99%|█████████▉| 399/403 [2:04:01<02:11, 32.96s/it]

Start Skill Selection Phase for Sample 400/403


Processing samples:  99%|█████████▉| 400/403 [2:04:23<01:29, 29.69s/it]

Start Skill Selection Phase for Sample 401/403


Processing samples: 100%|█████████▉| 401/403 [2:04:38<00:50, 25.11s/it]

Start Skill Selection Phase for Sample 402/403


Processing samples: 100%|█████████▉| 402/403 [2:04:50<00:21, 21.14s/it]

Start Skill Selection Phase for Sample 403/403


Processing samples: 100%|██████████| 403/403 [2:05:01<00:00, 18.61s/it]


All 403 samples processed. Results saved to: /home/snt/projects_lujun/LabAgentSkill/assets/results/finer_standard_gpt-4o-mini_20260211_233411.jsonl





In [1]:
import pandas as pd
from sklearn.metrics import accuracy_score, f1_score  # F1 supports macro/weighted averages [web:2]

jsonl_path = "/home/snt/projects_lujun/LabAgentSkill/assets/results/finer_standard_gpt-4o-mini_20260211_233411.jsonl"
results_df = pd.read_json(jsonl_path, lines=True)
TARGET = "xbrl-tag-classification"

def calc_hit_target_skill(row) -> bool:
    lst = row.get("chat_history_agent_skill_select", "")
    if not lst:
        return False

    try:
        obj = lst[1] 
        content = (obj or {}).get("content", "")
    except (IndexError, TypeError, AttributeError):
        return False

    return TARGET in str(content).lower()

results_df["hit_target_skill"] = results_df.apply(calc_hit_target_skill, axis=1)
ESTIMATED_GPU_RAM_GB = 100

for task_type, g in results_df.groupby("task_type", dropna=False):

    # --- ACC / F1: exclude "unknown" predictions ---
    pred_lower = g["predicted_label"].astype(str).str.lower()
    valid_mask = pred_lower.ne("unknown")
    valid_df = g[valid_mask]
    unknown_count = int((~valid_mask).sum())

    if len(valid_df) > 0:
        y_true = valid_df["true_label"].astype(str).str.lower()
        y_pred = valid_df["predicted_label"].astype(str).str.lower()

        acc = accuracy_score(y_true, y_pred)
        f1_macro = f1_score(y_true, y_pred, average="macro", zero_division=0)      # macro avg [web:2]
        f1_weighted = f1_score(y_true, y_pred, average="weighted", zero_division=0)  # weighted avg [web:2]
        correct = int((y_true == y_pred).sum())
        denom = len(valid_df)
    else:
        acc = float("0.0000")
        f1_macro = float("0.0000")
        f1_weighted = float("0.0000")
        correct = 0
        denom = 0

    # --- Hit Rate: count non-empty hit_target_skill ---
    hit_count = int(g["hit_target_skill"].fillna("").astype(str).eq("True").sum())
    hit_rate = hit_count / len(g) if len(g) > 0 else float("nan")

    # --- VRAM-Hours (GB·h): VRAM(GB) * time(hours) ---
    total_time_sec = g["elapsed_seconds"].fillna(0).astype(float).sum()
    total_minutes = (total_time_sec / 60.0)
    total_vram_minutes = total_minutes * ESTIMATED_GPU_RAM_GB
    avg_minutes= total_minutes / len(g) if len(g) > 0 else float("nan")
    avg_vram_minutes = total_vram_minutes / len(g) if len(g) > 0 else float("nan")
    # --- Print only the requested metrics ---
    # print(f"[task_type={task_type}] "
    #       f"ACC={acc:.4f} ({correct}/{denom}, unknown_excluded={unknown_count}) | "
    #       f"F1_weighted={f1_weighted:.4f} | "
    #       f"HitRate={hit_rate:.4f} ({hit_count}/{len(g)}) | "
    #       f"AvgHours={avg_hours:.4f} | AvgVRAM-Hours={avg_vram_hours:.4f} GB·h"
    # )

    print (f"{task_type} {acc:.4f} {f1_weighted:.4f} {hit_rate:.4f} {avg_minutes:.4f} {avg_vram_minutes:.4f}")


agent_simple 0.4839 0.4444 0.0000 0.1148 11.4810
agent_skill_based 0.3826 0.3395 0.9727 0.0670 6.6986
agent_skill_full_context 0.4613 0.4302 0.0000 0.1281 12.8111


In [5]:
results_df.hit_target_skill.value_counts(dropna=False)

hit_target_skill
         806
True     392
False     11
Name: count, dtype: int64