## Init For Agents Testing

In [2]:
# Setup: Load environment variables and dependencies
import os
import sys
from pathlib import Path

from jinja2 import Environment, FileSystemLoader

project_root = Path.cwd()
src_path = project_root / "src"

if str(src_path) not in sys.path:
    sys.path.insert(0, str(src_path))
    print(f"✓ Added to sys.path: {src_path}")

from LabAgentSkill import skills_utils
from LabAgentSkill.SkillAwareAgent import SkillAwareAgent

root_dir = Path.cwd().parent if Path.cwd().name == "notebooks" else Path.cwd()
env_path = root_dir / ".env"
env = {}

if env_path.exists():
    for line in env_path.read_text().splitlines():
        line = line.strip()
        if not line or line.startswith("#") or "=" not in line:
            continue
        key, value = line.split("=", 1)
        env[key.strip()] = value.strip()

# Set API key
os.environ["OPENAI_API_KEY"] = env.get("OPENAI_API_KEY", os.environ.get("OPENAI_API_KEY", ""))
env = Environment(loader=FileSystemLoader('prompts/'))  
skills_folder = Path("/home/snt/projects_lujun/LabAgentSkill/skillsHub/skills_finer")
all_skills = skills_utils.read_all_skills_metadata(skills_folder)
for skill in all_skills:
    print(f"  - {skill['name']}: {skill['description']}")

# model_name = "gpt-4o-mini"
# base_url = None
model_name = "google/gemma-3-270m-it"
# model_name = "Qwen/Qwen2.5-0.5B-Instruct"
base_url = "http://127.0.0.1:8001/v1"

  - XBRL-tag-classification: Classify financial text into specific XBRL tags by analyzing semantic cues, context, and category boundaries.
  - algorithmic-art: Creating algorithmic art using p5.js with seeded randomness and interactive parameter exploration. Use this when users request creating art using code, generative art, algorithmic art, flow fields, or particle systems. Create original algorithmic art rather than copying existing artists' work to avoid copyright violations.
  - brand-guidelines: Applies Anthropic's official brand colors and typography to any sort of artifact that may benefit from having Anthropic's look-and-feel. Use it when brand colors or style guidelines, visual formatting, or company design standards apply.
  - canvas-design: Create beautiful visual art in .png and .pdf documents using design philosophy. You should use this skill when the user asks to create a poster, piece of art, design, or other static piece. Create original visual designs, never copying e

## Load Data - Sentimental Analysis

In [17]:
from datasets import load_dataset
dataset_name = "Volavion/finer-139-xbrl-nonempty"
loaded_dataset = load_dataset(dataset_name, split="train")
loaded_df = loaded_dataset.to_pandas()
loaded_df = loaded_df.reset_index(drop=True)

In [18]:
import pandas as pd
random_selected_df = loaded_df.sample(n=250, random_state=42)
new_df = pd.DataFrame(columns=["sentence", "tag_token", "tag_name"])
for idx, row in random_selected_df.iterrows():
    sentence_list = row["tokens"]
    tag_indices = row["tag_indices"]
    tag_tokens = [sentence_list[i] for i in tag_indices]
    tag_names = row["tag_names"]
    for tag_token, tag_name in zip(tag_tokens, tag_names):
        print (f"  - {tag_token}: {tag_name}")
        sentence = " ".join(sentence_list)
        print (f"Sentence: {sentence}")
        new_row_dict = {
            "sentence": sentence,
            "tag_token": tag_token,
            "tag_name": tag_name
        }
        new_row_df = pd.DataFrame([new_row_dict])
        new_df = pd.concat([new_df, new_row_df], ignore_index=True)

new_df = new_df[new_df["tag_token"].str.replace(",", "").str.replace(".", "").str.replace("-", "").str.strip().str.isnumeric()].reset_index(drop=True)


  - two: B-NumberOfOperatingSegments
Sentence: Accordingly , CNX Resources is the sole sponsor of the Partnership , and we may refer to CNX Resources as the “ Sponsor ” throughout this Quarterly Report on Form 10-Q . Description of Business Our midstream assets consist of two operating segments that we refer to as our “ Anchor Systems ” and “ Additional Systems ” based on their relative current cash flows , growth profiles , capital expenditure requirements and the timing of their development .
  - 17.43: B-TreasuryStockAcquiredAverageCostPerShare
Sentence: During the nine months ended September 30 , 2019 , the Company repurchased 0.8 million shares of common stock under the Program at an average price per share of $ 17.43 for a total of $ 14.6 million , excluding commissions .
  - 14.6: B-TreasuryStockValueAcquiredCostMethod
Sentence: During the nine months ended September 30 , 2019 , the Company repurchased 0.8 million shares of common stock under the Program at an average price per 

In [21]:
from datasets import Dataset

ds = Dataset.from_pandas(new_df)
ds.push_to_hub("Volavion/finer-139-numeric-sampled", private=False)

Creating parquet from Arrow format: 100%|██████████| 1/1 [00:00<00:00, 419.26ba/s]
Processing Files (1 / 1): 100%|██████████| 40.5kB / 40.5kB, 50.6kB/s  
New Data Upload: 100%|██████████| 40.5kB / 40.5kB, 50.6kB/s  
Uploading the dataset shards: 100%|██████████| 1/1 [00:01<00:00,  1.45s/ shards]


CommitInfo(commit_url='https://huggingface.co/datasets/Volavion/finer-139-numeric-sampled/commit/31d85461da7733a376ff6921ec11933eea5c331e', commit_message='Upload dataset', commit_description='', oid='31d85461da7733a376ff6921ec11933eea5c331e', pr_url=None, repo_url=RepoUrl('https://huggingface.co/datasets/Volavion/finer-139-numeric-sampled', endpoint='https://huggingface.co', repo_type='dataset', repo_id='Volavion/finer-139-numeric-sampled'), pr_revision=None, pr_num=None)

## Select skills 

In [3]:
import time
import json
from datetime import datetime
import pandas as pd
from tqdm import tqdm

from LabAgentSkill.evaluate import get_predicted_label

print(f"Using model: {model_name}")
# Initialize agents
agent_skill_aware = SkillAwareAgent(use_chat_history=True, use_trim_messages=True, model=model_name, base_url=base_url)
agent_skill_exec_agent = SkillAwareAgent(use_chat_history=True, use_trim_messages=True, model=model_name, base_url=base_url)
agent_simple = SkillAwareAgent(use_chat_history=True, use_trim_messages=False, model=model_name, base_url=base_url)

p_exec_imdb_temp = env.get_template('p_exec_imdb.jinja')
p_skill_select_temp = env.get_template('p_skill_select.jinja')
p_skill_discov_temp = env.get_template('p_skill_discov.jinja')
p_skill_exec_temp = env.get_template('p_skill_exec.jinja')
p_default_system_temp = env.get_template('p_default_system.jinja')

# JSONL output path
output_dir = "/home/snt/projects_lujun/LabAgentSkill/assets/results/"
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
jsonl_path = output_dir+f"imdb_300_standard_{model_name.split('/')[-1]}_{timestamp}.jsonl"
print(f"Results will be saved to: {jsonl_path}")

skill_count = 0
count_row = 0
if os.path.exists(jsonl_path):
    df_exist = pd.read_json(jsonl_path, lines=True)
    count_row = len(df_exist)
    print(f"Resume from row: {count_row}")


# Process each sample
for idx, row in tqdm( loaded_df.iterrows(), total=len(loaded_df), desc="Processing samples",):

    if idx < count_row:
        continue
    sample_start_time, text, true_label = time.time(), row.text, row.label_name

    # Step 1: Skill Selection
    print(f"Start Skill Selection Phase for Sample {idx + 1}/{len(loaded_df)}")
    skill_context = "\n".join([
        f"- **{skill['name']}**: {skill['description']}"
        for skill in all_skills
    ])

    p_skill_select = p_skill_select_temp.render(SKILL_CONTEXT=skill_context)
    p_exec_imdb = p_exec_imdb_temp.render(text=text)
    skill_select_resp = agent_skill_aware.chat(user_input=p_exec_imdb, custom_system_prompt=p_skill_select)
    selected_skills = skills_utils.parse_skills_from_json_response(json_response=skill_select_resp, skills_hub_dir=skills_folder)

    # Track whether "movie-sentiment-analysis" was selected in Step 1 
    selected_skill_names_step1 = [s["name"] for s in selected_skills]
    hit_target_skill = "movie-sentiment-analysis" in selected_skill_names_step1 ## This is hard Coded

    skill_execution_context = ""
    for skill_meta in selected_skills:
        skill_execution_context += (
            f"SKill {skill_count + 1}: \n"
            f"{skill_meta['description']}\n"
            f"{'\n'.join(skill_meta['body'].split('\n')[1:])}\n\n"
        )
        skill_count += 1

    skill_count_prev = skill_count

    # Step 2: Skill Discovery
    discovery_rounds = 0
    while len(selected_skills) > 0:
        p_skill_discov = p_skill_discov_temp.render(SKILL_CONTEXT=skill_execution_context)
        skill_discov_resp = agent_skill_exec_agent.chat(user_input=p_skill_discov, custom_system_prompt=p_default_system_temp.render())
        selected_skills = skills_utils.parse_skills_from_json_response(json_response=skill_discov_resp, skills_hub_dir=skills_folder)

        for skill_meta in selected_skills:
            skill_execution_context += (
                f"SKill {skill_count + 1}: \n"
                f"{skill_meta['description']}\n"
                f"{'\n'.join(skill_meta['body'].split('\n')[1:])}\n\n"
            )
            skill_count += 1
        discovery_rounds += 1
    new_skills_found = skill_count - skill_count_prev


    # print(f"End of skill discovery phase. Found total of new skills: {new_skills_found}")
    # Step 3: Query Execution
    p_skill_exec = p_skill_exec_temp.render(SKILL_CONTEXT=skill_execution_context)
    imdb_exec_response = agent_skill_exec_agent.chat(user_input=p_exec_imdb, custom_system_prompt=p_skill_exec)
    message_classification = skills_utils.parse_message_from_json_response(imdb_exec_response)
    is_correct = true_label.lower() in message_classification.strip().lower()

    predicted_label = get_predicted_label(message_classification)
    sample_end_time = time.time()
    sample_elapsed = sample_end_time - sample_start_time
    chat_history_agent_skill_select = agent_skill_aware.get_human_ai_message_history()
    chat_history_agent_exec = agent_skill_exec_agent.get_human_ai_message_history()

    # Build record and append to JSONL
    record = {
        "index": int(idx),
        "text": text,
        "true_label": true_label,
        "predicted_label": predicted_label,
        "raw_response": message_classification,
        "correct": is_correct,
        "selected_skills_step1": selected_skill_names_step1,
        "hit_target_skill": hit_target_skill,
        "new_skills_discovered": new_skills_found,
        "discovery_rounds": discovery_rounds,
        "elapsed_seconds": round(sample_elapsed, 4),
        "model": model_name,
        "timestamp": datetime.now().isoformat(),
        "chat_history_agent_skill_select": chat_history_agent_skill_select,
        "chat_history_agent_exec": chat_history_agent_exec,
        "task_type": "agent_skill_based"
        
    }

    dataframe_record = pd.DataFrame([record])
    dataframe_record.to_json(jsonl_path, orient="records", lines=True, mode="a" if os.path.exists(jsonl_path) else "w")
    agent_skill_aware.clear_history()
    agent_skill_exec_agent.clear_history()

    ######################################################################################################################################
    sample_start_time, text, true_label = time.time(), row.text, row.label_name

    p_exec_imdb = p_exec_imdb_temp.render(text=text)
    p_exec_imdb_sys = p_default_system_temp.render()
    imdb_exec_response = agent_simple.chat(user_input=p_exec_imdb, custom_system_prompt=p_exec_imdb_sys)
    message_classification = skills_utils.parse_message_from_json_response(imdb_exec_response)
    is_correct = true_label.lower() in message_classification.strip().lower()

    predicted_label = get_predicted_label(message_classification)
    sample_end_time = time.time()
    sample_elapsed = sample_end_time - sample_start_time
    chat_history_agent_exec = agent_simple.get_human_ai_message_history()

    # Build record and append to JSONL
    record = {
        "index": int(idx),
        "text": text,
        "true_label": true_label,
        "predicted_label": predicted_label,
        "raw_response": message_classification,
        "correct": is_correct,
        "selected_skills_step1": "",
        "hit_target_skill": "",
        "new_skills_discovered": "",
        "discovery_rounds": "",
        "elapsed_seconds": round(sample_elapsed, 4),
        "model": model_name,
        "timestamp": datetime.now().isoformat(),
        "chat_history_agent_skill_select": "",
        "chat_history_agent_exec": chat_history_agent_exec,
        "task_type": "agent_simple"
        
    }

    dataframe_record = pd.DataFrame([record])
    dataframe_record.to_json(jsonl_path, orient="records", lines=True, mode="a" if os.path.exists(jsonl_path) else "w")
    agent_simple.clear_history()

    ######################################################################################################################################
    sample_start_time, text, true_label = time.time(), row.text, row.label_name
    skill_context_all =  "The following are skills informaiton you can use as a reference for user request:\n".join([
        f"- **{skill['name']}**:\n {skill['description']} **:\n {skill['body']}"
        for skill in all_skills
    ])
    p_exec_imdb = p_exec_imdb_temp.render(text= text + skill_context_all)
    p_exec_imdb_sys = p_default_system_temp.render()

    imdb_exec_response = agent_simple.chat(user_input=p_exec_imdb, custom_system_prompt=p_exec_imdb_sys)
    message_classification = skills_utils.parse_message_from_json_response(imdb_exec_response)
    is_correct = true_label.lower() in message_classification.strip().lower()

    predicted_label = get_predicted_label(message_classification)
    sample_end_time = time.time()
    sample_elapsed = sample_end_time - sample_start_time
    chat_history_agent_exec = agent_simple.get_human_ai_message_history()

    # Build record and append to JSONL
    record = {
        "index": int(idx),
        "text": text,
        "true_label": true_label,
        "predicted_label": predicted_label,
        "raw_response": message_classification,
        "correct": is_correct,
        "selected_skills_step1": "",
        "hit_target_skill": "",
        "new_skills_discovered": "",
        "discovery_rounds": "",
        "elapsed_seconds": round(sample_elapsed, 4),
        "model": model_name,
        "timestamp": datetime.now().isoformat(),
        "chat_history_agent_skill_select": "",
        "chat_history_agent_exec": chat_history_agent_exec,
        "task_type": "agent_skill_full_context"
        
    }

    dataframe_record = pd.DataFrame([record])
    dataframe_record.to_json(jsonl_path, orient="records", lines=True, mode="a" if os.path.exists(jsonl_path) else "w")
    agent_simple.clear_history()

print(f"\n{'='*60}")
print(f"All {len(loaded_df)} samples processed. Results saved to: {jsonl_path}")
print(f"{'='*60}")

Using model: google/gemma-3-270m-it
✓ SkillAwareAgent initialized
  Model: google/gemma-3-270m-it
  Base URL: http://127.0.0.1:8001/v1
  Chat History: ENABLED ✓
  Trim Messages: ENABLED ✓
✓ SkillAwareAgent initialized
  Model: google/gemma-3-270m-it
  Base URL: http://127.0.0.1:8001/v1
  Chat History: ENABLED ✓
  Trim Messages: ENABLED ✓
✓ SkillAwareAgent initialized
  Model: google/gemma-3-270m-it
  Base URL: http://127.0.0.1:8001/v1
  Chat History: ENABLED ✓
  Trim Messages: DISABLED ✗
Results will be saved to: /home/snt/projects_lujun/LabAgentSkill/assets/results/imdb_300_standard_gemma-3-270m-it_20260211_181940.jsonl


Processing samples:   0%|          | 0/300 [00:00<?, ?it/s]

Start Skill Selection Phase for Sample 1/300
✓ Chat history cleared
✓ Chat history cleared
✓ Chat history cleared


Processing samples:   0%|          | 1/300 [00:00<03:01,  1.64it/s]

✓ Chat history cleared
Start Skill Selection Phase for Sample 2/300
✓ Chat history cleared
✓ Chat history cleared
✓ Chat history cleared


Processing samples:   1%|          | 2/300 [00:01<02:47,  1.78it/s]

✓ Chat history cleared
Start Skill Selection Phase for Sample 3/300
✓ Chat history cleared
✓ Chat history cleared
✓ Chat history cleared


Processing samples:   1%|          | 3/300 [00:01<03:12,  1.55it/s]

✓ Chat history cleared
Start Skill Selection Phase for Sample 4/300
✓ Chat history cleared
✓ Chat history cleared
✓ Chat history cleared


Processing samples:   1%|▏         | 4/300 [00:02<03:17,  1.50it/s]

✓ Chat history cleared
Start Skill Selection Phase for Sample 5/300
✓ Chat history cleared
✓ Chat history cleared
✓ Chat history cleared


Processing samples:   2%|▏         | 5/300 [00:03<03:37,  1.36it/s]

✓ Chat history cleared
Start Skill Selection Phase for Sample 6/300
✓ Chat history cleared
✓ Chat history cleared
✓ Chat history cleared


Processing samples:   2%|▏         | 6/300 [00:05<06:06,  1.25s/it]

✓ Chat history cleared
Start Skill Selection Phase for Sample 7/300
✓ Chat history cleared
✓ Chat history cleared
✓ Chat history cleared


Processing samples:   2%|▏         | 7/300 [00:06<05:52,  1.20s/it]

✓ Chat history cleared
Start Skill Selection Phase for Sample 8/300
✓ Chat history cleared
✓ Chat history cleared
✓ Chat history cleared


Processing samples:   3%|▎         | 8/300 [00:07<05:15,  1.08s/it]

✓ Chat history cleared
Start Skill Selection Phase for Sample 9/300
✓ Chat history cleared
✓ Chat history cleared
✓ Chat history cleared


Processing samples:   3%|▎         | 9/300 [00:08<04:45,  1.02it/s]

✓ Chat history cleared
Start Skill Selection Phase for Sample 10/300
✓ Chat history cleared
✓ Chat history cleared
✓ Chat history cleared


Processing samples:   3%|▎         | 10/300 [00:08<04:12,  1.15it/s]

✓ Chat history cleared
Start Skill Selection Phase for Sample 11/300
✓ Chat history cleared
✓ Chat history cleared
✓ Chat history cleared


Processing samples:   4%|▎         | 11/300 [00:09<04:05,  1.18it/s]

✓ Chat history cleared
Start Skill Selection Phase for Sample 12/300
✓ Chat history cleared
✓ Chat history cleared
✓ Chat history cleared


Processing samples:   4%|▍         | 12/300 [00:10<03:58,  1.21it/s]

✓ Chat history cleared
Start Skill Selection Phase for Sample 13/300
✓ Chat history cleared
✓ Chat history cleared
✓ Chat history cleared


Processing samples:   4%|▍         | 13/300 [00:11<03:35,  1.33it/s]

✓ Chat history cleared
Start Skill Selection Phase for Sample 14/300
✓ Chat history cleared
✓ Chat history cleared
✓ Chat history cleared


Processing samples:   5%|▍         | 14/300 [00:12<03:57,  1.20it/s]

✓ Chat history cleared
Start Skill Selection Phase for Sample 15/300
✓ Chat history cleared
✓ Chat history cleared
✓ Chat history cleared


Processing samples:   5%|▌         | 15/300 [00:13<03:58,  1.20it/s]

✓ Chat history cleared
Start Skill Selection Phase for Sample 16/300
✓ Chat history cleared
✓ Chat history cleared
✓ Chat history cleared


Processing samples:   5%|▌         | 16/300 [00:13<03:35,  1.32it/s]

✓ Chat history cleared
Start Skill Selection Phase for Sample 17/300
✓ Chat history cleared
✓ Chat history cleared
✓ Chat history cleared


Processing samples:   6%|▌         | 17/300 [00:14<03:32,  1.33it/s]

✓ Chat history cleared
Start Skill Selection Phase for Sample 18/300
✓ Chat history cleared
✓ Chat history cleared
✓ Chat history cleared


Processing samples:   6%|▌         | 18/300 [00:16<05:35,  1.19s/it]

✓ Chat history cleared
Start Skill Selection Phase for Sample 19/300
✓ Chat history cleared
✓ Chat history cleared
✓ Chat history cleared


Processing samples:   6%|▋         | 19/300 [00:17<04:53,  1.04s/it]

✓ Chat history cleared
Start Skill Selection Phase for Sample 20/300
✓ Chat history cleared
✓ Chat history cleared
✓ Chat history cleared


Processing samples:   7%|▋         | 20/300 [00:17<04:27,  1.05it/s]

✓ Chat history cleared
Start Skill Selection Phase for Sample 21/300
✓ Chat history cleared
✓ Chat history cleared
✓ Chat history cleared


Processing samples:   7%|▋         | 21/300 [00:18<04:15,  1.09it/s]

✓ Chat history cleared
Start Skill Selection Phase for Sample 22/300
✓ Chat history cleared
✓ Chat history cleared
✓ Chat history cleared


Processing samples:   7%|▋         | 22/300 [00:19<03:54,  1.19it/s]

✓ Chat history cleared
Start Skill Selection Phase for Sample 23/300
✓ Chat history cleared
✓ Chat history cleared
✓ Chat history cleared


Processing samples:   8%|▊         | 23/300 [00:20<03:45,  1.23it/s]

✓ Chat history cleared
Start Skill Selection Phase for Sample 24/300
✓ Chat history cleared
✓ Chat history cleared
✓ Chat history cleared


Processing samples:   8%|▊         | 24/300 [00:21<03:49,  1.20it/s]

✓ Chat history cleared
Start Skill Selection Phase for Sample 25/300
✓ Chat history cleared
✓ Chat history cleared
✓ Chat history cleared


Processing samples:   8%|▊         | 25/300 [00:21<03:36,  1.27it/s]

✓ Chat history cleared
Start Skill Selection Phase for Sample 26/300
✓ Chat history cleared
✓ Chat history cleared
✓ Chat history cleared


Processing samples:   9%|▊         | 26/300 [00:22<03:36,  1.27it/s]

✓ Chat history cleared
Start Skill Selection Phase for Sample 27/300
✓ Chat history cleared
✓ Chat history cleared
✓ Chat history cleared


Processing samples:   9%|▉         | 27/300 [00:24<04:31,  1.00it/s]

✓ Chat history cleared
Start Skill Selection Phase for Sample 28/300
✓ Chat history cleared
✓ Chat history cleared
✓ Chat history cleared


Processing samples:   9%|▉         | 28/300 [00:25<04:51,  1.07s/it]

✓ Chat history cleared
Start Skill Selection Phase for Sample 29/300
✓ Chat history cleared
✓ Chat history cleared
✓ Chat history cleared


Processing samples:  10%|▉         | 29/300 [00:32<13:36,  3.01s/it]

✓ Chat history cleared
Start Skill Selection Phase for Sample 30/300
✓ Chat history cleared
✓ Chat history cleared
✓ Chat history cleared


Processing samples:  10%|█         | 30/300 [00:33<10:32,  2.34s/it]

✓ Chat history cleared
Start Skill Selection Phase for Sample 31/300
✓ Chat history cleared
✓ Chat history cleared
✓ Chat history cleared


Processing samples:  10%|█         | 31/300 [00:34<08:11,  1.83s/it]

✓ Chat history cleared
Start Skill Selection Phase for Sample 32/300
✓ Chat history cleared
✓ Chat history cleared
✓ Chat history cleared


Processing samples:  11%|█         | 32/300 [00:35<06:50,  1.53s/it]

✓ Chat history cleared
Start Skill Selection Phase for Sample 33/300
✓ Chat history cleared
✓ Chat history cleared
✓ Chat history cleared


Processing samples:  11%|█         | 33/300 [01:38<1:28:46, 19.95s/it]

✓ Chat history cleared
Start Skill Selection Phase for Sample 34/300
✓ Chat history cleared
✓ Chat history cleared
✓ Chat history cleared


Processing samples:  11%|█▏        | 34/300 [01:38<1:03:01, 14.22s/it]

✓ Chat history cleared
Start Skill Selection Phase for Sample 35/300
✓ Chat history cleared
✓ Chat history cleared
✓ Chat history cleared


Processing samples:  12%|█▏        | 35/300 [01:39<44:57, 10.18s/it]  

✓ Chat history cleared
Start Skill Selection Phase for Sample 36/300
✓ Chat history cleared
✓ Chat history cleared
✓ Chat history cleared


Processing samples:  12%|█▏        | 36/300 [01:40<32:28,  7.38s/it]

✓ Chat history cleared
Start Skill Selection Phase for Sample 37/300
✓ Chat history cleared
✓ Chat history cleared
✓ Chat history cleared


Processing samples:  12%|█▏        | 37/300 [01:41<23:23,  5.33s/it]

✓ Chat history cleared
Start Skill Selection Phase for Sample 38/300
✓ Chat history cleared
✓ Chat history cleared
✓ Chat history cleared


Processing samples:  13%|█▎        | 38/300 [01:41<17:20,  3.97s/it]

✓ Chat history cleared
Start Skill Selection Phase for Sample 39/300
✓ Chat history cleared
✓ Chat history cleared
✓ Chat history cleared


Processing samples:  13%|█▎        | 39/300 [01:42<13:12,  3.04s/it]

✓ Chat history cleared
Start Skill Selection Phase for Sample 40/300
✓ Chat history cleared
✓ Chat history cleared
✓ Chat history cleared


Processing samples:  13%|█▎        | 40/300 [01:43<10:21,  2.39s/it]

✓ Chat history cleared
Start Skill Selection Phase for Sample 41/300
✓ Chat history cleared
✓ Chat history cleared
✓ Chat history cleared


Processing samples:  14%|█▎        | 41/300 [01:54<21:13,  4.92s/it]

✓ Chat history cleared
Start Skill Selection Phase for Sample 42/300
✓ Chat history cleared
✓ Chat history cleared
✓ Chat history cleared


Processing samples:  14%|█▍        | 42/300 [01:54<15:32,  3.62s/it]

✓ Chat history cleared
Start Skill Selection Phase for Sample 43/300
✓ Chat history cleared
✓ Chat history cleared
✓ Chat history cleared


Processing samples:  14%|█▍        | 43/300 [01:55<11:41,  2.73s/it]

✓ Chat history cleared
Start Skill Selection Phase for Sample 44/300
✓ Chat history cleared
✓ Chat history cleared
✓ Chat history cleared


Processing samples:  15%|█▍        | 44/300 [01:56<09:11,  2.15s/it]

✓ Chat history cleared
Start Skill Selection Phase for Sample 45/300
✓ Chat history cleared
✓ Chat history cleared
✓ Chat history cleared


Processing samples:  15%|█▌        | 45/300 [01:57<07:17,  1.72s/it]

✓ Chat history cleared
Start Skill Selection Phase for Sample 46/300
✓ Chat history cleared
✓ Chat history cleared
✓ Chat history cleared


Processing samples:  15%|█▌        | 46/300 [01:58<06:13,  1.47s/it]

✓ Chat history cleared
Start Skill Selection Phase for Sample 47/300
✓ Chat history cleared
✓ Chat history cleared
✓ Chat history cleared


Processing samples:  16%|█▌        | 47/300 [02:00<07:20,  1.74s/it]

✓ Chat history cleared
Start Skill Selection Phase for Sample 48/300
✓ Chat history cleared
✓ Chat history cleared
✓ Chat history cleared


Processing samples:  16%|█▌        | 48/300 [02:01<06:04,  1.45s/it]

✓ Chat history cleared
Start Skill Selection Phase for Sample 49/300
✓ Chat history cleared
✓ Chat history cleared
✓ Chat history cleared


Processing samples:  16%|█▋        | 49/300 [02:01<05:05,  1.22s/it]

✓ Chat history cleared
Start Skill Selection Phase for Sample 50/300
✓ Chat history cleared
✓ Chat history cleared
✓ Chat history cleared


Processing samples:  17%|█▋        | 50/300 [02:02<04:30,  1.08s/it]

✓ Chat history cleared
Start Skill Selection Phase for Sample 51/300
✓ Chat history cleared
✓ Chat history cleared
✓ Chat history cleared


Processing samples:  17%|█▋        | 51/300 [02:03<03:54,  1.06it/s]

✓ Chat history cleared
Start Skill Selection Phase for Sample 52/300
✓ Chat history cleared
✓ Chat history cleared
✓ Chat history cleared


Processing samples:  17%|█▋        | 52/300 [02:03<03:38,  1.13it/s]

✓ Chat history cleared
Start Skill Selection Phase for Sample 53/300
✓ Chat history cleared
✓ Chat history cleared
✓ Chat history cleared


Processing samples:  18%|█▊        | 53/300 [03:06<1:19:48, 19.39s/it]

✓ Chat history cleared
Start Skill Selection Phase for Sample 54/300
✓ Chat history cleared
✓ Chat history cleared
✓ Chat history cleared


Processing samples:  18%|█▊        | 54/300 [03:07<56:34, 13.80s/it]  

✓ Chat history cleared
Start Skill Selection Phase for Sample 55/300
✓ Chat history cleared
✓ Chat history cleared
✓ Chat history cleared


Processing samples:  18%|█▊        | 55/300 [03:07<40:18,  9.87s/it]

✓ Chat history cleared
Start Skill Selection Phase for Sample 56/300
✓ Chat history cleared
✓ Chat history cleared
✓ Chat history cleared


Processing samples:  19%|█▊        | 56/300 [03:08<28:52,  7.10s/it]

✓ Chat history cleared
Start Skill Selection Phase for Sample 57/300
✓ Chat history cleared
✓ Chat history cleared
✓ Chat history cleared


Processing samples:  19%|█▉        | 57/300 [03:09<20:53,  5.16s/it]

✓ Chat history cleared
Start Skill Selection Phase for Sample 58/300
✓ Chat history cleared
✓ Chat history cleared
✓ Chat history cleared


Processing samples:  19%|█▉        | 58/300 [03:09<15:24,  3.82s/it]

✓ Chat history cleared
Start Skill Selection Phase for Sample 59/300
✓ Chat history cleared
✓ Chat history cleared
✓ Chat history cleared


Processing samples:  20%|█▉        | 59/300 [03:10<11:43,  2.92s/it]

✓ Chat history cleared
Start Skill Selection Phase for Sample 60/300
✓ Chat history cleared
✓ Chat history cleared
✓ Chat history cleared


Processing samples:  20%|██        | 60/300 [03:11<08:55,  2.23s/it]

✓ Chat history cleared
Start Skill Selection Phase for Sample 61/300
✓ Chat history cleared
✓ Chat history cleared
✓ Chat history cleared


Processing samples:  20%|██        | 61/300 [03:12<07:09,  1.80s/it]

✓ Chat history cleared
Start Skill Selection Phase for Sample 62/300
✓ Chat history cleared
✓ Chat history cleared
✓ Chat history cleared


Processing samples:  21%|██        | 62/300 [04:16<1:21:03, 20.43s/it]

✓ Chat history cleared
Start Skill Selection Phase for Sample 63/300
✓ Chat history cleared
✓ Chat history cleared
✓ Chat history cleared


Processing samples:  21%|██        | 63/300 [04:16<57:22, 14.53s/it]  

✓ Chat history cleared
Start Skill Selection Phase for Sample 64/300
✓ Chat history cleared
✓ Chat history cleared
✓ Chat history cleared


Processing samples:  21%|██▏       | 64/300 [04:18<41:23, 10.52s/it]

✓ Chat history cleared
Start Skill Selection Phase for Sample 65/300
✓ Chat history cleared
✓ Chat history cleared
✓ Chat history cleared


Processing samples:  22%|██▏       | 65/300 [04:22<34:03,  8.70s/it]

✓ Chat history cleared
Start Skill Selection Phase for Sample 66/300
✓ Chat history cleared
✓ Chat history cleared
✓ Chat history cleared


Processing samples:  22%|██▏       | 66/300 [04:23<24:42,  6.33s/it]

✓ Chat history cleared
Start Skill Selection Phase for Sample 67/300
✓ Chat history cleared
✓ Chat history cleared
✓ Chat history cleared


Processing samples:  22%|██▏       | 67/300 [04:26<21:14,  5.47s/it]

✓ Chat history cleared
Start Skill Selection Phase for Sample 68/300
✓ Chat history cleared
✓ Chat history cleared
✓ Chat history cleared


Processing samples:  23%|██▎       | 68/300 [04:29<18:25,  4.76s/it]

✓ Chat history cleared
Start Skill Selection Phase for Sample 69/300
✓ Chat history cleared
✓ Chat history cleared
✓ Chat history cleared


Processing samples:  23%|██▎       | 69/300 [04:30<13:56,  3.62s/it]

✓ Chat history cleared
Start Skill Selection Phase for Sample 70/300
✓ Chat history cleared
✓ Chat history cleared
✓ Chat history cleared


Processing samples:  23%|██▎       | 70/300 [04:31<10:35,  2.77s/it]

✓ Chat history cleared
Start Skill Selection Phase for Sample 71/300
✓ Chat history cleared
✓ Chat history cleared
✓ Chat history cleared


Processing samples:  24%|██▎       | 71/300 [04:32<08:24,  2.20s/it]

✓ Chat history cleared
Start Skill Selection Phase for Sample 72/300
✓ Chat history cleared
✓ Chat history cleared
✓ Chat history cleared


Processing samples:  24%|██▍       | 72/300 [04:33<06:40,  1.76s/it]

✓ Chat history cleared
Start Skill Selection Phase for Sample 73/300
✓ Chat history cleared
✓ Chat history cleared
✓ Chat history cleared


Processing samples:  24%|██▍       | 73/300 [04:33<05:29,  1.45s/it]

✓ Chat history cleared
Start Skill Selection Phase for Sample 74/300
✓ Chat history cleared
✓ Chat history cleared
✓ Chat history cleared


Processing samples:  25%|██▍       | 74/300 [04:34<04:35,  1.22s/it]

✓ Chat history cleared
Start Skill Selection Phase for Sample 75/300
✓ Chat history cleared
✓ Chat history cleared
✓ Chat history cleared


Processing samples:  25%|██▌       | 75/300 [04:35<04:03,  1.08s/it]

✓ Chat history cleared
Start Skill Selection Phase for Sample 76/300
✓ Chat history cleared
✓ Chat history cleared
✓ Chat history cleared


Processing samples:  25%|██▌       | 76/300 [04:36<03:37,  1.03it/s]

✓ Chat history cleared
Start Skill Selection Phase for Sample 77/300
✓ Chat history cleared
✓ Chat history cleared
✓ Chat history cleared


Processing samples:  26%|██▌       | 77/300 [04:36<03:15,  1.14it/s]

✓ Chat history cleared
Start Skill Selection Phase for Sample 78/300
✓ Chat history cleared
✓ Chat history cleared
✓ Chat history cleared


Processing samples:  26%|██▌       | 78/300 [04:37<02:55,  1.26it/s]

✓ Chat history cleared
Start Skill Selection Phase for Sample 79/300
✓ Chat history cleared
✓ Chat history cleared
✓ Chat history cleared


Processing samples:  26%|██▋       | 79/300 [04:38<03:11,  1.15it/s]

✓ Chat history cleared
Start Skill Selection Phase for Sample 80/300
✓ Chat history cleared
✓ Chat history cleared
✓ Chat history cleared


Processing samples:  27%|██▋       | 80/300 [04:54<19:28,  5.31s/it]

✓ Chat history cleared
Start Skill Selection Phase for Sample 81/300
✓ Chat history cleared
✓ Chat history cleared
✓ Chat history cleared


Processing samples:  27%|██▋       | 81/300 [05:16<37:58, 10.40s/it]

✓ Chat history cleared
Start Skill Selection Phase for Sample 82/300
✓ Chat history cleared
✓ Chat history cleared
✓ Chat history cleared


Processing samples:  27%|██▋       | 82/300 [05:17<27:18,  7.51s/it]

✓ Chat history cleared
Start Skill Selection Phase for Sample 83/300
✓ Chat history cleared
✓ Chat history cleared
✓ Chat history cleared


Processing samples:  28%|██▊       | 83/300 [05:17<19:55,  5.51s/it]

✓ Chat history cleared
Start Skill Selection Phase for Sample 84/300
✓ Chat history cleared
✓ Chat history cleared
✓ Chat history cleared


Processing samples:  28%|██▊       | 84/300 [05:20<16:58,  4.71s/it]

✓ Chat history cleared
Start Skill Selection Phase for Sample 85/300
✓ Chat history cleared
✓ Chat history cleared
✓ Chat history cleared


Processing samples:  28%|██▊       | 85/300 [05:22<13:09,  3.67s/it]

✓ Chat history cleared
Start Skill Selection Phase for Sample 86/300
✓ Chat history cleared
✓ Chat history cleared
✓ Chat history cleared


Processing samples:  29%|██▊       | 86/300 [05:23<10:15,  2.88s/it]

✓ Chat history cleared
Start Skill Selection Phase for Sample 87/300
✓ Chat history cleared
✓ Chat history cleared
✓ Chat history cleared


Processing samples:  29%|██▉       | 87/300 [05:24<08:41,  2.45s/it]

✓ Chat history cleared
Start Skill Selection Phase for Sample 88/300
✓ Chat history cleared
✓ Chat history cleared
✓ Chat history cleared


Processing samples:  29%|██▉       | 88/300 [05:25<06:59,  1.98s/it]

✓ Chat history cleared
Start Skill Selection Phase for Sample 89/300
✓ Chat history cleared
✓ Chat history cleared
✓ Chat history cleared


Processing samples:  30%|██▉       | 89/300 [05:26<05:50,  1.66s/it]

✓ Chat history cleared
Start Skill Selection Phase for Sample 90/300
✓ Chat history cleared
✓ Chat history cleared
✓ Chat history cleared


Processing samples:  30%|███       | 90/300 [05:45<24:14,  6.93s/it]

✓ Chat history cleared
Start Skill Selection Phase for Sample 91/300
✓ Chat history cleared
✓ Chat history cleared
✓ Chat history cleared


Processing samples:  30%|███       | 91/300 [05:46<17:55,  5.15s/it]

✓ Chat history cleared
Start Skill Selection Phase for Sample 92/300
✓ Chat history cleared
✓ Chat history cleared
✓ Chat history cleared


Processing samples:  31%|███       | 92/300 [05:49<16:01,  4.62s/it]

✓ Chat history cleared
Start Skill Selection Phase for Sample 93/300
✓ Chat history cleared
✓ Chat history cleared
✓ Chat history cleared


Processing samples:  31%|███       | 93/300 [05:51<13:11,  3.83s/it]

✓ Chat history cleared
Start Skill Selection Phase for Sample 94/300
✓ Chat history cleared
✓ Chat history cleared
✓ Chat history cleared


Processing samples:  31%|███▏      | 94/300 [05:53<10:57,  3.19s/it]

✓ Chat history cleared
Start Skill Selection Phase for Sample 95/300
✓ Chat history cleared
✓ Chat history cleared
✓ Chat history cleared


Processing samples:  32%|███▏      | 95/300 [05:54<08:40,  2.54s/it]

✓ Chat history cleared
Start Skill Selection Phase for Sample 96/300
✓ Chat history cleared
✓ Chat history cleared
✓ Chat history cleared


Processing samples:  32%|███▏      | 96/300 [05:55<06:57,  2.04s/it]

✓ Chat history cleared
Start Skill Selection Phase for Sample 97/300
✓ Chat history cleared
✓ Chat history cleared
✓ Chat history cleared


Processing samples:  32%|███▏      | 97/300 [05:56<05:52,  1.74s/it]

✓ Chat history cleared
Start Skill Selection Phase for Sample 98/300
✓ Chat history cleared
✓ Chat history cleared
✓ Chat history cleared


Processing samples:  33%|███▎      | 98/300 [06:59<1:07:52, 20.16s/it]

✓ Chat history cleared
Start Skill Selection Phase for Sample 99/300
✓ Chat history cleared
✓ Chat history cleared
✓ Chat history cleared


Processing samples:  33%|███▎      | 99/300 [07:00<48:10, 14.38s/it]  

✓ Chat history cleared
Start Skill Selection Phase for Sample 100/300
✓ Chat history cleared
✓ Chat history cleared
✓ Chat history cleared


Processing samples:  33%|███▎      | 100/300 [07:01<34:26, 10.33s/it]

✓ Chat history cleared
Start Skill Selection Phase for Sample 101/300
✓ Chat history cleared
✓ Chat history cleared
✓ Chat history cleared


Processing samples:  34%|███▎      | 101/300 [07:03<25:49,  7.79s/it]

✓ Chat history cleared
Start Skill Selection Phase for Sample 102/300
✓ Chat history cleared
✓ Chat history cleared
✓ Chat history cleared


Processing samples:  34%|███▍      | 102/300 [07:04<18:58,  5.75s/it]

✓ Chat history cleared
Start Skill Selection Phase for Sample 103/300
✓ Chat history cleared
✓ Chat history cleared
✓ Chat history cleared


Processing samples:  34%|███▍      | 103/300 [07:05<14:17,  4.35s/it]

✓ Chat history cleared
Start Skill Selection Phase for Sample 104/300
✓ Chat history cleared
✓ Chat history cleared
✓ Chat history cleared


Processing samples:  35%|███▍      | 104/300 [07:06<11:29,  3.52s/it]

✓ Chat history cleared
Start Skill Selection Phase for Sample 105/300
✓ Chat history cleared
✓ Chat history cleared
✓ Chat history cleared


Processing samples:  35%|███▌      | 105/300 [07:07<08:56,  2.75s/it]

✓ Chat history cleared
Start Skill Selection Phase for Sample 106/300
✓ Chat history cleared
✓ Chat history cleared
✓ Chat history cleared


Processing samples:  35%|███▌      | 106/300 [07:08<07:12,  2.23s/it]

✓ Chat history cleared
Start Skill Selection Phase for Sample 107/300
✓ Chat history cleared
✓ Chat history cleared
✓ Chat history cleared


Processing samples:  36%|███▌      | 107/300 [07:10<06:07,  1.90s/it]

✓ Chat history cleared
Start Skill Selection Phase for Sample 108/300
✓ Chat history cleared
✓ Chat history cleared
✓ Chat history cleared


Processing samples:  36%|███▌      | 108/300 [07:10<05:03,  1.58s/it]

✓ Chat history cleared
Start Skill Selection Phase for Sample 109/300
✓ Chat history cleared
✓ Chat history cleared
✓ Chat history cleared


Processing samples:  36%|███▋      | 109/300 [07:11<04:31,  1.42s/it]

✓ Chat history cleared
Start Skill Selection Phase for Sample 110/300
✓ Chat history cleared
✓ Chat history cleared
✓ Chat history cleared


Processing samples:  37%|███▋      | 110/300 [07:14<05:48,  1.83s/it]

✓ Chat history cleared
Start Skill Selection Phase for Sample 111/300
✓ Chat history cleared
✓ Chat history cleared
✓ Chat history cleared


Processing samples:  37%|███▋      | 111/300 [07:15<04:49,  1.53s/it]

✓ Chat history cleared
Start Skill Selection Phase for Sample 112/300
✓ Chat history cleared
✓ Chat history cleared
✓ Chat history cleared


Processing samples:  37%|███▋      | 112/300 [07:16<04:12,  1.34s/it]

✓ Chat history cleared
Start Skill Selection Phase for Sample 113/300
✓ Chat history cleared
✓ Chat history cleared
✓ Chat history cleared


Processing samples:  38%|███▊      | 113/300 [07:17<03:48,  1.22s/it]

✓ Chat history cleared
Start Skill Selection Phase for Sample 114/300
✓ Chat history cleared
✓ Chat history cleared
✓ Chat history cleared


Processing samples:  38%|███▊      | 114/300 [07:18<04:03,  1.31s/it]

✓ Chat history cleared
Start Skill Selection Phase for Sample 115/300
✓ Chat history cleared
✓ Chat history cleared
✓ Chat history cleared


Processing samples:  38%|███▊      | 115/300 [07:28<12:07,  3.93s/it]

✓ Chat history cleared
Start Skill Selection Phase for Sample 116/300
✓ Chat history cleared
✓ Chat history cleared
✓ Chat history cleared


Processing samples:  39%|███▊      | 116/300 [07:29<09:16,  3.03s/it]

✓ Chat history cleared
Start Skill Selection Phase for Sample 117/300
✓ Chat history cleared
✓ Chat history cleared
✓ Chat history cleared


Processing samples:  39%|███▉      | 117/300 [07:30<07:15,  2.38s/it]

✓ Chat history cleared
Start Skill Selection Phase for Sample 118/300
✓ Chat history cleared
✓ Chat history cleared
✓ Chat history cleared


Processing samples:  39%|███▉      | 118/300 [07:33<07:08,  2.35s/it]

✓ Chat history cleared
Start Skill Selection Phase for Sample 119/300
✓ Chat history cleared
✓ Chat history cleared
✓ Chat history cleared


Processing samples:  40%|███▉      | 119/300 [07:34<06:00,  1.99s/it]

✓ Chat history cleared
Start Skill Selection Phase for Sample 120/300
✓ Chat history cleared
✓ Chat history cleared
✓ Chat history cleared


Processing samples:  40%|████      | 120/300 [07:39<08:44,  2.91s/it]

✓ Chat history cleared
Start Skill Selection Phase for Sample 121/300
✓ Chat history cleared
✓ Chat history cleared
✓ Chat history cleared


Processing samples:  40%|████      | 121/300 [07:40<07:04,  2.37s/it]

✓ Chat history cleared
Start Skill Selection Phase for Sample 122/300
✓ Chat history cleared
✓ Chat history cleared
✓ Chat history cleared


Processing samples:  41%|████      | 122/300 [07:41<06:05,  2.05s/it]

✓ Chat history cleared
Start Skill Selection Phase for Sample 123/300
✓ Chat history cleared
✓ Chat history cleared
✓ Chat history cleared


Processing samples:  41%|████      | 123/300 [07:42<05:06,  1.73s/it]

✓ Chat history cleared
Start Skill Selection Phase for Sample 124/300
✓ Chat history cleared
✓ Chat history cleared
✓ Chat history cleared


Processing samples:  41%|████▏     | 124/300 [07:45<06:06,  2.08s/it]

✓ Chat history cleared
Start Skill Selection Phase for Sample 125/300
✓ Chat history cleared
✓ Chat history cleared
✓ Chat history cleared


Processing samples:  42%|████▏     | 125/300 [07:46<04:59,  1.71s/it]

✓ Chat history cleared
Start Skill Selection Phase for Sample 126/300
✓ Chat history cleared
✓ Chat history cleared
✓ Chat history cleared


Processing samples:  42%|████▏     | 126/300 [07:47<04:29,  1.55s/it]

✓ Chat history cleared
Start Skill Selection Phase for Sample 127/300
✓ Chat history cleared
✓ Chat history cleared
✓ Chat history cleared


Processing samples:  42%|████▏     | 127/300 [07:48<04:09,  1.44s/it]

✓ Chat history cleared
Start Skill Selection Phase for Sample 128/300
✓ Chat history cleared
✓ Chat history cleared
✓ Chat history cleared


Processing samples:  43%|████▎     | 128/300 [07:49<03:38,  1.27s/it]

✓ Chat history cleared
Start Skill Selection Phase for Sample 129/300
✓ Chat history cleared
✓ Chat history cleared
✓ Chat history cleared


Processing samples:  43%|████▎     | 129/300 [07:50<03:19,  1.17s/it]

✓ Chat history cleared
Start Skill Selection Phase for Sample 130/300
✓ Chat history cleared
✓ Chat history cleared
✓ Chat history cleared


Processing samples:  43%|████▎     | 130/300 [07:52<04:09,  1.47s/it]

✓ Chat history cleared
Start Skill Selection Phase for Sample 131/300
✓ Chat history cleared
✓ Chat history cleared
✓ Chat history cleared


Processing samples:  44%|████▎     | 131/300 [07:53<03:37,  1.29s/it]

✓ Chat history cleared
Start Skill Selection Phase for Sample 132/300
✓ Chat history cleared
✓ Chat history cleared
✓ Chat history cleared


Processing samples:  44%|████▍     | 132/300 [07:54<03:17,  1.17s/it]

✓ Chat history cleared
Start Skill Selection Phase for Sample 133/300
✓ Chat history cleared
✓ Chat history cleared
✓ Chat history cleared


Processing samples:  44%|████▍     | 133/300 [07:58<05:24,  1.94s/it]

✓ Chat history cleared
Start Skill Selection Phase for Sample 134/300
✓ Chat history cleared
✓ Chat history cleared
✓ Chat history cleared


Processing samples:  45%|████▍     | 134/300 [08:33<32:47, 11.85s/it]

✓ Chat history cleared
Start Skill Selection Phase for Sample 135/300
✓ Chat history cleared
✓ Chat history cleared
✓ Chat history cleared


Processing samples:  45%|████▌     | 135/300 [08:34<23:37,  8.59s/it]

✓ Chat history cleared
Start Skill Selection Phase for Sample 136/300
✓ Chat history cleared
✓ Chat history cleared
✓ Chat history cleared


Processing samples:  45%|████▌     | 136/300 [08:35<17:31,  6.41s/it]

✓ Chat history cleared
Start Skill Selection Phase for Sample 137/300
✓ Chat history cleared
✓ Chat history cleared
✓ Chat history cleared


Processing samples:  46%|████▌     | 137/300 [09:38<1:03:30, 23.38s/it]

✓ Chat history cleared
Start Skill Selection Phase for Sample 138/300
✓ Chat history cleared
✓ Chat history cleared
✓ Chat history cleared


Processing samples:  46%|████▌     | 138/300 [09:39<44:51, 16.61s/it]  

✓ Chat history cleared
Start Skill Selection Phase for Sample 139/300
✓ Chat history cleared
✓ Chat history cleared
✓ Chat history cleared


Processing samples:  46%|████▋     | 139/300 [09:40<31:56, 11.91s/it]

✓ Chat history cleared
Start Skill Selection Phase for Sample 140/300
✓ Chat history cleared
✓ Chat history cleared
✓ Chat history cleared


Processing samples:  47%|████▋     | 140/300 [09:41<23:15,  8.72s/it]

✓ Chat history cleared
Start Skill Selection Phase for Sample 141/300
✓ Chat history cleared
✓ Chat history cleared
✓ Chat history cleared


Processing samples:  47%|████▋     | 141/300 [09:42<16:58,  6.41s/it]

✓ Chat history cleared
Start Skill Selection Phase for Sample 142/300
✓ Chat history cleared
✓ Chat history cleared
✓ Chat history cleared


Processing samples:  47%|████▋     | 142/300 [09:43<12:28,  4.74s/it]

✓ Chat history cleared
Start Skill Selection Phase for Sample 143/300
✓ Chat history cleared
✓ Chat history cleared
✓ Chat history cleared


Processing samples:  48%|████▊     | 143/300 [09:45<10:17,  3.94s/it]

✓ Chat history cleared
Start Skill Selection Phase for Sample 144/300
✓ Chat history cleared
✓ Chat history cleared
✓ Chat history cleared


Processing samples:  48%|████▊     | 144/300 [09:46<07:58,  3.07s/it]

✓ Chat history cleared
Start Skill Selection Phase for Sample 145/300
✓ Chat history cleared
✓ Chat history cleared
✓ Chat history cleared


Processing samples:  48%|████▊     | 145/300 [09:47<06:20,  2.45s/it]

✓ Chat history cleared
Start Skill Selection Phase for Sample 146/300
✓ Chat history cleared
✓ Chat history cleared
✓ Chat history cleared


Processing samples:  49%|████▊     | 146/300 [09:48<05:16,  2.06s/it]

✓ Chat history cleared
Start Skill Selection Phase for Sample 147/300
✓ Chat history cleared
✓ Chat history cleared
✓ Chat history cleared


Processing samples:  49%|████▉     | 147/300 [09:49<04:23,  1.72s/it]

✓ Chat history cleared
Start Skill Selection Phase for Sample 148/300
✓ Chat history cleared
✓ Chat history cleared
✓ Chat history cleared


Processing samples:  49%|████▉     | 148/300 [09:50<03:43,  1.47s/it]

✓ Chat history cleared
Start Skill Selection Phase for Sample 149/300
✓ Chat history cleared
✓ Chat history cleared
✓ Chat history cleared


Processing samples:  50%|████▉     | 149/300 [09:53<04:58,  1.98s/it]

✓ Chat history cleared
Start Skill Selection Phase for Sample 150/300
✓ Chat history cleared
✓ Chat history cleared
✓ Chat history cleared


Processing samples:  50%|█████     | 150/300 [09:54<04:05,  1.64s/it]

✓ Chat history cleared
Start Skill Selection Phase for Sample 151/300
✓ Chat history cleared
✓ Chat history cleared
✓ Chat history cleared


Processing samples:  50%|█████     | 151/300 [09:56<04:15,  1.71s/it]

✓ Chat history cleared
Start Skill Selection Phase for Sample 152/300
✓ Chat history cleared
✓ Chat history cleared
✓ Chat history cleared


Processing samples:  51%|█████     | 152/300 [10:00<05:44,  2.33s/it]

✓ Chat history cleared
Start Skill Selection Phase for Sample 153/300
✓ Chat history cleared
✓ Chat history cleared
✓ Chat history cleared


Processing samples:  51%|█████     | 153/300 [10:01<04:44,  1.94s/it]

✓ Chat history cleared
Start Skill Selection Phase for Sample 154/300
✓ Chat history cleared
✓ Chat history cleared
✓ Chat history cleared


Processing samples:  51%|█████▏    | 154/300 [10:02<04:01,  1.66s/it]

✓ Chat history cleared
Start Skill Selection Phase for Sample 155/300
✓ Chat history cleared
✓ Chat history cleared
✓ Chat history cleared


Processing samples:  52%|█████▏    | 155/300 [10:04<04:43,  1.96s/it]

✓ Chat history cleared
Start Skill Selection Phase for Sample 156/300
✓ Chat history cleared
✓ Chat history cleared
✓ Chat history cleared


Processing samples:  52%|█████▏    | 156/300 [10:05<04:03,  1.69s/it]

✓ Chat history cleared
Start Skill Selection Phase for Sample 157/300
✓ Chat history cleared
✓ Chat history cleared
✓ Chat history cleared


Processing samples:  52%|█████▏    | 157/300 [10:08<04:27,  1.87s/it]

✓ Chat history cleared
Start Skill Selection Phase for Sample 158/300
✓ Chat history cleared
✓ Chat history cleared
✓ Chat history cleared


Processing samples:  53%|█████▎    | 158/300 [10:09<03:53,  1.65s/it]

✓ Chat history cleared
Start Skill Selection Phase for Sample 159/300
✓ Chat history cleared
✓ Chat history cleared
✓ Chat history cleared


Processing samples:  53%|█████▎    | 159/300 [10:09<03:11,  1.36s/it]

✓ Chat history cleared
Start Skill Selection Phase for Sample 160/300
✓ Chat history cleared
✓ Chat history cleared
✓ Chat history cleared


Processing samples:  53%|█████▎    | 160/300 [10:10<02:45,  1.18s/it]

✓ Chat history cleared
Start Skill Selection Phase for Sample 161/300
✓ Chat history cleared
✓ Chat history cleared
✓ Chat history cleared


Processing samples:  54%|█████▎    | 161/300 [10:11<02:34,  1.11s/it]

✓ Chat history cleared
Start Skill Selection Phase for Sample 162/300
✓ Chat history cleared
✓ Chat history cleared
✓ Chat history cleared


Processing samples:  54%|█████▍    | 162/300 [10:12<02:15,  1.02it/s]

✓ Chat history cleared
Start Skill Selection Phase for Sample 163/300
✓ Chat history cleared
✓ Chat history cleared
✓ Chat history cleared


Processing samples:  54%|█████▍    | 163/300 [10:13<02:37,  1.15s/it]

✓ Chat history cleared
Start Skill Selection Phase for Sample 164/300
✓ Chat history cleared
✓ Chat history cleared
✓ Chat history cleared


Processing samples:  55%|█████▍    | 164/300 [10:14<02:23,  1.06s/it]

✓ Chat history cleared
Start Skill Selection Phase for Sample 165/300
✓ Chat history cleared
✓ Chat history cleared
✓ Chat history cleared


Processing samples:  55%|█████▌    | 165/300 [10:15<02:10,  1.03it/s]

✓ Chat history cleared
Start Skill Selection Phase for Sample 166/300
✓ Chat history cleared
✓ Chat history cleared
✓ Chat history cleared


Processing samples:  55%|█████▌    | 166/300 [10:16<02:01,  1.10it/s]

✓ Chat history cleared
Start Skill Selection Phase for Sample 167/300
✓ Chat history cleared
✓ Chat history cleared
✓ Chat history cleared


Processing samples:  56%|█████▌    | 167/300 [10:17<01:59,  1.12it/s]

✓ Chat history cleared
Start Skill Selection Phase for Sample 168/300
✓ Chat history cleared
✓ Chat history cleared
✓ Chat history cleared


Processing samples:  56%|█████▌    | 168/300 [10:17<01:53,  1.16it/s]

✓ Chat history cleared
Start Skill Selection Phase for Sample 169/300
✓ Chat history cleared
✓ Chat history cleared
✓ Chat history cleared


Processing samples:  56%|█████▋    | 169/300 [10:18<01:51,  1.18it/s]

✓ Chat history cleared
Start Skill Selection Phase for Sample 170/300
✓ Chat history cleared
✓ Chat history cleared
✓ Chat history cleared


Processing samples:  57%|█████▋    | 170/300 [10:19<01:54,  1.14it/s]

✓ Chat history cleared
Start Skill Selection Phase for Sample 171/300
✓ Chat history cleared
✓ Chat history cleared
✓ Chat history cleared


Processing samples:  57%|█████▋    | 171/300 [10:20<01:50,  1.17it/s]

✓ Chat history cleared
Start Skill Selection Phase for Sample 172/300
✓ Chat history cleared
✓ Chat history cleared
✓ Chat history cleared


Processing samples:  57%|█████▋    | 172/300 [10:21<02:00,  1.06it/s]

✓ Chat history cleared
Start Skill Selection Phase for Sample 173/300
✓ Chat history cleared
✓ Chat history cleared
✓ Chat history cleared


Processing samples:  58%|█████▊    | 173/300 [10:23<02:16,  1.08s/it]

✓ Chat history cleared
Start Skill Selection Phase for Sample 174/300
✓ Chat history cleared
✓ Chat history cleared
✓ Chat history cleared


Processing samples:  58%|█████▊    | 174/300 [11:01<25:36, 12.19s/it]

✓ Chat history cleared
Start Skill Selection Phase for Sample 175/300
✓ Chat history cleared
✓ Chat history cleared
✓ Chat history cleared


Processing samples:  58%|█████▊    | 175/300 [11:01<18:13,  8.74s/it]

✓ Chat history cleared
Start Skill Selection Phase for Sample 176/300
✓ Chat history cleared
✓ Chat history cleared
✓ Chat history cleared


Processing samples:  59%|█████▊    | 176/300 [11:02<13:06,  6.34s/it]

✓ Chat history cleared
Start Skill Selection Phase for Sample 177/300
✓ Chat history cleared
✓ Chat history cleared
✓ Chat history cleared


Processing samples:  59%|█████▉    | 177/300 [11:03<09:40,  4.72s/it]

✓ Chat history cleared
Start Skill Selection Phase for Sample 178/300
✓ Chat history cleared
✓ Chat history cleared
✓ Chat history cleared


Processing samples:  59%|█████▉    | 178/300 [11:40<29:09, 14.34s/it]

✓ Chat history cleared
Start Skill Selection Phase for Sample 179/300
✓ Chat history cleared
✓ Chat history cleared
✓ Chat history cleared


Processing samples:  60%|█████▉    | 179/300 [11:41<20:45, 10.30s/it]

✓ Chat history cleared
Start Skill Selection Phase for Sample 180/300
✓ Chat history cleared
✓ Chat history cleared
✓ Chat history cleared


Processing samples:  60%|██████    | 180/300 [11:41<14:50,  7.42s/it]

✓ Chat history cleared
Start Skill Selection Phase for Sample 181/300
✓ Chat history cleared
✓ Chat history cleared
✓ Chat history cleared


Processing samples:  60%|██████    | 181/300 [11:42<10:48,  5.45s/it]

✓ Chat history cleared
Start Skill Selection Phase for Sample 182/300
✓ Chat history cleared
✓ Chat history cleared
✓ Chat history cleared


Processing samples:  61%|██████    | 182/300 [11:43<07:59,  4.06s/it]

✓ Chat history cleared
Start Skill Selection Phase for Sample 183/300
✓ Chat history cleared
✓ Chat history cleared
✓ Chat history cleared


Processing samples:  61%|██████    | 183/300 [11:44<06:01,  3.09s/it]

✓ Chat history cleared
Start Skill Selection Phase for Sample 184/300
✓ Chat history cleared
✓ Chat history cleared
✓ Chat history cleared


Processing samples:  61%|██████▏   | 184/300 [11:46<05:39,  2.93s/it]

✓ Chat history cleared
Start Skill Selection Phase for Sample 185/300
✓ Chat history cleared
✓ Chat history cleared
✓ Chat history cleared


Processing samples:  62%|██████▏   | 185/300 [11:47<04:22,  2.28s/it]

✓ Chat history cleared
Start Skill Selection Phase for Sample 186/300
✓ Chat history cleared
✓ Chat history cleared
✓ Chat history cleared


Processing samples:  62%|██████▏   | 186/300 [11:48<03:30,  1.84s/it]

✓ Chat history cleared
Start Skill Selection Phase for Sample 187/300
✓ Chat history cleared
✓ Chat history cleared
✓ Chat history cleared


Processing samples:  62%|██████▏   | 187/300 [11:49<02:56,  1.56s/it]

✓ Chat history cleared
Start Skill Selection Phase for Sample 188/300
✓ Chat history cleared
✓ Chat history cleared
✓ Chat history cleared


Processing samples:  63%|██████▎   | 188/300 [11:50<02:27,  1.32s/it]

✓ Chat history cleared
Start Skill Selection Phase for Sample 189/300
✓ Chat history cleared
✓ Chat history cleared
✓ Chat history cleared


Processing samples:  63%|██████▎   | 189/300 [11:51<02:12,  1.20s/it]

✓ Chat history cleared
Start Skill Selection Phase for Sample 190/300
✓ Chat history cleared
✓ Chat history cleared
✓ Chat history cleared


Processing samples:  63%|██████▎   | 190/300 [11:53<02:54,  1.59s/it]

✓ Chat history cleared
Start Skill Selection Phase for Sample 191/300
✓ Chat history cleared
✓ Chat history cleared
✓ Chat history cleared


Processing samples:  64%|██████▎   | 191/300 [11:54<02:36,  1.44s/it]

✓ Chat history cleared
Start Skill Selection Phase for Sample 192/300
✓ Chat history cleared
✓ Chat history cleared
✓ Chat history cleared


Processing samples:  64%|██████▍   | 192/300 [11:55<02:18,  1.28s/it]

✓ Chat history cleared
Start Skill Selection Phase for Sample 193/300
✓ Chat history cleared
✓ Chat history cleared
✓ Chat history cleared


Processing samples:  64%|██████▍   | 193/300 [11:56<02:13,  1.25s/it]

✓ Chat history cleared
Start Skill Selection Phase for Sample 194/300
✓ Chat history cleared
✓ Chat history cleared
✓ Chat history cleared


Processing samples:  65%|██████▍   | 194/300 [11:57<01:59,  1.13s/it]

✓ Chat history cleared
Start Skill Selection Phase for Sample 195/300
✓ Chat history cleared
✓ Chat history cleared
✓ Chat history cleared


Processing samples:  65%|██████▌   | 195/300 [11:58<01:47,  1.03s/it]

✓ Chat history cleared
Start Skill Selection Phase for Sample 196/300
✓ Chat history cleared
✓ Chat history cleared
✓ Chat history cleared


Processing samples:  65%|██████▌   | 196/300 [11:59<01:44,  1.01s/it]

✓ Chat history cleared
Start Skill Selection Phase for Sample 197/300
✓ Chat history cleared
✓ Chat history cleared
✓ Chat history cleared


Processing samples:  66%|██████▌   | 197/300 [12:00<01:36,  1.07it/s]

✓ Chat history cleared
Start Skill Selection Phase for Sample 198/300
✓ Chat history cleared
✓ Chat history cleared
✓ Chat history cleared


Processing samples:  66%|██████▌   | 198/300 [12:01<01:35,  1.07it/s]

✓ Chat history cleared
Start Skill Selection Phase for Sample 199/300
✓ Chat history cleared
✓ Chat history cleared
✓ Chat history cleared


Processing samples:  66%|██████▋   | 199/300 [12:01<01:33,  1.08it/s]

✓ Chat history cleared
Start Skill Selection Phase for Sample 200/300
✓ Chat history cleared
✓ Chat history cleared
✓ Chat history cleared


Processing samples:  67%|██████▋   | 200/300 [12:02<01:28,  1.13it/s]

✓ Chat history cleared
Start Skill Selection Phase for Sample 201/300
✓ Chat history cleared
✓ Chat history cleared
✓ Chat history cleared


Processing samples:  67%|██████▋   | 201/300 [12:04<01:44,  1.06s/it]

✓ Chat history cleared
Start Skill Selection Phase for Sample 202/300
✓ Chat history cleared
✓ Chat history cleared
✓ Chat history cleared


Processing samples:  67%|██████▋   | 202/300 [12:05<01:37,  1.01it/s]

✓ Chat history cleared
Start Skill Selection Phase for Sample 203/300
✓ Chat history cleared
✓ Chat history cleared
✓ Chat history cleared


Processing samples:  68%|██████▊   | 203/300 [12:05<01:31,  1.07it/s]

✓ Chat history cleared
Start Skill Selection Phase for Sample 204/300
✓ Chat history cleared
✓ Chat history cleared
✓ Chat history cleared


Processing samples:  68%|██████▊   | 204/300 [12:06<01:26,  1.11it/s]

✓ Chat history cleared
Start Skill Selection Phase for Sample 205/300
✓ Chat history cleared
✓ Chat history cleared
✓ Chat history cleared


Processing samples:  68%|██████▊   | 205/300 [12:07<01:25,  1.11it/s]

✓ Chat history cleared
Start Skill Selection Phase for Sample 206/300
✓ Chat history cleared
✓ Chat history cleared
✓ Chat history cleared


Processing samples:  69%|██████▊   | 206/300 [12:09<01:45,  1.13s/it]

✓ Chat history cleared
Start Skill Selection Phase for Sample 207/300
✓ Chat history cleared
✓ Chat history cleared
✓ Chat history cleared


Processing samples:  69%|██████▉   | 207/300 [12:10<02:00,  1.29s/it]

✓ Chat history cleared
Start Skill Selection Phase for Sample 208/300
✓ Chat history cleared
✓ Chat history cleared
✓ Chat history cleared


Processing samples:  69%|██████▉   | 208/300 [12:12<01:53,  1.23s/it]

✓ Chat history cleared
Start Skill Selection Phase for Sample 209/300
✓ Chat history cleared
✓ Chat history cleared
✓ Chat history cleared


Processing samples:  70%|██████▉   | 209/300 [12:12<01:34,  1.04s/it]

✓ Chat history cleared
Start Skill Selection Phase for Sample 210/300
✓ Chat history cleared
✓ Chat history cleared
✓ Chat history cleared


Processing samples:  70%|███████   | 210/300 [12:13<01:35,  1.06s/it]

✓ Chat history cleared
Start Skill Selection Phase for Sample 211/300
✓ Chat history cleared
✓ Chat history cleared
✓ Chat history cleared


Processing samples:  70%|███████   | 211/300 [12:16<02:09,  1.45s/it]

✓ Chat history cleared
Start Skill Selection Phase for Sample 212/300
✓ Chat history cleared
✓ Chat history cleared
✓ Chat history cleared


Processing samples:  71%|███████   | 212/300 [12:16<01:44,  1.19s/it]

✓ Chat history cleared
Start Skill Selection Phase for Sample 213/300
✓ Chat history cleared
✓ Chat history cleared
✓ Chat history cleared


Processing samples:  71%|███████   | 213/300 [12:17<01:31,  1.05s/it]

✓ Chat history cleared
Start Skill Selection Phase for Sample 214/300
✓ Chat history cleared
✓ Chat history cleared
✓ Chat history cleared


Processing samples:  71%|███████▏  | 214/300 [12:18<01:19,  1.08it/s]

✓ Chat history cleared
Start Skill Selection Phase for Sample 215/300
✓ Chat history cleared
✓ Chat history cleared
✓ Chat history cleared


Processing samples:  72%|███████▏  | 215/300 [12:18<01:12,  1.17it/s]

✓ Chat history cleared
Start Skill Selection Phase for Sample 216/300
✓ Chat history cleared
✓ Chat history cleared
✓ Chat history cleared


Processing samples:  72%|███████▏  | 216/300 [12:19<01:11,  1.18it/s]

✓ Chat history cleared
Start Skill Selection Phase for Sample 217/300
✓ Chat history cleared
✓ Chat history cleared
✓ Chat history cleared


Processing samples:  72%|███████▏  | 217/300 [12:20<01:12,  1.14it/s]

✓ Chat history cleared
Start Skill Selection Phase for Sample 218/300
✓ Chat history cleared
✓ Chat history cleared
✓ Chat history cleared


Processing samples:  73%|███████▎  | 218/300 [12:21<01:05,  1.26it/s]

✓ Chat history cleared
Start Skill Selection Phase for Sample 219/300
✓ Chat history cleared
✓ Chat history cleared
✓ Chat history cleared


Processing samples:  73%|███████▎  | 219/300 [12:21<01:02,  1.29it/s]

✓ Chat history cleared
Start Skill Selection Phase for Sample 220/300
✓ Chat history cleared
✓ Chat history cleared
✓ Chat history cleared


Processing samples:  73%|███████▎  | 220/300 [12:22<01:01,  1.29it/s]

✓ Chat history cleared
Start Skill Selection Phase for Sample 221/300
✓ Chat history cleared
✓ Chat history cleared
✓ Chat history cleared


Processing samples:  74%|███████▎  | 221/300 [12:23<00:59,  1.33it/s]

✓ Chat history cleared
Start Skill Selection Phase for Sample 222/300
✓ Chat history cleared
✓ Chat history cleared
✓ Chat history cleared


Processing samples:  74%|███████▍  | 222/300 [12:24<00:58,  1.34it/s]

✓ Chat history cleared
Start Skill Selection Phase for Sample 223/300
✓ Chat history cleared
✓ Chat history cleared
✓ Chat history cleared


Processing samples:  74%|███████▍  | 223/300 [12:24<00:55,  1.39it/s]

✓ Chat history cleared
Start Skill Selection Phase for Sample 224/300
✓ Chat history cleared
✓ Chat history cleared
✓ Chat history cleared


Processing samples:  75%|███████▍  | 224/300 [12:25<00:51,  1.48it/s]

✓ Chat history cleared
Start Skill Selection Phase for Sample 225/300
✓ Chat history cleared
✓ Chat history cleared
✓ Chat history cleared


Processing samples:  75%|███████▌  | 225/300 [12:26<00:59,  1.25it/s]

✓ Chat history cleared
Start Skill Selection Phase for Sample 226/300
✓ Chat history cleared
✓ Chat history cleared
✓ Chat history cleared


Processing samples:  75%|███████▌  | 226/300 [13:28<23:43, 19.23s/it]

✓ Chat history cleared
Start Skill Selection Phase for Sample 227/300
✓ Chat history cleared
✓ Chat history cleared
✓ Chat history cleared


Processing samples:  76%|███████▌  | 227/300 [14:19<34:58, 28.75s/it]

✓ Chat history cleared
Start Skill Selection Phase for Sample 228/300
✓ Chat history cleared
✓ Chat history cleared
✓ Chat history cleared


Processing samples:  76%|███████▌  | 228/300 [14:21<24:42, 20.59s/it]

✓ Chat history cleared
Start Skill Selection Phase for Sample 229/300
✓ Chat history cleared
✓ Chat history cleared
✓ Chat history cleared


Processing samples:  76%|███████▋  | 229/300 [14:22<17:22, 14.68s/it]

✓ Chat history cleared
Start Skill Selection Phase for Sample 230/300
✓ Chat history cleared
✓ Chat history cleared
✓ Chat history cleared


Processing samples:  77%|███████▋  | 230/300 [14:25<13:15, 11.36s/it]

✓ Chat history cleared
Start Skill Selection Phase for Sample 231/300
✓ Chat history cleared
✓ Chat history cleared
✓ Chat history cleared


Processing samples:  77%|███████▋  | 231/300 [14:26<09:28,  8.24s/it]

✓ Chat history cleared
Start Skill Selection Phase for Sample 232/300
✓ Chat history cleared
✓ Chat history cleared
✓ Chat history cleared


Processing samples:  77%|███████▋  | 232/300 [14:27<06:56,  6.13s/it]

✓ Chat history cleared
Start Skill Selection Phase for Sample 233/300
✓ Chat history cleared
✓ Chat history cleared
✓ Chat history cleared


Processing samples:  78%|███████▊  | 233/300 [14:29<05:17,  4.73s/it]

✓ Chat history cleared
Start Skill Selection Phase for Sample 234/300
✓ Chat history cleared
✓ Chat history cleared
✓ Chat history cleared


Processing samples:  78%|███████▊  | 234/300 [14:30<03:58,  3.62s/it]

✓ Chat history cleared
Start Skill Selection Phase for Sample 235/300
✓ Chat history cleared
✓ Chat history cleared
✓ Chat history cleared


Processing samples:  78%|███████▊  | 235/300 [14:31<03:06,  2.86s/it]

✓ Chat history cleared
Start Skill Selection Phase for Sample 236/300
✓ Chat history cleared
✓ Chat history cleared
✓ Chat history cleared


Processing samples:  79%|███████▊  | 236/300 [14:32<02:30,  2.35s/it]

✓ Chat history cleared
Start Skill Selection Phase for Sample 237/300
✓ Chat history cleared
✓ Chat history cleared
✓ Chat history cleared


Processing samples:  79%|███████▉  | 237/300 [14:33<02:03,  1.96s/it]

✓ Chat history cleared
Start Skill Selection Phase for Sample 238/300
✓ Chat history cleared
✓ Chat history cleared
✓ Chat history cleared


Processing samples:  79%|███████▉  | 238/300 [14:35<01:52,  1.82s/it]

✓ Chat history cleared
Start Skill Selection Phase for Sample 239/300
✓ Chat history cleared
✓ Chat history cleared
✓ Chat history cleared


Processing samples:  80%|███████▉  | 239/300 [14:36<01:38,  1.61s/it]

✓ Chat history cleared
Start Skill Selection Phase for Sample 240/300
✓ Chat history cleared
✓ Chat history cleared
✓ Chat history cleared


Processing samples:  80%|████████  | 240/300 [14:37<01:24,  1.42s/it]

✓ Chat history cleared
Start Skill Selection Phase for Sample 241/300
✓ Chat history cleared
✓ Chat history cleared
✓ Chat history cleared


Processing samples:  80%|████████  | 241/300 [14:38<01:16,  1.30s/it]

✓ Chat history cleared
Start Skill Selection Phase for Sample 242/300
✓ Chat history cleared
✓ Chat history cleared
✓ Chat history cleared


Processing samples:  81%|████████  | 242/300 [14:39<01:08,  1.18s/it]

✓ Chat history cleared
Start Skill Selection Phase for Sample 243/300
✓ Chat history cleared
✓ Chat history cleared
✓ Chat history cleared


Processing samples:  81%|████████  | 243/300 [14:39<01:01,  1.08s/it]

✓ Chat history cleared
Start Skill Selection Phase for Sample 244/300
✓ Chat history cleared
✓ Chat history cleared
✓ Chat history cleared


Processing samples:  81%|████████▏ | 244/300 [14:40<00:58,  1.04s/it]

✓ Chat history cleared
Start Skill Selection Phase for Sample 245/300
✓ Chat history cleared
✓ Chat history cleared
✓ Chat history cleared


Processing samples:  82%|████████▏ | 245/300 [14:41<00:58,  1.06s/it]

✓ Chat history cleared
Start Skill Selection Phase for Sample 246/300
✓ Chat history cleared
✓ Chat history cleared
✓ Chat history cleared


Processing samples:  82%|████████▏ | 246/300 [14:42<00:54,  1.01s/it]

✓ Chat history cleared
Start Skill Selection Phase for Sample 247/300
✓ Chat history cleared
✓ Chat history cleared
✓ Chat history cleared


Processing samples:  82%|████████▏ | 247/300 [14:43<00:52,  1.00it/s]

✓ Chat history cleared
Start Skill Selection Phase for Sample 248/300
✓ Chat history cleared
✓ Chat history cleared
✓ Chat history cleared


Processing samples:  83%|████████▎ | 248/300 [15:31<12:59, 14.99s/it]

✓ Chat history cleared
Start Skill Selection Phase for Sample 249/300
✓ Chat history cleared
✓ Chat history cleared
✓ Chat history cleared


Processing samples:  83%|████████▎ | 249/300 [15:32<09:13, 10.85s/it]

✓ Chat history cleared
Start Skill Selection Phase for Sample 250/300
✓ Chat history cleared
✓ Chat history cleared
✓ Chat history cleared


Processing samples:  83%|████████▎ | 250/300 [16:09<15:33, 18.67s/it]

✓ Chat history cleared
Start Skill Selection Phase for Sample 251/300
✓ Chat history cleared
✓ Chat history cleared
✓ Chat history cleared


Processing samples:  84%|████████▎ | 251/300 [16:10<10:56, 13.39s/it]

✓ Chat history cleared
Start Skill Selection Phase for Sample 252/300
✓ Chat history cleared
✓ Chat history cleared
✓ Chat history cleared


Processing samples:  84%|████████▍ | 252/300 [16:11<07:44,  9.67s/it]

✓ Chat history cleared
Start Skill Selection Phase for Sample 253/300
✓ Chat history cleared
✓ Chat history cleared
✓ Chat history cleared


Processing samples:  84%|████████▍ | 253/300 [16:12<05:33,  7.09s/it]

✓ Chat history cleared
Start Skill Selection Phase for Sample 254/300
✓ Chat history cleared
✓ Chat history cleared
✓ Chat history cleared


Processing samples:  85%|████████▍ | 254/300 [16:13<04:01,  5.26s/it]

✓ Chat history cleared
Start Skill Selection Phase for Sample 255/300
✓ Chat history cleared
✓ Chat history cleared
✓ Chat history cleared


Processing samples:  85%|████████▌ | 255/300 [16:14<02:59,  3.99s/it]

✓ Chat history cleared
Start Skill Selection Phase for Sample 256/300
✓ Chat history cleared
✓ Chat history cleared
✓ Chat history cleared


Processing samples:  85%|████████▌ | 256/300 [16:15<02:17,  3.12s/it]

✓ Chat history cleared
Start Skill Selection Phase for Sample 257/300
✓ Chat history cleared
✓ Chat history cleared
✓ Chat history cleared


Processing samples:  86%|████████▌ | 257/300 [16:16<01:45,  2.46s/it]

✓ Chat history cleared
Start Skill Selection Phase for Sample 258/300
✓ Chat history cleared
✓ Chat history cleared
✓ Chat history cleared


Processing samples:  86%|████████▌ | 258/300 [16:17<01:24,  2.00s/it]

✓ Chat history cleared
Start Skill Selection Phase for Sample 259/300
✓ Chat history cleared
✓ Chat history cleared
✓ Chat history cleared


Processing samples:  86%|████████▋ | 259/300 [16:18<01:11,  1.74s/it]

✓ Chat history cleared
Start Skill Selection Phase for Sample 260/300
✓ Chat history cleared
✓ Chat history cleared
✓ Chat history cleared


Processing samples:  87%|████████▋ | 260/300 [16:19<00:59,  1.49s/it]

✓ Chat history cleared
Start Skill Selection Phase for Sample 261/300
✓ Chat history cleared
✓ Chat history cleared
✓ Chat history cleared


Processing samples:  87%|████████▋ | 261/300 [16:20<00:52,  1.35s/it]

✓ Chat history cleared
Start Skill Selection Phase for Sample 262/300
✓ Chat history cleared
✓ Chat history cleared
✓ Chat history cleared


Processing samples:  87%|████████▋ | 262/300 [16:21<00:48,  1.27s/it]

✓ Chat history cleared
Start Skill Selection Phase for Sample 263/300
✓ Chat history cleared
✓ Chat history cleared
✓ Chat history cleared


Processing samples:  88%|████████▊ | 263/300 [16:22<00:41,  1.13s/it]

✓ Chat history cleared
Start Skill Selection Phase for Sample 264/300
✓ Chat history cleared
✓ Chat history cleared
✓ Chat history cleared


Processing samples:  88%|████████▊ | 264/300 [16:23<00:42,  1.18s/it]

✓ Chat history cleared
Start Skill Selection Phase for Sample 265/300
✓ Chat history cleared
✓ Chat history cleared
✓ Chat history cleared


Processing samples:  88%|████████▊ | 265/300 [16:49<04:53,  8.37s/it]

✓ Chat history cleared
Start Skill Selection Phase for Sample 266/300
✓ Chat history cleared
✓ Chat history cleared
✓ Chat history cleared


Processing samples:  89%|████████▊ | 266/300 [16:50<03:30,  6.20s/it]

✓ Chat history cleared
Start Skill Selection Phase for Sample 267/300
✓ Chat history cleared
✓ Chat history cleared
✓ Chat history cleared


Processing samples:  89%|████████▉ | 267/300 [17:30<08:59, 16.33s/it]

✓ Chat history cleared
Start Skill Selection Phase for Sample 268/300
✓ Chat history cleared
✓ Chat history cleared
✓ Chat history cleared


Processing samples:  89%|████████▉ | 268/300 [17:49<09:15, 17.36s/it]

✓ Chat history cleared
Start Skill Selection Phase for Sample 269/300
✓ Chat history cleared
✓ Chat history cleared
✓ Chat history cleared


Processing samples:  90%|████████▉ | 269/300 [17:51<06:27, 12.50s/it]

✓ Chat history cleared
Start Skill Selection Phase for Sample 270/300
✓ Chat history cleared
✓ Chat history cleared
✓ Chat history cleared


Processing samples:  90%|█████████ | 270/300 [17:55<04:58,  9.94s/it]

✓ Chat history cleared
Start Skill Selection Phase for Sample 271/300
✓ Chat history cleared
✓ Chat history cleared
✓ Chat history cleared


Processing samples:  90%|█████████ | 271/300 [17:56<03:29,  7.24s/it]

✓ Chat history cleared
Start Skill Selection Phase for Sample 272/300
✓ Chat history cleared
✓ Chat history cleared
✓ Chat history cleared


Processing samples:  91%|█████████ | 272/300 [18:04<03:32,  7.58s/it]

✓ Chat history cleared
Start Skill Selection Phase for Sample 273/300
✓ Chat history cleared
✓ Chat history cleared
✓ Chat history cleared


Processing samples:  91%|█████████ | 273/300 [18:05<02:30,  5.58s/it]

✓ Chat history cleared
Start Skill Selection Phase for Sample 274/300
✓ Chat history cleared
✓ Chat history cleared
✓ Chat history cleared


Processing samples:  91%|█████████▏| 274/300 [18:06<01:48,  4.18s/it]

✓ Chat history cleared
Start Skill Selection Phase for Sample 275/300
✓ Chat history cleared
✓ Chat history cleared
✓ Chat history cleared


Processing samples:  92%|█████████▏| 275/300 [18:07<01:20,  3.22s/it]

✓ Chat history cleared
Start Skill Selection Phase for Sample 276/300
✓ Chat history cleared
✓ Chat history cleared
✓ Chat history cleared


Processing samples:  92%|█████████▏| 276/300 [18:08<01:01,  2.57s/it]

✓ Chat history cleared
Start Skill Selection Phase for Sample 277/300
✓ Chat history cleared
✓ Chat history cleared
✓ Chat history cleared


Processing samples:  92%|█████████▏| 277/300 [18:09<00:48,  2.12s/it]

✓ Chat history cleared
Start Skill Selection Phase for Sample 278/300
✓ Chat history cleared
✓ Chat history cleared
✓ Chat history cleared


Processing samples:  93%|█████████▎| 278/300 [19:13<07:35, 20.70s/it]

✓ Chat history cleared
Start Skill Selection Phase for Sample 279/300
✓ Chat history cleared
✓ Chat history cleared
✓ Chat history cleared


Processing samples:  93%|█████████▎| 279/300 [19:14<05:09, 14.76s/it]

✓ Chat history cleared
Start Skill Selection Phase for Sample 280/300
✓ Chat history cleared
✓ Chat history cleared
✓ Chat history cleared


Processing samples:  93%|█████████▎| 280/300 [19:15<03:31, 10.60s/it]

✓ Chat history cleared
Start Skill Selection Phase for Sample 281/300
✓ Chat history cleared
✓ Chat history cleared
✓ Chat history cleared


Processing samples:  94%|█████████▎| 281/300 [19:16<02:29,  7.88s/it]

✓ Chat history cleared
Start Skill Selection Phase for Sample 282/300
✓ Chat history cleared
✓ Chat history cleared
✓ Chat history cleared


Processing samples:  94%|█████████▍| 282/300 [19:18<01:48,  6.04s/it]

✓ Chat history cleared
Start Skill Selection Phase for Sample 283/300
✓ Chat history cleared
✓ Chat history cleared
✓ Chat history cleared


Processing samples:  94%|█████████▍| 283/300 [19:19<01:17,  4.56s/it]

✓ Chat history cleared
Start Skill Selection Phase for Sample 284/300
✓ Chat history cleared
✓ Chat history cleared
✓ Chat history cleared


Processing samples:  95%|█████████▍| 284/300 [20:22<05:53, 22.07s/it]

✓ Chat history cleared
Start Skill Selection Phase for Sample 285/300
✓ Chat history cleared
✓ Chat history cleared
✓ Chat history cleared


Processing samples:  95%|█████████▌| 285/300 [20:23<03:56, 15.74s/it]

✓ Chat history cleared
Start Skill Selection Phase for Sample 286/300
✓ Chat history cleared
✓ Chat history cleared
✓ Chat history cleared


Processing samples:  95%|█████████▌| 286/300 [20:24<02:38, 11.34s/it]

✓ Chat history cleared
Start Skill Selection Phase for Sample 287/300
✓ Chat history cleared
✓ Chat history cleared
✓ Chat history cleared


Processing samples:  96%|█████████▌| 287/300 [20:25<01:47,  8.29s/it]

✓ Chat history cleared
Start Skill Selection Phase for Sample 288/300
✓ Chat history cleared
✓ Chat history cleared
✓ Chat history cleared


Processing samples:  96%|█████████▌| 288/300 [20:26<01:13,  6.14s/it]

✓ Chat history cleared
Start Skill Selection Phase for Sample 289/300
✓ Chat history cleared
✓ Chat history cleared
✓ Chat history cleared


Processing samples:  96%|█████████▋| 289/300 [20:28<00:51,  4.66s/it]

✓ Chat history cleared
Start Skill Selection Phase for Sample 290/300
✓ Chat history cleared
✓ Chat history cleared
✓ Chat history cleared


Processing samples:  97%|█████████▋| 290/300 [20:30<00:40,  4.08s/it]

✓ Chat history cleared
Start Skill Selection Phase for Sample 291/300
✓ Chat history cleared
✓ Chat history cleared
✓ Chat history cleared


Processing samples:  97%|█████████▋| 291/300 [20:31<00:28,  3.20s/it]

✓ Chat history cleared
Start Skill Selection Phase for Sample 292/300
✓ Chat history cleared
✓ Chat history cleared
✓ Chat history cleared


Processing samples:  97%|█████████▋| 292/300 [20:33<00:21,  2.73s/it]

✓ Chat history cleared
Start Skill Selection Phase for Sample 293/300
✓ Chat history cleared
✓ Chat history cleared
✓ Chat history cleared


Processing samples:  98%|█████████▊| 293/300 [20:34<00:15,  2.23s/it]

✓ Chat history cleared
Start Skill Selection Phase for Sample 294/300
✓ Chat history cleared
✓ Chat history cleared
✓ Chat history cleared


Processing samples:  98%|█████████▊| 294/300 [20:36<00:12,  2.03s/it]

✓ Chat history cleared
Start Skill Selection Phase for Sample 295/300
✓ Chat history cleared
✓ Chat history cleared
✓ Chat history cleared


Processing samples:  98%|█████████▊| 295/300 [20:37<00:08,  1.72s/it]

✓ Chat history cleared
Start Skill Selection Phase for Sample 296/300
✓ Chat history cleared
✓ Chat history cleared
✓ Chat history cleared


Processing samples:  99%|█████████▊| 296/300 [20:38<00:06,  1.53s/it]

✓ Chat history cleared
Start Skill Selection Phase for Sample 297/300
✓ Chat history cleared
✓ Chat history cleared
✓ Chat history cleared


Processing samples:  99%|█████████▉| 297/300 [20:39<00:04,  1.38s/it]

✓ Chat history cleared
Start Skill Selection Phase for Sample 298/300
✓ Chat history cleared
✓ Chat history cleared
✓ Chat history cleared


Processing samples:  99%|█████████▉| 298/300 [20:40<00:02,  1.27s/it]

✓ Chat history cleared
Start Skill Selection Phase for Sample 299/300
✓ Chat history cleared
✓ Chat history cleared
✓ Chat history cleared


Processing samples: 100%|█████████▉| 299/300 [20:41<00:01,  1.27s/it]

✓ Chat history cleared
Start Skill Selection Phase for Sample 300/300
✓ Chat history cleared
✓ Chat history cleared
✓ Chat history cleared


Processing samples: 100%|██████████| 300/300 [20:42<00:00,  4.14s/it]

✓ Chat history cleared

All 300 samples processed. Results saved to: /home/snt/projects_lujun/LabAgentSkill/assets/results/imdb_300_standard_gemma-3-270m-it_20260211_181940.jsonl





In [13]:
import pandas as pd
from sklearn.metrics import accuracy_score, f1_score  # F1 supports macro/weighted averages [web:2]

results_df = pd.read_json(jsonl_path, lines=True)

ESTIMATED_GPU_RAM_GB = 44.3

for task_type, g in results_df.groupby("task_type", dropna=False):

    # --- ACC / F1: exclude "unknown" predictions ---
    pred_lower = g["predicted_label"].astype(str).str.lower()
    valid_mask = pred_lower.ne("unknown")
    valid_df = g[valid_mask]
    unknown_count = int((~valid_mask).sum())

    if len(valid_df) > 0:
        y_true = valid_df["true_label"].astype(str).str.lower()
        y_pred = valid_df["predicted_label"].astype(str).str.lower()

        acc = accuracy_score(y_true, y_pred)
        f1_macro = f1_score(y_true, y_pred, average="macro", zero_division=0)      # macro avg [web:2]
        f1_weighted = f1_score(y_true, y_pred, average="weighted", zero_division=0)  # weighted avg [web:2]
        correct = int((y_true == y_pred).sum())
        denom = len(valid_df)
    else:
        acc = float("nan")
        f1_macro = float("nan")
        f1_weighted = float("nan")
        correct = 0
        denom = 0

    # --- Hit Rate: count non-empty hit_target_skill ---
    hit_count = int(g["hit_target_skill"].fillna("").astype(str).eq("True").sum())
    hit_rate = hit_count / len(g) if len(g) > 0 else float("nan")

    # --- VRAM-Hours (GB·h): VRAM(GB) * time(hours) ---
    total_time_sec = g["elapsed_seconds"].fillna(0).astype(float).sum()
    total_minutes = (total_time_sec / 60.0)
    total_vram_minutes = total_minutes * ESTIMATED_GPU_RAM_GB
    avg_minutes= total_minutes / len(g) if len(g) > 0 else float("nan")
    avg_vram_minutes = total_vram_minutes / len(g) if len(g) > 0 else float("nan")
    # --- Print only the requested metrics ---
    # print(f"[task_type={task_type}] "
    #       f"ACC={acc:.4f} ({correct}/{denom}, unknown_excluded={unknown_count}) | "
    #       f"F1_weighted={f1_weighted:.4f} | "
    #       f"HitRate={hit_rate:.4f} ({hit_count}/{len(g)}) | "
    #       f"AvgHours={avg_hours:.4f} | AvgVRAM-Hours={avg_vram_hours:.4f} GB·h"
    # )

    print (f"{task_type} {acc:.4f} {f1_weighted:.4f} {hit_rate:.4f} {avg_minutes:.4f} {avg_vram_minutes:.4f}")


agent_simple 0.6373 0.6093 0.0000 0.0024 0.1072
agent_skill_based 0.6667 0.6268 0.0000 0.0063 0.2813
agent_skill_full_context 0.5000 0.4176 0.0000 0.0600 2.6562


In [8]:
results_df.hit_target_skill.value_counts(dropna=False)

hit_target_skill
         600
False    300
Name: count, dtype: int64