In [3]:
%pip install torch --index-url https://download.pytorch.org/whl/cu121  # adjust cu version
%pip install sentence-transformers cross-encoder


huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


Looking in indexes: https://download.pytorch.org/whl/cu121
Note: you may need to restart the kernel to use updated packages.


huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


[31mERROR: Could not find a version that satisfies the requirement cross-encoder (from versions: none)[0m[31m
[0m[31mERROR: No matching distribution found for cross-encoder[0m[31m
[0mNote: you may need to restart the kernel to use updated packages.


In [None]:
import json
import torch
from tqdm.auto import tqdm
import numpy as np
from collections import defaultdict
from sentence_transformers import SentenceTransformer, CrossEncoder
 # assuming the import path; your original used from sentence_transformers import CrossEncoder but typically it's from cross_encoder

# --- Device setup ---
device = "cuda" if torch.cuda.is_available() else "cpu"
print(f"Using device: {device}")

# --- Load data ---
with open('speech_all(3).json', 'r') as f:
    data = json.load(f)

# --- Define skills (unchanged) ---
SKILLS = {
    "perceptual": [
        {"name": "Speaker Identification", "description": "Recognizing, counting, or detecting speakers or speech events are present in audio.", "example": "How many speakers are there in the audio?"},
        {"name": "Speaker Demographics", "description": "Identifying characteristics of speakers like age, sex, or background.", "example": "How old is the second speaker?"},
        {"name": "Language Identification", "description": "Determining the language, dialect, or accent spoken.", "example": "Based how they talk, where are the interviewees from?"},
        {"name": "Lexical and Phrase-Level Recognition", "description": "Identifying words and short phrases accurately from speech.", "example": "What is the term the speaker uses to describe themselves?"},
        {"name": "Prosody Detection", "description": "Recognizing rhythm, stress, intonation, and emphasis in speech.", "example": "What word sounds important in the answer to the question?"},
        {"name": "Paralinguistic/Emotion Recognition", "description": "Detecting emotions or non-verbal cues from voice.", "example": "How confidently does the interviewee answer the question?"},
        {"name": "Speech Activity, Turn-Taking and Overlap Detection", "description": "Identifying who speaks when, how turns are exchanged, and detecting overlapping speech.", "example": "Who decides to end the conversation?"},
        {"name": "Audio Quality, Artifacts & Channel Characteristics", "description": "Recognizing sound quality issues, background noise, or distortions to speech.", "example": "Based on how they talk, what illness does the speaker have?"}
    ],
    "reasoning": [
        {"name": "Social Role and Relationship Inference", "description": "Inferring relationships among speakers.", "example": "What is the nickname given to the friend of the first speaker"},
        {"name": "Speaker Intent, Pragmatics and Causal Reasoning", "description": "Understanding why something was said, implied meanings, and cause-effect relationships.", "example": "Why does the first speaker slowly repeat everything the second speaker says"},
        {"name": "Quantitative Reasoning (Counting/Arithmetic Comparison)", "description": "Using numbers, counting, and basic math to understand spoken information.", "example": "How many times does the speaker use foul language?"},
        {"name": "Temporal and Ordering Reasoning", "description": "Understanding sequence, timing, and chronological order of events described.", "example": "Does the man speak before the woman in this clip?"},
        {"name": "Logical/Consistency Reasoning", "description": "Recognizing logical sequences/inconsistencies within spoken content.", "example": "How did the speaker misinterpret the directions given by the GPS?"},
        {"name": "Cross-frontier Entity Linking", "description": "Connecting spoken references to external entities or concepts beyond the immediate context.", "example": "What does the first speaker do that implies they are the CEO of the company?"},
        {"name": "Ground Truth and World Knowledge Integration", "description": "Using general knowledge to interpret and verify spoken content.", "example": "What is the capital of the country mentioned by the speaker?"},
        {"name": "Contextual/Causal Scenario Reasoning", "description": "Understanding situations or events described, including cause-and-effect relationships within a scenario.", "example": "What would happen to the second speaker if the things the first speaker says happen"},
        {"name": "Semantic Abstraction and Summarization", "description": "Identifying main ideas, themes, or concise summaries from spoken content.", "example": "What is the meaning of the poem read by the speaker?"},
        {"name": "Comparative and Preference-Based Judgments", "description": "Evaluating and comparing spoken information, identifying preferences or rankings.", "example": "Which of the phrases mentioned by the speaker is the shortest"}
    ]
}

# --- Initialize models on GPU ---
nli_model = CrossEncoder('cross-encoder/nli-deberta-base', device=device)  # ensure CrossEncoder supports device arg
embed_model = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2", device=device)

# --- Precompute skill embedding mapping ---
def skill_repr(skill):
    return f"{skill['name']}: {skill['description']}"

perc_skill_texts = [skill_repr(s) for s in SKILLS["perceptual"]]
reas_skill_texts = [skill_repr(s) for s in SKILLS["reasoning"]]
# Normalize embeddings (SentenceTransformer with normalize_embeddings=True in encode call)
perc_embeddings = embed_model.encode(perc_skill_texts, normalize_embeddings=True)
reas_embeddings = embed_model.encode(reas_skill_texts, normalize_embeddings=True)
perc_names = [s["name"] for s in SKILLS["perceptual"]]
reas_names = [s["name"] for s in SKILLS["reasoning"]]

def map_human_labels_to_defined(human_labels, category):
    if category == "perceptual":
        target_embeddings = perc_embeddings
        target_names = perc_names
    else:
        target_embeddings = reas_embeddings
        target_names = reas_names

    mapped = set()
    for hl in human_labels or []:
        hl_emb = embed_model.encode(hl, normalize_embeddings=True)
        sims = np.dot(target_embeddings, hl_emb)
        best_idx = int(np.argmax(sims))
        mapped.add(target_names[best_idx])
    return mapped

# --- Batched entailment scoring helper ---
def batch_entailment_scores(premises, hypotheses, batch_size=32):
    """
    Both inputs are lists of equal length; returns list of entailment probabilities.
    Assumes the NLI model outputs logits in order [contradiction, neutral, entailment].
    """
    assert len(premises) == len(hypotheses)
    pairs = list(zip(premises, hypotheses))
    entailment_probs = []
    with torch.no_grad():
        # cross-encoder.predict accepts list of tuples
        for i in range(0, len(pairs), batch_size):
            batch = pairs[i : i + batch_size]
            logits = nli_model.predict(batch, batch_size=len(batch))  # we control inner batching
            probs = torch.softmax(torch.tensor(logits), dim=-1)
            # extract entailment index 2
            entailment_probs.extend([p[2].item() for p in probs])
    return entailment_probs

# --- Helper to pick best skill from aux label ---
def best_skill_from_cat_label(cat_label, skills_list, category_name):
    if not cat_label:
        return None
    premise = f"The labeled {category_name} category is '{cat_label}'."
    hypotheses = []
    skill_names = []
    for skill in skills_list:
        if category_name == "perceptual":
            hyp = f"This requires perceptual skill: {skill['name']}. {skill['description']} Example: {skill['example']}"
        else:
            hyp = f"This requires reasoning skill: {skill['name']}. {skill['description']} Example: {skill['example']}"
        hypotheses.append(hyp)
        skill_names.append(skill["name"])
    premises = [premise] * len(hypotheses)
    scores = batch_entailment_scores(premises, hypotheses, batch_size=16)
    best_idx = int(np.argmax(scores))
    return skill_names[best_idx]

# --- Thresholds ---
perc_thresh = 0.95
reas_thresh = 0.99

# --- Main loop with batching per entry ---
tagged_data = []

for entry in tqdm(data, desc="Tagging entries"):
    qid = entry.get("id")
    qa_text = entry.get("question", "") + " " + entry.get("answer", "")

    # human-labeled mapped to defined skill names
    human_perc = map_human_labels_to_defined(entry.get("perceptual_skills", []), "perceptual")
    human_reas = map_human_labels_to_defined(entry.get("reasoning_skills", []), "reasoning")

    perceptual_skills = set()
    reasoning_skills = set()

    # Prepare NLI pairs for defined skills
    perc_premises = []
    perc_hypotheses = []
    for skill in SKILLS["perceptual"]:
        hypothesis = f"This requires perceptual skill: {skill['name']}. {skill['description']} Example: {skill['example']}"
        perc_premises.append(qa_text)
        perc_hypotheses.append(hypothesis)

    reas_premises = []
    reas_hypotheses = []
    for skill in SKILLS["reasoning"]:
        hypothesis = f"This requires reasoning skill: {skill['name']}. {skill['description']} Example: {skill['example']}"
        reas_premises.append(qa_text)
        reas_hypotheses.append(hypothesis)

    # Get scores in batch
    perc_scores_list = batch_entailment_scores(perc_premises, perc_hypotheses, batch_size=16)
    reas_scores_list = batch_entailment_scores(reas_premises, reas_hypotheses, batch_size=16)

    perc_scores = list(zip([s["name"] for s in SKILLS["perceptual"]], perc_scores_list))
    reas_scores = list(zip([s["name"] for s in SKILLS["reasoning"]], reas_scores_list))

    for name, score in perc_scores:
        if score > perc_thresh:
            perceptual_skills.add(name)

    for name, score in reas_scores:
        if score > reas_thresh:
            reasoning_skills.add(name)

    # include human-mapped ones
    perceptual_skills |= human_perc
    reasoning_skills |= human_reas

    # cat_p / cat_r enrichment from aux
    aux = entry.get("aux", {})
    cat_p = aux.get("cat_p")
    cat_r = aux.get("cat_r")

    best_from_cat_p = best_skill_from_cat_label(cat_p, SKILLS["perceptual"], "perceptual")
    if best_from_cat_p:
        perceptual_skills.add(best_from_cat_p)
    best_from_cat_r = best_skill_from_cat_label(cat_r, SKILLS["reasoning"], "reasoning")
    if best_from_cat_r:
        reasoning_skills.add(best_from_cat_r)

    # fallback: ensure at least one per category
    if not perceptual_skills:
        best_perc = max(perc_scores, key=lambda x: x[1])[0]
        perceptual_skills.add(best_perc)
    if not reasoning_skills:
        best_reas = max(reas_scores, key=lambda x: x[1])[0]
        reasoning_skills.add(best_reas)

    tagged_data.append({
        "id": qid,
        "question_answer": qa_text,
        "perceptual_skills": sorted(list(perceptual_skills)),
        "reasoning_skills": sorted(list(reasoning_skills)),
        "human_mapped_perceptual": sorted(list(human_perc)),
        "human_mapped_reasoning": sorted(list(human_reas)),
        "cat_p": cat_p,
        "cat_r": cat_r,
        "best_from_cat_p": best_from_cat_p,
        "best_from_cat_r": best_from_cat_r,
        "perceptual_scores": {name: sc for name, sc in perc_scores},
        "reasoning_scores": {name: sc for name, sc in reas_scores},
    })

# --- Save output ---
with open('tagged_ALL.json', 'w') as f:
    json.dump(tagged_data, f, indent=2)

print("Skill tagging complete and results saved.")


Using device: cuda


Tagging entries:   0%|          | 0/1143 [00:00<?, ?it/s]

Skill tagging complete and results saved.


In [18]:
import json
import torch
from tqdm.auto import tqdm
import numpy as np
from collections import defaultdict
from sentence_transformers import SentenceTransformer, CrossEncoder
 # assuming the import path; your original used from sentence_transformers import CrossEncoder but typically it's from cross_encoder

# --- Device setup ---
device = "cuda" if torch.cuda.is_available() else "cpu"
print(f"Using device: {device}")

# --- Load data ---
with open('speech_all(3).json', 'r') as f:
    data = json.load(f)

# --- Define skills (unchanged) ---
SKILLS = {
    "perceptual": [
        {"name": "Speaker Identification", "description": "Recognizing, counting, or detecting speakers or speech events are present in audio.", "example": "How many speakers are there in the audio?"},
        {"name": "Speaker Demographics", "description": "Identifying characteristics of speakers like age, sex, or background.", "example": "How old is the second speaker?"},
        {"name": "Language Identification", "description": "Determining the language, dialect, or accent spoken.", "example": "Based how they talk, where are the interviewees from?"},
        {"name": "Lexical and Phrase-Level Recognition", "description": "Identifying words and short phrases accurately from speech.", "example": "What is the term the speaker uses to describe themselves?"},
        {"name": "Prosody Detection", "description": "Recognizing rhythm, stress, intonation, and emphasis in speech.", "example": "What word sounds important in the answer to the question?"},
        {"name": "Paralinguistic/Emotion Recognition", "description": "Detecting emotions or non-verbal cues from voice.", "example": "How confidently does the interviewee answer the question?"},
        {"name": "Speech Activity, Turn-Taking and Overlap Detection", "description": "Identifying who speaks when, how turns are exchanged, and detecting overlapping speech.", "example": "Who decides to end the conversation?"},
        {"name": "Audio Quality, Artifacts & Channel Characteristics", "description": "Recognizing sound quality issues, background noise, or distortions to speech.", "example": "Based on how they talk, what illness does the speaker have?"}
    ],
    "reasoning": [
        {"name": "Social Role and Relationship Inference", "description": "Inferring relationships among speakers.", "example": "What is the nickname given to the friend of the first speaker"},
        {"name": "Speaker Intent, Pragmatics and Causal Reasoning", "description": "Understanding why something was said, implied meanings, and cause-effect relationships.", "example": "Why does the first speaker slowly repeat everything the second speaker says"},
        {"name": "Quantitative Reasoning (Counting/Arithmetic Comparison)", "description": "Using numbers, counting, and basic math to understand spoken information.", "example": "How many times does the speaker use foul language?"},
        {"name": "Temporal and Ordering Reasoning", "description": "Understanding sequence, timing, and chronological order of events described.", "example": "Does the man speak before the woman in this clip?"},
        {"name": "Logical/Consistency Reasoning", "description": "Recognizing logical sequences/inconsistencies within spoken content.", "example": "How did the speaker misinterpret the directions given by the GPS?"},
        {"name": "Cross-frontier Entity Linking", "description": "Connecting spoken references to external entities or concepts beyond the immediate context.", "example": "What does the first speaker do that implies they are the CEO of the company?"},
        {"name": "Ground Truth and World Knowledge Integration", "description": "Using general knowledge to interpret and verify spoken content.", "example": "What is the capital of the country mentioned by the speaker?"},
        {"name": "Contextual/Causal Scenario Reasoning", "description": "Understanding situations or events described, including cause-and-effect relationships within a scenario.", "example": "What would happen to the second speaker if the things the first speaker says happen"},
        {"name": "Semantic Abstraction and Summarization", "description": "Identifying main ideas, themes, or concise summaries from spoken content.", "example": "What is the meaning of the poem read by the speaker?"},
        {"name": "Comparative and Preference-Based Judgments", "description": "Evaluating and comparing spoken information, identifying preferences or rankings.", "example": "Which of the phrases mentioned by the speaker is the shortest"}
    ]
}

# --- Initialize models on GPU ---
nli_model = CrossEncoder('cross-encoder/nli-deberta-base', device=device)  # ensure CrossEncoder supports device arg
embed_model = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2", device=device)

# --- Precompute skill embedding mapping ---
def skill_repr(skill):
    return f"{skill['name']}: {skill['description']}"

perc_skill_texts = [skill_repr(s) for s in SKILLS["perceptual"]]
reas_skill_texts = [skill_repr(s) for s in SKILLS["reasoning"]]
# Normalize embeddings (SentenceTransformer with normalize_embeddings=True in encode call)
perc_embeddings = embed_model.encode(perc_skill_texts, normalize_embeddings=True)
reas_embeddings = embed_model.encode(reas_skill_texts, normalize_embeddings=True)
perc_names = [s["name"] for s in SKILLS["perceptual"]]
reas_names = [s["name"] for s in SKILLS["reasoning"]]

def map_human_labels_to_defined(human_labels, category):
    if category == "perceptual":
        target_embeddings = perc_embeddings
        target_names = perc_names
    else:
        target_embeddings = reas_embeddings
        target_names = reas_names

    mapped = set()
    for hl in human_labels or []:
        # Replace excluded skill with closest alternative
        if hl == "Paralinguistic/Emotion Recognition" and category == "perceptual":
            hl_emb = embed_model.encode(hl, normalize_embeddings=True)
            sims = np.dot(target_embeddings, hl_emb)
            # Mask out the Paralinguistic/Emotion Recognition index
            excluded_idx = target_names.index("Paralinguistic/Emotion Recognition")
            sims[excluded_idx] = -np.inf
            best_idx = int(np.argmax(sims))
            mapped.add(target_names[best_idx])
        else:
            hl_emb = embed_model.encode(hl, normalize_embeddings=True)
            sims = np.dot(target_embeddings, hl_emb)
            best_idx = int(np.argmax(sims))
            mapped.add(target_names[best_idx])
    return mapped


# --- Batched entailment scoring helper ---
def batch_entailment_scores(premises, hypotheses, batch_size=32):
    """
    Both inputs are lists of equal length; returns list of entailment probabilities.
    Assumes the NLI model outputs logits in order [contradiction, neutral, entailment].
    """
    assert len(premises) == len(hypotheses)
    pairs = list(zip(premises, hypotheses))
    entailment_probs = []
    with torch.no_grad():
        # cross-encoder.predict accepts list of tuples
        for i in range(0, len(pairs), batch_size):
            batch = pairs[i : i + batch_size]
            logits = nli_model.predict(batch, batch_size=len(batch))  # we control inner batching
            probs = torch.softmax(torch.tensor(logits), dim=-1)
            # extract entailment index 2
            entailment_probs.extend([p[2].item() for p in probs])
    return entailment_probs

# --- Helper to pick best skill from aux label ---
def best_skill_from_cat_label(cat_label, skills_list, category_name):
    if not cat_label:
        return None
    premise = f"The labeled {category_name} category is '{cat_label}'."
    hypotheses = []
    skill_names = []
    for skill in skills_list:
        if category_name == "perceptual":
            hyp = f"This requires perceptual skill: {skill['name']}. {skill['description']} Example: {skill['example']}"
        else:
            hyp = f"This requires reasoning skill: {skill['name']}. {skill['description']} Example: {skill['example']}"
        hypotheses.append(hyp)
        skill_names.append(skill["name"])
    premises = [premise] * len(hypotheses)
    scores = batch_entailment_scores(premises, hypotheses, batch_size=16)
    best_idx = int(np.argmax(scores))
    return skill_names[best_idx]

# --- Thresholds ---
perc_thresh = 0.94
reas_thresh = 0.99

# --- Main loop with batching per entry ---
tagged_data = []

for entry in tqdm(data, desc="Tagging entries"):
    qid = entry.get("id")
    qa_text = entry.get("question", "") + " " + entry.get("answer", "")

    # human-labeled mapped to defined skill names
    human_perc = map_human_labels_to_defined(entry.get("perceptual_skills", []), "perceptual")
    human_reas = map_human_labels_to_defined(entry.get("reasoning_skills", []), "reasoning")

    perceptual_skills = set()
    reasoning_skills = set()

    # Prepare NLI pairs for defined skills
    perc_premises = []
    perc_hypotheses = []
    for skill in SKILLS["perceptual"]:
        hypothesis = f"This requires perceptual skill: {skill['name']}. {skill['description']} Example: {skill['example']}"
        perc_premises.append(qa_text)
        perc_hypotheses.append(hypothesis)

    reas_premises = []
    reas_hypotheses = []
    for skill in SKILLS["reasoning"]:
        hypothesis = f"This requires reasoning skill: {skill['name']}. {skill['description']} Example: {skill['example']}"
        reas_premises.append(qa_text)
        reas_hypotheses.append(hypothesis)

    # Get scores in batch
    perc_scores_list = batch_entailment_scores(perc_premises, perc_hypotheses, batch_size=16)
    reas_scores_list = batch_entailment_scores(reas_premises, reas_hypotheses, batch_size=16)

    perc_scores = list(zip([s["name"] for s in SKILLS["perceptual"]], perc_scores_list))
    reas_scores = list(zip([s["name"] for s in SKILLS["reasoning"]], reas_scores_list))

    for name, score in perc_scores:
        if score > perc_thresh:
            perceptual_skills.add(name)

    for name, score in reas_scores:
        if score > reas_thresh:
            reasoning_skills.add(name)

    # include human-mapped ones
    perceptual_skills |= human_perc
    reasoning_skills |= human_reas

        # --- cat_p / cat_r enrichment from aux, splitting comma-separated labels ---
    aux = entry.get("aux", {})

    raw_cat_p = aux.get("cat_p")
    raw_cat_r = aux.get("cat_r")

    # Split comma-separated if present, trim whitespace, ignore empties
    def split_labels(raw):
        if not raw:
            return []
        if isinstance(raw, str):
            parts = [p.strip() for p in raw.split(",")]
            return [p for p in parts if p]
        if isinstance(raw, list):
            return [str(p).strip() for p in raw if str(p).strip()]
        return []

    cat_p_labels = split_labels(raw_cat_p)
    cat_r_labels = split_labels(raw_cat_r)

    best_from_cat_p = None
    best_from_cat_r = None

    # For each label, pick best matching perceptual/reasoning skill and add
    for label in cat_p_labels:
        inferred = best_skill_from_cat_label(label, SKILLS["perceptual"], "perceptual")
        if inferred:
            perceptual_skills.add(inferred)
            best_from_cat_p = best_from_cat_p or inferred  # keep first for reporting

    for label in cat_r_labels:
        inferred = best_skill_from_cat_label(label, SKILLS["reasoning"], "reasoning")
        if inferred:
            reasoning_skills.add(inferred)
            best_from_cat_r = best_from_cat_r or inferred

    # Store the original (list) cat_p/cat_r for output
    cat_p = cat_p_labels if cat_p_labels else None
    cat_r = cat_r_labels if cat_r_labels else None


    # fallback: ensure at least one per category
    if not perceptual_skills:
        if not perceptual_skills:
            sorted_perc = sorted(perc_scores, key=lambda x: x[1], reverse=True)
            for skill_name, _ in sorted_perc:
                if skill_name != "Paralinguistic/Emotion Recognition":
                    perceptual_skills.add(skill_name)
                    break
    if not reasoning_skills:
        best_reas = max(reas_scores, key=lambda x: x[1])[0]
        reasoning_skills.add(best_reas)

    tagged_data.append({
        "id": qid,
        "question_answer": qa_text,
        "perceptual_skills": sorted(list(perceptual_skills)),
        "reasoning_skills": sorted(list(reasoning_skills)),
        "human_mapped_perceptual": sorted(list(human_perc)),
        "human_mapped_reasoning": sorted(list(human_reas)),
        "cat_p": cat_p,
        "cat_r": cat_r,
        "best_from_cat_p": best_from_cat_p,
        "best_from_cat_r": best_from_cat_r,
        "perceptual_scores": {name: sc for name, sc in perc_scores},
        "reasoning_scores": {name: sc for name, sc in reas_scores},
    })

# --- Save output ---
with open('tagged_ALL.json', 'w') as f:
    json.dump(tagged_data, f, indent=2)

print("Skill tagging complete and results saved.")


Using device: cuda


Tagging entries:   0%|          | 0/1143 [00:00<?, ?it/s]

Skill tagging complete and results saved.


In [20]:
from collections import Counter
import json

# --- Load the tagged output ---
with open('tagged_ALL.json', 'r') as f:
    tagged_data = json.load(f)

# --- Initialize counters ---
perceptual_counter = Counter()
reasoning_counter = Counter()

# --- Count skill occurrences ---
for entry in tagged_data:
    perceptual_counter.update(entry.get("perceptual_skills", []))
    reasoning_counter.update(entry.get("reasoning_skills", []))

# --- Display the counts sorted ---
print("\n--- Perceptual Skill Counts ---")
for skill, count in perceptual_counter.most_common():
    print(f"{skill}: {count}")

print("\n--- Reasoning Skill Counts ---")
for skill, count in reasoning_counter.most_common():
    print(f"{skill}: {count}")

# Optionally save to JSON
with open('skill_counts.json', 'w') as f:
    json.dump({
        "perceptual": dict(perceptual_counter),
        "reasoning": dict(reasoning_counter)
    }, f, indent=2)



--- Perceptual Skill Counts ---
Paralinguistic/Emotion Recognition: 1115
Speaker Demographics: 735
Prosody Detection: 448
Speech Activity, Turn-Taking and Overlap Detection: 381
Language Identification: 340
Speaker Identification: 285
Lexical and Phrase-Level Recognition: 230
Audio Quality, Artifacts & Channel Characteristics: 58

--- Reasoning Skill Counts ---
Semantic Abstraction and Summarization: 858
Cross-frontier Entity Linking: 556
Speaker Intent, Pragmatics and Causal Reasoning: 316
Ground Truth and World Knowledge Integration: 299
Social Role and Relationship Inference: 200
Temporal and Ordering Reasoning: 188
Comparative and Preference-Based Judgments: 72
Logical/Consistency Reasoning: 65
Quantitative Reasoning (Counting/Arithmetic Comparison): 8
