# Ability Analysis Notebook

This notebook loads `metrics.log` and provides a minimal starting point for analysing
session-level interview reports and media-level multimodal features.


In [None]:
import json
from pathlib import Path

import pandas as pd
import matplotlib.pyplot as plt

# Path to metrics.log (adjust if you run from a different working directory)
log_path = Path("../metrics.log")

rows = []
if log_path.exists():
    with log_path.open(encoding="utf-8") as f:
        for line in f:
            line = line.strip()
            if not line:
                continue
            try:
                rows.append(json.loads(line))
            except json.JSONDecodeError:
                continue

print(f"Loaded {len(rows)} metric rows")

df_raw = pd.json_normalize(rows)
df_raw.head()


In [None]:
# Session-level dimension score distribution example

df_session = df_raw[df_raw["type"] == "session_report"].copy()

# Explode evaluation.dimensions into rows if present
if "evaluation.dimensions" in df_session.columns:
    dims_exploded = df_session[["session_id", "evaluation.dimensions"]].explode("evaluation.dimensions")
    dims_exploded = pd.concat(
        [dims_exploded.drop(columns=["evaluation.dimensions"]),
         pd.json_normalize(dims_exploded["evaluation.dimensions"] or {})],
        axis=1,
    ).rename(columns={"name": "dimension_name", "score": "dimension_score"})
else:
    dims_exploded = pd.DataFrame(columns=["session_id", "dimension_name", "dimension_score"])

target_dimension = "结构化表达（STAR）"

mask = dims_exploded["dimension_name"] == target_dimension
scores = dims_exploded.loc[mask, "dimension_score"].dropna()

plt.figure(figsize=(6, 4))
plt.hist(scores, bins=10, edgecolor="black")
plt.title(f"{target_dimension} score distribution")
plt.xlabel("Score")
plt.ylabel("Count")
plt.show()


In [None]:
# Example: inspect text-related diagnostics such as filler_ratio if logged

if "diagnostics.filler_ratio" in df_session.columns:
    plt.figure(figsize=(6, 4))
    df_session["diagnostics.filler_ratio"].dropna().hist(bins=10, edgecolor="black")
    plt.title("Filler ratio distribution")
    plt.xlabel("Filler ratio")
    plt.ylabel("Count")
    plt.show()
