In [1]:
from pathlib import Path
import argparse
import numpy as np
import pandas as pd
from tqdm.auto import tqdm
from __future__ import annotations
from pathlib import Path
import pandas as pd
from sklearn.metrics import (
    precision_score,
    recall_score,
    f1_score,
    accuracy_score,
)
from sklearn.metrics import (
    precision_score,
    recall_score,
    f1_score,
    accuracy_score,
)

In [2]:
def upsample_to_hz(df: pd.DataFrame, behaviour: str, target_hz: int) -> pd.DataFrame:
    """Forward-fill 10 Hz ラベル → target_hz."""
    step = 1 / target_hz
    min_t, max_t = df["time"].iloc[0], df["time"].iloc[-1]
    timeline = np.arange(min_t, max_t + 1e-9, step)
    dense = pd.DataFrame({"time": timeline})
    dense = pd.merge_asof(
        dense, df[["time", behaviour]].sort_values("time"), on="time", direction="backward"
    )
    dense["time"] = dense["time"].round(3)  # 0.000, 0.033, 0.067 …
    return dense.rename(columns={behaviour: "label"})

In [3]:
def load_and_upsample(csv_path: Path, behaviour: str, target_hz: int) -> pd.DataFrame:
    df = pd.read_csv(csv_path)
    if "time" not in df or behaviour not in df:
        raise ValueError(f"{csv_path} には 'time' または '{behaviour}' 列がありません")
    return upsample_to_hz(df, behaviour, target_hz)

In [4]:
def calc_metrics(y_true: np.ndarray, y_pred: np.ndarray) -> dict[str, float]:
    return {
        "precision": precision_score(y_true, y_pred, zero_division=0),
        "recall": recall_score(y_true, y_pred, zero_division=0),
        "f1_score": f1_score(y_true, y_pred, zero_division=0),
        "accuracy": accuracy_score(y_true, y_pred),
    }

In [5]:
def main(file_list):

    behavior ="trophalaxis"
    sample_hz = 30
    
    rows = []
    all_true, all_pred = [], []
    
    for file_line in tqdm(file_list):
        asoid_data_path = f"../data/asoid_data/{file_line[0]}"
        boris_data_path = f"../data/BORIS_data/{file_line[1]}"

        # アップサンプルしてマージ
        gt_df = load_and_upsample(boris_data_path, behavior, sample_hz)
        pr_df = load_and_upsample(asoid_data_path, behavior, sample_hz)
        merged = gt_df.merge(pr_df, on="time", suffixes=("_gt", "_pred"), how="inner")

        y_true = merged["label_gt"].astype(int).to_numpy()
        y_pred = merged["label_pred"].astype(int).to_numpy()

        m = calc_metrics(y_true, y_pred)
        m["file"] = Path(boris_data_path).name
        m["frames"] = len(merged)
        rows.append(m)

        all_true.append(y_true)
        all_pred.append(y_pred)

    if not rows:
        raise RuntimeError("対応するファイルペアが見つかりません")

    # 全ファイル統合
    y_true_all = np.concatenate(all_true)
    y_pred_all = np.concatenate(all_pred)
    overall = calc_metrics(y_true_all, y_pred_all)
    overall.update({"file": "ALL", "frames": len(y_true_all)})
    rows.append(overall)

    # CSV 出力
    df_out = pd.DataFrame(rows).set_index("file")
    out_path = "../outputs/result.csv"
    df_out.to_csv(out_path, float_format="%.6f")
    print(f"Saved metrics → {out_path}")
    print(df_out)

In [6]:
df_file_list = pd.read_csv('../data/file_list.csv')
file_list = df_file_list.values.tolist()
file_list = np.array(file_list)

In [7]:
main(file_list)


  0%|          | 0/3 [00:00<?, ?it/s]

Saved metrics → ../outputs/result.csv
                precision    recall  f1_score  accuracy  frames
file                                                           
20231003_2.csv   0.961429  0.824805  0.887892  0.937224   54145
20231004_2.csv   0.922270  0.855921  0.887858  0.959366   54142
20231005_1.csv   0.977249  0.754362  0.851461  0.957925   54142
ALL              0.952662  0.816467  0.879322  0.951505  162429
