In [1]:
from pathlib import Path
import argparse
import numpy as np
import pandas as pd
from tqdm.auto import tqdm
from __future__ import annotations
from pathlib import Path
import pandas as pd
from sklearn.metrics import (
    precision_score,
    recall_score,
    f1_score,
    accuracy_score,
)
from sklearn.metrics import (
    precision_score,
    recall_score,
    f1_score,
    accuracy_score,
)

In [2]:
def upsample_to_hz(df: pd.DataFrame, behaviour: str, target_hz: int) -> pd.DataFrame:
    """Forward-fill 10 Hz ラベル → target_hz."""
    step = 1 / target_hz
    min_t, max_t = df["time"].iloc[0], df["time"].iloc[-1]
    timeline = np.arange(min_t, max_t + 1e-9, step)
    dense = pd.DataFrame({"time": timeline})
    dense = pd.merge_asof(
        dense, df[["time", behaviour]].sort_values("time"), on="time", direction="backward"
    )
    dense["time"] = dense["time"].round(3)  # 0.000, 0.033, 0.067 …
    return dense.rename(columns={behaviour: "label"})

In [3]:
def load_and_upsample(csv_path: Path, behaviour: str, target_hz: int) -> pd.DataFrame:
    df = pd.read_csv(csv_path)
    if "time" not in df or behaviour not in df:
        raise ValueError(f"{csv_path} には 'time' または '{behaviour}' 列がありません")
    return upsample_to_hz(df, behaviour, target_hz)

In [4]:
def calc_metrics(y_true: np.ndarray, y_pred: np.ndarray) -> dict[str, float]:
    return {
        "precision": precision_score(y_true, y_pred, zero_division=0),
        "recall": recall_score(y_true, y_pred, zero_division=0),
        "f1_score": f1_score(y_true, y_pred, zero_division=0),
        "accuracy": accuracy_score(y_true, y_pred),
    }

In [5]:
def main(file_list, file_no):

    behavior ="wing_extension"
    sample_hz = 30
    
    rows = []
    all_true, all_pred = [], []
    
    for file_line in tqdm(file_list):
        asoid_data_path = f"../data/asoid_data/{file_line[0]}"
        boris_data_path = f"../data/BORIS_data/{file_line[1]}"

        # アップサンプルしてマージ
        gt_df = load_and_upsample(boris_data_path, behavior, sample_hz)
        pr_df = load_and_upsample(asoid_data_path, behavior, sample_hz)
        merged = gt_df.merge(pr_df, on="time", suffixes=("_gt", "_pred"), how="inner")

        y_true = merged["label_gt"].astype(int).to_numpy()
        y_pred = merged["label_pred"].astype(int).to_numpy()

        m = calc_metrics(y_true, y_pred)
        m["file"] = Path(boris_data_path).name
        m["frames"] = len(merged)
        rows.append(m)

        all_true.append(y_true)
        all_pred.append(y_pred)

    if not rows:
        raise RuntimeError("対応するファイルペアが見つかりません")

    # 全ファイル統合
    y_true_all = np.concatenate(all_true)
    y_pred_all = np.concatenate(all_pred)
    overall = calc_metrics(y_true_all, y_pred_all)
    overall.update({"file": "ALL", "frames": len(y_true_all)})
    rows.append(overall)

    # CSV 出力
    df_out = pd.DataFrame(rows).set_index("file")
    out_path = f"../outputs/result_{file_no}.csv"
    df_out.to_csv(out_path, float_format="%.6f")
    print(f"Saved metrics → {out_path}")
    print(df_out)

In [6]:
df_file_list = pd.read_csv('../data/file_list_1314.csv')
file_list = df_file_list.values.tolist()
file_list = np.array(file_list)
main(file_list, "1314")

  0%|          | 0/2 [00:00<?, ?it/s]

Saved metrics → ../outputs/result_1314.csv
                                               precision    recall  f1_score  \
file                                                                           
cs-h_movie_0013 23-10-27 15-14-00_cropped.csv   0.425647  0.379495  0.401249   
cs-h_movie_0014 23-10-27 15-28-17_cropped.csv   0.704532  0.583457  0.638304   
ALL                                             0.565758  0.485725  0.522696   

                                               accuracy  frames  
file                                                             
cs-h_movie_0013 23-10-27 15-14-00_cropped.csv  0.665875   12630  
cs-h_movie_0014 23-10-27 15-28-17_cropped.csv  0.779043   12120  
ALL                                            0.721293   24750  


In [7]:
df_file_list = pd.read_csv('../data/file_list_1516.csv')
file_list = df_file_list.values.tolist()
file_list = np.array(file_list)
main(file_list, "1516")

  0%|          | 0/2 [00:00<?, ?it/s]

Saved metrics → ../outputs/result_1516.csv
                                               precision    recall  f1_score  \
file                                                                           
cs-h_movie_0015 23-10-27 15-40-40_cropped.csv   0.755322  0.348747  0.477174   
cs-h_movie_0016 23-10-27 15-54-23_cropped.csv   0.944533  0.441852  0.602060   
ALL                                             0.836501  0.388394  0.530482   

                                               accuracy  frames  
file                                                             
cs-h_movie_0015 23-10-27 15-40-40_cropped.csv  0.674336   16950  
cs-h_movie_0016 23-10-27 15-54-23_cropped.csv  0.617949    8190  
ALL                                            0.655967   25140  


In [8]:
df_file_list = pd.read_csv('../data/file_list_1718.csv')
file_list = df_file_list.values.tolist()
file_list = np.array(file_list)
main(file_list, "1718")

  0%|          | 0/2 [00:00<?, ?it/s]

Saved metrics → ../outputs/result_1718.csv
                                               precision    recall  f1_score  \
file                                                                           
cs-h_movie_0017 23-10-27 16-01-24_cropped.csv   0.874070  0.350793  0.500656   
cs-h_movie_0018 23-10-27 16-15-23_cropped.csv   0.892102  0.433243  0.583239   
ALL                                             0.884350  0.393906  0.545041   

                                               accuracy  frames  
file                                                             
cs-h_movie_0017 23-10-27 16-01-24_cropped.csv  0.692323    9900  
cs-h_movie_0018 23-10-27 16-15-23_cropped.csv  0.732428   11040  
ALL                                            0.713467   20940  


In [9]:
df_file_list = pd.read_csv('../data/file_list_1920.csv')
file_list = df_file_list.values.tolist()
file_list = np.array(file_list)
main(file_list, "1920")

  0%|          | 0/2 [00:00<?, ?it/s]

Saved metrics → ../outputs/result_1920.csv
                                               precision    recall  f1_score  \
file                                                                           
cs-h_movie_0019 23-10-27 16-26-54_cropped.csv   0.889948  0.450735  0.598398   
cs-h_movie_0020 23-10-27 16-38-01_cropped.csv   0.988679  0.225862  0.367719   
ALL                                             0.900117  0.405108  0.558745   

                                               accuracy  frames  
file                                                             
cs-h_movie_0019 23-10-27 16-26-54_cropped.csv  0.718960    9810  
cs-h_movie_0020 23-10-27 16-38-01_cropped.csv  0.614957    2340  
ALL                                            0.698930   12150  
