In [1]:
import pandas as pd
from pathlib import Path

# Pastas
base_folder = Path("MABe-mouse-behavior-detection")
tracking_folder = base_folder / "train_tracking"
annotation_folder = base_folder / "train_annotation"
df_meta = pd.read_csv(base_folder / "train.csv")

output_folder = base_folder / "processed_videos"
output_folder.mkdir(exist_ok=True)

def load_parquet(file_path: Path) -> pd.DataFrame:
    return pd.read_parquet(file_path, engine='fastparquet')

def extract_video_id(file_path: Path) -> int:
    return int(file_path.stem)

def pivot_tracking(df_track: pd.DataFrame) -> pd.DataFrame:
    """Transforma tracking em wide: 1 linha por frame, colunas = mouse_bodypart_x/y"""
    df = df_track.rename(columns={"video_frame": "frame"})
    df["col_x"] = df["mouse_id"].astype(str) + "_" + df["bodypart"] + "_x"
    df["col_y"] = df["mouse_id"].astype(str) + "_" + df["bodypart"] + "_y"

    wide_x = df.pivot_table(index="frame", columns="col_x", values="x", aggfunc="first")
    wide_y = df.pivot_table(index="frame", columns="col_y", values="y", aggfunc="first")

    return pd.concat([wide_x, wide_y], axis=1).reset_index()

def expand_annotations(df_ann: pd.DataFrame) -> pd.DataFrame:
    """Expande intervalos de anotaÃ§Ã£o em uma linha por frame"""
    if df_ann.empty:
        return pd.DataFrame(columns=["frame", "behavior"])

    expanded = []
    for _, row in df_ann.iterrows():
        for f in range(int(row["start_frame"]), int(row["stop_frame"]) + 1):
            expanded.append({"frame": f, "behavior": row["action"]})
    return pd.DataFrame(expanded)

# Itera sobre laboratÃ³rios
for lab_folder in tracking_folder.iterdir():
    if not lab_folder.is_dir():
        continue

    print(f"ðŸ“‚ Processando laboratÃ³rio {lab_folder.name}...")

    tracking_files = list(lab_folder.glob("*.parquet"))
    annotation_lab_folder = annotation_folder / lab_folder.name
    annotation_files = list(annotation_lab_folder.glob("*.parquet"))

    # Mapeia video_id -> annotation
    videoid_to_annotation = {extract_video_id(f): load_parquet(f) for f in annotation_files}

    for track_file in tracking_files:
        vid = extract_video_id(track_file)
        df_track_raw = load_parquet(track_file)

        # Pivotar tracking (1 linha = 1 frame)
        df_track = pivot_tracking(df_track_raw)

        # Expandir annotations
        df_ann = videoid_to_annotation.get(vid, pd.DataFrame(columns=['start_frame','stop_frame','action']))
        df_ann_expanded = expand_annotations(df_ann)

        # Merge
        df_merged = pd.merge(df_track, df_ann_expanded, on="frame", how="left")

        # Adicionar metadata
        meta_row = df_meta[df_meta['video_id'] == vid]
        if not meta_row.empty:
            for col in df_meta.columns:
                if col not in df_merged.columns:
                    df_merged[col] = meta_row.iloc[0][col]

        # Salva arquivo processado individual
        df_merged.to_parquet(output_folder / f"{vid}_processed.parquet", index=False)
        print(f"âœ… VÃ­deo {vid} processado e salvo em {output_folder}")


ðŸ“‚ Processando laboratÃ³rio NiftyGoldfinch...
âœ… VÃ­deo 1233426234 processado e salvo em MABe-mouse-behavior-detection/processed_videos
âœ… VÃ­deo 2103620137 processado e salvo em MABe-mouse-behavior-detection/processed_videos
âœ… VÃ­deo 984382096 processado e salvo em MABe-mouse-behavior-detection/processed_videos
âœ… VÃ­deo 425399867 processado e salvo em MABe-mouse-behavior-detection/processed_videos
âœ… VÃ­deo 1654932902 processado e salvo em MABe-mouse-behavior-detection/processed_videos
âœ… VÃ­deo 101686631 processado e salvo em MABe-mouse-behavior-detection/processed_videos
âœ… VÃ­deo 1223664597 processado e salvo em MABe-mouse-behavior-detection/processed_videos
âœ… VÃ­deo 1269061587 processado e salvo em MABe-mouse-behavior-detection/processed_videos
âœ… VÃ­deo 1705186224 processado e salvo em MABe-mouse-behavior-detection/processed_videos
âœ… VÃ­deo 960582355 processado e salvo em MABe-mouse-behavior-detection/processed_videos
âœ… VÃ­deo 1539947238 processado e salvo em MA