In [1]:
#this pipeline was impractical since 5 secs of vid give too many images
import pandas as pd
import os
import subprocess
from tqdm import tqdm

In [2]:
# ==============================================================================
# 1. CONFIGURATION
# ==============================================================================
# --- IMPORTANT: Update these paths before running ---

# This should be the full path to original source video you analyzed.
SOURCE_VIDEO_PATH = "/Users/natalyagrokh/AI/ml_expressions/img_expressions/data_flywheel/sample_vids/StreetQs.mp4"

# This should be the path to the versioned folder from your last analysis run.
RUN_DIRECTORY = "/Users/natalyagrokh/AI/ml_expressions/img_expressions/data_flywheel/V6_20250716_112248"

# --- Filtering Options ---
# Defines emotions for generating clips; to review all events, set to None.
EMOTIONS_TO_REVIEW = ['contempt', 'disgust']

# Define the duration of the video clips to generate (in seconds).
CLIP_DURATION = 5

In [3]:
# ==============================================================================
# 2. SCRIPT LOGIC
# ==============================================================================

# Loads a log, filters for events, creates video clips, and generates a
    # starter spreadsheet for manual curation.
def generate_review_clips(run_dir, source_video, emotions_to_review, clip_duration):
    
    log_path = os.path.join(run_dir, "multi_region_filtered_log.csv")
    if not os.path.exists(log_path):
        print(f"❌ Error: '{log_path}' not found.")
        return

    df = pd.read_csv(log_path)
    print(f"✅ Loaded '{log_path}' with {len(df)} relevant emotional events.")

    # --- Filter for the emotions we want to review ---
    review_df = df[df['predicted_label'].isin(emotions_to_review)] if emotions_to_review else df
    
    if review_df.empty:
        print("✅ No events found for the specified emotions. Nothing to do.")
        return

    # --- Create a directory to save the new clips ---
    clips_output_dir = os.path.join(run_dir, "review_clips")
    os.makedirs(clips_output_dir, exist_ok=True)
    curation_log = [] # To store data for the new CSV

    print(f"\n--- Generating {len(review_df)} Video Clips & Curation Sheet ---")
    
    # --- Create Clips and Log Data for Spreadsheet ---
    for index, row in tqdm(review_df.iterrows(), total=review_df.shape[0], desc="Creating Artifacts"):
        start_time = max(0, row['timestamp_seconds'] - (clip_duration / 2))
        emotion_label = row['predicted_label']
        frame_num = int(row['frame_number'])
        
        output_clip_name = f"{emotion_label}_frame_{frame_num}.mp4"
        output_path = os.path.join(clips_output_dir, output_clip_name)
        
        # Add data to our log for the spreadsheet
        curation_log.append({
            "clip_filename": output_clip_name,
            "predicted_label": emotion_label,
            "actual_label": "", # Leave blank for manual entry
            "notes": ""          # Leave blank for manual entry
        })
        
        # Builds/executes command automatically for each row
            # creates the command as a list of strings
        ffmpeg_command = [
            'ffmpeg',
            '-ss', str(start_time),
            '-i', source_video,
            '-t', str(clip_duration),
            '-c', 'copy',
            '-y',
            '-loglevel', 'error',
            output_path
        ]

        subprocess.run(ffmpeg_command)
    
    # --- Save the Starter Spreadsheet ---
    curation_df = pd.DataFrame(curation_log)
    spreadsheet_path = os.path.join(clips_output_dir, "manual_curation_starter_sheet.csv")
    curation_df.to_csv(spreadsheet_path, index=False)

    print(f"\n✅ Success! All clips created in: {clips_output_dir}")
    print(f"✅ Starter spreadsheet for curation saved to: {spreadsheet_path}")

In [4]:
# ==============================================================================
# 3. MAIN EXECUTION BLOCK
# ==============================================================================
if __name__ == '__main__':
    if os.path.isfile(SOURCE_VIDEO_PATH) and os.path.isdir(RUN_DIRECTORY):
        generate_review_clips(
            run_dir=RUN_DIRECTORY,
            source_video=SOURCE_VIDEO_PATH,
            emotions_to_review=EMOTIONS_TO_REVIEW,
            clip_duration=CLIP_DURATION
        )
    else:
        print("❌ Error: Please ensure SOURCE_VIDEO_PATH and RUN_DIRECTORY are correct.")

✅ Loaded '/Users/natalyagrokh/AI/ml_expressions/img_expressions/data_flywheel/V6_20250716_112248/multi_region_filtered_log.csv' with 361 relevant emotional events.

--- Generating 91 Video Clips & Curation Sheet ---


Creating Artifacts: 100%|███████████████████████| 91/91 [00:04<00:00, 21.62it/s]


✅ Success! All clips created in: /Users/natalyagrokh/AI/ml_expressions/img_expressions/data_flywheel/V6_20250716_112248/review_clips
✅ Starter spreadsheet for curation saved to: /Users/natalyagrokh/AI/ml_expressions/img_expressions/data_flywheel/V6_20250716_112248/review_clips/manual_curation_starter_sheet.csv



