In [1]:
# V8 changes: added a parallel process for micro-expressions

In [2]:
import pandas as pd
import os
import glob
import re
import shutil

In [3]:
# ==============================================================================
# 1. CONFIGURATION
# ==============================================================================
ANALYSIS_OUTPUT_ROOT = "/Users/natalyagrokh/AI/ml_expressions/img_expressions/data_flywheel"

# --- Filtering Thresholds ---
CONFIDENCE_THRESHOLD = 0.88  # top 88% of confident predictions
TOP_N_TO_REVIEW = 150        # top 150 most uncertain images

In [4]:
# ==============================================================================
# 2. UTILITY FUNCTIONS
# ==============================================================================

#  Extracts integer version number (e.g., V1, V2) from directory name.
def extract_version_from_path(path):
    match = re.search(r"V(\d+)", os.path.basename(path))
    return int(match.group(1)) if match else -1

# Finds the most valuable images for review by first filtering for high
    # confidence, then selecting most uncertain (highest entropy)
# In your analyzer.py script, replace the existing function with this one.
def create_review_queue(log_df, run_dir, confidence_thresh, top_n, queue_name, description):
    print(f"\n--- Creating Prioritized '{description}' Review Queue ---")

    # Filter for confident predictions
    confident_mask = log_df['confidence'] > confidence_thresh
    confident_df = log_df[confident_mask]
    print(f"-> Found {len(confident_df)} predictions with confidence > {confidence_thresh}.")

    if confident_df.empty:
        print("⚠️ No predictions met the confidence threshold. Nothing to review.")
        return

    # Sort by entropy to find the most uncertain and take the top N
    review_df = confident_df.sort_values(by='entropy', ascending=False).head(top_n)
    print(f"-> Selected the Top {len(review_df)} most uncertain images for this queue.")
    
    # Create the dedicated review folder and copy images
    review_folder_path = os.path.join(run_dir, queue_name)
    os.makedirs(review_folder_path, exist_ok=True)
    
    copied_count = 0
    for _, row in review_df.iterrows():
        source_path = row.get('face_crop_path')
        if source_path and os.path.exists(source_path):
            try:
                shutil.copy(source_path, review_folder_path)
                copied_count += 1
            except Exception as e:
                print(f"⚠️ Could not copy file {source_path}. Error: {e}")
        else:
            print(f"⚠️ File not found and could not be copied: {source_path}")

    print(f"\n✅ Success! Copied {copied_count} images to: {review_folder_path}")
    
    # Step 4: Create and save the simplified, sortable CSV
    if not review_df.empty:
        # Select the columns needed for review
        simplified_df = review_df.copy()
        
        # Simplify the path to just the filename
        simplified_df['face_crop_path'] = simplified_df['face_crop_path'].apply(os.path.basename)
        
        # Sort the DataFrame by the numerical 'frame_number' column
        simplified_df = simplified_df.sort_values(by='frame_number').reset_index(drop=True)
        
        # Save the new CSV inside the manual_review_queue folder
        simplified_csv_path = os.path.join(review_folder_path, "simplified_review_log.csv")
        simplified_df.to_csv(simplified_csv_path, index=False)
        print(f"✅ Created a sortable, simplified log for manual curation at: {simplified_csv_path}")

In [5]:
# ==============================================================================
# 3. MAIN EXECUTION BLOCK
# ==============================================================================
if __name__ == '__main__':
    
    # Automatically find the most recent run directory by version number
    all_run_dirs = [os.path.join(ANALYSIS_OUTPUT_ROOT, d) for d in os.listdir(ANALYSIS_OUTPUT_ROOT) if d.startswith("V") and os.path.isdir(os.path.join(ANALYSIS_OUTPUT_ROOT, d))]

    if not all_run_dirs:
        print(f"❌ Error: No run directories found in {ANALYSIS_OUTPUT_ROOT}")
    else:
        latest_run_dir = max(all_run_dirs, key=extract_version_from_path)
        print(f"✅ Automatically analyzing latest run: {os.path.basename(latest_run_dir)}")

        # --- Define paths to both log files ---
        raw_log_path = os.path.join(latest_run_dir, "emotion_log_before_stability_filter.csv")
        stable_log_path = os.path.join(latest_run_dir, "final_stable_emotion_log.csv")

        # --- Process Micro-Expressions from RAW log ---
        if os.path.exists(raw_log_path):
            raw_log_df = pd.read_csv(raw_log_path)
            create_review_queue(
                log_df=raw_log_df,
                run_dir=latest_run_dir,
                confidence_thresh=CONFIDENCE_THRESHOLD,
                top_n=TOP_N_TO_REVIEW,
                queue_name="micro_expression_review_queue",
                description="Micro-Expression"
            )
        else:
            print(f"❌ Error: Could not find raw log file: {raw_log_path}")
            
        # --- Process Stable Emotions from STABLE log ---
        if os.path.exists(stable_log_path):
            stable_log_df = pd.read_csv(stable_log_path)
            create_review_queue(
                log_df=stable_log_df,
                run_dir=latest_run_dir,
                confidence_thresh=CONFIDENCE_THRESHOLD,
                top_n=TOP_N_TO_REVIEW,
                queue_name="stable_emotion_review_queue",
                description="Stable Emotion"
            )
        else:
            print(f"❌ Error: Could not find stable log file: {stable_log_path}")

✅ Automatically analyzing latest run: V8_20250722_130942

--- Creating Prioritized 'Micro-Expression' Review Queue ---
-> Found 3601 predictions with confidence > 0.88.
-> Selected the Top 150 most uncertain images for this queue.

✅ Success! Copied 150 images to: /Users/natalyagrokh/AI/ml_expressions/img_expressions/data_flywheel/V8_20250722_130942/micro_expression_review_queue
✅ Created a sortable, simplified log for manual curation at: /Users/natalyagrokh/AI/ml_expressions/img_expressions/data_flywheel/V8_20250722_130942/micro_expression_review_queue/simplified_review_log.csv

--- Creating Prioritized 'Stable Emotion' Review Queue ---
-> Found 3035 predictions with confidence > 0.88.
-> Selected the Top 150 most uncertain images for this queue.

✅ Success! Copied 150 images to: /Users/natalyagrokh/AI/ml_expressions/img_expressions/data_flywheel/V8_20250722_130942/stable_emotion_review_queue
✅ Created a sortable, simplified log for manual curation at: /Users/natalyagrokh/AI/ml_expres