In [1]:
import pandas as pd
import os
import glob
import re
from datetime import datetime
import argparse

In [2]:
# ==============================================================================
# 1. CONFIGURATION
# ==============================================================================
ANALYSIS_OUTPUT_ROOT = "/Users/natalyagrokh/AI/ml_expressions/img_expressions/flywheel"

In [3]:
# ==============================================================================
# 2. UTILITY FUNCTIONS
# ==============================================================================

# Extracts the integer version number (e.g., 23) from a directory name.
def extract_version_from_path(path):
    match = re.search(r"V(\d+)", os.path.basename(path))
    return int(match.group(1)) if match else -1

# Analyzes an emotion log to automatically flag predictions that are certain
    # or have high confidence.
def find_top_n_certain_images(log_df, top_n=200):
    print(f"\n--- Finding the Top {top_n} Most Confident Predictions ---")

    # Sort the entire log by confidence in descending order
    review_df_sorted = log_df.sort_values(by='confidence', ascending=False)
    
    # Take the top N from the sorted list
    top_n_df = review_df_sorted.head(top_n)
    
    print(f"✅ Found and selected the top {len(top_n_df)} most confident images for review.")
    
    return top_n_df

In [4]:
# ==============================================================================
# 3. MAIN EXECUTION BLOCK
# ==============================================================================
if __name__ == '__main__':
    
    # --- Part A: Automatically find the most recent run directory ---
    all_run_dirs = [
        os.path.join(ANALYSIS_OUTPUT_ROOT, d)
        for d in os.listdir(ANALYSIS_OUTPUT_ROOT)
        if d.startswith("V") and os.path.isdir(os.path.join(ANALYSIS_OUTPUT_ROOT, d))
    ]

    if not all_run_dirs:
        print(f"❌ Error: No run directories found in {ANALYSIS_OUTPUT_ROOT}")
    else:
        latest_run_dir = max(all_run_dirs, key=extract_version_from_path)
        print(f"✅ Automatically detected latest run directory by version: {os.path.basename(latest_run_dir)}")

        # --- Part B: Run the analysis ---
        log_path = os.path.join(latest_run_dir, "emotion_log.csv")

        if not os.path.exists(log_path):
            print(f"❌ Error: Could not find 'emotion_log.csv' in the directory: {latest_run_dir}")
        else:
            log_df = pd.read_csv(log_path)
            
            # Define how many of the most confident predictions you want to review.
            IMAGES_TO_REVIEW = 200
            
            # Find the top N most confident predictions across ALL classes.
            review_df = find_top_n_certain_images(
                log_df,
                top_n=IMAGES_TO_REVIEW
            )

            if not review_df.empty:
                # Save the results to a new CSV file
                output_path = os.path.join(latest_run_dir, "top_confidence_review_queue.csv")
                review_df.to_csv(output_path, index=False)
                print(f"\n✅ Successfully saved review queue with {len(review_df)} images to: {output_path}")
            else:
                print("\n✅ No predictions found in the log file.")

✅ Automatically detected latest run directory by version: V2_20250711_114129

--- Finding the Top 200 Most Confident Predictions ---
✅ Found and selected the top 200 most confident images for review.

✅ Successfully saved review queue with 200 images to: /Users/natalyagrokh/AI/ml_expressions/img_expressions/flywheel/V2_20250711_114129/top_confidence_review_queue.csv
