In [4]:
import pandas as pd
import os
import shutil
from tqdm import tqdm

# ==============================================================================
# 1. CONFIGURATION
# ==============================================================================

# --- IMPORTANT: Point this to the versioned folder from your V7 run ---
# This folder should contain the 'top_confidence_review.csv' file.
RUN_DIRECTORY = "/Users/natalyagrokh/AI/ml_expressions/img_expressions/data_flywheel/V7_20250717_093451" 

# ==============================================================================
# 2. SCRIPT LOGIC
# ==============================================================================

def extract_review_images(run_dir):
    """
    Reads a review CSV and copies the specified face crops into a new folder
    for easy manual curation.
    """
    review_csv_path = os.path.join(run_dir, "top_confidence_review.csv")
    if not os.path.exists(review_csv_path):
        print(f"❌ Error: '{os.path.basename(review_csv_path)}' not found in the specified directory.")
        return

    # --- Load the list of images to review ---
    df_review = pd.read_csv(review_csv_path)
    print(f"✅ Loaded '{os.path.basename(review_csv_path)}' with {len(df_review)} images to extract.")

    # --- Create the new folder for the curation set ---
    review_queue_path = os.path.join(run_dir, "manual_review_queue")
    os.makedirs(review_queue_path, exist_ok=True)
    print(f"✅ Created review folder at: {review_queue_path}")

    # --- Find and copy the images ---
    copied_count = 0
    for index, row in tqdm(df_review.iterrows(), total=df_review.shape[0], desc="Copying images"):
        # The full path to the source image should be in this column
        source_path = row.get('/Users/natalyagrokh/AI/ml_expressions/img_expressions/data_flywheel/V7_20250717_093451/face_crops')
        
        if source_path and os.path.exists(source_path):
            try:
                shutil.copy(source_path, review_queue_path)
                copied_count += 1
            except Exception as e:
                print(f"⚠️ Could not copy file {source_path}. Error: {e}")
        else:
            print(f"⚠️ Source file not found: {source_path}")

    print(f"\n✅ Success! Copied {copied_count} images to the review queue.")

# ==============================================================================
# 3. MAIN EXECUTION BLOCK
# ==============================================================================
if __name__ == '__main__':
    if not os.path.isdir(RUN_DIRECTORY):
        print(f"❌ Error: The specified RUN_DIRECTORY does not exist: {RUN_DIRECTORY}")
    else:
        extract_review_images(RUN_DIRECTORY)

✅ Loaded 'top_confidence_review.csv' with 200 images to extract.
✅ Created review folder at: /Users/natalyagrokh/AI/ml_expressions/img_expressions/data_flywheel/V7_20250717_093451/manual_review_queue


Copying images: 100%|██████████████████████| 200/200 [00:00<00:00, 47079.40it/s]

⚠️ Source file not found: None
⚠️ Source file not found: None
⚠️ Source file not found: None
⚠️ Source file not found: None
⚠️ Source file not found: None
⚠️ Source file not found: None
⚠️ Source file not found: None
⚠️ Source file not found: None
⚠️ Source file not found: None
⚠️ Source file not found: None
⚠️ Source file not found: None
⚠️ Source file not found: None
⚠️ Source file not found: None
⚠️ Source file not found: None
⚠️ Source file not found: None
⚠️ Source file not found: None
⚠️ Source file not found: None
⚠️ Source file not found: None
⚠️ Source file not found: None
⚠️ Source file not found: None
⚠️ Source file not found: None
⚠️ Source file not found: None
⚠️ Source file not found: None
⚠️ Source file not found: None
⚠️ Source file not found: None
⚠️ Source file not found: None
⚠️ Source file not found: None
⚠️ Source file not found: None
⚠️ Source file not found: None
⚠️ Source file not found: None
⚠️ Source file not found: None
⚠️ Source file not found: None
⚠️ Sourc


