In [1]:
# V9 changes: 
    # section 2 - create_review_queue updated renamed to certainty_review_queue

def analyze_video_with_filters(video_path, save_dir, emotion_model, gatekeeper_model, processor, device, static_threshold, process_every_n_frames=1):
    """
    Processes video with all filters, including a corrected static object filter.
    """
    if not os.path.exists(video_path):
        print(f"❌ Error: Video file not found at {video_path}")
        return []

    video_capture = cv2.VideoCapture(video_path)
    total_frames = int(video_capture.get(cv2.CAP_PROP_FRAME_COUNT))
    fps = video_capture.get(cv2.CAP_PROP_FPS) if video_capture.get(cv2.CAP_PROP_FPS) > 0 else 30
    ret, frame = video_capture.read()
    if not ret:
        print("❌ Error: Could not read the first frame.")
        return []
    frame_height, frame_width, _ = frame.shape
    video_capture.set(cv2.CAP_PROP_POS_FRAMES, 0)
    print(f"✅ Opened video: {os.path.basename(video_path)} ({total_frames} frames at {fps:.2f} fps)")

    face_crop_dir = os.path.join(save_dir, "face_crops")
    os.makedirs(face_crop_dir, exist_ok=True)
    
    static_object_tracker, ignored_locations = {}, set()
    known_face_encodings, known_face_ids = [], []
    next_person_id = 1
    all_results_log = []
    
    pbar = tqdm(total=total_frames, desc="Analyzing Video")

    for frame_count in range(total_frames):
        ret, frame = video_capture.read()
        if not ret: break

        if frame_count % process_every_n_frames == 0:
            rgb_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
            face_locations = face_recognition.face_locations(rgb_frame)
            current_face_encodings = face_recognition.face_encodings(rgb_frame, face_locations)
            current_frame_locations = set(face_locations)

            if current_face_encodings:
                for i, face_encoding in enumerate(current_face_encodings):
                    top, right, bottom, left = face_locations[i]
                    loc_key = (top, right, bottom, left)
                    
                    # --- PADDED FACE CROPPING LOGIC (MOVED TO CORRECT LOCATION) ---
                    face_height = bottom - top
                    face_width = right - left
                    v_pad = int(face_height * 0.40)
                    h_pad = int(face_width * 0.15)
                    top_pad = max(0, top - v_pad)
                    bottom_pad = min(frame_height, bottom + int(v_pad * 0.1))
                    left_pad = max(0, left - h_pad)
                    right_pad = min(frame_width, right + h_pad)
                    face_image = Image.fromarray(rgb_frame[top_pad:bottom_pad, left_pad:right_pad])
                    
                    # --- Static Object Filter ---
                    if loc_key in ignored_locations: continue
                    if loc_key not in static_object_tracker:
                        static_object_tracker[loc_key] = {"count": 1, "last_frame": frame_count}
                    else:
                        if frame_count == static_object_tracker[loc_key]["last_frame"] + process_every_n_frames:
                            static_object_tracker[loc_key]["count"] += 1
                        else:
                            static_object_tracker[loc_key]["count"] = 1
                        static_object_tracker[loc_key]["last_frame"] = frame_count
                    if static_object_tracker[loc_key]["count"] > static_threshold:
                        if loc_key not in ignored_locations:
                            ignored_locations.add(loc_key)
                        continue

                    # --- Gatekeeper Filter ---
                    gatekeeper_inputs = processor(images=face_image, return_tensors="pt").to(device)
                    with torch.no_grad():
                        gatekeeper_logits = gatekeeper_model(**gatekeeper_inputs).logits
                    gatekeeper_pred = gatekeeper_model.config.id2label[gatekeeper_logits.argmax(-1).item()]
                    
                    if "Non-Emotional" in gatekeeper_pred:
                        continue

                    # --- Face Identification ---
                    matches = face_recognition.compare_faces(known_face_encodings, face_encoding)
                    person_id = "Unknown"
                    if True in matches:
                        person_id = known_face_ids[matches.index(True)]
                    else:
                        person_id = f"Person_{next_person_id}"
                        known_face_encodings.append(face_encoding)
                        known_face_ids.append(person_id)
                        next_person_id += 1
                    
                    # --- Emotion Classification and Logging ---
                    emotion_results = get_emotion_predictions(face_image, emotion_model, processor, device)
                    face_filename = os.path.join(face_crop_dir, f"frame_{frame_count}_{person_id}.png")
                    face_image.save(face_filename)
                    
                    log_entry = {"timestamp": frame_count / fps, "person_id": person_id, "face_crop_path": face_filename, **emotion_results}
                    all_results_log.append(log_entry)
            
            stale_keys = [k for k in static_object_tracker if k not in current_frame_locations]
            for k in stale_keys:
                del static_object_tracker[k]
                
        pbar.update(1)
        
    pbar.close()
    video_capture.release()
    
    print("\n--- Video Processing Summary ---")
    print(f"✅ Discovered {len(known_face_ids)} unique person(s).")
    print(f"⚠️ Detected and ignored {len(ignored_locations)} static object(s).")
    print(f"✅ Logged {len(all_results_log)} potentially emotional events.")
    
    return all_results_log

In [2]:
import pandas as pd
import os
import glob
import re
import shutil

In [3]:
# ==============================================================================
# 1. CONFIGURATION
# ==============================================================================
ANALYSIS_OUTPUT_ROOT = "/Users/natalyagrokh/AI/ml_expressions/img_expressions/data_flywheel"

# Find Top 200 most CERTAIN predictions
TOP_N_TO_REVIEW = 200      

In [4]:
# ==============================================================================
# 2. UTILITY FUNCTIONS
# ==============================================================================

#  Extracts integer version number (e.g., V1, V2) from directory name.
def extract_version_from_path(path):
    match = re.search(r"V(\d+)", os.path.basename(path))
    return int(match.group(1)) if match else -1

# Analyzes log to find Top N most CERTAIN predictions (high confidence,
    # low entropy) for ground-truth audit.
def certainty_review_queue(log_df, run_dir, top_n, queue_name, description):
    print(f"\n--- Creating Top-{top_n} Certainty Review Queue for '{description}' ---")

    if log_df.empty:
        print(f"⚠️ The log file for '{description}' is empty. Nothing to review.")
        return

    # --- Step 1: Calculate a "certainty score" ---
    # A higher score means higher confidence and lower entropy.
    log_df['certainty_score'] = log_df['confidence'] - log_df['entropy']
 
    # --- Step 2: Sort by the new score and take the top N ---
    review_df = log_df.sort_values(by='certainty_score', ascending=False).head(top_n)
    
    print(f"-> Selected the Top {len(review_df)} most certain images for review.")
    
    # --- Step 3: Copy images and create the simplified CSV ---
    review_folder_path = os.path.join(run_dir, queue_name)
    os.makedirs(review_folder_path, exist_ok=True)
    
    copied_count = 0
    for _, row in review_df.iterrows():
        source_path = row.get('face_crop_path')
        if source_path and os.path.exists(source_path):
            try:
                shutil.copy(source_path, review_folder_path)
                copied_count += 1
            except Exception as e:
                print(f"⚠️ Could not copy file {source_path}. Error: {e}")
        else:
            print(f"⚠️ File not found and could not be copied: {source_path}")

    print(f"\n✅ Success! Copied {copied_count} images to: {review_folder_path}")
    
    # Step 4: Create and save the simplified, sortable CSV
    if not review_df.empty:
        # Select columns needed for review
        simplified_df = review_df.copy().sort_values(by='frame_number').reset_index(drop=True)
        
        # Simplify the path to just the filename
        simplified_df['face_crop_path'] = simplified_df['face_crop_path'].apply(os.path.basename)

        # Add blank columns for your manual labels and notes
        simplified_df['actual_label'] = ""
        simplified_df['notes'] = ""
        
        # Select and reorder columns for the final sheet
        final_columns = ['frame_number', 'face_crop_path', 'predicted_label', 'actual_label', 'notes', 'confidence', 'entropy', 'certainty_score']
        simplified_df = simplified_df[final_columns]
       
        # Save the new CSV inside the manual_review_queue folder
        simplified_csv_path = os.path.join(review_folder_path, "simplified_review_log.csv")
        simplified_df.to_csv(simplified_csv_path, index=False)
        print(f"✅ Created a log for the certainty audit at: {simplified_csv_path}")


In [6]:
# ==============================================================================
# 3. MAIN EXECUTION BLOCK
# ==============================================================================
if __name__ == '__main__':
    
    all_run_dirs = [os.path.join(ANALYSIS_OUTPUT_ROOT, d) for d in os.listdir(ANALYSIS_OUTPUT_ROOT) if d.startswith("V") and os.path.isdir(os.path.join(ANALYSIS_OUTPUT_ROOT, d))]

    if not all_run_dirs:
        print(f"❌ Error: No run directories found in {ANALYSIS_OUTPUT_ROOT}")
    else:
        latest_run_dir = max(all_run_dirs, key=extract_version_from_path)
        print(f"✅ Automatically analyzing latest run: {os.path.basename(latest_run_dir)}")

        # --- Define paths to both log files ---
        raw_log_path = os.path.join(latest_run_dir, "emotion_log_before_stability_filter.csv")
        stable_log_path = os.path.join(latest_run_dir, "final_stable_emotion_log.csv")

        # --- Process Micro-Expressions from the RAW log ---
        if os.path.exists(raw_log_path):
            raw_log_df = pd.read_csv(raw_log_path)
            certainty_review_queue(
                log_df=raw_log_df,
                run_dir=latest_run_dir,
                top_n=TOP_N_TO_REVIEW,
                queue_name="certainty_micro_expression_review",
                description="Micro-Expressions (from raw log)"
            )
        else:
            print(f"❌ Error: Could not find raw log file: {raw_log_path}")
            
        # --- Process Stable Emotions from the STABLE log ---
        if os.path.exists(stable_log_path):
            stable_log_df = pd.read_csv(stable_log_path)
            certainty_review_queue(
                log_df=stable_log_df,
                run_dir=latest_run_dir,
                top_n=TOP_N_TO_REVIEW,
                queue_name="certainty_stable_emotion_review",
                description="Stable Emotions (from filtered log)"
            )
        else:
            print(f"❌ Error: Could not find stable log file: {stable_log_path}")

✅ Automatically analyzing latest run: V9_20250725_154347

--- Creating Top-200 Certainty Review Queue for 'Micro-Expressions (from raw log)' ---
-> Selected the Top 200 most certain images for review.

✅ Success! Copied 200 images to: /Users/natalyagrokh/AI/ml_expressions/img_expressions/data_flywheel/V9_20250725_154347/certainty_micro_expression_review
✅ Created a log for the certainty audit at: /Users/natalyagrokh/AI/ml_expressions/img_expressions/data_flywheel/V9_20250725_154347/certainty_micro_expression_review/simplified_review_log.csv

--- Creating Top-200 Certainty Review Queue for 'Stable Emotions (from filtered log)' ---
-> Selected the Top 200 most certain images for review.

✅ Success! Copied 200 images to: /Users/natalyagrokh/AI/ml_expressions/img_expressions/data_flywheel/V9_20250725_154347/certainty_stable_emotion_review
✅ Created a log for the certainty audit at: /Users/natalyagrokh/AI/ml_expressions/img_expressions/data_flywheel/V9_20250725_154347/certainty_stable_emoti