In [1]:
# V1 - rerunning test to avoid split screen confusing model
    # section #2 - added filter_high_conviction_emotions
    # section #4 - worked in new filtering function,
        # updated vid to StreetQs

In [2]:
import cv2
import face_recognition
from PIL import Image
import os
import glob
from datetime import datetime
from tqdm import tqdm
import torch
import torch.nn.functional as F
from transformers import AutoImageProcessor, AutoModelForImageClassification
import pandas as pd

In [3]:
# ==============================================================================
# 1. CONFIGURATION
# ==============================================================================
# Define the root directory where all analysis outputs will be stored.
ANALYSIS_OUTPUT_ROOT = "/Users/natalyagrokh/AI/ml_expressions/img_expressions/vid_inference"
# Define the path to the final, production-ready model
MODEL_PATH = "/Users/natalyagrokh/AI/ml_expressions/img_expressions/sup_training/V29_20250710_082807"
os.makedirs(ANALYSIS_OUTPUT_ROOT, exist_ok=True)

In [4]:
# ==============================================================================
# 2. UTILITY FUNCTIONS
# ==============================================================================

# Checks for valid image file extensions.
def is_valid_image(filename):
    return filename.lower().endswith((".jpg", ".jpeg", ".png")) and not filename.startswith("._")

# Dynamically determines the next version number by scanning a directory.
def get_next_version(base_dir):
    # Use glob to find all entries matching the pattern
    all_entries = glob.glob(os.path.join(base_dir, "V*_*"))
    
    # Filter to include only directories
    existing = [
        os.path.basename(d) for d in all_entries if os.path.isdir(d)
    ]

    # Extract version numbers from the directory names
    versions = [
        int(d[1:].split("_")[0]) for d in existing
        if d.startswith("V") and "_" in d and d[1:].split("_")[0].isdigit()
    ]
    
    # Determine the next version number
    next_version = max(versions, default=0) + 1
    return f"V{next_version}"

# Runs the emotion recognition model on a single face image and prints the
    # formatted probability distribution.
def predict_and_display_emotions(face_image, model, processor, device):

    # Use the processor to prepare the image for the model
    inputs = processor(images=face_image, return_tensors="pt").to(device)

    # Run inference
    with torch.no_grad():
        logits = model(**inputs).logits

    # Apply softmax to convert logits to probabilities
    probabilities = F.softmax(logits, dim=1).squeeze()

    # Get the top prediction
    top_confidence, top_pred_idx = torch.max(probabilities, dim=0)
    top_pred_label = model.config.id2label[top_pred_idx.item()]
    
    # Calculate entropy
    entropy = -torch.sum(probabilities * torch.log(probabilities + 1e-9)).item()
    
    # Create a dictionary with all results
    results = {
        "predicted_label": top_pred_label,
        "confidence": top_confidence.item(),
        "entropy": entropy
    }
    # Add individual probabilities for each class
    for i, prob in enumerate(probabilities):
        label = model.config.id2label[i]
        results[f"prob_{label}"] = prob.item()
        
    return results

# üÜï Post-processing function to filter confidence>0.85;entropy<0.35;prob>0.95
def filter_high_conviction_emotions(log_df):
    print("\n--- Filtering for High-Conviction Predictions ---")
    
    # --- Step 1: Filter by Confidence and Entropy ---
    initial_filter_mask = (log_df['confidence'] > 0.85) & (log_df['entropy'] < 0.45)
    filtered_df = log_df[initial_filter_mask].copy()
    print(f"-> Found {len(filtered_df)} predictions with confidence > 0.8 and entropy < 0.4.")

    # --- Step 2: Further refine by high probability for any single emotion ---
    prob_columns = [col for col in log_df.columns if col.startswith('prob_')]
    high_prob_mask = (filtered_df[prob_columns] > 0.95).any(axis=1)
    
    final_df = filtered_df[high_prob_mask]
    print(f"-> Refined to {len(final_df)} predictions with a single emotion probability > 0.9.")
    
    return final_df

In [5]:
# ==============================================================================
# 3. CORE PROCESSING FUNCTION - VIDEO FACE EXTRACTOR
# ==============================================================================

# Reads vid file, detects faces, saves cropped images, runs emotion
    # prediction, and logs all results to a list
def analyze_video_faces(video_path, save_dir, model, processor, device, process_every_n_frames=1):
   
    if not os.path.exists(video_path):
        print(f"‚ùå Error: Video file not found at {video_path}")
        return []

    video_capture = cv2.VideoCapture(video_path)
    total_frames = int(video_capture.get(cv2.CAP_PROP_FRAME_COUNT))
    fps = video_capture.get(cv2.CAP_PROP_FPS)
    print(f"‚úÖ Opened video: {os.path.basename(video_path)} ({total_frames} frames at {fps:.2f} fps)")

    frame_count = 0
    all_results_log = []
    
    pbar = tqdm(total=total_frames, desc="Processing video frames")

    while video_capture.isOpened():
        ret, frame = video_capture.read()
        if not ret:
            break
            
        if frame_count % process_every_n_frames == 0:
            rgb_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
            face_locations = face_recognition.face_locations(rgb_frame)
            
            if face_locations:
                frame_save_path = os.path.join(save_dir, "face_crops", f"frame_{frame_count}")
                os.makedirs(frame_save_path, exist_ok=True)
                
                for i, (top, right, bottom, left) in enumerate(face_locations):
                    face_image_arr = frame[top:bottom, left:right]
                    face_image_pil = Image.fromarray(cv2.cvtColor(face_image_arr, cv2.COLOR_BGR2RGB))
                    
                    # Get the dictionary of emotion predictions
                    emotion_results = predict_and_display_emotions(face_image_pil, model, processor, device)
                    
                    # Save the cropped face image
                    face_filename = os.path.join(frame_save_path, f"face_{i}.png")
                    face_image_pil.save(face_filename)

                    # Add frame-specific info to the log
                    log_entry = {
                        "timestamp_seconds": frame_count / fps,
                        "frame_number": frame_count,
                        "face_index": i,
                        "face_crop_path": face_filename,
                        **emotion_results  # Unpack the emotion results into the log
                    }
                    all_results_log.append(log_entry)

        frame_count += 1
        pbar.update(1)
        
    pbar.close()
    video_capture.release()
    
    print(f"‚úÖ Video processing complete. Found and analyzed {len(all_results_log)} faces.")
    return all_results_log

In [6]:
# ==============================================================================
# 4. MAIN EXTRACTION BLOCK
# ==============================================================================
if __name__ == '__main__':
    
    # --- Setup Dynamic Save Directory ---
    VERSION = get_next_version(ANALYSIS_OUTPUT_ROOT)
    timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
    VERSION_TAG = f"{VERSION}_{timestamp}"
    SAVE_DIR = os.path.join(ANALYSIS_OUTPUT_ROOT, VERSION_TAG)
    os.makedirs(SAVE_DIR, exist_ok=True)
    print(f"üìÅ Created analysis output directory: {SAVE_DIR}")

    # --- Load Model and Set Device ---
    print(f"\n--- Loading model from {MODEL_PATH} ---")
    model = AutoModelForImageClassification.from_pretrained(MODEL_PATH)
    processor = AutoImageProcessor.from_pretrained(MODEL_PATH)
    device = torch.device("mps" if torch.backends.mps.is_available() else "cpu")
    print("\nüñ•Ô∏è Using device:", device)
    model.to(device).eval()

    # --- Run the Analysis ---
    video_to_process = "/Users/natalyagrokh/AI/ml_expressions/img_expressions/vid_inference/sample_vids/StreetQs.mp4" 
    analysis_log = analyze_video_faces(
        video_path=video_to_process, 
        save_dir=SAVE_DIR,
        model=model,
        processor=processor,
        device=device,
        process_every_n_frames=1
    )
    
    # --- Save and Filter Results ---
    if analysis_log:
        log_df = pd.DataFrame(analysis_log)
        csv_path = os.path.join(SAVE_DIR, "emotion_log.csv")
        log_df.to_csv(csv_path, index=False)
        print(f"\n‚úÖ Successfully saved detailed analysis to: {csv_path}")

        # Call filtering function
        refined_df = filter_high_conviction_emotions(log_df)

        # Save the refined results to a new CSV file
        if not refined_df.empty:
            refined_csv_path = os.path.join(SAVE_DIR, "refined_emotion_log.csv")
            refined_df.to_csv(refined_csv_path, index=False)
            print(f"‚úÖ Successfully saved refined analysis to: {refined_csv_path}")

    else:
        print("\n‚ö†Ô∏è No faces were detected, so no log file was created.")

    print(f"\n--- Summary ---")
    print(f"Total faces analyzed and saved: {len(analysis_log)}")

üìÅ Created analysis output directory: /Users/natalyagrokh/AI/ml_expressions/img_expressions/vid_inference/V2_20250711_114129

--- Loading model from /Users/natalyagrokh/AI/ml_expressions/img_expressions/sup_training/V29_20250710_082807 ---

üñ•Ô∏è Using device: mps
‚úÖ Opened video: StreetQs.mp4 (5657 frames at 30.00 fps)


Processing video frames: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 5657/5657 [23:48<00:00,  3.96it/s]


‚úÖ Video processing complete. Found and analyzed 7612 faces.

‚úÖ Successfully saved detailed analysis to: /Users/natalyagrokh/AI/ml_expressions/img_expressions/vid_inference/V2_20250711_114129/emotion_log.csv

--- Filtering for High-Conviction Predictions ---
-> Found 2970 predictions with confidence > 0.8 and entropy < 0.4.
-> Refined to 332 predictions with a single emotion probability > 0.9.
‚úÖ Successfully saved refined analysis to: /Users/natalyagrokh/AI/ml_expressions/img_expressions/vid_inference/V2_20250711_114129/refined_emotion_log.csv

--- Summary ---
Total faces analyzed and saved: 7612
