In [None]:
from IPython import display
from pathlib import Path
import shutil
import yaml

display.clear_output()

# Load config
with open('../config.yaml', 'r') as f:
    config = yaml.safe_load(f)

# Prevent ultralytics from tracking activity
!yolo settings sync=False

import ultralytics
ultralytics.checks()

In [None]:
from ultralytics import YOLO
from IPython.display import display, Image
from cleanvision import Imagelab
from tqdm import tqdm

# Phase 3 Full-body & Face Validation

Info: if a person is "cut off," their bounding box will touch the very edge of the image frame.
Check the $y_{min}$ (top) and $y_{max}$ (bottom) of the bounding box. If the top of the box is at pixel 0, the head is likely cut off. If the bottom is at the maximum image height, the feet are cut off.

# Todo
Key design decision: use bbox to check for full body image instead of another model. 

In [None]:
import cv2

## Full-body Detection: Method Comparison & Design Decision

### Problem Statement
Dataset requirement: **"Full-body person crops only (No feet or hands are acceptable)"**

How do we validate that a person is truly full-body and not cut off?

### Method A: Geometry-based Bounding Box Check
**Logic**: If a person is "cut off," their bounding box will touch the very edge of the image.
- Check if `x_min < margin` or `x_max > (width - margin)` → horizontally cut off
- Check if `y_min < margin` or `y_max > (height - margin)` → vertically cut off (head/feet missing)
- **Pros**: Fast, simple, no extra inference needed
- **Cons**: Unreliable with loose bboxes; hand/feet could be in frame but still reject valid images

### Method B: Pose Keypoint Validation ✓ **CHOSEN**
**Logic**: Use YOLOv8-Pose to detect actual body keypoints (nose, ankles). A full-body person must have:
- ✅ Nose visible (head is visible)
- ✅ Left ankle visible (left leg visible)
- ✅ Right ankle visible (right leg visible)
- **Pros**: Detects actual body parts, not bbox edges; more robust to bbox variations
- **Cons**: Slower (additional pose inference per image)

### Why Method B?
1. **Accuracy**: Your dataset requirement is strict → must validate actual body parts, not just box position
2. **Robustness**: YOLOv8 bbox can be loose or tight; Method A would fail on loose bboxes
3. **Quality Guarantee**: Ensures only truly full-body images pass through
4. **Trade-off Worth It**: ~2-3x slower inference, but significantly higher confidence in output quality

### Validation Rule
```
is_fullbody = (
    confidence(nose) > 0.5 AND 
    confidence(left_ankle) > 0.5 AND 
    confidence(right_ankle) > 0.5
)
```

**Expected Result**: Stricter filter, fewer false positives, higher dataset quality.

In [None]:
# Load YOLOv8-Pose model for keypoint detection
pose_model = YOLO(f"../model/yolov8n-pose.pt")

def is_fullbody_person(img, bbox_xyxy, pose_model, keypoint_threshold=0.5):
    """
    Check if person has full body visible using pose keypoints.
    
    Args:
        img: image array (BGR)
        bbox_xyxy: [x_min, y_min, x_max, y_max]
        pose_model: YOLOv8-Pose model
        keypoint_threshold: confidence threshold for keypoints (default: 0.5)
    
    Returns:
        dict: {'is_fullbody': bool, 'keypoint_confidence': dict}
    """
    # Run pose detection on full image
    results = pose_model.predict(source=img, verbose=False)
    result = results[0]
    
    if not result.keypoints or len(result.keypoints) == 0:
        return {'is_fullbody': False, 'keypoint_confidence': {}}
    
    # Get keypoints for first person (should be our detected person)
    keypoints = result.keypoints[0]
    
    # COCO keypoint indices: 0=nose, 15=left_ankle, 16=right_ankle
    nose_conf = keypoints.conf[0, 0].item() if keypoints.conf is not None else 0
    left_ankle_conf = keypoints.conf[0, 15].item() if keypoints.conf is not None else 0
    right_ankle_conf = keypoints.conf[0, 16].item() if keypoints.conf is not None else 0
    
    # Full-body: must have nose + both ankles with high confidence
    is_fullbody = (
        nose_conf > keypoint_threshold and 
        left_ankle_conf > keypoint_threshold and 
        right_ankle_conf > keypoint_threshold
    )
    
    return {
        'is_fullbody': is_fullbody,
        'keypoint_confidence': {
            'nose': float(nose_conf),
            'left_ankle': float(left_ankle_conf),
            'right_ankle': float(right_ankle_conf),
        }
    }

In [None]:
# Load first person image for testing
first_img_name = person_image_names[0]
first_img_path = person_output_path / first_img_name
first_img = cv2.imread(str(first_img_path))
first_entry = all_results[first_img_name]

print(f"Loaded first image for testing: {first_img_name}")
print(f"Image shape: {first_img.shape}")
print(f"Persons in image: {len(first_entry['boxes_xyxy'])}")