# Box-Opening Detection Demo

This notebook demonstrates the box-opening detection pipeline on a single frame or video.

In [None]:
# Import required libraries
import cv2
import numpy as np
import matplotlib.pyplot as plt
from src.detect_yolo import YOLODetector
from src.hand_pose import HandPoseEstimator
from src.opening_logic import is_opening_box
from src.utils import draw_bounding_box, draw_keypoint

%matplotlib inline

## 1. Load Models

In [None]:
# Initialize detector and pose estimator
detector = YOLODetector(model_path="yolov8n.pt", conf_threshold=0.3)
pose_estimator = HandPoseEstimator()

print("Models loaded successfully!")

## 2. Load Test Image

In [None]:
# Load a test frame
# Replace with your own image path
frame_path = "data/frames/frame_000001.jpg"

frame = cv2.imread(frame_path)
if frame is None:
    print(f"Error: Could not load image from {frame_path}")
    print("Please provide a valid image path")
else:
    frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
    
    plt.figure(figsize=(12, 8))
    plt.imshow(frame_rgb)
    plt.title("Original Frame")
    plt.axis('off')
    plt.show()
    
    print(f"Frame shape: {frame.shape}")

## 3. Run Object Detection

In [None]:
# Run YOLO detection
detections = detector.detect(frame)

print("Detections:")
for class_name, dets in detections.items():
    print(f"  {class_name}: {len(dets)} detected")
    for det in dets:
        print(f"    - Confidence: {det['confidence']:.2f}, BBox: {det['bbox']}")

## 4. Extract Hand Positions

In [None]:
# Extract wrist positions if person detected
wrists = {"left_wrist": None, "right_wrist": None}
person_bbox = None

if "person" in detections and len(detections["person"]) > 0:
    person_bbox = detections["person"][0]["bbox"]
    wrists = pose_estimator.extract_wrists(frame, person_bbox)
    
    print("Wrist positions:")
    print(f"  Left wrist: {wrists['left_wrist']}")
    print(f"  Right wrist: {wrists['right_wrist']}")
else:
    print("No person detected in frame")

## 5. Detect Box-Opening Event

In [None]:
# Get box bbox
box_bbox = None
if "box" in detections and len(detections["box"]) > 0:
    box_bbox = detections["box"][0]["bbox"]

# Get lid bbox (for temporal analysis, we'd need previous frame)
curr_lid_bbox = None
if "lid" in detections and len(detections["lid"]) > 0:
    curr_lid_bbox = detections["lid"][0]["bbox"]

# Determine if box is being opened
is_opening, confidence = is_opening_box(
    person_bbox=person_bbox,
    box_bbox=box_bbox,
    wrist_positions=wrists,
    prev_lid_bbox=None,  # Would need previous frame
    curr_lid_bbox=curr_lid_bbox
)

print(f"\nBox Opening Detection:")
print(f"  Is Opening: {is_opening}")
print(f"  Confidence: {confidence:.2f}")

## 6. Visualize Results

In [None]:
# Create visualization
vis_frame = frame.copy()

# Draw person bbox
if person_bbox:
    draw_bounding_box(vis_frame, person_bbox, label="person", color=(0, 255, 0))

# Draw box bbox
if box_bbox:
    draw_bounding_box(vis_frame, box_bbox, label="box", color=(255, 0, 0))

# Draw lid bbox
if curr_lid_bbox:
    draw_bounding_box(vis_frame, curr_lid_bbox, label="lid", color=(0, 255, 255))

# Draw wrist keypoints
if wrists["left_wrist"]:
    draw_keypoint(vis_frame, wrists["left_wrist"], label="L", color=(0, 0, 255))
if wrists["right_wrist"]:
    draw_keypoint(vis_frame, wrists["right_wrist"], label="R", color=(255, 0, 255))

# Add opening status text
if is_opening:
    cv2.putText(vis_frame, "BOX OPENING DETECTED", (10, 30),
                cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 2)

# Display
vis_frame_rgb = cv2.cvtColor(vis_frame, cv2.COLOR_BGR2RGB)
plt.figure(figsize=(12, 8))
plt.imshow(vis_frame_rgb)
plt.title(f"Detection Results (Opening: {is_opening}, Confidence: {confidence:.2f})")
plt.axis('off')
plt.show()

## 7. Process Full Video (Optional)

In [None]:
# Process a full video using the pipeline
from src.pipeline import BoxOpeningPipeline

# Initialize pipeline
pipeline = BoxOpeningPipeline(
    model_path="yolov8n.pt",
    output_dir="output/demo",
    visualize=True,
    conf_threshold=0.3
)

# Process video
video_path = "data/videos/test_video.mp4"
# Uncomment to run:
# results_path = pipeline.process_video(video_path, frame_skip=2)
# print(f"Results saved to: {results_path}")

## 8. Analyze Results

In [None]:
# Load and analyze results
import json
import pandas as pd

# Load JSON results
# results_path = "output/demo/results.json"
# with open(results_path, 'r') as f:
#     results = json.load(f)

# Load CSV results
# csv_path = "output/demo/results.csv"
# df = pd.read_csv(csv_path)

# Display summary
# print(f"Total frames: {len(df)}")
# print(f"Opening events: {df['box_opening'].sum()}")
# print(f"Percentage: {100 * df['box_opening'].sum() / len(df):.1f}%")

# Plot confidence over time
# plt.figure(figsize=(12, 4))
# plt.plot(df['frame_id'], df['confidence'])
# plt.xlabel('Frame ID')
# plt.ylabel('Confidence')
# plt.title('Detection Confidence Over Time')
# plt.grid(True)
# plt.show()