In [None]:
#!/usr/bin/env python3
"""
Detect a person waving using:
  1) YOLOv5m for person detection
  2) MobileNetV2+LSTM for waving inference on an 8-frame clip
Uses your MacBook’s built-in webcam instead of RealSense.
"""

import os
# silence TensorFlow deprecation warnings (0 = all messages, 1 = filter INFO, 2 = filter WARNING)
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'

import warnings
warnings.filterwarnings("ignore", category=DeprecationWarning)
warnings.filterwarnings("ignore", category=FutureWarning)

import torch
import tensorflow as tf
import cv2
import numpy as np
from collections import deque

YOLO_MODEL_PATH = 'yolov5m.pt'
WAVE_MODEL_PATH = 'wave_sequence_model_one_epoch.h5'
CONF_THRESHOLD  = 0.5
FRAME_WIDTH     = 640
FRAME_HEIGHT    = 480
CLIP_LENGTH     = 8
ROI_SIZE        = 224

def load_models():
    detector = torch.hub.load(
        'ultralytics/yolov5', 'custom',
        path=YOLO_MODEL_PATH
    )
    detector.conf = CONF_THRESHOLD
    wave_model = tf.keras.models.load_model(WAVE_MODEL_PATH)
    return detector, wave_model

def init_camera():
    cap = cv2.VideoCapture(0, cv2.CAP_ANY)
    cap.set(cv2.CAP_PROP_FRAME_WIDTH, FRAME_WIDTH)
    cap.set(cv2.CAP_PROP_FRAME_HEIGHT, FRAME_HEIGHT)
    if not cap.isOpened():
        raise RuntimeError("Could not open webcam")
    return cap

def detect_person_box(detector, frame):
    img = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
    results = detector(img, size=640)
    dets = results.xyxy[0].cpu().numpy()
    persons = [d for d in dets if int(d[5]) == 0 and d[4] >= CONF_THRESHOLD]
    if not persons:
        return None
    x1, y1, x2, y2, conf, cls = max(persons, key=lambda d: d[4])
    return int(x1), int(y1), int(x2), int(y2)

def main():
    person_detector, wave_model = load_models()
    print("Models loaded.")
    cap = init_camera()
    print("Webcam initialized.")

    roi_buffer = deque(maxlen=CLIP_LENGTH)
    preprocess_input = tf.keras.applications.mobilenet_v2.preprocess_input

    try:
        while True:
            ret, frame = cap.read()
            if not ret:
                continue

            box = detect_person_box(person_detector, frame)
            if box is None:
                roi_buffer.clear()
            else:
                print(f"Detected person at {box}")
                x1, y1, x2, y2 = box
                crop = frame[y1:y2, x1:x2]
                if crop.size == 0:
                    roi_buffer.clear()
                else:
                    roi_buffer.append(
                        cv2.resize(crop, (ROI_SIZE, ROI_SIZE))
                    )

                    if len(roi_buffer) == CLIP_LENGTH:
                        clip = np.stack(roi_buffer, axis=0).astype('float32')
                        clip = preprocess_input(clip)
                        prob = wave_model.predict(clip[None, ...])[0, 0]
                        print(f"Probability of waving: {prob:.2f}")
                        if prob >= 0.5:
                            print("Waving detected!")
                        roi_buffer.clear()

            cv2.imshow('Webcam', frame)
            if cv2.waitKey(1) & 0xFF == ord('q'):
                break

    finally:
        cap.release()
        cv2.destroyAllWindows()
        print("Webcam released, exiting.")

if __name__ == '__main__':
    main()

Using cache found in /Users/braeden/.cache/torch/hub/ultralytics_yolov5_master
YOLOv5 🚀 2025-5-2 Python-3.9.21 torch-2.6.0 CPU

Fusing layers... 
YOLOv5m_v6 summary: 290 layers, 21172173 parameters, 0 gradients
Adding AutoShape... 


Models loaded.
Webcam initialized.
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2s/step
Probability of waving: 0.00
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 63ms/step
Probability of waving: 0.00
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 66ms/step
Probability of waving: 0.00
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 67ms/step
Probability of waving: 0.00
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 68ms/step
Probability of waving: 0.00
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 58ms/step
Probability of waving: 0.00
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 61ms/step
Probability of waving: 0.00
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 65ms/step
Probability of waving: 0.00
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 63ms/step
Probability of waving: 0.01
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 69ms/

KeyboardInterrupt: 

In [6]:
!pip3 install pandas

Collecting pandas
  Using cached pandas-2.2.3-cp39-cp39-macosx_11_0_arm64.whl.metadata (89 kB)
Collecting pytz>=2020.1 (from pandas)
  Downloading pytz-2025.2-py2.py3-none-any.whl.metadata (22 kB)
Collecting tzdata>=2022.7 (from pandas)
  Downloading tzdata-2025.2-py2.py3-none-any.whl.metadata (1.4 kB)
Using cached pandas-2.2.3-cp39-cp39-macosx_11_0_arm64.whl (11.3 MB)
Downloading pytz-2025.2-py2.py3-none-any.whl (509 kB)
Downloading tzdata-2025.2-py2.py3-none-any.whl (347 kB)
Installing collected packages: pytz, tzdata, pandas
Successfully installed pandas-2.2.3 pytz-2025.2 tzdata-2025.2


In [None]:
!pip3 install -r requirements.txt

Collecting opencv-python (from -r requirements.txt (line 3))
  Using cached opencv_python-4.11.0.86-cp37-abi3-macosx_13_0_arm64.whl.metadata (20 kB)
Downloading opencv_python-4.11.0.86-cp37-abi3-macosx_13_0_arm64.whl (37.3 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m37.3/37.3 MB[0m [31m15.2 MB/s[0m eta [36m0:00:00[0m00:01[0m00:01[0m
[?25hInstalling collected packages: opencv-python
Successfully installed opencv-python-4.11.0.86
