# Library Import

In [2]:
import cv2
import mediapipe as mp
import numpy as np
import csv
import os
from ultralytics import YOLO

# Mediapipe

In [9]:
mp_drawing = mp.solutions.drawing_utils
mp_holistic = mp.solutions.holistic

In [10]:
def capture_pose(cam, label, output='pose_keypoints.csv'):
    cap = cv2.VideoCapture(cam)
    # Initiate holistic model
    with mp_holistic.Holistic(min_detection_confidence=0.5, min_tracking_confidence=0.5) as holistic:
        
        while cap.isOpened():
            ret, frame = cap.read()
            
            # Recolor Feed
            image = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
            image.flags.writeable = False
            
            # Make Detections
            results = holistic.process(image)
            # print(results.face_landmarks)
            
            # face_landmarks, pose_landmarks, left_hand_landmarks, right_hand_landmarks
            
            # Recolor image back to BGR for rendering
            image.flags.writeable = True
            image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)

            mp_drawing.draw_landmarks(image, results.pose_landmarks, mp_holistic.POSE_CONNECTIONS, 
                                    mp_drawing.DrawingSpec(color=(245,117,66), thickness=2, circle_radius=4),
                                    mp_drawing.DrawingSpec(color=(245,66,230), thickness=2, circle_radius=2)
                                    )
            
            try:
                pose = results.pose_landmarks.landmark
                pose_row = list(np.array([[landmark.x, landmark.y, landmark.z, landmark.visibility] for landmark in pose]).flatten())

                pose_row.insert(0, label)
                
                with open(output, mode='a', newline='') as f:
                    writer = csv.writer(f, delimiter=',', quotechar='"', quoting=csv.QUOTE_MINIMAL)
                    writer.writerow(pose_row)
            except:
                pass
                            
            cv2.imshow('Raw Webcam Feed', image)


            if cv2.waitKey(10) & 0xFF == ord('q'):
                break

    cap.release()
    cv2.destroyAllWindows()

In [11]:
landmarks = ['class']
for val in range(1, 34):
    landmarks += [f'x{val}', f'y{val}', f'z{val}', f'v{val}']
landmarks

['class',
 'x1',
 'y1',
 'z1',
 'v1',
 'x2',
 'y2',
 'z2',
 'v2',
 'x3',
 'y3',
 'z3',
 'v3',
 'x4',
 'y4',
 'z4',
 'v4',
 'x5',
 'y5',
 'z5',
 'v5',
 'x6',
 'y6',
 'z6',
 'v6',
 'x7',
 'y7',
 'z7',
 'v7',
 'x8',
 'y8',
 'z8',
 'v8',
 'x9',
 'y9',
 'z9',
 'v9',
 'x10',
 'y10',
 'z10',
 'v10',
 'x11',
 'y11',
 'z11',
 'v11',
 'x12',
 'y12',
 'z12',
 'v12',
 'x13',
 'y13',
 'z13',
 'v13',
 'x14',
 'y14',
 'z14',
 'v14',
 'x15',
 'y15',
 'z15',
 'v15',
 'x16',
 'y16',
 'z16',
 'v16',
 'x17',
 'y17',
 'z17',
 'v17',
 'x18',
 'y18',
 'z18',
 'v18',
 'x19',
 'y19',
 'z19',
 'v19',
 'x20',
 'y20',
 'z20',
 'v20',
 'x21',
 'y21',
 'z21',
 'v21',
 'x22',
 'y22',
 'z22',
 'v22',
 'x23',
 'y23',
 'z23',
 'v23',
 'x24',
 'y24',
 'z24',
 'v24',
 'x25',
 'y25',
 'z25',
 'v25',
 'x26',
 'y26',
 'z26',
 'v26',
 'x27',
 'y27',
 'z27',
 'v27',
 'x28',
 'y28',
 'z28',
 'v28',
 'x29',
 'y29',
 'z29',
 'v29',
 'x30',
 'y30',
 'z30',
 'v30',
 'x31',
 'y31',
 'z31',
 'v31',
 'x32',
 'y32',
 'z32',
 'v32',
 '

In [12]:
with open('pose_keypoints.csv', mode='w', newline='') as f:
    writer = csv.writer(f, delimiter=',', quotechar='"', quoting=csv.QUOTE_MINIMAL)
    writer.writerow(landmarks)

In [15]:
capture_pose(0, 'suspicious')

In [7]:
result.pose_landmarks

NameError: name 'result' is not defined

# YOLO V11

In [3]:
model = YOLO('yolo11n-pose.pt')

In [10]:
def capture_pose_yolo(cam, label, output='yolo_keypoints.csv'):
    cap = cv2.VideoCapture(cam)
    # Initiate holistic model
        
    while cap.isOpened():
        ret, frame = cap.read()
        
        # Recolor Feed
        image = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)

        results = model(image)


        for result in results:
            image = result.plot()

            kp_array = result.keypoints.xy.cpu().numpy()[0]
            flat_keypoints = kp_array.flatten().tolist()
            row = [label] + flat_keypoints

            # Save to CSV
            with open(output, mode='a', newline='') as f:
                writer = csv.writer(f, delimiter=',', quotechar='"', quoting=csv.QUOTE_MINIMAL)
                writer.writerow(row)

        image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
                        
        cv2.imshow('Raw Webcam Feed', image)

        if cv2.waitKey(10) & 0xFF == ord('q'):
            break

    cap.release()
    cv2.destroyAllWindows()

In [11]:
num_coords = 17

landmarks = ['class']
for val in range(1, num_coords + 1):
    landmarks += [f'x{val}', f'y{val}']
landmarks

['class',
 'x1',
 'y1',
 'x2',
 'y2',
 'x3',
 'y3',
 'x4',
 'y4',
 'x5',
 'y5',
 'x6',
 'y6',
 'x7',
 'y7',
 'x8',
 'y8',
 'x9',
 'y9',
 'x10',
 'y10',
 'x11',
 'y11',
 'x12',
 'y12',
 'x13',
 'y13',
 'x14',
 'y14',
 'x15',
 'y15',
 'x16',
 'y16',
 'x17',
 'y17']

In [12]:
with open('yolo_keypoints.csv', mode='w', newline='') as f:
    writer = csv.writer(f, delimiter=',', quotechar='"', quoting=csv.QUOTE_MINIMAL)
    writer.writerow(landmarks)

In [13]:
capture_pose_yolo(0, 'normal')


0: 480x640 (no detections), 833.8ms
Speed: 58.8ms preprocess, 833.8ms inference, 6.7ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 1 person, 463.9ms
Speed: 4.2ms preprocess, 463.9ms inference, 18.6ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 1 person, 462.5ms
Speed: 3.4ms preprocess, 462.5ms inference, 8.0ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 1 person, 422.5ms
Speed: 5.5ms preprocess, 422.5ms inference, 2.2ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 1 person, 417.7ms
Speed: 3.5ms preprocess, 417.7ms inference, 4.8ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 1 person, 429.8ms
Speed: 3.6ms preprocess, 429.8ms inference, 6.1ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 1 person, 425.1ms
Speed: 3.5ms preprocess, 425.1ms inference, 3.6ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 1 person, 376.6ms
Speed: 3.6ms preprocess, 376.6ms inference, 2.1ms postprocess per

In [14]:
capture_pose_yolo(0, 'suspicious')


0: 480x640 (no detections), 412.2ms
Speed: 3.3ms preprocess, 412.2ms inference, 1.5ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 1 person, 462.2ms
Speed: 4.1ms preprocess, 462.2ms inference, 3.2ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 1 person, 483.5ms
Speed: 3.9ms preprocess, 483.5ms inference, 2.8ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 1 person, 471.6ms
Speed: 4.5ms preprocess, 471.6ms inference, 3.8ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 1 person, 562.7ms
Speed: 3.5ms preprocess, 562.7ms inference, 1.4ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 1 person, 434.6ms
Speed: 2.7ms preprocess, 434.6ms inference, 1.6ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 1 person, 485.8ms
Speed: 3.7ms preprocess, 485.8ms inference, 1.7ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 1 person, 477.0ms
Speed: 3.3ms preprocess, 477.0ms inference, 3.1ms postprocess per i