# Step 1: Import Libraries

In [45]:
import math
import numpy as np
import cv2 as cv
import gdown
import time

# Step 2: Verify CUDA

In [46]:
print("CUDA Enabled:", cv.cuda.getCudaEnabledDeviceCount())
print("OpenCV Build Info:")
print(cv.getBuildInformation())

CUDA Enabled: 1
OpenCV Build Info:

  Version control:               unknown

  Extra modules:
    Location (extra):            D:/Programming/OpenCV/OpenCV-Contrib/opencv_contrib/modules
    Version control (extra):     4.11.0-43-geebd8977

  Platform:
    Timestamp:                   2025-05-16T18:34:01Z
    Host:                        Windows 10.0.19045 AMD64
    CMake:                       4.0.2
    CMake generator:             Visual Studio 17 2022
    CMake build tool:            C:/Program Files/Visual studio/MSBuild/Current/Bin/amd64/MSBuild.exe
    MSVC:                        1939
    Configuration:               Release
    Algorithm Hint:              ALGO_HINT_ACCURATE

  CPU/HW features:
    Baseline:                    SSE SSE2 SSE3
      requested:                 SSE3
    Dispatched code generation:  SSE4_1 SSE4_2 AVX FP16 AVX2 AVX512_SKX
      SSE4_1 (18 files):         + SSSE3 SSE4_1
      SSE4_2 (2 files):          + SSSE3 SSE4_1 POPCNT SSE4_2
      AVX (9 files):

# Step 3: Download Resources

In [None]:
gdown.download(id="1D3ytIZ-ZMMd5MbvVbf2Sn5oZ1L0aQ9IG", output="pose_deploy_linevec_faster_4_stages.prototxt", quiet=False)
gdown.download(id="1f-fCSTg7qFHRVKGIptyPJsgNwRs4XDsK", output="pose_iter_160000.caffemodel", quiet=False)

# Step 4: Load Models

In [47]:
protoFile = "pose_deploy_linevec_faster_4_stages.prototxt"
weightsFile = "pose_iter_160000.caffemodel"

net = cv.dnn.readNetFromCaffe(protoFile, weightsFile)
net.setPreferableBackend(cv.dnn.DNN_BACKEND_CUDA)
net.setPreferableTarget(cv.dnn.DNN_TARGET_CUDA)

# Step 5: Set Body Points

In [48]:
nPoints = 15
POSE_PAIRS = [[0,1], [1,2], [2,3], [3,4], [1,5], [5,6], [6,7], [1,14], [14,8], [8,9], [9,10], [14,11], [11,12], [12,13]]

# Step 6: Convert image to blob

In [None]:
# GPU based
def convert_image_to_blob(frame):
    return cv.dnn.blobFromImage(frame, 1.0/255, (256, 256), (0, 0, 0), swapRB=False, crop=False)

# Step 7: Run Inference Async (forward pass)

In [None]:
# GPU based
def run_inference():
    if not hasattr(run_inference, '_warmed_up'):
        net.forward()
        run_inference._warmed_up = True

    return net.forward()

# Step 8: Extract points

In [None]:
# CPU based
def extract_keypoints(output, height, width):
    points = []
    for i in range(15):
        probMap = output[0, i, :, :]
        _, _, _, max_loc = cv.minMaxLoc(probMap)
        points.append((int(max_loc[0] * width / output.shape[3]), 
                       int(max_loc[1] * height / output.shape[2])))
    return points

# Step 9: Display Points & Skeleton

In [None]:
# CPU based
def draw_skeleton(frame, points, POSE_PAIRS): 
    image_skeleton = frame.copy()

    for pair in POSE_PAIRS:
        partA, partB = pair
        if points[partA] and points[partB]:
            cv.line(image_skeleton, points[partA], points[partB], (255, 255, 0), 2)
            cv.circle(image_skeleton, points[partA], 8, (255, 0, 0), thickness=-1, lineType=cv.FILLED)
    return image_skeleton

# Step 10: Classifying

In [None]:
# CPU based
def calculate_angle(line):
    point1 = line[0]
    point2 = line[1]
    if not point1 or not point2:
        return None
    
    x1, y1 = point1
    x2, y2 = point2
    dx = x2 - x1
    dy = y2 - y1
    
    angle_rad = math.atan2(dy, dx)
    angle_deg = math.degrees(angle_rad)
    if angle_deg < 0:
        angle_deg += 360
        
    return angle_deg

In [None]:
def classify_pose(points): # CPU based
    try:
        # each points od body are (width, height)
        head = points[0]
        neck = points[1]
        right_shoulder = points[2]
        right_elbow = points[3]
        right_wrist = points[4]
        left_shoulder = points[5]
        left_elbow = points[6]
        left_wrist = points[7]
        right_hip = points[8]
        right_knee = points[9]
        right_ankle = points[10]
        left_hip = points[11]
        left_knee = points[12]
        left_ankle = points[13]
        center = points[14]

        # each line of the body
        head_line = (head, neck)
        right_shoulder_line = (neck, right_shoulder)
        left_shoulder_line = (neck, left_shoulder)
        torso_line = (neck, center)

        right_upper_arm_line = (right_shoulder, right_elbow)
        right_lower_arm_line = (right_elbow, right_wrist)
        left_upper_arm_line = (left_shoulder, left_elbow)
        left_lower_arm_line = (left_elbow, left_wrist)

        right_thigh_line = (right_hip, right_knee)
        right_shin_line = (right_knee, right_ankle)
        left_thigh_line = (left_hip, left_knee)
        left_shin_line = (left_knee, left_ankle)

        right_hip_line = (center, right_hip)
        left_hip_line = (center, left_hip)

        if not neck or not center:
            return "none"
        
        # detecting Position
        torso_angle = calculate_angle(torso_line)
        vertical_torso = False
        horizental_torso = False
        if (70 < torso_angle < 110):
            vertical_torso = True
        if (150 < torso_angle < 210) or (330 < torso_angle) or (torso_angle < 30):
            horizental_torso = True

        right_thigh_angle = calculate_angle(right_thigh_line)
        vertical_right_thigh = False
        horizental_right_thigh = False
        if (70 < right_thigh_angle < 110):
            vertical_right_thigh = True
        if (150 < right_thigh_angle < 210) or (330 < right_thigh_angle) or (right_thigh_angle < 30):
            horizental_right_thigh = True

        left_thigh_angle = calculate_angle(left_thigh_line)
        vertical_left_thigh = False
        horizental_left_thigh = False
        if (70 < left_thigh_angle < 110):
            vertical_left_thigh = True
        if (150 < left_thigh_angle < 210) or (330 < left_thigh_angle) or (left_thigh_angle < 30):
            horizental_left_thigh = True
        
        # Standing gesture
        if vertical_torso and vertical_right_thigh and vertical_left_thigh:
            return "standing"

        # Sitting gesture
        if horizental_left_thigh and horizental_right_thigh and vertical_torso:
            return "sitting"

        # Laying gesture
        if horizental_torso:
            return "laying"

    except:
        return "none"

    return "none"

# Step 11: Put every thing together

In [55]:
def detect_gesture(frame, POSE_PAIRS):
    blob = convert_image_to_blob(frame)
    net.setInput(blob)
    output = run_inference()
    points = extract_keypoints(output, frame.shape[0], frame.shape[1])
    frame_with_skeleton = draw_skeleton(frame, points, POSE_PAIRS)
    label = classify_pose(points)
    return frame_with_skeleton, label

# Step 12: Run Live Gesture Detection

In [81]:
cap = cv.VideoCapture(0)
# cap = cv.VideoCapture('http://21.118.71.170:8080/video', cv.CAP_FFMPEG)
cap.set(cv.CAP_PROP_FRAME_WIDTH, 640)
cap.set(cv.CAP_PROP_FRAME_HEIGHT, 480)

frame_count = 0
skip_frames = 3

prev_time = time.time()
while True:
    ret, frame = cap.read()
    if not ret:
        break

    frame_count += 1
    if frame_count % skip_frames != 0:
        continue

    frame = cv.flip(frame, 1)
    frame = cv.resize(frame, (640, 480))
    output_frame, label = detect_gesture(frame, POSE_PAIRS)

    curr_time = time.time()
    fps = 1 / (curr_time - prev_time)
    prev_time = curr_time

    cv.putText(output_frame, f"FPS: {fps:.1f}", (520, 30), 
                cv.FONT_HERSHEY_SIMPLEX, 0.7, (0, 255, 0), 2)

    cv.putText(output_frame, label, (10, 30),
               cv.FONT_HERSHEY_SIMPLEX, 1, (255, 0, 0), 2)
    cv.imshow('Gesture Detection', output_frame)

    k = cv.waitKey(5) & 0xFF
    if k == 27:
        break

cv.destroyAllWindows()
cap.release()