Overlay over live footage

In [1]:
import cv2
import mediapipe as mp
import numpy as np
from mediapipe.framework.formats import landmark_pb2

# Init pose
mp_pose = mp.solutions.pose
pose = mp_pose.Pose()
mp_draw = mp.solutions.drawing_utils

# Video and webcam
cap_video = cv2.VideoCapture("enemy_m2.mp4")
cap_webcam = cv2.VideoCapture(0)

#-------------------------------------custom functions---------------------------------------------
def draw_shifted_landmarks(image, landmarks, shift_x_px, image_width, image_height):
    shifted_landmarks = []

    for lm in landmarks.landmark:
        shifted_lm = landmark_pb2.NormalizedLandmark()
        shifted_lm.x = lm.x + (shift_x_px / image_width)  # normalize shift
        shifted_lm.y = lm.y
        shifted_lm.z = lm.z
        shifted_lm.visibility = lm.visibility
        shifted_landmarks.append(shifted_lm)

    landmark_list = landmark_pb2.NormalizedLandmarkList()
    landmark_list.landmark.extend(shifted_landmarks)

    mp_draw.draw_landmarks(
        image,
        landmark_list,
        mp_pose.POSE_CONNECTIONS,
        landmark_drawing_spec=mp_draw.DrawingSpec(color=(0,255,0), thickness=2),
        connection_drawing_spec=mp_draw.DrawingSpec(color=(255,255,255), thickness=2)
    )
#-------------------------------------------------------------------------------------------------
def draw_body_shapes(image, landmarks, shift_x_px, width, height, color):
    def get_point(lm):
        return int((lm.x + shift_x_px / width) * width), int(lm.y * height)

    lm = landmarks.landmark
    def p(name): return get_point(lm[mp_pose.PoseLandmark[name].value])

    def midpoint(pt1, pt2):
        return ((pt1[0] + pt2[0]) // 2, (pt1[1] + pt2[1]) // 2)

    # Points
    nose = p("NOSE")
    ls = p("LEFT_SHOULDER")
    rs = p("RIGHT_SHOULDER")
    le = p("LEFT_ELBOW")
    re = p("RIGHT_ELBOW")
    lw = p("LEFT_WRIST")
    rw = p("RIGHT_WRIST")
    lh = p("LEFT_HIP")
    rh = p("RIGHT_HIP")
    lk = p("LEFT_KNEE")
    rk = p("RIGHT_KNEE")
    la = p("LEFT_ANKLE")
    ra = p("RIGHT_ANKLE")

    # Head
    cv2.circle(image, nose, 20, color, -1)

    # Torso as filled polygon
    torso_pts = np.array([ls, rs, rh, lh], np.int32).reshape((-1, 1, 2))
    cv2.fillPoly(image, [torso_pts], color)

    # Arms as lines only
    cv2.line(image, ls, le, color, 12)
    cv2.line(image, le, lw, color, 12)
    cv2.line(image, rs, re, color, 12)
    cv2.line(image, re, rw, color, 12)

    # Legs as lines
    cv2.line(image, lh, lk, color, 14)
    cv2.line(image, lk, la, color, 14)
    cv2.line(image, rh, rk, color, 14)
    cv2.line(image, rk, ra, color, 14)

    # Feet as circles
    cv2.circle(image, la, 10, color, -1)
    cv2.circle(image, ra, 10, color, -1)

#-----------------------------------------------------------------------------------------------
def get_keypoints_xy(landmarks, shift_x_px, width, height):
    def to_pixel_coords(lm):
        x = int((lm.x + shift_x_px / width) * width)
        y = int(lm.y * height)
        return x, y

    lm = landmarks.landmark
    points = {
        "head": to_pixel_coords(lm[mp_pose.PoseLandmark.NOSE]),
        "left_fist": to_pixel_coords(lm[mp_pose.PoseLandmark.LEFT_WRIST]),
        "right_fist": to_pixel_coords(lm[mp_pose.PoseLandmark.RIGHT_WRIST])
    }
    return points
#-----------------------------------------------------------------------------------------------
def draw_debug_shapes(image, points, color):
    # Draw circle on head
    cv2.circle(image, points["head"], 10, color, -1)

    # Draw squares on fists
    for key in ["left_fist", "right_fist"]:
        x, y = points[key]
        cv2.rectangle(image, (x - 8, y - 8), (x + 8, y + 8), color, -1)

#-----------------------------------------------------------------------------------------------
def is_hit(p1, p2, threshold=30):
    return np.linalg.norm(np.array(p1) - np.array(p2)) < threshold
#-----------------------------------------------------------------------------------------------

while cap_video.isOpened() and cap_webcam.isOpened():
    ret_vid, frame_vid = cap_video.read()
    ret_cam, frame_cam = cap_webcam.read()
    
    if not ret_vid:
        cap_video.set(cv2.CAP_PROP_POS_FRAMES, 0)
        continue

    # Resize both to same size
    frame_vid = cv2.resize(frame_vid, (640, 480))
    frame_cam = cv2.resize(frame_cam, (640, 480))

    # Create a black canvas to combine both
    black = np.zeros((480, 1280, 3), dtype=np.uint8)
    layer_enemy = np.zeros((480, 1280, 4), dtype=np.uint8)
    layer_player = np.zeros((480, 1280, 4), dtype=np.uint8)

    #mirroring both feed
    frame_cam = cv2.flip(frame_cam, 1)
    # frame_vid = cv2.flip(frame_vid, 1)

    # Process both frames
    results_vid = pose.process(cv2.cvtColor(frame_vid, cv2.COLOR_BGR2RGB))
    results_cam = pose.process(cv2.cvtColor(frame_cam, cv2.COLOR_BGR2RGB))

    player_head_pos,enemy_head_pos = None,None
    # Draw both in same space
    if results_vid.pose_landmarks:
        # draw_shifted_landmarks(black, results_vid.pose_landmarks, shift_x_px=200, image_width=1280, image_height=480)
        draw_body_shapes(layer_enemy, results_vid.pose_landmarks, shift_x_px=0, width=1280, height=480,color=(0,0,255))
        layer_enemy = cv2.cvtColor(layer_enemy[:, :, :3], cv2.COLOR_BGR2BGRA)
        layer_enemy[:, :, 3] = 255  # Set alpha fully opaque
        points_vid = get_keypoints_xy(results_vid.pose_landmarks, shift_x_px=0, width=1280, height=480)
        enemy_head_pos = points_vid["head"]
        enemy_fist1_pos = points_vid["left_fist"]
        enemy_fist2_pos = points_vid["right_fist"]
        # draw_debug_shapes(black, points_vid, color=(0, 255, 255))  # Yellow-ish debug shapes
        # print("Video Feed - Head:", points_vid["head"], "Left Fist:", points_vid["left_fist"], "Right Fist:", points_vid["right_fist"])

    if results_cam.pose_landmarks:
        # draw_shifted_landmarks(black, results_cam.pose_landmarks, shift_x_px=400, image_width=1280, image_height=480)
        draw_body_shapes(layer_player, results_cam.pose_landmarks, shift_x_px=400, width=1280, height=480,color=(255,0,0))
        layer_player = cv2.cvtColor(layer_player[:, :, :3], cv2.COLOR_BGR2BGRA)
        layer_player[:, :, 3] = 255  # Set alpha fully opaque
        points_cam = get_keypoints_xy(results_cam.pose_landmarks, shift_x_px=400, width=1280, height=480)
        player_head_pos = points_cam["head"]
        player_fist1_pos = points_cam["left_fist"]
        player_fist2_pos = points_cam["right_fist"]
        # draw_debug_shapes(black, points_cam, color=(0, 255, 0))  # Green debug shapes
        # print("Webcam Feed - Head:", points_cam["head"], "Left Fist:", points_cam["left_fist"], "Right Fist:", points_cam["right_fist"])

    combined = cv2.add(layer_enemy, layer_player)

    if player_head_pos and enemy_head_pos:
        for enemy_fist_pos in [enemy_fist1_pos, enemy_fist2_pos]:
            if is_hit(enemy_fist_pos, player_head_pos):
                print("Player Hit !")
                cv2.putText(combined, "Player Hit!", (black.shape[1]//2 - 50, black.shape[0]//2), cv2.FONT_HERSHEY_SIMPLEX, 2, (0, 0, 255), 4)


        for player_fist_pos in [player_fist1_pos, player_fist2_pos]:
            if is_hit(player_fist_pos, enemy_head_pos):
                print("Enemy Hit !")
                cv2.putText(combined, "Enemy Hit!", (black.shape[1]//2 - 50, black.shape[0]//2), cv2.FONT_HERSHEY_SIMPLEX, 2, (0, 0, 255), 4)
    
    # Show result
    cv2.imshow("GAME", combined)
    if cv2.waitKey(1) & 0xFF in [ord('q'),ord('Q')]:
        break

cap_video.release()
cap_webcam.release()
cv2.destroyAllWindows()
