# Mediapipe

In [5]:
import cv2
import mediapipe as mp
import numpy as np

mp_holistic = mp.solutions.holistic  # For body + face + hands
mp_drawing = mp.solutions.drawing_utils

def extract_keypoints_from_video(video_path):
    # Load video
    cap = cv2.VideoCapture(video_path)
    holistic = mp_holistic.Holistic(static_image_mode=False, min_detection_confidence=0.5)
    
    keypoints_list = []
    max_keypoints = 0  # Track maximum number of keypoints in any frame
    
    while cap.isOpened():
        ret, frame = cap.read()
        if not ret:
            break

        # Convert the image color to RGB
        image = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        image.flags.writeable = False

        # Make detections
        results = holistic.process(image)
        
        keypoints = []
        if results.left_hand_landmarks:
            keypoints.extend([[lmk.x, lmk.y, lmk.z] for lmk in results.left_hand_landmarks.landmark])
        if results.right_hand_landmarks:
            keypoints.extend([[lmk.x, lmk.y, lmk.z] for lmk in results.right_hand_landmarks.landmark])
        
        # Track the maximum number of keypoints seen
        max_keypoints = max(max_keypoints, len(keypoints))
        keypoints_list.append(keypoints)
    
    cap.release()

    # Pad all frames with missing keypoints
    for i in range(len(keypoints_list)):
        if len(keypoints_list[i]) < max_keypoints:
            # Pad the keypoints with zeros (or you could use None)
            keypoints_list[i].extend([[0.0, 0.0, 0.0]] * (max_keypoints - len(keypoints_list[i])))

    return np.array(keypoints_list)  # Now all frames should have the same number of keypoints

dataset_video_keypoints = extract_keypoints_from_video(r"D:\Free lancing\Sign language validator\66039.mp4")
user_video_keypoints = extract_keypoints_from_video(r"D:\Free lancing\Sign language validator\test_a.mp4")


In [6]:
dataset_video_keypoints.shape, user_video_keypoints.shape

((62, 21, 3), (91, 21, 3))

In [7]:
from scipy.spatial.distance import euclidean

def calculate_frame_distance(keypoints1, keypoints2):
    """
    Compare two sets of keypoints frame by frame using Euclidean distance.
    keypoints1, keypoints2: numpy arrays of shape (n_frames, n_keypoints, 3)
    """
    distances = []
    for frame1, frame2 in zip(keypoints1, keypoints2):
        frame_distances = [euclidean(kp1, kp2) for kp1, kp2 in zip(frame1, frame2)]
        distances.append(np.mean(frame_distances))  # Average distance per frame
    return distances

distances = calculate_frame_distance(dataset_video_keypoints, user_video_keypoints)


In [8]:
def calculate_similarity_score(distances):
    """
    Map the average frame distances to a similarity score (percentage).
    Handle edge cases where distances are all zero.
    """
    # Check if all distances are zero
    if np.all(distances == 0):
        return 100.0  # 100% similarity if all distances are zero

    max_distance = np.max(distances)
    
    # Avoid division by zero, if max_distance is zero, set similarity to 100%
    if max_distance == 0:
        return 100.0
    
    # Compute similarity percentage based on average distances
    similarity_percentage = (np.mean(distances) / max_distance) * 100
    return similarity_percentage

similarity = calculate_similarity_score(distances)
print(f"Similarity: {similarity:.2f}%, {type(similarity)}")


Similarity: 53.90%, <class 'numpy.float64'>


In [2]:
import cv2
import mediapipe as mp
import numpy as np
import scipy

# Print versions
print(f"OpenCV version: {cv2.__version__}")
print(f"MediaPipe version: {mp.__version__}")
print(f"Numpy version: {np.__version__}")
print(f"SciPy version: {scipy.__version__}")


OpenCV version: 4.9.0
MediaPipe version: 0.10.10
Numpy version: 1.26.4
SciPy version: 1.10.1


# OpenPose

In [None]:
import cv2
import numpy as np
from openpose import pyopenpose as op  # Import OpenPose library

def extract_keypoints_from_video_openpose(video_path):
    # Configure OpenPose parameters
    params = {
        "model_folder": "models/",  # Path to OpenPose models
        "face": True,  # Enable face detection
        "hand": True,  # Enable hand detection
    }
    
    # Initialize OpenPose
    opWrapper = op.WrapperPython()
    opWrapper.configure(params)
    opWrapper.start()

    # Load video
    cap = cv2.VideoCapture(video_path)
    keypoints_list = []
    max_keypoints = 0  # Track maximum number of keypoints in any frame
    
    while cap.isOpened():
        ret, frame = cap.read()
        if not ret:
            break
        
        # Prepare frame for OpenPose
        datum = op.Datum()
        datum.cvInputData = frame
        opWrapper.emplaceAndPop([datum])
        
        keypoints = []
        
        # Extract body keypoints
        if datum.poseKeypoints is not None:
            keypoints.extend(datum.poseKeypoints[0].tolist())  # Assuming single person
        
        # Extract face keypoints
        if datum.faceKeypoints is not None:
            keypoints.extend(datum.faceKeypoints[0].tolist())
        
        # Extract hand keypoints
        if datum.handKeypoints[0] is not None:  # Left hand
            keypoints.extend(datum.handKeypoints[0][0].tolist())
        if datum.handKeypoints[1] is not None:  # Right hand
            keypoints.extend(datum.handKeypoints[1][0].tolist())
        
        # Track the maximum number of keypoints seen
        max_keypoints = max(max_keypoints, len(keypoints))
        keypoints_list.append(keypoints)

    cap.release()
    
    # Pad all frames with missing keypoints
    for i in range(len(keypoints_list)):
        if len(keypoints_list[i]) < max_keypoints:
            # Pad the keypoints with zeros (or you could use None)
            keypoints_list[i].extend([[0.0, 0.0, 0.0]] * (max_keypoints - len(keypoints_list[i])))
    
    return np.array(keypoints_list)

# Extract keypoints using OpenPose
dataset_video_keypoints = extract_keypoints_from_video_openpose(r"D:\Free lancing\Sign language validator\66039.mp4")
user_video_keypoints = extract_keypoints_from_video_openpose(r"D:\Free lancing\Sign language validator\test_a.mp4")

# Continue with frame comparison and similarity calculations...
distances = calculate_frame_distance(dataset_video_keypoints, user_video_keypoints)
similarity = calculate_similarity_score(distances)
print(f"Similarity: {similarity:.2f}%, {type(similarity)}")


# OpenCV

In [1]:
import cv2
import numpy as np

# Load body pose detection model (you can use COCO or MPI models)
pose_net = cv2.dnn.readNetFromCaffe("pose_deploy_linevec.prototxt", 
                                    "pose_iter_440000.caffemodel")  # Downloaded from OpenCV/OpenPose

# Load face detector model
face_cascade = cv2.CascadeClassifier(cv2.data.haarcascades + 'haarcascade_frontalface_default.xml')

def extract_keypoints_from_video_opencv(video_path):
    cap = cv2.VideoCapture(video_path)
    keypoints_list = []
    max_keypoints = 0  # Track maximum number of keypoints in any frame

    while cap.isOpened():
        ret, frame = cap.read()
        if not ret:
            break

        frame_height, frame_width = frame.shape[:2]
        
        # Body Keypoints detection
        inp_blob = cv2.dnn.blobFromImage(frame, 1.0 / 255, (368, 368), (0, 0, 0), swapRB=False, crop=False)
        pose_net.setInput(inp_blob)
        output = pose_net.forward()

        # Process pose keypoints (assuming 15 body parts)
        keypoints = []
        for part in range(output.shape[1]): 
            heat_map = output[0, part, :, :]
            _, conf, _, point = cv2.minMaxLoc(heat_map)
            x = (frame_width * point[0]) / output.shape[3]
            y = (frame_height * point[1]) / output.shape[2]
            keypoints.append([x, y, conf])

        # Face Keypoints detection
        gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
        faces = face_cascade.detectMultiScale(gray, 1.3, 5)
        for (x, y, w, h) in faces:
            face_center = [x + w // 2, y + h // 2, 1.0]
            keypoints.append(face_center)  # Simplified as face center point

        # Track the maximum number of keypoints seen
        max_keypoints = max(max_keypoints, len(keypoints))
        keypoints_list.append(keypoints)
    
    cap.release()

    # Pad all frames with missing keypoints
    for i in range(len(keypoints_list)):
        if len(keypoints_list[i]) < max_keypoints:
            keypoints_list[i].extend([[0.0, 0.0, 0.0]] * (max_keypoints - len(keypoints_list[i])))

    return np.array(keypoints_list)

# Extract keypoints using OpenCV DNN
dataset_video_keypoints = extract_keypoints_from_video_opencv(r"D:\Free lancing\Sign language validator\66039.mp4")
user_video_keypoints = extract_keypoints_from_video_opencv(r"D:\Free lancing\Sign language validator\test_a.mp4")

# Continue with frame comparison and similarity calculations...
distances = calculate_frame_distance(dataset_video_keypoints, user_video_keypoints)
similarity = calculate_similarity_score(distances)
print(f"Similarity: {similarity:.2f}%, {type(similarity)}")


error: OpenCV(4.9.0) D:\a\opencv-python\opencv-python\opencv\modules\dnn\src\caffe\caffe_io.cpp:1126: error: (-2:Unspecified error) FAILED: fs.is_open(). Can't open "pose_deploy_linevec.prototxt" in function 'cv::dnn::ReadProtoFromTextFile'


In [2]:
import sys

# Print Python version
print(f"Python version: {sys.version}")

Python version: 3.9.5 (tags/v3.9.5:0a7dcbd, May  3 2021, 17:27:52) [MSC v.1928 64 bit (AMD64)]
