In [14]:
import pandas as pd
import numpy as np
import mediapipe as mp
import cv2
import matplotlib.pyplot as plt
import os

In [15]:
mp_holistic = mp.solutions.holistic # importing the holistc module
holistic_model = mp_holistic.Holistic( # Creating the isntance of the holistic class
    min_detection_confidence=0.5,  # defining the confidence levels.
    min_tracking_confidence=0.5 
)

# Initializing the drawing utils for drawing the facial landmarks on image
mp_drawing = mp.solutions.drawing_utils # importing the drawing_utils module.

In [16]:
def mediapipe_detection(frame, model):
    image = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)      # COLOR conversion to BGR 2 RGB
    image.flags.writeable = False                        # Image is no longer writable
    results = model.process(image)                      # Make predictions
    image.flags.writeable = True                         #Image is now writable
    image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)      # Color conversion rgb 2 bgr
    return image, results

In [17]:
def draw_styled_landmarks(image, results):
#     mp_drawing.draw_landmarks(image, results.face_landmarks, mp_holistic.FACEMESH_TESSELATION,
#                               mp_drawing.DrawingSpec(color=(80,110,10), thickness=1, circle_radius=1),
#                               mp_drawing.DrawingSpec(color=(80,256,10), thickness=1, circle_radius=1))
#     mp_drawing.draw_landmarks(image, results.pose_landmarks, mp_holistic.POSE_CONNECTIONS,
#                               mp_drawing.DrawingSpec(color=(80,22,10), thickness=2, circle_radius=4),
#                               mp_drawing.DrawingSpec(color=(80,44,121), thickness=2, circle_radius=2))
    mp_drawing.draw_landmarks(image, results.left_hand_landmarks, mp_holistic.HAND_CONNECTIONS,
                              mp_drawing.DrawingSpec(color=(121,22,76), thickness=2, circle_radius=4),
                              mp_drawing.DrawingSpec(color=(121,44,250), thickness=2, circle_radius=2))
    mp_drawing.draw_landmarks(image, results.right_hand_landmarks, mp_holistic.HAND_CONNECTIONS,
                              mp_drawing.DrawingSpec(color=(245,117,66), thickness=2, circle_radius=4),
                              mp_drawing.DrawingSpec(color=(245,66,230), thickness=2, circle_radius=2))

In [85]:
def frame_capture(in_video, out_video):
    cap = cv2.VideoCapture(in_video)
    frame_count = 0
    hand_count = 0
    while cap.isOpened():
        ret, frame = cap.read()
        if not ret:
            break

        # process individual frames from a video to extract landmarks i.e (Face, pose, hands)
        image, results = mediapipe_detection(frame, holistic_model)

        # Draw lanmarks over the video:
        #draw_styled_landmarks(image, results)


        # Printing when hands are not being detected:
        if results.left_hand_landmarks or results.right_hand_landmarks:
            image_path = os.path.join(out_video, "{}.jpg".format(hand_count))
            cv2.imwrite(image_path, image)
            hand_count += 1
            print(f"Hands detected at {frame_count}")

        cv2.putText(image, f"Frame: {frame_count}", (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 2, cv2.LINE_AA)
        resized_frame = cv2.resize(image, (400, 300))
        cv2.imshow('MediaPipe Hands', resized_frame)

        frame_count += 1

        # Show on the screen (optional, for visualization)
        if cv2.waitKey(1000) & 0xFF == ord('q'):
            break
    cap.release()
    cv2.destroyAllWindows()

# ROI Capture: Not implemented in the final project:

In [110]:
def roi_capture(in_video, out_video):
    cap = cv2.VideoCapture(in_video)
    frame_count = 0
    left = 0
    right = 0
    while cap.isOpened():
        ret, frame = cap.read()
        if not ret:
            break

        # Process individual frames from a video to extract landmarks (Face, pose, hands)
        image, results = mediapipe_detection(frame, holistic_model)

        # Draw landmarks over the video:
        #draw_styled_landmarks(image, results)

        # Initialize a list to store ROIs for the current frame
        frame_rois = []

        # Left hand ROI:
        if results.left_hand_landmarks:

            X_min_left = min([res.x for res in results.left_hand_landmarks.landmark])
            X_max_left = max([res.x for res in results.left_hand_landmarks.landmark])
            Y_min_left = min([res.y for res in results.left_hand_landmarks.landmark])
            Y_max_left = max([res.y for res in results.left_hand_landmarks.landmark])

            # Extract the ROI
            roi_left = image[int(Y_min_left * image.shape[0]):int(Y_max_left * image.shape[0]), int(X_min_left * image.shape[1]):int(X_max_left * image.shape[1])]

            # Resize and normalize the ROI
            roi_left_gray = cv2.cvtColor(roi_left, cv2.COLOR_BGR2GRAY)
            roi_left_resized = cv2.resize(roi_left_gray, (250, 250)) # Example dimensions, adjust as needed
            
            #roi_left_normalized = roi_left_resized/ 255.0
            
            left_image_path = os.path.join(out_video, "left_{}.jpg".format(left))
            left += 1
            cv2.imwrite(left_image_path, roi_left_resized)

        # Right hand ROI:
        if results.right_hand_landmarks:

            X_min_right = min([res.x for res in results.right_hand_landmarks.landmark])
            X_max_right = max([res.x for res in results.right_hand_landmarks.landmark])
            Y_min_right = min([res.y for res in results.right_hand_landmarks.landmark])
            Y_max_right = max([res.y for res in results.right_hand_landmarks.landmark])

            # Extract the ROI
            roi_right = image[int(Y_min_right * image.shape[0]):int(Y_max_right * image.shape[0]), int(X_min_right * image.shape[1]):int(X_max_right * image.shape[1])]

            # Resize and normalize the ROI
            roi_right_gray = cv2.cvtColor(roi_right, cv2.COLOR_BGR2GRAY)
            roi_right_resized = cv2.resize(roi_right_gray, (250, 250)) # if not resized it will take the shape of the bounding boxes
            #roi_right_normalized = roi_right_resized / 255.0

            right_image_path = os.path.join(out_video, "right_{}.jpg".format(right))
            right += 1
            cv2.imwrite(right_image_path, roi_right_resized)

        cv2.putText(image, f"Frame: {frame_count}", (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 2, cv2.LINE_AA)
        resized_frame = cv2.resize(image, (400, 300))
        cv2.imshow('MediaPipe Hands', resized_frame)

        frame_count += 1

        # Show on the screen (optional, for visualization)
        if cv2.waitKey(1000) & 0xFF == ord('q'):
            break

    cap.release()
    cv2.destroyAllWindows()

In [115]:
video_path = "D:\\Datasets\\minor2_dataset2\\dummy_att2\\145\\videos"

In [116]:
o_p = "D:\\Datasets\\minor2_dataset2\\hand_frames\\help"

In [117]:
video_dir = os.listdir(video_path)

In [118]:
for video in video_dir:
    v_p = os.path.join(video_path, video)
    new_video_path = os.path.join(o_p, video)
    os.makedirs(new_video_path, exist_ok=True)
    frame_capture(v_p, new_video_path)
    #roi_capture(v_p, new_video_path)

Hands detected at 7
Hands detected at 8
Hands detected at 9
Hands detected at 10
Hands detected at 11
Hands detected at 12
Hands detected at 13
Hands detected at 14
Hands detected at 15
Hands detected at 16
Hands detected at 18
Hands detected at 19
Hands detected at 20
Hands detected at 21
Hands detected at 22
Hands detected at 23
Hands detected at 24
Hands detected at 25
Hands detected at 26
Hands detected at 27
Hands detected at 28
Hands detected at 29
Hands detected at 30
Hands detected at 31
Hands detected at 32
Hands detected at 33
Hands detected at 34
Hands detected at 35
Hands detected at 36
Hands detected at 37
Hands detected at 38
Hands detected at 39
Hands detected at 40
Hands detected at 41
Hands detected at 42
Hands detected at 43
Hands detected at 44
Hands detected at 45
Hands detected at 46
Hands detected at 47
Hands detected at 48
Hands detected at 49
Hands detected at 4
Hands detected at 7
Hands detected at 8
Hands detected at 9
Hands detected at 10
Hands detected at 11

Hands detected at 27
Hands detected at 28
Hands detected at 29
Hands detected at 30
Hands detected at 31
Hands detected at 32
Hands detected at 33
Hands detected at 34
Hands detected at 35
Hands detected at 36
Hands detected at 37
Hands detected at 38
Hands detected at 39
Hands detected at 40
Hands detected at 41
Hands detected at 42
Hands detected at 43
Hands detected at 44
Hands detected at 45
Hands detected at 46
Hands detected at 47
Hands detected at 48
Hands detected at 49
Hands detected at 50
Hands detected at 51
Hands detected at 52
Hands detected at 53
Hands detected at 54
Hands detected at 55
Hands detected at 56
Hands detected at 57
Hands detected at 58
Hands detected at 59
Hands detected at 60
Hands detected at 61
Hands detected at 62
Hands detected at 13
Hands detected at 14
Hands detected at 15
Hands detected at 16
Hands detected at 17
Hands detected at 18
Hands detected at 19
Hands detected at 20
Hands detected at 21
Hands detected at 22
Hands detected at 23
Hands detecte