In [1]:
import cv2
import numpy as np
import os
from matplotlib import pyplot as plt
import time
import mediapipe as mp
import copy
from natsort import natsorted


# Setup Mediapipe Funcs

In [2]:
mp_holistic = mp.solutions.holistic # Holistic model
mp_drawing = mp.solutions.drawing_utils # Drawing utilities

In [3]:
def mediapipe_detection(image, model):
    
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) # COLOR CONVERSION BGR 2 RGB
    image.flags.writeable = False                  # Image is no longer writeable to Improve Perf.
    results = model.process(image)                 # Make prediction
    image.flags.writeable = True                   # Image is now writeable 
    image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR) # COLOR COVERSION RGB 2 BGR
    return image, results

In [5]:
def draw_styled_landmarks(image, results):
    # Draw face connections
    
    mp_drawing.draw_landmarks(image, results.face_landmarks, mp_holistic.FACEMESH_TESSELATION, 
                             mp_drawing.DrawingSpec(color=(80,110,10), thickness=1, circle_radius=1), 
                             mp_drawing.DrawingSpec(color=(80,256,121), thickness=1, circle_radius=1)
                             ) 
    # Draw pose connections
    mp_drawing.draw_landmarks(image, results.pose_landmarks, mp_holistic.POSE_CONNECTIONS,
                             mp_drawing.DrawingSpec(color=(80,22,10), thickness=2, circle_radius=4), 
                             mp_drawing.DrawingSpec(color=(80,44,121), thickness=2, circle_radius=2)
                             ) 
    # Draw left hand connections
    mp_drawing.draw_landmarks(image, results.left_hand_landmarks, mp_holistic.HAND_CONNECTIONS, 
                             mp_drawing.DrawingSpec(color=(121,22,76), thickness=2, circle_radius=4), 
                             mp_drawing.DrawingSpec(color=(121,44,250), thickness=2, circle_radius=2)
                             ) 
    # Draw right hand connections  
    mp_drawing.draw_landmarks(image, results.right_hand_landmarks, mp_holistic.HAND_CONNECTIONS, 
                             mp_drawing.DrawingSpec(color=(245,117,66), thickness=2, circle_radius=4), 
                             mp_drawing.DrawingSpec(color=(245,66,230), thickness=2, circle_radius=2)
                             ) 

In [6]:
def extract_keypoints(results):
    
    pose = np.array([[res.x, res.y, res.z, res.visibility] for res in results.pose_landmarks.landmark]).flatten() if results.pose_landmarks else np.zeros(33*4)

    face = np.array([[res.x, res.y, res.z] for res in results.face_landmarks.landmark]).flatten() if results.face_landmarks else np.zeros(468*3)

    lh = np.array([[res.x, res.y, res.z] for res in results.left_hand_landmarks.landmark]).flatten() if results.left_hand_landmarks else np.zeros(21*3)

    rh = np.array([[res.x, res.y, res.z] for res in results.right_hand_landmarks.landmark]).flatten() if results.right_hand_landmarks else np.zeros(21*3)
    
    return np.concatenate([pose, face, lh, rh])

# Setup Folders for Collection

In [7]:
# Path for exported data, numpy arrays
DATA_PATH = os.path.join(r'D:\signara\Signara-main\Sign To Text\utils\MP_Ehanced_Data_Videos') 
DATA_PATH_KEYPOINTS = os.path.join(r'D:\signara\Signara-main\Sign To Text\utils\MP_Enhanced_Data_KEYPOINTS') 
VIDEO_DATA_PATH=os.path.join(r'D:\Sign language Dataset\videos')
# Actions that we try to detect
actions = np.array(['All',
'Boy',
'Girl',
'Book',
'Mobile',
'Car',
'Baby',
'Door',
'End',
'Eat',
'Drink',
'Father',
'Mother',
'Go',
'Good',
'Bad',
'Food',
'House',
'In',
'Out',
'Sad',
'Happy',
'Man',
'Woman',
'Stop',
'School',
'New',
'Old',
'Play',
'Room',
'See',
'Sit',
'Sister',
'Brother',
'Think',
'Work',
'name',
'Yes',
'No',
'Walk',
'Love',
'Need',
'Respect',
'Money',
'I',
'You',
'Day',
'Ambulance',
'Buy',
'Bread',])
sorted_actions=sorted(actions)
# Forty five videos worth of data
no_sequences = 45 # number of videos per class

# Videos are going to be 60 frames in length
sequence_length = 60

In [None]:
# Create DataPaths for Videos
for action in actions: 
    for sequence in range(no_sequences):
        try: 
            os.makedirs(os.path.join(DATA_PATH, action, str(sequence)))
        except:
            pass

In [9]:
# Create DataPaths for Keypoints
for action in actions: 
    for sequence in range(no_sequences):
        try: 
            os.makedirs(os.path.join(DATA_PATH_KEYPOINTS, action, str(sequence)))
        except:
            pass

# Extract frames from recorded Videos

In [10]:
# get video names 
files_in_videos=os.listdir(VIDEO_DATA_PATH)
# sort naturally not like windows os 
files_in_videos=natsorted(files_in_videos)
videos=[]
# exclude folders names and keep all .mp4 files 
for i in files_in_videos:

    if len(i.split('_'))==2:
        
        videos.append(i)
    else:
        print(i)

len(videos)

2250

In [11]:

i=0
actions_count=0
videos_count=0
# Create a VideoCapture object and read from input file
for v in videos:
    if videos_count==45:
        actions_count+=1
        videos_count=0
    
    cap = cv2.VideoCapture(rf'{VIDEO_DATA_PATH}\{v}')

    print(v)
    frame_num=0
    #for action in actions:
    # Check if camera opened successfully
    if (cap.isOpened()== False): 
      print("Error opening video stream or file")

    # Read until video is completed
    while(cap.isOpened()):
      
      # Capture frame-by-frame
        ret, frame = cap.read()
        if ret == True:
          
          jpg_path = os.path.join(str(DATA_PATH),str(sorted_actions[actions_count]),str(videos_count), str(frame_num) + str('.jpg'))
                        
          cv2.imwrite(jpg_path,frame)
          frame_num+=1
          #print(jpg_path)
          # Press Q on keyboard to  exit
          if cv2.waitKey(25) & 0xFF == ord('q'):
            break

        
        else: 
          i+=1
          print(i)
          break
    
    videos_count+=1

# When everything done, release the video capture object
cap.release()

# Closes all the frames
cv2.destroyAllWindows()

All_0.mp4
1
All_1.mp4
2
All_2.mp4
3
All_3.mp4
4
All_4.mp4
5
All_5.mp4
6
All_6.mp4
7
All_7.mp4
8
All_8.mp4
9
All_9.mp4
10
All_10.mp4
11
All_11.mp4
12
All_12.mp4
13
All_13.mp4
14
All_14.mp4
15
All_15.mp4
16
All_16.mp4
17
All_17.mp4
18
All_18.mp4
19
All_19.mp4
20
All_20.mp4
21
All_21.mp4
22
All_22.mp4
23
All_23.mp4
24
All_24.mp4
25
All_25.mp4
26
All_26.mp4
27
All_27.mp4
28
All_28.mp4
29
All_29.mp4
30
All_30.mp4
31
All_31.mp4
32
All_32.mp4
33
All_33.mp4
34
All_34.mp4
35
All_35.mp4
36
All_36.mp4
37
All_37.mp4
38
All_38.mp4
39
All_39.mp4
40
All_40.mp4
41
All_41.mp4
42
All_42.mp4
43
All_43.mp4
44
All_44.mp4
45
Ambulance_0.mp4
46
Ambulance_1.mp4
47
Ambulance_2.mp4
48
Ambulance_3.mp4
49
Ambulance_4.mp4
50
Ambulance_5.mp4
51
Ambulance_6.mp4
52
Ambulance_7.mp4
53
Ambulance_8.mp4
54
Ambulance_9.mp4
55
Ambulance_10.mp4
56
Ambulance_11.mp4
57
Ambulance_12.mp4
58
Ambulance_13.mp4
59
Ambulance_14.mp4
60
Ambulance_15.mp4
61
Ambulance_16.mp4
62
Ambulance_17.mp4
63
Ambulance_18.mp4
64
Ambulance_19.mp4
65

In [15]:
cap.release()
cv2.destroyAllWindows()

# Extract Keypoints from Saved Images Without Displaying

In [15]:
# Set mediapipe model 
with mp_holistic.Holistic(min_detection_confidence=0.5, min_tracking_confidence=0.5) as holistic:
    
    # NEW LOOP
    # Loop through actions
    for action in sorted_actions:
        # Loop through sequences aka videos
        for sequence in range(no_sequences):
            # Loop through video length aka sequence length
            print(f'{action,sequence}')
            
            for frame_num in range(sequence_length):
                    
                    
                # Read feed
                jpg_path = os.path.join(DATA_PATH, action, str(sequence), str(frame_num) + str('.jpg'))
                if os.path.exists(jpg_path):
                    # Read Frame
                    #print(jpg_path)
                    frame = cv2.imread(jpg_path)
                    # Make detections
                    image, results = mediapipe_detection(frame, holistic)
                    # NEW Export Keypoints
                    keypoints = extract_keypoints(results)
                    npy_path = os.path.join(DATA_PATH_KEYPOINTS, action, str(sequence), str(frame_num))
                    np.save(npy_path, keypoints)
                else:
                    continue

('All', 0)
('All', 1)
('All', 2)
('All', 3)
('All', 4)
('All', 5)
('All', 6)
('All', 7)
('All', 8)
('All', 9)
('All', 10)
('All', 11)
('All', 12)
('All', 13)
('All', 14)
('All', 15)
('All', 16)
('All', 17)
('All', 18)
('All', 19)
('All', 20)
('All', 21)
('All', 22)
('All', 23)
('All', 24)
('All', 25)
('All', 26)
('All', 27)
('All', 28)
('All', 29)
('All', 30)
('All', 31)
('All', 32)
('All', 33)
('All', 34)
('All', 35)
('All', 36)
('All', 37)
('All', 38)
('All', 39)
('All', 40)
('All', 41)
('All', 42)
('All', 43)
('All', 44)
('Ambulance', 0)
('Ambulance', 1)
('Ambulance', 2)
('Ambulance', 3)
('Ambulance', 4)
('Ambulance', 5)
('Ambulance', 6)
('Ambulance', 7)
('Ambulance', 8)
('Ambulance', 9)
('Ambulance', 10)
('Ambulance', 11)
('Ambulance', 12)
('Ambulance', 13)
('Ambulance', 14)
('Ambulance', 15)
('Ambulance', 16)
('Ambulance', 17)
('Ambulance', 18)
('Ambulance', 19)
('Ambulance', 20)
('Ambulance', 21)
('Ambulance', 22)
('Ambulance', 23)
('Ambulance', 24)
('Ambulance', 25)
('Ambulance'

# Extract Keypoints from Saved Images With Displaying

In [12]:
# Set mediapipe model 
with mp_holistic.Holistic(min_detection_confidence=0.5, min_tracking_confidence=0.5) as holistic:
    
    # NEW LOOP
    # Loop through actions
    for action in sorted_actions:
        # Loop through sequences aka videos
        for sequence in range(no_sequences):
            # Loop through video length aka sequence length
            for frame_num in range(sequence_length):
                    
                    
                # Read feed
                jpg_path = os.path.join(DATA_PATH, action, str(sequence), str(frame_num) + str('.jpg'))
                # Read Frame
                if os.path.exists(jpg_path):
                    frame = cv2.imread(jpg_path)

                    # Make detections
                    image, results = mediapipe_detection(frame, holistic)
    #                 print(results)

                    # Draw landmarks
                    draw_styled_landmarks(image, results)
                    
                    # NEW Apply wait logic
                    if frame_num == 0: 
                        cv2.putText(image, 'STARTING COLLECTION', (120,200), 
                                cv2.FONT_HERSHEY_SIMPLEX, 1, (0,255, 0), 4, cv2.LINE_AA)
                        cv2.putText(image, 'Collecting frames for {} Video Number {}'.format(action, sequence), (15,12), 
                                cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 255), 1, cv2.LINE_AA)
                        # Show to screen
                        cv2.imshow('OpenCV Feed', image)
                        cv2.waitKey(20)
                    else: 
                        cv2.putText(image, 'Collecting frames for {} Video Number {}'.format(action, sequence), (15,12), 
                                cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 255), 1, cv2.LINE_AA)
                        # Show to screen
                        cv2.imshow('OpenCV Feed', image)
                    
                    # NEW Export Keypoints
                    keypoints = extract_keypoints(results)
                    npy_path = os.path.join(DATA_PATH_KEYPOINTS, action, str(sequence), str(frame_num))
                    np.save(npy_path, keypoints)
                    # Break gracefully
                    if cv2.waitKey(10) & 0xFF == ord('q'):
                        break
                        
    cap.release()
    cv2.destroyAllWindows()
cap.release()
cv2.destroyAllWindows()

KeyboardInterrupt: 

In [13]:

cv2.destroyAllWindows()