### 1. Import and Install Dependencies

In [1]:
# !pip install tensorflow==2.5 tensorflow-gpu==2.5
# !pip install opencv-python mediapipe sklearn matplotlib

In [2]:
import cv2
import numpy as np
import os
from matplotlib import pyplot as plt
import time
import mediapipe as mp

# 2. Keypoints using MP Holistic

In [3]:
mp_holistic = mp.solutions.holistic # Holistic model
mp_drawing = mp.solutions.drawing_utils # Drawing utilities

In [4]:
def mediapipe_detection(image, model):
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) # COLOR CONVERSION BGR 2 RGB
    image.flags.writeable = False                  # Image is no longer writeable
    results = model.process(image)                 # Make prediction
    image.flags.writeable = True                   # Image is now writeable 
    image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR) # COLOR COVERSION RGB 2 BGR
    return image, results

In [5]:
def draw_landmarks(image, results):
    mp_drawing.draw_landmarks(image, results.face_landmarks, mp_holistic.FACEMESH_TESSELATION) # Draw face connections
    mp_drawing.draw_landmarks(image, results.pose_landmarks, mp_holistic.POSE_CONNECTIONS) # Draw pose connections
    mp_drawing.draw_landmarks(image, results.left_hand_landmarks, mp_holistic.HAND_CONNECTIONS) # Draw left hand connections
    mp_drawing.draw_landmarks(image, results.right_hand_landmarks, mp_holistic.HAND_CONNECTIONS) # Draw right hand connections

In [6]:
def draw_styled_landmarks(image, results):
#     FACEMESH_CONTOURS, FACEMESH_TESSELATION
    # Draw face connections
    mp_drawing.draw_landmarks(image, results.face_landmarks, mp_holistic. FACEMESH_TESSELATION, 
                             mp_drawing.DrawingSpec(color=(80,110,10), thickness=1, circle_radius=1), 
                             mp_drawing.DrawingSpec(color=(80,256,121), thickness=1, circle_radius=1)
                             ) 
    # Draw pose connections
    mp_drawing.draw_landmarks(image, results.pose_landmarks, mp_holistic.POSE_CONNECTIONS,
                             mp_drawing.DrawingSpec(color=(80,22,10), thickness=2, circle_radius=4), 
                             mp_drawing.DrawingSpec(color=(80,44,121), thickness=2, circle_radius=2)
                             ) 
    # Draw left hand connections
    mp_drawing.draw_landmarks(image, results.left_hand_landmarks, mp_holistic.HAND_CONNECTIONS, 
                             mp_drawing.DrawingSpec(color=(121,22,76), thickness=2, circle_radius=4), 
                             mp_drawing.DrawingSpec(color=(121,44,250), thickness=2, circle_radius=2)
                             ) 
    # Draw right hand connections  
    mp_drawing.draw_landmarks(image, results.right_hand_landmarks, mp_holistic.HAND_CONNECTIONS, 
                             mp_drawing.DrawingSpec(color=(245,117,66), thickness=2, circle_radius=4), 
                             mp_drawing.DrawingSpec(color=(245,66,230), thickness=2, circle_radius=2)
                             ) 

In [7]:
cap = cv2.VideoCapture(0)
# Set mediapipe model 
with mp_holistic.Holistic(min_detection_confidence=0.5, min_tracking_confidence=0.5) as holistic:
    while cap.isOpened():

        # Read feed
        ret, frame = cap.read()

        # Make detections
        image, results = mediapipe_detection(frame, holistic)
        print(results)
        
        # Draw landmarks
        draw_styled_landmarks(image, results)

        # Show to screen
        cv2.imshow('OpenCV Feed', image)

        # Break gracefully
        if cv2.waitKey(10) & 0xFF == ord('q'):
            break
    cap.release()
    cv2.destroyAllWindows()

<class 'mediapipe.python.solution_base.SolutionOutputs'>
<class 'mediapipe.python.solution_base.SolutionOutputs'>
<class 'mediapipe.python.solution_base.SolutionOutputs'>
<class 'mediapipe.python.solution_base.SolutionOutputs'>
<class 'mediapipe.python.solution_base.SolutionOutputs'>
<class 'mediapipe.python.solution_base.SolutionOutputs'>
<class 'mediapipe.python.solution_base.SolutionOutputs'>
<class 'mediapipe.python.solution_base.SolutionOutputs'>
<class 'mediapipe.python.solution_base.SolutionOutputs'>
<class 'mediapipe.python.solution_base.SolutionOutputs'>
<class 'mediapipe.python.solution_base.SolutionOutputs'>
<class 'mediapipe.python.solution_base.SolutionOutputs'>
<class 'mediapipe.python.solution_base.SolutionOutputs'>
<class 'mediapipe.python.solution_base.SolutionOutputs'>
<class 'mediapipe.python.solution_base.SolutionOutputs'>
<class 'mediapipe.python.solution_base.SolutionOutputs'>
<class 'mediapipe.python.solution_base.SolutionOutputs'>
<class 'mediapipe.python.soluti

In [8]:
plt.imshow(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB))

NameError: name 'frame' is not defined

# 3. Extract Keypoint Values

In [9]:
len(results.left_hand_landmarks.landmark)

AttributeError: 'NoneType' object has no attribute 'landmark'

In [None]:
pose = []
for res in results.pose_landmarks.landmark:
    test = np.array([res.x, res.y, res.z, res.visibility])
    pose.append(test)

In [None]:
pose = np.array([[res.x, res.y, res.z, res.visibility] for res in results.pose_landmarks.landmark]).flatten() if results.pose_landmarks else np.zeros(132)
face = np.array([[res.x, res.y, res.z] for res in results.face_landmarks.landmark]).flatten() if results.face_landmarks else np.zeros(1404)
lh = np.array([[res.x, res.y, res.z] for res in results.left_hand_landmarks.landmark]).flatten() if results.left_hand_landmarks else np.zeros(21*3)
rh = np.array([[res.x, res.y, res.z] for res in results.right_hand_landmarks.landmark]).flatten() if results.right_hand_landmarks else np.zeros(21*3)

In [10]:
face = np.array([[res.x, res.y, res.z] 
for res in results.face_landmarks.landmark]).flatten() 
if results.face_landmarks:
    a=0
else:
    np.zeros(1404)


In [8]:
def extract_keypoints(results):
    pose = np.array([[res.x, res.y, res.z, res.visibility] for res in results.pose_landmarks.landmark]).flatten() if results.pose_landmarks else np.zeros(33*4)
    face = np.array([[res.x, res.y, res.z] for res in results.face_landmarks.landmark]).flatten() if results.face_landmarks else np.zeros(468*3)
    lh = np.array([[res.x, res.y, res.z] for res in results.left_hand_landmarks.landmark]).flatten() if results.left_hand_landmarks else np.zeros(21*3)
    rh = np.array([[res.x, res.y, res.z] for res in results.right_hand_landmarks.landmark]).flatten() if results.right_hand_landmarks else np.zeros(21*3)
    return np.concatenate([pose, face, lh, rh])

In [None]:
result_test = extract_keypoints(results)

In [None]:
result_test

In [None]:
np.save('0', result_test)

In [None]:
np.load('0.npy')

# 4. Setup Folders for Collection

In [9]:
PSLDataset = os.path.join('Dataset\PSL Dataset') 
GeneralDataset = os.path.join('Dataset\General Dataset') 
PersonalDataset = os.path.join('Dataset\Personal Dataset') 
# Path for exported data, numpy arrays
DATA_PATH = os.path.join('MP_Data') 

# Actions that we try to detect 'Eye','Nose',
# actions = np.array(['Eye'])
# actions = np.array(['Ankle','Blood','Fist','Heart','Jaw','Knuckle',
#                     'Lips','Palm','Skull','Thumb'])
# actions = np.array(['Abdomen', 'Back', 'Body', 'Brain', 'Elbow', 'Finger', 'Gall Bladder', 'Intestine', 'Lungs'])
actions = np.array(['Ankle','Blood','Fist','Heart','Jaw','Knuckle','Lips','Palm','Skull','Thumb',
                   'Abdomen', 'Back', 'Body', 'Brain', 'Elbow', 'Finger', 'Gall Bladder', 'Intestine', 'Lungs'])


# number of augmentations(Techniaues)
augmentation = 1
# augmentation += 1

# Thirty videos worth of data
no_sequences = 0

# Videos are going to be 30 frames in length
sequence_length = 30

# Folder start
start_folder = 0

In [13]:
# for action in actions: 
#     dirmax = np.max(np.array(os.listdir(os.path.join(DATA_PATH, action))).astype(int))
#     for sequence in range(1,no_sequences+1):
#         try: 
#             os.makedirs(os.path.join(DATA_PATH, action, str(dirmax+sequence)))
#         except:
#             pass

# finding minimum number of videos of sign
minVideos = 9999
maxVideos = -9999

for action in actions: 
#     counting files of psl
    files=os.path.join(PSLDataset,action) 
    file_list = os.listdir(files)
    no_sequences=len(file_list)*augmentation
#     counting files of general
    files=os.path.join(GeneralDataset,action)
    file_list = os.listdir(files)
    no_sequences+=(len(file_list)*augmentation)
    #     counting files of Personal
    files=os.path.join(PersonalDataset,action)
    file_list = os.listdir(files)
    no_sequences+=(len(file_list)*augmentation)
    print("Number of Videos \t'",action," ' = ",no_sequences)
    if no_sequences<minVideos:
        minVideos=no_sequences
    if no_sequences>maxVideos:
        maxVideos=no_sequences
#     print(action,no_sequences,len(file_list))
    for sequence in range(no_sequences):
        try: 
            os.makedirs(os.path.join(DATA_PATH, action, str(sequence)))
        except:
            pass
        
print("Minimum Nubmber of Videos: ",minVideos)
print("Maximum Nubmber of Videos: ",maxVideos)

Number of Videos 	' Ankle  ' =  33
Number of Videos 	' Blood  ' =  34
Number of Videos 	' Fist  ' =  33
Number of Videos 	' Heart  ' =  40
Number of Videos 	' Jaw  ' =  35
Number of Videos 	' Knuckle  ' =  41
Number of Videos 	' Lips  ' =  35
Number of Videos 	' Palm  ' =  38
Number of Videos 	' Skull  ' =  45
Number of Videos 	' Thumb  ' =  43
Number of Videos 	' Abdomen  ' =  37
Number of Videos 	' Back  ' =  40
Number of Videos 	' Body  ' =  40
Number of Videos 	' Brain  ' =  36
Number of Videos 	' Elbow  ' =  36
Number of Videos 	' Finger  ' =  35
Number of Videos 	' Gall Bladder  ' =  43
Number of Videos 	' Intestine  ' =  36
Number of Videos 	' Lungs  ' =  39
Minimum Nubmber of Videos:  33
Maximum Nubmber of Videos:  45


# 5. Collect Keypoint Values for Training and Testing

# orignal video Data+Augumetations

In [10]:
import imageio
import imgaug as ia
import imgaug.augmenters as iaa
# cap = cv2.VideoCapture(0)
# Set mediapipe model 
def startFolder(action):
    folders=os.path.join('MP_Data',action) 
    folder_list = os.listdir(folders)
    entries = sorted(folder_list, key=lambda x: int(x.split('.')[0]))
    for f in entries:
        data = os.path.join('MP_Data',action,f)
        num = os.listdir(data)
        if len(num)==0:
            return f    

def DataCollectiion(trainDataset,trainType):
    with mp_holistic.Holistic(min_detection_confidence=0.5, min_tracking_confidence=0.5) as holistic:
        cap = cv2.VideoCapture('')
        # NEW LOOP
        # Loop through actions
#         actions = ['Ankle']
        for action in actions:
            files=os.path.join(trainDataset,action) 
            file_list = os.listdir(files)

            no_sequences=len(file_list)
            print(action," Videos \t= ",no_sequences)
            
            # Loop through sequences aka videos
            for sequence in range(start_folder, start_folder+no_sequences):
                cap = cv2.VideoCapture(os.path.join(trainDataset, action,str(sequence)+".mp4"))
                print(os.path.join(trainDataset, action,str(sequence)+".mp4"))
#               Calculating frames
                frame_count = cap.get(cv2.CAP_PROP_FRAME_COUNT)
                fps = cap.get(cv2.CAP_PROP_FPS)
                duration = frame_count/fps
                duration = (duration/32)*1000
                count = 0
                print("Trainig Type: ",trainType)
                print("Frame Count",frame_count)
                print("fps",fps)
                print("Duration",duration)
                print("Update: ",duration/30)
                print("Frame Time: ",duration)

                # saving folder
                folder = startFolder(action)
                
                # Loop through video length aka sequence length
                for frame_num in range(sequence_length):

                    # Read feed
                    ret, frame = cap.read()
    #                 skipping frames
                    cap.set(cv2.CAP_PROP_POS_MSEC, (count*duration))    
                    # move the time
                    success,image = cap.read()
                    count += 1
#                     print(sequence,ret,frame_num)
    #                 print(frame)

#                   horizontal
                    if(trainType=='horizontal'):
                        hflip = iaa.Fliplr(p=1.0)
                        frame = hflip.augment_image(frame)
#                     rotate left
                    elif (trainType=='left'):
                        rot1 = iaa.Affine(rotate=(-10,-9))
                        frame = rot1.augment_image(frame)
#                     rotate right
                    elif (trainType=='right'):
                        rot1 = iaa.Affine(rotate=(9,10))
                        frame = rot1.augment_image(frame)
                    # Make detections
                    image, results = mediapipe_detection(frame, holistic)

                    # Draw landmarks
                    draw_styled_landmarks(image, results)

                    # NEW Apply wait logic
                    if frame_num == 0: 
                        cv2.putText(image, 'STARTING COLLECTION', (120,200), 
                                   cv2.FONT_HERSHEY_SIMPLEX, 1, (0,255, 0), 4, cv2.LINE_AA)
                        cv2.putText(image, 'Collecting frames for {} Video Number {}'.format(action, sequence), (15,12), 
                                   cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 255), 1, cv2.LINE_AA)
                        # Show to screen
#                         cv2.imshow('OpenCV Feed', image)
#                         cv2.waitKey(1000)
                    else: 
                        cv2.putText(image, 'Collecting frames for {} Video Number {}'.format(action, sequence), (15,12), 
                                   cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 255), 1, cv2.LINE_AA)
    #                     Show to screen
#                         cv2.imshow('OpenCV Feed', image)
    #                     cv2.waitKey(round((duration/30)*1000))


                    # NEW Export keypoints
                    keypoints = extract_keypoints(results)
#                     if(trainType=='horizontal'):
#                         npy_path = os.path.join(DATA_PATH, action, str(sequence+no_sequences), str(frame_num))
#                     elif (trainType=='left'):
#                         npy_path = os.path.join(DATA_PATH, action, str(sequence+(no_sequences*2)), str(frame_num))
#                     elif (trainType=='right'):
#                         npy_path = os.path.join(DATA_PATH, action, str(sequence+(no_sequences*3)), str(frame_num))
#                     else:
                    npy_path = os.path.join(DATA_PATH, action, folder, str(frame_num))
                    np.save(npy_path, keypoints)
#                     print("npy Frame: ",npy_path)
                    # Break gracefully
                    if cv2.waitKey(10) & 0xFF == ord('q'):
                        break

        cap.release()
        cv2.destroyAllWindows()

In [11]:
cap.release()
cv2.destroyAllWindows()

# Collecting Data

In [63]:
# PSL Dataset
DataCollectiion(PSLDataset,'')
# DataCollectiion(PSLDataset,'horizontal')
# DataCollectiion(PSLDataset,'left')
# DataCollectiion(PSLDataset,'right')

# Personal Dataset
DataCollectiion(PersonalDataset,'')
# DataCollectiion(PersonalDataset,'horizontal')
# DataCollectiion(PersonalDataset,'left')
# DataCollectiion(PersonalDataset,'right')

# General Dataset
DataCollectiion(GeneralDataset,'')
# DataCollectiion(GeneralDataset,'horizontal')
# DataCollectiion(GeneralDataset,'left')
# DataCollectiion(GeneralDataset,'right')

Abdomen  Videos 	=  0
Back  Videos 	=  0
Body  Videos 	=  0
Brain  Videos 	=  0
Elbow  Videos 	=  0
Finger  Videos 	=  0
Gall Bladder  Videos 	=  0
Intestine  Videos 	=  0
Lungs  Videos 	=  0
Abdomen  Videos 	=  37
Dataset\Personal Dataset\Abdomen\0.mp4
Trainig Type:  
Frame Count 127.0
fps 29.596796547705296
Duration 134.09390416976413
Update:  4.469796805658804
Frame Time:  134.09390416976413
Dataset\Personal Dataset\Abdomen\1.mp4
Trainig Type:  
Frame Count 128.0
fps 30.10458206373182
Duration 132.87013888888887
Update:  4.429004629629629
Frame Time:  132.87013888888887
Dataset\Personal Dataset\Abdomen\2.mp4
Trainig Type:  
Frame Count 113.0
fps 30.103809301100256
Duration 117.30243055555555
Update:  3.910081018518518
Frame Time:  117.30243055555555
Dataset\Personal Dataset\Abdomen\3.mp4
Trainig Type:  
Frame Count 102.0
fps 30.104381532044115
Duration 105.88159722222223
Update:  3.529386574074074
Frame Time:  105.88159722222223
Dataset\Personal Dataset\Abdomen\4.mp4
Trainig Type:  

Dataset\Personal Dataset\Back\7.mp4
Trainig Type:  
Frame Count 89.0
fps 30.104293902095275
Duration 92.38715277777777
Update:  3.079571759259259
Frame Time:  92.38715277777777
Dataset\Personal Dataset\Back\8.mp4
Trainig Type:  
Frame Count 80.0
fps 30.103228989409516
Duration 83.04756944444443
Update:  2.7682523148148146
Frame Time:  83.04756944444443
Dataset\Personal Dataset\Back\9.mp4
Trainig Type:  
Frame Count 140.0
fps 30.024233845889896
Duration 145.71562500000002
Update:  4.8571875
Frame Time:  145.71562500000002
Dataset\Personal Dataset\Back\10.mp4
Trainig Type:  
Frame Count 108.0
fps 29.858265394516156
Duration 113.03402777777777
Update:  3.7678009259259255
Frame Time:  113.03402777777777
Dataset\Personal Dataset\Back\11.mp4
Trainig Type:  
Frame Count 152.0
fps 30.01520507098991
Duration 158.25312499999998
Update:  5.275104166666666
Frame Time:  158.25312499999998
Dataset\Personal Dataset\Back\12.mp4
Trainig Type:  
Frame Count 127.0
fps 29.825119900635123
Duration 133.0673

Dataset\Personal Dataset\Body\15.mp4
Trainig Type:  
Frame Count 88.0
fps 29.93446167103841
Duration 91.8673611111111
Update:  3.06224537037037
Frame Time:  91.8673611111111
Dataset\Personal Dataset\Body\16.mp4
Trainig Type:  
Frame Count 101.0
fps 29.91863710569277
Duration 105.49444444444444
Update:  3.5164814814814815
Frame Time:  105.49444444444444
Dataset\Personal Dataset\Body\17.mp4
Trainig Type:  
Frame Count 116.0
fps 30.0
Duration 120.83333333333333
Update:  4.027777777777778
Frame Time:  120.83333333333333
Dataset\Personal Dataset\Body\18.mp4
Trainig Type:  
Frame Count 131.0
fps 30.0
Duration 136.45833333333331
Update:  4.548611111111111
Frame Time:  136.45833333333331
Dataset\Personal Dataset\Body\19.mp4
Trainig Type:  
Frame Count 99.0
fps 30.0
Duration 103.125
Update:  3.4375
Frame Time:  103.125
Dataset\Personal Dataset\Body\20.mp4
Trainig Type:  
Frame Count 74.0
fps 30.0
Duration 77.08333333333334
Update:  2.5694444444444446
Frame Time:  77.08333333333334
Dataset\Perso

Dataset\Personal Dataset\Brain\22.mp4
Trainig Type:  
Frame Count 118.0
fps 29.994592212107342
Duration 122.93882757010904
Update:  4.097960919003635
Frame Time:  122.93882757010904
Dataset\Personal Dataset\Brain\23.mp4
Trainig Type:  
Frame Count 142.0
fps 29.99554832392987
Duration 147.9386191603588
Update:  4.931287305345293
Frame Time:  147.9386191603588
Dataset\Personal Dataset\Brain\24.mp4
Trainig Type:  
Frame Count 149.0
fps 29.991818023169962
Duration 155.25067524759078
Update:  5.175022508253026
Frame Time:  155.25067524759078
Dataset\Personal Dataset\Brain\25.mp4
Trainig Type:  
Frame Count 165.0
fps 29.994453537006727
Duration 171.90678248691188
Update:  5.730226082897063
Frame Time:  171.90678248691188
Dataset\Personal Dataset\Brain\26.mp4
Trainig Type:  
Frame Count 143.0
fps 29.991516770638096
Duration 149.00046683784055
Update:  4.966682227928018
Frame Time:  149.00046683784055
Dataset\Personal Dataset\Brain\27.mp4
Trainig Type:  
Frame Count 142.0
fps 29.99563371057135

Dataset\Personal Dataset\Elbow\32.mp4
Trainig Type:  
Frame Count 124.0
fps 29.601400350087523
Duration 130.90596911536122
Update:  4.363532303845374
Frame Time:  130.90596911536122
Dataset\Personal Dataset\Elbow\33.mp4
Trainig Type:  
Frame Count 119.0
fps 29.601960190975724
Duration 125.62512671487464
Update:  4.187504223829155
Frame Time:  125.62512671487464
Dataset\Personal Dataset\Elbow\34.mp4
Trainig Type:  
Frame Count 133.0
fps 29.595002669353565
Duration 140.43756124759233
Update:  4.6812520415864105
Frame Time:  140.43756124759233
Dataset\Personal Dataset\Elbow\35.mp4
Trainig Type:  
Frame Count 138.0
fps 29.60107851877623
Duration 145.6872592417382
Update:  4.856241974724607
Frame Time:  145.6872592417382
Finger  Videos 	=  35
Dataset\Personal Dataset\Finger\0.mp4
Trainig Type:  
Frame Count 102.0
fps 29.599513634892773
Duration 107.68758025275395
Update:  3.5895860084251314
Frame Time:  107.68758025275395
Dataset\Personal Dataset\Finger\1.mp4
Trainig Type:  
Frame Count 141

Dataset\Personal Dataset\Gall Bladder\6.mp4
Trainig Type:  
Frame Count 102.0
fps 30.10428280973306
Duration 105.88194444444444
Update:  3.529398148148148
Frame Time:  105.88194444444444
Dataset\Personal Dataset\Gall Bladder\7.mp4
Trainig Type:  
Frame Count 104.0
fps 30.192087479638083
Duration 107.64409722222221
Update:  3.5881365740740736
Frame Time:  107.64409722222221
Dataset\Personal Dataset\Gall Bladder\8.mp4
Trainig Type:  
Frame Count 92.0
fps 29.77892385874432
Duration 96.54479166666667
Update:  3.2181597222222225
Frame Time:  96.54479166666667
Dataset\Personal Dataset\Gall Bladder\9.mp4
Trainig Type:  
Frame Count 144.0
fps 30.10327094337519
Duration 149.48541666666668
Update:  4.982847222222222
Frame Time:  149.48541666666668
Dataset\Personal Dataset\Gall Bladder\10.mp4
Trainig Type:  
Frame Count 141.0
fps 30.10419038943293
Duration 146.36666666666667
Update:  4.8788888888888895
Frame Time:  146.36666666666667
Dataset\Personal Dataset\Gall Bladder\11.mp4
Trainig Type:  
Fr

Dataset\Personal Dataset\Intestine\8.mp4
Trainig Type:  
Frame Count 130.0
fps 30.103509760484126
Duration 134.95104166666667
Update:  4.498368055555556
Frame Time:  134.95104166666667
Dataset\Personal Dataset\Intestine\9.mp4
Trainig Type:  
Frame Count 143.0
fps 29.957403226181885
Duration 149.17013888888889
Update:  4.972337962962963
Frame Time:  149.17013888888889
Dataset\Personal Dataset\Intestine\10.mp4
Trainig Type:  
Frame Count 126.0
fps 29.967812349698473
Duration 131.39097222222222
Update:  4.3796990740740736
Frame Time:  131.39097222222222
Dataset\Personal Dataset\Intestine\11.mp4
Trainig Type:  
Frame Count 109.0
fps 29.9609682798556
Duration 113.68958333333335
Update:  3.789652777777778
Frame Time:  113.68958333333335
Dataset\Personal Dataset\Intestine\12.mp4
Trainig Type:  
Frame Count 114.0
fps 29.91079237362253
Duration 119.10416666666666
Update:  3.9701388888888887
Frame Time:  119.10416666666666
Dataset\Personal Dataset\Intestine\13.mp4
Trainig Type:  
Frame Count 89.

Dataset\Personal Dataset\Lungs\17.mp4
Trainig Type:  
Frame Count 125.0
fps 29.80637777000604
Duration 131.05416666666667
Update:  4.3684722222222225
Frame Time:  131.05416666666667
Dataset\Personal Dataset\Lungs\18.mp4
Trainig Type:  
Frame Count 131.0
fps 30.02414154892993
Duration 136.3486111111111
Update:  4.544953703703703
Frame Time:  136.3486111111111
Dataset\Personal Dataset\Lungs\19.mp4
Trainig Type:  
Frame Count 126.0
fps 29.997222479400055
Duration 131.26215277777777
Update:  4.375405092592592
Frame Time:  131.26215277777777
Dataset\Personal Dataset\Lungs\20.mp4
Trainig Type:  
Frame Count 102.0
fps 29.999882310249795
Duration 106.25041681950049
Update:  3.541680560650016
Frame Time:  106.25041681950049
Dataset\Personal Dataset\Lungs\21.mp4
Trainig Type:  
Frame Count 144.0
fps 29.993582352859466
Duration 150.03209510153724
Update:  5.001069836717908
Frame Time:  150.03209510153724
Dataset\Personal Dataset\Lungs\22.mp4
Trainig Type:  
Frame Count 110.0
fps 29.99690827581817

# 6. Preprocess Data and Create Labels and Features

In [12]:
from sklearn.model_selection import train_test_split
from tensorflow.keras.utils import to_categorical

In [13]:
label_map = {label:num for num, label in enumerate(actions)}
len(label_map)

19

In [14]:
label_map


{'Ankle': 0,
 'Blood': 1,
 'Fist': 2,
 'Heart': 3,
 'Jaw': 4,
 'Knuckle': 5,
 'Lips': 6,
 'Palm': 7,
 'Skull': 8,
 'Thumb': 9,
 'Abdomen': 10,
 'Back': 11,
 'Body': 12,
 'Brain': 13,
 'Elbow': 14,
 'Finger': 15,
 'Gall Bladder': 16,
 'Intestine': 17,
 'Lungs': 18}

In [19]:
sequences, labels = [], []
for action in actions:
#     for sequence in np.array(os.listdir(os.path.join(DATA_PATH, action))).astype(int):
    for sequence in range(30):  
        window = []
        for frame_num in range(sequence_length):
            res = np.load(os.path.join(DATA_PATH, action, str(sequence), "{}.npy".format(frame_num)))
#             print((os.path.join(DATA_PATH, action, str(sequence), "{}.npy".format(frame_num))))
            window.append(res)
        sequences.append(window)
        labels.append(label_map[action])

In [35]:
np.array(sequences).shape

(570, 30, 1662)

In [36]:
np.array(labels).shape

(570,)

In [37]:
X = np.array(sequences)

In [38]:
X.shape

(570, 30, 1662)

In [39]:
y = to_categorical(labels).astype(int)

In [40]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.05)

In [41]:
y_test.shape

(29, 19)

# 7. Build and Train LSTM Neural Network

In [15]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense
from tensorflow.keras.callbacks import TensorBoard
import os

In [17]:
# !nvidia-smi
import subprocess

result = subprocess.run(['nvidia-smi'], stdout=subprocess.PIPE)
output = result.stdout.decode('utf-8')

print(output)

Wed Jun 14 14:24:44 2023       
+---------------------------------------------------------------------------------------+
| NVIDIA-SMI 535.98                 Driver Version: 535.98       CUDA Version: 12.2     |
|-----------------------------------------+----------------------+----------------------+
| GPU  Name                     TCC/WDDM  | Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp   Perf          Pwr:Usage/Cap |         Memory-Usage | GPU-Util  Compute M. |
|                                         |                      |               MIG M. |
|   0  NVIDIA GeForce RTX 3050 ...  WDDM  | 00000000:01:00.0 Off |                  N/A |
| N/A   50C    P3              13W /  35W |      0MiB /  4096MiB |      0%      Default |
|                                         |                      |                  N/A |
+-----------------------------------------+----------------------+----------------------+
                                                         

In [18]:
log_dir = os.path.join('Logs')
tb_callback = TensorBoard(log_dir=log_dir)

In [19]:
model = Sequential()
model.add(LSTM(64, return_sequences=True, activation='tanh', input_shape=(30,1662)))
model.add(LSTM(128, return_sequences=True, activation='tanh'))
model.add(LSTM(64, return_sequences=False, activation='tanh'))
model.add(Dense(64, activation='relu'))
model.add(Dense(32, activation='relu'))
model.add(Dense(actions.shape[0], activation='softmax'))

In [20]:
model.compile(optimizer='Adam', loss='categorical_crossentropy', metrics=['categorical_accuracy'])

In [31]:
model.fit(X_train, y_train, epochs=2000, callbacks=[tb_callback])

Epoch 1/2000
Epoch 2/2000
Epoch 3/2000
Epoch 4/2000
Epoch 5/2000
Epoch 6/2000
Epoch 7/2000
Epoch 8/2000
Epoch 9/2000
Epoch 10/2000
Epoch 11/2000
Epoch 12/2000
Epoch 13/2000
Epoch 14/2000
Epoch 15/2000
Epoch 16/2000
Epoch 17/2000
Epoch 18/2000
Epoch 19/2000
Epoch 20/2000
Epoch 21/2000
Epoch 22/2000
Epoch 23/2000
Epoch 24/2000
Epoch 25/2000
Epoch 26/2000
Epoch 27/2000
Epoch 28/2000
Epoch 29/2000
Epoch 30/2000
Epoch 31/2000
Epoch 32/2000
Epoch 33/2000
Epoch 34/2000
Epoch 35/2000
Epoch 36/2000
Epoch 37/2000
Epoch 38/2000
Epoch 39/2000
Epoch 40/2000
Epoch 41/2000
Epoch 42/2000
Epoch 43/2000
Epoch 44/2000
Epoch 45/2000
Epoch 46/2000
Epoch 47/2000
Epoch 48/2000
Epoch 49/2000
Epoch 50/2000
Epoch 51/2000
Epoch 52/2000
Epoch 53/2000
Epoch 54/2000
Epoch 55/2000
Epoch 56/2000
Epoch 57/2000
Epoch 58/2000
Epoch 59/2000
Epoch 60/2000
Epoch 61/2000
Epoch 62/2000
Epoch 63/2000
Epoch 64/2000
Epoch 65/2000
Epoch 66/2000
Epoch 67/2000
Epoch 68/2000
Epoch 69/2000
Epoch 70/2000
Epoch 71/2000
Epoch 72/2000
E

KeyboardInterrupt: 

In [25]:
model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
lstm (LSTM)                  (None, 30, 64)            442112    
_________________________________________________________________
lstm_1 (LSTM)                (None, 30, 128)           98816     
_________________________________________________________________
lstm_2 (LSTM)                (None, 64)                49408     
_________________________________________________________________
dense (Dense)                (None, 64)                4160      
_________________________________________________________________
dense_1 (Dense)              (None, 32)                2080      
_________________________________________________________________
dense_2 (Dense)              (None, 19)                627       
Total params: 597,203
Trainable params: 597,203
Non-trainable params: 0
__________________________________________________

# 8. Make Predictions

In [42]:
res = model.predict(X_test)

In [43]:
actions[np.argmax(res[0])]

'Blood'

In [44]:
actions[np.argmax(y_test[0])]

'Blood'

# 9. Save Weights

In [36]:
model.save('Final.h5')
# model.save('mouth.h5')

In [37]:
# del model

In [21]:
model.load_weights('Final.h5')
# model.load_weights('mouth.h5')

# 10. Evaluation using Confusion Matrix and Accuracy

In [22]:
from sklearn.metrics import multilabel_confusion_matrix, accuracy_score

In [23]:
yhat = model.predict(X_test)

NameError: name 'X_test' is not defined

In [None]:
ytrue = np.argmax(y_test, axis=1).tolist()
yhat = np.argmax(yhat, axis=1).tolist()

In [24]:
multilabel_confusion_matrix(ytrue, yhat)

NameError: name 'ytrue' is not defined

In [25]:
accuracy_score(ytrue, yhat)

NameError: name 'ytrue' is not defined

# Testing Through Video

In [26]:
def test_Video(video):
    def printFind(pred):
        for n in pred:
            print(actions[n])
        print('---------')

    # 1. New detection variables
    sequence = []
    sentence = []
    predictions = []
    threshold = 0.5
    print(actions)
    # testing on video
#     video = os.path.join(GeneralDataset,'Fist','9.mp4')
    cap = cv2.VideoCapture(video)

    #   Calculating frames
    frame_count = cap.get(cv2.CAP_PROP_FRAME_COUNT)
    fps = cap.get(cv2.CAP_PROP_FPS)
    duration = frame_count/fps
    duration = (duration/35)*1000
    count = 0
    print("Frame Count",frame_count)
    print("fps",fps)
    print("Duration",duration)
    print("Update: ",duration/35)
    print("Frame Time: ",duration)

    # Set mediapipe model 
    with mp_holistic.Holistic(min_detection_confidence=0.5, min_tracking_confidence=0.5) as holistic:
        while cap.isOpened():

            # Read feed
            ret, frame = cap.read()
    #         testin--------------
    #         hflip = iaa.Fliplr(p=1.0)
    #         frame = hflip.augment_image(frame)
    #         ------------------
    #       skipping frames
            cap.set(cv2.CAP_PROP_POS_MSEC, (count*duration))    
            # move the time
            success,image = cap.read()
            count += 1
            if success==False:
                break
            # Make detections
            image, results = mediapipe_detection(frame, holistic)
    #         print(results)

            # Draw landmarks
            draw_styled_landmarks(image, results)

            # 2. Prediction logic
            keypoints = extract_keypoints(results)
            sequence.append(keypoints)
            sequence = sequence[-30:]

            if len(sequence) == 30:
                res = model.predict(np.expand_dims(sequence, axis=0))[0]
                printFind(predictions)
    #             print(predictions)
                print("Max: ",actions[np.argmax(res)])
                predictions.append(np.argmax(res))
    # #             predictions.append(np.argmax(res))
    #             predictions.append(res.argsort()[-3:][::-1])
    #             print(actions[res[0]],actions[res[1]],actions[res[2]])


            #3. Viz logic
                if np.unique(predictions[-10:])[0]==np.argmax(res): 
                    if res[np.argmax(res)] > threshold: 

                        if len(sentence) > 0: 
                            if actions[np.argmax(res)] != sentence[-1]:
                                sentence.append(actions[np.argmax(res)])
                        else:
                            sentence.append(actions[np.argmax(res)])

                if len(sentence) > 5: 
                    sentence = sentence[-5:]

                # Viz probabilities
    #             image = prob_viz(res, actions, image, colors)

            cv2.rectangle(image, (0,0), (640, 40), (245, 117, 16), -1)
            cv2.putText(image, ' '.join(sentence), (3,30), 
                           cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 2, cv2.LINE_AA)

            # Show to screen
            cv2.imshow('OpenCV Feed', image)

            # Break gracefully
            if cv2.waitKey(10) & 0xFF == ord('q'):
                break
        cap.release()
        cv2.destroyAllWindows()

In [27]:
# testing on video
video = os.path.join(PersonalDataset,'Ankle','10.mp4')

test_Video(video)

['Ankle' 'Blood' 'Fist' 'Heart' 'Jaw' 'Knuckle' 'Lips' 'Palm' 'Skull'
 'Thumb' 'Abdomen' 'Back' 'Body' 'Brain' 'Elbow' 'Finger' 'Gall Bladder'
 'Intestine' 'Lungs']
Frame Count 97.0
fps 29.59452450446969
Duration 93.64666666666666
Update:  2.6756190476190476
Frame Time:  93.64666666666666
---------
Max:  Ankle
Ankle
---------
Max:  Ankle
Ankle
Ankle
---------
Max:  Palm
Ankle
Ankle
Palm
---------
Max:  Palm
Ankle
Ankle
Palm
Palm
---------
Max:  Palm
Ankle
Ankle
Palm
Palm
Palm
---------
Max:  Palm


# 11. Test in Real Time

In [52]:
from scipy import stats

In [53]:
colors = [(245,117,16), (117,245,16), (16,117,245)]
def prob_viz(res, actions, input_frame, colors):
    output_frame = input_frame.copy()
    for num, prob in enumerate(res):
        cv2.rectangle(output_frame, (0,60+num*40), (int(prob*100), 90+num*40), colors[num], -1)
        cv2.putText(output_frame, actions[num], (0, 85+num*40), cv2.FONT_HERSHEY_SIMPLEX, 1, (255,255,255), 2, cv2.LINE_AA)
        
    return output_frame

In [54]:
plt.figure(figsize=(18,18))
plt.imshow(prob_viz(res, actions, image, colors))

TypeError: only size-1 arrays can be converted to Python scalars

<Figure size 1800x1800 with 0 Axes>

In [59]:
# 1. New detection variables
sequence = []
sentence = []
predictions = []
threshold = 0.5

cap = cv2.VideoCapture(0)
# Set mediapipe model 
with mp_holistic.Holistic(min_detection_confidence=0.5, min_tracking_confidence=0.5) as holistic:
    while cap.isOpened():

        # Read feed
        ret, frame = cap.read()

        # Make detections
        image, results = mediapipe_detection(frame, holistic)
#         print(results)
        
        # Draw landmarks
        draw_styled_landmarks(image, results)
        
        # 2. Prediction logic
        keypoints = extract_keypoints(results)
        sequence.append(keypoints)
        sequence = sequence[-30:]
        
        if len(sequence) == 30:
            res = model.predict(np.expand_dims(sequence, axis=0))[0]
            print(actions[np.argmax(res)])
            predictions.append(np.argmax(res))
            
            
        #3. Viz logic
            if np.unique(predictions[-10:])[0]==np.argmax(res): 
                if res[np.argmax(res)] > threshold: 
                    
                    if len(sentence) > 0: 
                        if actions[np.argmax(res)] != sentence[-1]:
                            sentence.append(actions[np.argmax(res)])
                    else:
                        sentence.append(actions[np.argmax(res)])

            if len(sentence) > 5: 
                sentence = sentence[-5:]

            # Viz probabilities
#             image = prob_viz(res, actions, image, colors)
            
        cv2.rectangle(image, (0,0), (640, 40), (245, 117, 16), -1)
        cv2.putText(image, ' '.join(sentence), (3,30), 
                       cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 2, cv2.LINE_AA)
        
        # Show to screen
        cv2.imshow('OpenCV Feed', image)

        # Break gracefully
        if cv2.waitKey(10) & 0xFF == ord('q'):
            break
    cap.release()
    cv2.destroyAllWindows()

Lips
Jaw
Jaw
Jaw
Jaw
Lips
Lips
Blood
Intestine
Abdomen
Abdomen
Abdomen
Abdomen
Abdomen
Abdomen
Abdomen
Abdomen
Abdomen
Abdomen
Lungs
Elbow
Elbow
Elbow
Elbow
Finger
Finger
Finger
Gall Bladder
Gall Bladder
Gall Bladder
Elbow
Elbow
Elbow
Elbow
Elbow
Elbow
Elbow
Elbow
Elbow
Elbow
Finger
Finger
Finger
Lungs
Lungs
Abdomen
Lungs
Lungs
Lungs
Lungs
Lungs
Finger
Finger
Elbow
Gall Bladder
Lungs
Elbow
Elbow
Elbow
Elbow
Elbow
Elbow
Elbow
Elbow
Elbow
Elbow
Elbow
Elbow
Elbow
Elbow
Elbow
Elbow
Elbow
Skull
Ankle
Ankle
Ankle
Ankle
Ankle
Ankle
Ankle
Ankle
Ankle
Ankle
Ankle
Skull
Skull
Skull
Skull
Skull
Skull
Skull
Skull
Skull
Blood
Blood
Blood
Blood
Blood
Blood
Blood
Blood
Finger
Finger
Finger
Ankle
Ankle
Ankle
Ankle
Skull
Skull
Skull
Skull
Skull
Heart
Heart
Heart
Skull
Skull
Skull
Skull
Lungs
Lungs
Lungs
Lungs
Lungs
Intestine
Gall Bladder
Finger
Finger
Finger
Finger
Fist
Fist
Fist
Fist
Fist
Fist
Fist
Skull
Fist
Fist
Ankle
Ankle
Ankle
Ankle
Ankle
Ankle
Ankle
Ankle
Ankle
Ankle
Ankle
Ankle
Ankle
Ankle
Ankl