In [1]:
# Import and install dependencies.
!pip install tensorflow==2.4.1 tensorflow-gpu==2.4.1 opencv-python mediapipe sklearn matplotlib



In [2]:
import cv2
import numpy as np
import os
from matplotlib import pyplot as plt
import time
import mediapipe as mp

In [3]:
# We will setup Media Pipe Holistic

#This is mediapipe holistic model which makes the detections.
mp_holistic = mp.solutions.holistic

#This is mediapipe holistic drawing utilities which will draw the detection.
mp_drawing = mp.solutions.drawing_utils

In [4]:
def mediapipe_detection(image, model):
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) #This is color coversion from BGR to RGB.
    image.flags.writeable = False  #image is no longer writeable.
    results = model.process(image) #Make prediction.
    image.flags.writeable = True  #Image is now writeable.
    image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR) #This is color conversion from RGB to BGR.
    return image, results

#### first we will access webcam using cv2 </br>
#### This is the method for accessing the web cam we will use this method to collect dataset for our model to train</br>
cap = cv2.VideoCapture(0)</br>
with mp_holistic.Holistic(min_detection_confidence=0.5, min_tracking_confidence=0.5) as holistic:
    
    while cap.isOpened():
        
        #capturig frames from the webcam
        ret, frame = cap.read()

        #between read and rendering we want to make our prediction.
        image, results = mediapipe_detection(frame, holistic)
        print(results)
        
        #Draw ladmarks in real time
        enhanced_landmarks(image, results)

        #Showing frames on the screen.
        cv2.imshow('OpenCV Feed', image)

        #Breaking gracefully from the loop.
        #This line specifies that when we want to break out of loop we need to press 'q' on the keyboard.
        if cv2.waitKey(10) & 0XFFF == ord('q'):
            break;   
    cap.release()
    #This line of code closes alll the windows.
    cv2.destroyAllWindows()

#### Then on above that layer we will apply mediapipe <br>

In [6]:
def draw_landmarks(image, results):
    #Draw Face connections
    mp_drawing.draw_landmarks(image, results.face_landmarks, mp_holistic.FACEMESH_TESSELATION,
                             mp_drawing.DrawingSpec(color=(80, 110, 10), thickness=1, circle_radius=1),
                             mp_drawing.DrawingSpec(color=(80,256,121), thickness=1, circle_radius=1)
                             )
    #Draw pose Connections
    mp_drawing.draw_landmarks(image, results.pose_landmarks, mp_holistic.POSE_CONNECTIONS,
                             mp_drawing.DrawingSpec(color=(80, 22, 10), thickness=2, circle_radius=4),
                             mp_drawing.DrawingSpec(color=(80,44,121), thickness=2, circle_radius=2)
                             )
    #Draw left Hand Connections
    mp_drawing.draw_landmarks(image, results.left_hand_landmarks, mp_holistic.HAND_CONNECTIONS,
                             mp_drawing.DrawingSpec(color=(121, 22, 76), thickness=2, circle_radius=4),
                             mp_drawing.DrawingSpec(color=(121,44,250), thickness=2, circle_radius=2)
                             )
    #Draw right hand connections
    mp_drawing.draw_landmarks(image, results.right_hand_landmarks, mp_holistic.HAND_CONNECTIONS,
                             mp_drawing.DrawingSpec(color=(245, 117, 76), thickness=2, circle_radius=4),
                             mp_drawing.DrawingSpec(color=(245,66,230), thickness=2, circle_radius=2)
                             )

In [7]:
#This method is for extracting keypoints from our media pipe detection for further recognition.

def extract_keypoints(results):
    pose = np.array([[res.x, res.y, res.z, res.visibility] for res in results.pose_landmarks.landmark]).flatten() if results.pose_landmarks else np.zeros(33*4)
    face = np.array([[res.x, res.y, res.z] for res in results.face_landmarks.landmark]).flatten() if results.face_landmarks else np.zeros(468*3)
    lh = np.array([[res.x, res.y, res.z] for res in results.left_hand_landmarks.landmark]).flatten() if results.left_hand_landmarks else np.zeros(21*3)
    rh = np.array([[res.x, res.y, res.z] for res in results.right_hand_landmarks.landmark]).flatten() if results.right_hand_landmarks else np.zeros(21*3)
    return np.concatenate([pose, face, lh, rh])

In [40]:
#path for exported data, numpy arrays
DATA_PATH = os.path.join('MP_Data')

In [41]:
#Actions that we try to detect
#First we will train our model for digits
actions = np.array(['0','1','2','3','4','5','6','7','8','9'])
#Thirty videos worth of data
no_sequences = 30

#videos are going to be 30 frames in length
sequence_length = 30

In [42]:
#Actions that we try to detect
#First we will train our model for digits
#actions = np.array(['A','B','C','D','E','F','G','H','I','J','K','L','M','N','O','P','Q','R','S','T','U','V','W','X','Y','Z',])
#actions = np.array(['Hello', 'India', 'Sign', 'Language','Bye','Again','I','You','Man','Woman','He','She','Deaf','Hearing'])

In [46]:
try:
    os.rmdir(DATA_PATH)
except OSError as e:
    print("Error: %s : %s" % (DATA_PATH, e.strerror))

Error: MP_Data : The system cannot find the file specified


In [47]:
#This traverse through every actions or labels and inside that action there will be thirty videos, each of thirty frames and 
#Creates the folder or directory for the same
for action in actions:
    for sequence in range(no_sequences):
        try:
            os.makedirs(os.path.join(DATA_PATH, actions , str(sequence)))
        except:
            pass

In [44]:
pwd

'C:\\Users\\Aastha\\Documents\\GitHub\\Real-Time-SignLanguageDetection'

In [28]:
#This method is for collecting keyframes or dataset for the acitons or lables specified in the actions variable above

cap = cv2.VideoCapture(0)
with mp_holistic.Holistic(min_detection_confidence=0.5, min_tracking_confidence=0.5) as holistic:
    
    #Loop through actions
    for action in actions:
        #Loop through each videos
        for sequence in range(no_sequences):
            #Loop through video length
            for frame_num in range(sequence_length):
                #capturig frames from the webcam
                ret, frame = cap.read()

                #between read and rendering we want to make our prediction.
                image, results = mediapipe_detection(frame, holistic)
                print(results)

                #Draw ladmarks in real time
                draw_landmarks(image, results)

                #Apply wait Logic.
                if frame_num == 0:
                    cv2.putText(image, 'STARTING COLLECTION', (120,200),
                               cv2.FONT_HERSHEY_SIMPLEX, 1, (0,255,0), 1, cv2.LINE_AA)
                    cv2.putText(image, 'Collecting frames for {} video Number {}'.format(action, sequence), (15,12),
                               cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0,0,255), 1, cv2.LINE_AA)
                    cv2.waitKey(2000)
                else:
                    cv2.putText(image, 'Collecting frames for {} Video Number {}'.format(action, sequence), (15,12),
                               cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 255), 1, cv2.LINE_AA)

                #Now we will extract keypoints
                keypoints = extract_keypoints(results)
                npy_path = os.path.join(DATA_PATH, action, str(sequence), str(frame_num))
                np.save(npy_path, keypoints)


                #Showing frames on the screen.
                cv2.imshow('OpenCV Feed', image)

                #Breaking gracefully from the loop.
                #This line specifies that when we want to break out of loop we need to press 'q' on the keyboard.
                if cv2.waitKey(10) & 0XFFF == ord('q'):
                    break;  
                    
    cap.release()
    #This line of code closes alll the windows.
    cv2.destroyAllWindows()

<class 'mediapipe.python.solution_base.SolutionOutputs'>


FileNotFoundError: [Errno 2] No such file or directory: 'Data\\0\\0\\0.npy'

In [29]:
cap.release()
#This line of code closes alll the windows.
cv2.destroyAllWindows()

In [None]:
from sklearn.model_selection import train_test_split
from tensorflow.keras.utils import to_categorical


label_map = {label:num for num, label in enumerate(actions)}
label_map


#Now we will bring our data together and start structuring
sequences, labels = [], []
for action in actions:
    for sequence in range(no_sequences):
        window = []
        for frame_num in range(sequence_length):
            res = np.load(os.path.join(DATA_PATH, action, str(sequence), "{}.npy".format(frame_num)))
            window.append(res)
        sequences.append(window)
        labels.append(label_map[action])
        
        
np.array(sequences).shape

np.array(labels).shape

x = np.array(sequences)
x.shape

y = to_categorical(labels).astype(int)
y

#Now we will do training and testing
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.05)


x_train.shape

x_test.shape


y_train.shape

y_test.shape


#Build the LSTM model

# For this purpose we are going to use tensorflow and keras
#first of all we have to import some dependencies that is 
#Sequencial model
#LSTM model
#Dense layer
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense
from tensorflow.keras.callbacks import TensorBoard #This gonna allow some logins in tesorflow


#Now we will create a log directory and setup our tensorflow callbacks
#log directory allows us to monitor our neural network training
log_dir = os.path.join('logs')
tb_callback = TensorBoard(log_dir=log_dir)


#Now we will build our neural network architecture
model = Sequential()
model.add(LSTM(64, return_sequences=True, activation='relu', input_shape=(30,1662)))
model.add(LSTM(128, return_sequences=True, activation='relu'))
model.add(LSTM(64, return_sequences=False, activation='relu'))
model.add(Dense(64, activation='relu'))
model.add(Dense(32, activation='relu'))
model.add(Dense(actions.shape[0], activation='softmax'))


#Now we will compile our model and fit it
model.compile(optimizer='Adam', loss='categorical_crossentropy', metrics=['categorical_accuracy'])


model.fit(x_train, y_train, epochs=850, callbacks=[tb_callback])



#Now Testing our model
res = model.predict(x_test)
actions[np.argmax(res[30])]
actions[np.argmax(y_test[30])]