In [1]:
%config Completer.use_jedi = False

In [2]:
import cv2
import numpy as np
import os
import tensorflow as tf
from matplotlib import  pyplot as plt 
import time
import mediapipe as mp

# To Capture Keypoints using Holistics

In [3]:
# Holistics Model
mp_holistics = mp.solutions.holistic

# To draw Keypoints
mp_drawing = mp.solutions.drawing_utils

In [4]:
#  function to capture the keypoints from a video
def media_pipe_detection(image, model):
    #converting color from default opencv's BGR to RGB
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    image.flags.writeable =False
    # Making Predictions
    results = model.process(image)
    image.flags.writeable =True
    #converting back to RGB to opencv's Default BGR
    image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
    return image, results

In [5]:
# function to draw landmarks
def draw_landmarks(image, results):
  
    mp_drawing.draw_landmarks(image, results.right_hand_landmarks, mp_holistics.HAND_CONNECTIONS, 
                             mp_drawing.DrawingSpec(color=(80,22,76), thickness=2, circle_radius= 4),
                              mp_drawing.DrawingSpec(color=(80,44,250), thickness=2, circle_radius= 2)
                             )
    
    mp_drawing.draw_landmarks(image, results.left_hand_landmarks, mp_holistics.HAND_CONNECTIONS, 
                             mp_drawing.DrawingSpec(color=(80,117,66), thickness=2, circle_radius= 4),
                              mp_drawing.DrawingSpec(color=(80,66,230), thickness=2, circle_radius= 2)
                             )

In [7]:
# defining a function to extract all the keypoints( left hand, right hand) which is in 2d arrray to 1d array.
# In case if there is no key points then we will be returning array of zeros.
def extract_keypoints(results):
  
    lh = np.array([[res.x, res.y, res.z] for res in results.left_hand_landmarks.landmark]).flatten() if results.left_hand_landmarks else np.zeros(21*3)
    rh = np.array([[res.x, res.y, res.z] for res in results.right_hand_landmarks.landmark]).flatten() if results.right_hand_landmarks else np.zeros(21*3)
    
    return np.concatenate([lh, rh])

In [8]:
# setting up folder for collections

DATA_PATH = os.path.join("CUSTOM_ISL_DATA")

# actions we are detecting
actions = np.array(["hi", "thanks", "iloveyou", "sorry"])

# 50 videos to train
no_sequences = 50

# videos are going to be in frames of 30
sequence_length = 30

# start_folder =50

In [9]:
for action in actions: 
    for sequence in range(1, no_sequences+1):
        try: 
            os.makedirs(os.path.join(DATA_PATH, action, str(sequence)))
        except:
            pass
 
 # to make more than 50
# for action in actions: 
#     for sequence in range(1+start_folder, no_sequences+start_folder+1):
#         try: 
#             os.makedirs(os.path.join(DATA_PATH, action, str(sequence)))
#         except:
#             pass

In [10]:
cap = cv2.VideoCapture(0)
# Set mediapipe model 
with mp_holistics.Holistic(min_detection_confidence=0.5, min_tracking_confidence=0.5) as holistic:
    
    
    # Loop through actions
    for action in actions:
        # Loop through sequences aka videos
        for sequence in range(1, no_sequences+1):
            # Loop through video length aka sequence length
            for frame_num in range(sequence_length):

                # Read feed
                ret, frame = cap.read()

                # Make detections
                image, results = media_pipe_detection(frame, holistic)
#                 print(results)

                # Draw landmarks
                draw_landmarks(image, results)
                
                # Apply wait logic
                if frame_num == 0: 
                    cv2.putText(image, 'STARTING COLLECTION', (120,200), 
                               cv2.FONT_HERSHEY_SIMPLEX, 1, (0,255, 0), 4, cv2.LINE_AA)
                    cv2.putText(image, 'Collecting frames for {} Video Number {}'.format(action, sequence), (15,12), 
                               cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 255), 1, cv2.LINE_AA)
                    # Show to screen
                    cv2.imshow('OpenCV Feed', image)
                    cv2.waitKey(2000)
                else: 
                    cv2.putText(image, 'Collecting frames for {} Video Number {}'.format(action, sequence), (15,12), 
                               cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 255), 1, cv2.LINE_AA)
                    # Show to screen
                    cv2.imshow('OpenCV Feed', image)
                
                # Export keypoints
                keypoints = extract_keypoints(results)
                npy_path = os.path.join(DATA_PATH, action, str(sequence), str(frame_num))
                np.save(npy_path, keypoints)

                # Break
                if cv2.waitKey(10) & 0xFF == ord('q'):
                    break
                    
    cap.release()
    cv2.destroyAllWindows()

In [10]:
# create labels and features
from sklearn.model_selection import train_test_split
from tensorflow.keras.utils import to_categorical

In [11]:
label_map = {label:num for num, label in enumerate(actions)}

In [12]:
sequences, labels = [], []
for action in actions:
    for sequence in range(1, no_sequences+1):
        window = []
        for frame_num in range(sequence_length):
            res = np.load(os.path.join(DATA_PATH, action, str(sequence), "{}.npy".format(frame_num)))
            window.append(res)
        sequences.append(window)
        labels.append(label_map[action])

In [13]:
# preprocessing our data

X = np.array(sequences)
y = to_categorical(labels).astype(int)

In [14]:
x_train, x_test, y_train, y_test = train_test_split(X, y, test_size=0.1)

In [15]:
# libraries to build model
import datetime

from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense
from tensorflow.keras.callbacks import TensorBoard

In [16]:
# build model

model = Sequential()
model.add(LSTM(64, return_sequences=True, activation='relu', input_shape=(30,126)))
model.add(LSTM(128, return_sequences=True, activation='relu'))
model.add(Dense(actions.shape[0], activation='softmax'))

In [17]:
model.compile(optimizer='Adam', loss='categorical_crossentropy', metrics=['categorical_accuracy'])

In [19]:
tb_callback = tf.keras.callbacks.TensorBoard(log_dir="logs/", histogram_freq=1)
model.fit(x_train, y_train, epochs=1000, callbacks=[tb_callback])

Epoch 1/1000
Epoch 2/1000
Epoch 3/1000
Epoch 4/1000
Epoch 5/1000
Epoch 6/1000
Epoch 7/1000
Epoch 8/1000
Epoch 9/1000
Epoch 10/1000
Epoch 11/1000
Epoch 12/1000
Epoch 13/1000
Epoch 14/1000
Epoch 15/1000
Epoch 16/1000
Epoch 17/1000
Epoch 18/1000
Epoch 19/1000
Epoch 20/1000
Epoch 21/1000
Epoch 22/1000
Epoch 23/1000
Epoch 24/1000
Epoch 25/1000
Epoch 26/1000
Epoch 27/1000
Epoch 28/1000
Epoch 29/1000
Epoch 30/1000
Epoch 31/1000
Epoch 32/1000
Epoch 33/1000
Epoch 34/1000
Epoch 35/1000
Epoch 36/1000
Epoch 37/1000
Epoch 38/1000
Epoch 39/1000
Epoch 40/1000

KeyboardInterrupt: 

In [18]:
%load_ext tensorboard


In [19]:
# to reload tensorboard
%reload_ext tensorboard

In [20]:
# to open tensorboard
%tensorboard --logdir logs

Reusing TensorBoard on port 6006 (pid 14928), started 2 days, 1:06:07 ago. (Use '!kill 14928' to kill it.)

In [19]:
model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 lstm (LSTM)                 (None, 30, 64)            48896     
                                                                 
 lstm_1 (LSTM)               (None, 30, 128)           98816     
                                                                 
 lstm_2 (LSTM)               (None, 64)                49408     
                                                                 
 dense (Dense)               (None, 32)                2080      
                                                                 
 dense_1 (Dense)             (None, 4)                 132       
                                                                 
Total params: 199,332
Trainable params: 199,332
Non-trainable params: 0
_________________________________________________________________


In [20]:
res = model.predict(x_test)

In [21]:
correct = 0
total = 0
for i in range(len(res)):
    if actions[np.argmax(res[i])] == actions[np.argmax(y_test[i])]: correct+=1
    total+=1

In [22]:
correct

20

In [23]:
total

20

In [24]:
loss, acc = model.evaluate(x_test,y_test);
print("Accuracy: {:.2f}%".format(acc*100));
print("Loss: {:.4f}".format(loss));

Accuracy: 100.00%
Loss: 0.0000


In [25]:
#  Evaluate model using confusion matrix
from sklearn.metrics import multilabel_confusion_matrix, accuracy_score

In [26]:
ypreds = model.predict(x_test)

ytrue = np.argmax(y_test, axis=1).tolist()
ypreds = np.argmax(ypreds, axis=1).tolist()

multilabel_confusion_matrix(ytrue, ypreds)

array([[[16,  0],
        [ 0,  4]],

       [[15,  0],
        [ 0,  5]],

       [[12,  0],
        [ 0,  8]],

       [[17,  0],
        [ 0,  3]]], dtype=int64)

In [29]:
# accuracy score
accuracy_score(ytrue, ypreds)

1.0

In [27]:
# tf.keras.models.save_model(model,'Final_Model.hdf5')

In [33]:
# plt.figure(figsize=(18,18))
# plt.imshow(cv2.cvtColor(prob_viz(res, actions, image, colors),cv2.COLOR_BGR2RGB ))

# Deploy

In [32]:
import pickle

pickle.dump(model, open("ISL_Prediction_Model.pkl","wb"))

INFO:tensorflow:Assets written to: ram://6d43754e-f4a0-4d02-98bf-34e667f1da30/assets


