In [16]:
from sklearn.model_selection import train_test_split
from keras.utils import to_categorical
import make_folder as mkf
import numpy as np
import os
import pandas 

label_map = {action:num for num,action in enumerate(mkf.actions)}
print(label_map)

{'Hello': 0, 'Thanks': 1, 'ILoveYou': 2}


In [17]:
sequences, labels = [], []
for action in mkf.actions:
    for sequence in np.array(os.listdir(os.path.join(mkf.DATA_PATH, action))).astype(int):
        window = []
        for frame_num in range(mkf.sequence_length):
            res = np.load(os.path.join(mkf.DATA_PATH, action, str(sequence), "{}.npy".format(frame_num)))
            window.append(res)
        sequences.append(window)
        labels.append(label_map[action])

In [18]:
np.array(sequences).shape
np.array(labels).shape
x = np.array(sequences)
y = to_categorical(labels, dtype ="uint8")
X_train, X_test, y_train, y_test = train_test_split(x, y, test_size=0.1)
print(X_train.shape,X_test.shape,y)

(81, 30, 1662) (9, 30, 1662) [[1 0 0]
 [1 0 0]
 [1 0 0]
 [1 0 0]
 [1 0 0]
 [1 0 0]
 [1 0 0]
 [1 0 0]
 [1 0 0]
 [1 0 0]
 [1 0 0]
 [1 0 0]
 [1 0 0]
 [1 0 0]
 [1 0 0]
 [1 0 0]
 [1 0 0]
 [1 0 0]
 [1 0 0]
 [1 0 0]
 [1 0 0]
 [1 0 0]
 [1 0 0]
 [1 0 0]
 [1 0 0]
 [1 0 0]
 [1 0 0]
 [1 0 0]
 [1 0 0]
 [1 0 0]
 [0 1 0]
 [0 1 0]
 [0 1 0]
 [0 1 0]
 [0 1 0]
 [0 1 0]
 [0 1 0]
 [0 1 0]
 [0 1 0]
 [0 1 0]
 [0 1 0]
 [0 1 0]
 [0 1 0]
 [0 1 0]
 [0 1 0]
 [0 1 0]
 [0 1 0]
 [0 1 0]
 [0 1 0]
 [0 1 0]
 [0 1 0]
 [0 1 0]
 [0 1 0]
 [0 1 0]
 [0 1 0]
 [0 1 0]
 [0 1 0]
 [0 1 0]
 [0 1 0]
 [0 1 0]
 [0 0 1]
 [0 0 1]
 [0 0 1]
 [0 0 1]
 [0 0 1]
 [0 0 1]
 [0 0 1]
 [0 0 1]
 [0 0 1]
 [0 0 1]
 [0 0 1]
 [0 0 1]
 [0 0 1]
 [0 0 1]
 [0 0 1]
 [0 0 1]
 [0 0 1]
 [0 0 1]
 [0 0 1]
 [0 0 1]
 [0 0 1]
 [0 0 1]
 [0 0 1]
 [0 0 1]
 [0 0 1]
 [0 0 1]
 [0 0 1]
 [0 0 1]
 [0 0 1]
 [0 0 1]]


In [27]:
from keras.models import Sequential
from keras.layers import LSTM, Dense
from keras.callbacks import TensorBoard

log_dir = os.path.join('Logs')
tb_callback = TensorBoard(log_dir=log_dir)

model = Sequential()
model.add(LSTM(64, return_sequences=True, activation='relu', input_shape=(30,1662)))
model.add(LSTM(64, return_sequences=False, activation='relu'))
model.add(Dense(32, activation='relu'))
model.add(Dense(mkf.actions.shape[0], activation='softmax'))

model.compile(optimizer='Adam', loss='categorical_crossentropy', metrics=['categorical_accuracy'])
model.fit(X_train, y_train, epochs=300, callbacks=[tb_callback])


ValueError: Input 0 of layer "lstm_20" is incompatible with the layer: expected ndim=3, found ndim=2. Full shape received: (None, 64)

In [15]:
import joblib

# Train your machine learning model

# Save the model to a file
joblib.dump(model, 'signLanguageModel.pkl')

INFO:tensorflow:Assets written to: ram://b87a3d51-3c87-42bc-9e89-aa240af96527/assets


['signLanguageModel.pkl']

In [5]:
model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 lstm (LSTM)                 (None, 30, 64)            442112    
                                                                 
 lstm_1 (LSTM)               (None, 64)                33024     
                                                                 
 dense (Dense)               (None, 32)                2080      
                                                                 
 dense_1 (Dense)             (None, 3)                 99        
                                                                 
Total params: 477,315
Trainable params: 477,315
Non-trainable params: 0
_________________________________________________________________


In [6]:

res = model.predict(X_test)
model.save('action.h5')
model.load_weights('action.h5')



In [26]:
import cv2 as cv
import holistic_tracking.holisticTrackingModule as htm

detector = htm.holisticDetector()
vid = cv.VideoCapture(0)
input_frames = []     #input frames from camera consisting of 30 frames each     
predictions = []

while True:
    isTrue, img = vid.read()
    img = cv.flip(img, 1)
    img, results = detector.find_body(img)
    keypoints = detector.extract_keypoints(results)
    input_frames.append(keypoints)
    
    if len(input_frames) == 30:
        cv.waitKey(1000)
        res = model.predict(np.expand_dims(input_frames, axis=0))[0]
        print(mkf.actions[np.argmax(res)])
        predictions.append(mkf.actions[np.argmax(res)])
        input_frames = []
    if predictions:
        prediction_text = " ".join(predictions[-1])
    else:
        prediction_text = " "
    cv.putText(img, f'The word you said is {prediction_text}', (10,200), cv.FONT_HERSHEY_SIMPLEX, 1, (0,255, 0), 4, cv.LINE_AA)
    cv.imshow('OpenCV Feed', img)

    if cv.waitKey(10) & 0xFF == ord("q"):   #press q to close video
        break

vid.release()
cv.destroyAllWindows()

Hello
Hello
Thanks
Thanks
Hello
Hello
ILoveYou
ILoveYou
ILoveYou
ILoveYou
ILoveYou
