In [1]:
from sklearn.model_selection import train_test_split
from tensorflow.keras.utils import to_categorical
import numpy as np
import os

In [5]:
# words = np.array(['မင်္ဂလာပါ', 'မှန်တယ်', 'မှားတယ်'])
words = np.array(['hello', 'like', 'dislike'])
data_path = 'dataset/custom_action_numpy/'

In [6]:
label_map = {label:num for num, label in enumerate(words)}
label_map

{'hello': 0, 'like': 1, 'dislike': 2}

In [7]:
sequences, labels = [], []
for word in words:
    for folder in os.listdir(f'{data_path}/{word}'):
        window = []
        for frame_num in os.listdir(f'{data_path}/{word}/{folder}'):
            res = np.load(os.path.join(f'{data_path}/{word}/{folder}/{frame_num}'))
            window.append(res)
        sequences.append(window)
        labels.append(label_map[word])
    print(sequences[0][0])

[ 2.28984773e-01  8.10803056e-01  1.95418949e-07  2.61330903e-01
  7.83403575e-01 -1.76140200e-02  2.82689095e-01  7.36656308e-01
 -2.34143231e-02  2.97368139e-01  6.95093393e-01 -2.81446688e-02
  3.09840888e-01  6.61885500e-01 -3.24332453e-02  2.64420360e-01
  6.72364593e-01 -8.08202103e-03  2.70259291e-01  6.22971296e-01
 -1.78625453e-02  2.72367954e-01  5.92124879e-01 -2.74306014e-02
  2.72769004e-01  5.64623833e-01 -3.50511372e-02  2.39358813e-01
  6.65728629e-01 -7.81143038e-03  2.37609029e-01  6.09877825e-01
 -1.67676359e-02  2.35355899e-01  5.74432254e-01 -2.61197500e-02
  2.32645184e-01  5.45185566e-01 -3.31241377e-02  2.15512514e-01
  6.73201084e-01 -1.04862638e-02  2.11190388e-01  6.18108749e-01
 -2.15685591e-02  2.07038388e-01  5.83333015e-01 -3.10849454e-02
  2.03536272e-01  5.52533090e-01 -3.75616662e-02  1.92780703e-01
  6.91028059e-01 -1.48967551e-02  1.84277683e-01  6.49842560e-01
 -2.71457341e-02  1.77010879e-01  6.23318076e-01 -3.41652669e-02
  1.70872211e-01  5.97257

## Data Understanding

In [104]:
# sequences, labels = [], []

# a = 0
# for frame_num in os.listdir(f'{data_path}/like/311'):
#     if a <= 10:
#         res = np.load(os.path.join(f'{data_path}/like/311/{frame_num}'))
#         sequences.append(res)
#         labels.append(label_map[word])
# #         print(window)
#     a += 1
# #     sequences.append(window)
# #     print(sequences)



In [107]:
# a = 0
# b = 1
# print(f'Joint 0')
# for i in sequences[2]:
#     print(i, end=" ")
#     a += 1
#     if a == 3:
#         print(f'\n\nJoint {b}')
# #         print()
# #         print()
#         a = 0
#         b+=1

In [108]:
# a = 0
# b = 1
# print(f'Joint 0')
# for i in sequences[9]:
#     print(i, end=" ")
#     a += 1
#     if a == 3:
#         print(f'\n\nJoint {b}')
# #         print()
# #         print()
#         a = 0
#         b+=1

In [109]:
# a = 0
# b = 0
# # print(f'Joint 0')
# for i in sequences[0]:
#     if b%4 == 0:
#         print(i, end=" ")
#     a += 1
#     if a == 3:
#         if b%4 == 0:
#             print(f'\nJoint {b}\n\n')
# #         print()
# #         print()
#         a = 0
#         b+=1

In [110]:
# for i, val in enumerate(sequences[0]):
#     if (i+1)%3==0:
#         val = 0
#         print(f'z {val}')
#         print()
#     else:
#         print(val)

In [8]:
from tensorflow.keras.preprocessing.sequence import pad_sequences

x = pad_sequences(sequences, maxlen=90, padding='post', dtype='float32')

In [9]:
x = np.array(x)

In [13]:
x.shape

(3600, 90, 63)

In [14]:
y = to_categorical(labels).astype(int)

In [15]:
y

array([[1, 0, 0],
       [1, 0, 0],
       [1, 0, 0],
       ...,
       [0, 0, 1],
       [0, 0, 1],
       [0, 0, 1]])

In [16]:
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.1, shuffle=True, random_state=1)

In [17]:
x_actual_train, x_validation, y_actual_train, y_validation = train_test_split(x_train, y_train, test_size=0.1, shuffle=True, random_state=1)

In [18]:
print(y_test.shape)

(360, 3)


## Build and Train LSTM Nerual Network

In [19]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Input, LSTM, Embedding, Dense, Dropout, Bidirectional
from tensorflow.keras.models import Model
from tensorflow.keras.callbacks import TensorBoard, EarlyStopping, ModelCheckpoint

In [25]:
my_callbacks = [
#     EarlyStopping(patience=20),
#     ModelCheckpoint(filepath='models/alex/alex_best_model_1', save_best_only = True),
    TensorBoard(log_dir='./logs')
]

In [26]:
batch_size = 64
epochs = 200

In [27]:
model = Sequential()
model.add(LSTM(256, return_sequences=True, activation='relu', input_shape=(90,63)))
model.add(LSTM(128, return_sequences=True, activation='relu'))
model.add(LSTM(64, return_sequences=False, activation='relu'))
model.add(Dropout(0.2))

model.add(Dense(256, activation='relu'))
model.add(Dropout(0.2))
model.add(Dense(128, activation='relu'))
model.add(Dropout(0.2))
model.add(Dense(64, activation='relu'))
model.add(Dropout(0.2))
model.add(Dense(32, activation='relu'))
model.add(Dropout(0.2))
model.add(Dense(10, activation='relu'))
model.add(Dropout(0.2))

model.add(Dense(words.shape[0], activation='softmax'))



In [28]:
model.compile(optimizer='Adam', loss='categorical_crossentropy', metrics=['accuracy'])

In [29]:
model.fit(x_actual_train, y_actual_train, epochs=epochs, batch_size=batch_size, validation_data=(x_validation, y_validation), callbacks=my_callbacks)

Epoch 1/200
Epoch 2/200
Epoch 3/200
Epoch 4/200

KeyboardInterrupt: 

In [19]:
model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
lstm (LSTM)                  (None, 90, 256)           327680    
_________________________________________________________________
lstm_1 (LSTM)                (None, 90, 128)           197120    
_________________________________________________________________
lstm_2 (LSTM)                (None, 64)                49408     
_________________________________________________________________
dropout (Dropout)            (None, 64)                0         
_________________________________________________________________
dense (Dense)                (None, 64)                4160      
_________________________________________________________________
dropout_1 (Dropout)          (None, 64)                0         
_________________________________________________________________
dense_1 (Dense)              (None, 32)                2

In [20]:
res = model.predict(x_test)

In [21]:
words[np.argmax(res[1])]

'dislike'

In [22]:
words[np.argmax(y_test[1])]

'like'

In [27]:
from sklearn.metrics import multilabel_confusion_matrix, accuracy_score

In [25]:
y_pred = model.predict(x_test)

NameError: name 'x_test' is not defined

In [29]:
ytrue = np.argmax(y_test, axis=1).tolist()
y_pred = np.argmax(y_pred, axis=1).tolist()

In [30]:
multilabel_confusion_matrix(ytrue, y_pred)

array([[[62,  1],
        [10, 17]],

       [[57, 10],
        [ 6, 17]],

       [[45,  5],
        [ 0, 40]]], dtype=int64)

In [None]:
from sklearn.metrics import accuracy_score, precision_score, f1_score, recall_score

print('Accuracy Score Macro',accuracy_score(y_true,y_pred)100,'%')
print('Precision Score Macro',precision_score(y_true,y_pred,average = 'macro')100,'%')
print('Recall_Score Macro',recall_score(y_true,y_pred, average = 'macro')100,'%')
print('F1_Score Macro',f1_score(y_true,y_pred, average = 'macro')100,'%')

print(sklearn.metrics.classification_report(y_true,y_pred)

In [None]:
sns.heatmap(confusion_matrix(test_y,y_pred), annot=True, fmt=".0f")
plt.title("K Nearest Neighbors Confusion Matrix",fontsize=18, color="red");

In [31]:
accuracy_score(ytrue, y_pred)

0.8222222222222222

In [32]:
model.save('models/alex/alex_val_70_acc_score_82')

INFO:tensorflow:Assets written to: models/alex/alex_val_70_acc_score_82\assets


In [9]:
from scipy import stats

In [10]:
colors = [(245,117,16), (71,240,16), (255, 0, 0)]
def prob_viz(res, words, input_frame, colors):
    output_frame = input_frame.copy()
    for num, prob in enumerate(res):
        cv.rectangle(output_frame, (0,60+num*40), (int(prob*100), 90+num*40), colors[num], -1)
        cv.putText(output_frame, words[num], (0, 85+num*40), cv.FONT_HERSHEY_DUPLEX, 1, (255,255,255), 2, cv.LINE_AA)
        
    return output_frame

## Real Time Testing

In [1]:
import cv2 as cv
import numpy as np
import os
import mediapipe as mp
from glob import glob
from tensorflow.keras.models import load_model

In [2]:
words = np.array(['မင်္ဂလာပါ', 'မှန်တယ်', 'မှားတယ်'])
words = np.array(['hello', 'like', 'dislike'])

In [19]:
model = load_model('MSLR_alex_best_performance.h5')



In [20]:
mp_drawing_styles = mp.solutions.drawing_styles
mp_drawing = mp.solutions.drawing_utils # Drawing utilities
mp_hands = mp.solutions.hands

In [21]:
def mediapipe_detection(image, model):
    image = cv.cvtColor(image, cv.COLOR_BGR2RGB) # COLOR CONVERSION BGR 2 RGB
    image.flags.writeable = False                  # Image is no longer writeable
    results = model.process(image)                 # Make prediction
    image.flags.writeable = True                   # Image is now writeable 
    image = cv.cvtColor(image, cv.COLOR_RGB2BGR) # COLOR COVERSION RGB 2 BGR
    return image, results

In [22]:
def draw_landmarks(image, results):
    for hand_landmarks in results.multi_hand_landmarks:
        mp_drawing.draw_landmarks(image, hand_landmarks, mp_hands.HAND_CONNECTIONS,
        mp_drawing_styles.get_default_hand_landmarks_style(), mp_drawing_styles.get_default_hand_connections_style())

In [23]:
def extract_keypoints(results):
    keypoints = []
    if results.multi_hand_landmarks:
        for h_lmk in results.multi_hand_landmarks[0].landmark:
            keypoints.append(np.array([h_lmk.x, h_lmk.y, h_lmk.z]))
    else:
        keypoints.append(np.zeros(21*3))
    
    keypoints = np.array(keypoints).flatten()
    return keypoints

In [24]:
# 1. New detection variables
sequence = []
sentence = []
predictions = []
threshold = 0.5

cap = cv.VideoCapture(0)
with mp_hands.Hands( static_image_mode=True, max_num_hands=1, min_detection_confidence=0.5) as hands:
    while cap.isOpened():

        # Read feed
        ret, frame = cap.read()
        
        img_resize = cv.resize(frame, (640, 480))
        # Make detections
        image, results = mediapipe_detection(img_resize, hands)
#         print(results)
        
        if results.multi_hand_landmarks:
            # Draw landmarks
            draw_landmarks(image, results)
            
            # 2. Prediction logic
            keypoints = extract_keypoints(results)
            sequence.append(keypoints)
            sequence = sequence[-30:]

            if len(sequence) == 30:
                res = model.predict(np.expand_dims(sequence, axis=0))[0]
    #             print(words[np.argmax(res)])
                predictions.append(np.argmax(res))


            #3. Viz logic
                if np.unique(predictions[-10:])[0]==np.argmax(res): 
                    if res[np.argmax(res)] > threshold: 

                        if len(sentence) > 0: 
                            if words[np.argmax(res)] != sentence[-1]:
                                sentence.append(words[np.argmax(res)])
                        else:
                            sentence.append(words[np.argmax(res)])

                if len(sentence) > 5: 
                    sentence = sentence[-5:]

                # Viz probabilities
                image = prob_viz(res, words, image, colors)

            cv.rectangle(image, (0,0), (640, 40), (245, 117, 16), -1)
            cv.putText(image, ' '.join(sentence), (3,30), 
                           cv.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 2, cv.LINE_AA)

#         # Show to screen
        cv.imshow('OpenCV Feed', image)

        # Break gracefully
        if cv.waitKey(10) & 0xFF == ord('q'):
            break
    cap.release()
    cv.destroyAllWindows()

In [18]:

    cap.release()
    cv.destroyAllWindows()