## Prasyarat

- Python versi 3.8
- Tensorflow dan Tensorflow-GPU (optional) harus memiliki versi yang sama
- CUDNN sesuaikan versi dengan yang diminta tensorflow (optional)

## Install Library

Catatan: Untuk `tensorflow-gpu` dan `cudnn` itu optional. Jika perangkat punya gpu yang support cuda, install library tersebut. Pastikan juga versi tensorflow dan tensorflow gpu sesuai dengan yang diminta oleh tensorflow. Cek di [sini](https://www.tensorflow.org/install/source_windows#gpu)

In [None]:
# !pip install opencv-python mediapipe scikit-learn matplotlib tensorflow tensoflow-gpu cudnn

In [1]:
import cv2
import numpy as np
import os
from matplotlib import pyplot as plt
import time
import mediapipe as mp

In [2]:
mp_holistic = mp.solutions.holistic
mp_drawing = mp.solutions.drawing_utils

## Fungsi-Fungsi untuk deteksi keypoint

In [3]:
def mediapipe_detection(image, model):
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    image.flags.writeable = False
    results = model.process(image)
    image.flags.writeable = True
    image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
    return image, results

In [4]:
def draw_styled_landmarks(image, results):
    # draw face connections
    # mp_drawing.draw_landmarks(image, results.face_landmarks, mp_holistic.FACEMESH_TESSELATION,
    #                           mp_drawing.DrawingSpec(color=(80,110,10), thickness=1, circle_radius=1),
    #                           mp_drawing.DrawingSpec(color=(80,256,121), thickness=1, circle_radius=1)
    #                          )
    # # draw pose connections
    # mp_drawing.draw_landmarks(image, results.pose_landmarks, mp_holistic.POSE_CONNECTIONS,
    #                           mp_drawing.DrawingSpec(color=(80,22,10), thickness=2, circle_radius=4),
    #                           mp_drawing.DrawingSpec(color=(80,44,121), thickness=2, circle_radius=2)
    #                          )
    # draw left hand connections
    mp_drawing.draw_landmarks(image, results.left_hand_landmarks, mp_holistic.HAND_CONNECTIONS,
                              mp_drawing.DrawingSpec(color=(121,22,76), thickness=2, circle_radius=4),
                              mp_drawing.DrawingSpec(color=(121,44,250), thickness=2, circle_radius=2)
                             )
    # draw right hand connections
    mp_drawing.draw_landmarks(image, results.right_hand_landmarks, mp_holistic.HAND_CONNECTIONS,
                              mp_drawing.DrawingSpec(color=(245,117,66), thickness=2, circle_radius=4),
                              mp_drawing.DrawingSpec(color=(245,66,230), thickness=2, circle_radius=2)
                             )

## Testing camera

Catatan: Cell ini boleh dijalankan ataupun tidak

In [22]:
cap = cv2.VideoCapture(0)

# cap.set(cv2.CAP_PROP_FRAME_WIDTH, 640)
# cap.set(cv2.CAP_PROP_FRAME_HEIGHT, 480)
# Set mediapipe model
with mp_holistic.Holistic(min_detection_confidence=0.5, min_tracking_confidence=0.5) as holistic:
    while cap.isOpened():
        # read feed
        ret, frame = cap.read()
        
        # make detections
        image, results = mediapipe_detection(frame, holistic)
        print(results)

        # draw landmarks
        draw_styled_landmarks(image, results)

        # show to screen
        cv2.imshow('OpenCV Feed', image)

        # break gracefully
        if cv2.waitKey(10) & 0xFF == ord('q'):
            break
    cap.release()
    cv2.destroyAllWindows()

<class 'mediapipe.python.solution_base.SolutionOutputs'>
<class 'mediapipe.python.solution_base.SolutionOutputs'>
<class 'mediapipe.python.solution_base.SolutionOutputs'>
<class 'mediapipe.python.solution_base.SolutionOutputs'>
<class 'mediapipe.python.solution_base.SolutionOutputs'>
<class 'mediapipe.python.solution_base.SolutionOutputs'>
<class 'mediapipe.python.solution_base.SolutionOutputs'>
<class 'mediapipe.python.solution_base.SolutionOutputs'>
<class 'mediapipe.python.solution_base.SolutionOutputs'>
<class 'mediapipe.python.solution_base.SolutionOutputs'>
<class 'mediapipe.python.solution_base.SolutionOutputs'>
<class 'mediapipe.python.solution_base.SolutionOutputs'>
<class 'mediapipe.python.solution_base.SolutionOutputs'>
<class 'mediapipe.python.solution_base.SolutionOutputs'>
<class 'mediapipe.python.solution_base.SolutionOutputs'>
<class 'mediapipe.python.solution_base.SolutionOutputs'>
<class 'mediapipe.python.solution_base.SolutionOutputs'>
<class 'mediapipe.python.soluti

## Fungsi untuk ekstrak keypoint

In [23]:
def extract_keypoints(results):
    # pose = np.array([[res.x, res.y, res.z, res.visibility] for res in results.pose_landmarks.landmark]).flatten() if results.pose_landmarks else np.zeros(132)
    # face = np.array([[res.x, res.y, res.z] for res in results.face_landmarks.landmark]).flatten() if results.face_landmarks else np.zeros(1404)
    lh = np.array([[res.x, res.y, res.z] for res in results.left_hand_landmarks.landmark]).flatten() if results.left_hand_landmarks else np.zeros(21*3)
    rh = np.array([[res.x, res.y, res.z] for res in results.right_hand_landmarks.landmark]).flatten() if results.right_hand_landmarks else np.zeros(21*3)
    # return np.concatenate([pose, face, lh, rh])
    return np.concatenate([lh, rh])

## Collecting data

- actions = gerakan yang ingin di deteksi
- no_sequences = setiap gerakan akan memiliki sejumlah N video
- sequence_length = setiap video memiliki sejumlah N frame

In [5]:
DATA_PATH = os.path.join('dataset_hands')

# actions that we try to detect
# actions = np.array(['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 
#                     'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 
#                     'w', 'x', 'y', 'z'])
actions = np.array(['a', 'b'])

# 30 videos for data
no_sequences = 30

# 30 frames in length
sequence_length = 30

In [42]:
for action in actions: 
    # dirmax = np.max(np.array(os.listdir(os.path.join(DATA_PATH, action))).astype(int))
    for sequence in range(no_sequences):
        try: 
            os.makedirs(os.path.join(DATA_PATH, action, str(sequence)))
        except:
            pass

## Catatan

Untuk menghentikan pengambilan gambar, tekan tombol `q` pada keyboard, buat cell baru dengan kode 
```python 
cap.release()
cv2.destroyAllWindows()
```
Lalu jalankan

In [43]:
cap = cv2.VideoCapture(0)
# Set mediapipe model
with mp_holistic.Holistic(min_detection_confidence=0.5, min_tracking_confidence=0.5) as holistic:
    for action in actions:
        for sequence in range(no_sequences):
            for frame_num in range(sequence_length):
                ret, frame = cap.read()

                image, results = mediapipe_detection(frame, holistic)
                
                draw_styled_landmarks(image, results)
                
                if frame_num == 0: 
                    cv2.putText(image, 'STARTING COLLECTION', (120,200), 
                               cv2.FONT_HERSHEY_SIMPLEX, 1, (0,255, 0), 4, cv2.LINE_AA)
                    cv2.putText(image, 'Collecting frames for {} Video Number {}'.format(action, sequence), (15,12), 
                               cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 255), 1, cv2.LINE_AA)
                    # Show to screen
                    cv2.imshow('OpenCV Feed', image)
                    cv2.waitKey(500)
                else: 
                    cv2.putText(image, 'Collecting frames for {} Video Number {}'.format(action, sequence), (15,12), 
                               cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 255), 1, cv2.LINE_AA)
                    # Show to screen
                    cv2.imshow('OpenCV Feed', image)

                keypoints = extract_keypoints(results)
                npy_path = os.path.join(DATA_PATH, action, str(sequence), str(frame_num))
                np.save(npy_path, keypoints)
                # Break gracefully
                if cv2.waitKey(10) & 0xFF == ord('q'):
                    break
    cap.release()
    cv2.destroyAllWindows()

In [15]:
cap.release()
cv2.destroyAllWindows()

## Preprocessing atau labeling

In [6]:
from sklearn.model_selection import train_test_split
from tensorflow.keras.utils import to_categorical

## One-hot encoding pada label

Catatan: Agar label lebih mudah dipahami mesin, lakukan step ini

In [7]:
label_map = {label: num for num, label in enumerate(actions)}

In [8]:
label_map

{'a': 0, 'b': 1}

In [47]:
np.load('dataset_hands/a/0/0.npy')

array([0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0.])

In [9]:
sequences = []
labels = []

for action in actions:
    for sequence in range(no_sequences):
        window = []
        for frame_num in range(sequence_length):
            res = np.load(os.path.join(DATA_PATH, action, str(sequence), "{}.npy".format(frame_num)))
            window.append(res)
        sequences.append(window)
        labels.append(label_map[action])

In [10]:
X = np.array(sequences)
y = to_categorical(labels).astype(int)

In [11]:
X

array([[[ 0.00000000e+00,  0.00000000e+00,  0.00000000e+00, ...,
          0.00000000e+00,  0.00000000e+00,  0.00000000e+00],
        [ 0.00000000e+00,  0.00000000e+00,  0.00000000e+00, ...,
          0.00000000e+00,  0.00000000e+00,  0.00000000e+00],
        [ 0.00000000e+00,  0.00000000e+00,  0.00000000e+00, ...,
          0.00000000e+00,  0.00000000e+00,  0.00000000e+00],
        ...,
        [ 0.00000000e+00,  0.00000000e+00,  0.00000000e+00, ...,
          2.59288698e-01,  5.20450115e-01, -6.40110970e-02],
        [ 0.00000000e+00,  0.00000000e+00,  0.00000000e+00, ...,
          2.79161900e-01,  5.15743136e-01, -4.36698869e-02],
        [ 0.00000000e+00,  0.00000000e+00,  0.00000000e+00, ...,
          2.90257275e-01,  5.17177701e-01, -4.79285344e-02]],

       [[ 0.00000000e+00,  0.00000000e+00,  0.00000000e+00, ...,
          3.04308087e-01,  5.21711707e-01, -2.66366098e-02],
        [ 0.00000000e+00,  0.00000000e+00,  0.00000000e+00, ...,
          3.38724673e-01,  5.35540044e

## Splitting dataset

In [12]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2) # test_size mempengaruhi hasil akurasi

In [13]:
X.shape

(60, 30, 126)

In [14]:
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense

## Build and train model

Catatan: Fungsi myCallbak digunakan untuk menghentikan proses training jika tingkat akurasi sudah sesuai dengan kriteria yang sudah ditentukan sebelumnya

In [15]:
class myCallback(tf.keras.callbacks.Callback):
    def on_epoch_end(self, epoch, logs={}):
        if((logs.get('loss')<0.3 and logs.get('categorical_accuracy')>0.9) 
           and (logs.get('val_loss')<0.3 and logs.get('val_categorical_accuracy')>0.9)):
            print("\nTarget akurasi tercapai")
            self.model.stop_training = True

In [16]:
model = Sequential()
model.add(LSTM(64, return_sequences=True, activation='relu', input_shape=(30, 126)))
model.add(LSTM(128, return_sequences=True, activation='relu'))
model.add(LSTM(64, return_sequences=False, activation='relu'))
model.add(Dense(64, activation='relu'))
model.add(Dense(32, activation='relu'))
model.add(Dense(actions.shape[0], activation='softmax'))

In [17]:
model.compile(optimizer='Adam', loss='categorical_crossentropy', metrics=['categorical_accuracy'])

In [18]:
model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 lstm (LSTM)                 (None, 30, 64)            48896     
                                                                 
 lstm_1 (LSTM)               (None, 30, 128)           98816     
                                                                 
 lstm_2 (LSTM)               (None, 64)                49408     
                                                                 
 dense (Dense)               (None, 64)                4160      
                                                                 
 dense_1 (Dense)             (None, 32)                2080      
                                                                 
 dense_2 (Dense)             (None, 2)                 66        
                                                                 
Total params: 203,426
Trainable params: 203,426
Non-trai

In [19]:
callback = myCallback()
model.fit(X_train, y_train, epochs=2000, callbacks=[callback], validation_data=(X_test, y_test)) # Sesuaikan jumlah epochs
# Cek lagi hasilnya, jangan sampai overfitting atau underfitting
# Untuk kondisi sekarang: Overfitting karena data terlalu sedikit yang mengakibatkan akurasi training tinggi, tapi testing rendah

Epoch 1/2000
Epoch 2/2000
Epoch 3/2000
Epoch 4/2000
Epoch 5/2000
Epoch 6/2000
Epoch 7/2000
Target akurasi tercapai


<keras.callbacks.History at 0x2c9be4a1d50>

## Save model

In [20]:
model.save('hand.h5') ## nama file bisa disesuaikan

## Catatan kaki

- Untuk melakukan testing model, buka file `testing.ipynb` lalu jalankan
- Jika ingin menambahkan dataset, buka file `refine_data.ipynb` lalu jalankan