In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import keras_tuner as kt
from sklearn import model_selection
import tensorflow as tf
import sqlite3
import os
import json
from collections import Counter
from tensorflow.keras.layers import Dense, Dropout, LSTM, InputLayer, Bidirectional
from tensorflow.keras.utils import to_categorical
from keras.callbacks import EarlyStopping, ReduceLROnPlateau

Using TensorFlow backend


In [2]:
IMAGE_SIZE = (256, 256)
SEQUENCE_LENGTH = 25
POSE_START_INDEX = 11
POSE_END_INDEX = 25

IMG_DIR = 'test_img'
RECORDINGS_DIR = 'recordings'
LOADED_VIDEOS_DIR = 'loaded_videos'

DB_NAME = 'slr.db'
TABLE_NAME = 'sign_videos'

DETECTOR_MODEL_NAME = 'pose_landmarker.task'

In [53]:
class CVTuner(kt.engine.tuner.Tuner):
  
    def run_trial(
            self, 
            trial, 
            x: np.array, 
            y: np.array, 
            kfolds: int = 5,
            batch_size: int = 32, 
            epochs: int = 1,
            callbacks: list = None,
            workers: int = -1,
            use_multiprocessing: bool = True):

        cv = model_selection.KFold(kfolds)
        val_losses = []
        val_accuracies = []

        for train_indices, test_indices in cv.split(x):

            x_train, x_test = x[train_indices], x[test_indices]
            y_train, y_test = y[train_indices], y[test_indices]

            model = self.hypermodel.build(trial.hyperparameters)
            model.fit(
                x_train, 
                y_train, 
                batch_size=batch_size, 
                epochs=epochs,
                callbacks=callbacks,
                workers=workers,
                use_multiprocessing=use_multiprocessing
            )
            
            val_losses.append(model.evaluate(x_test, y_test))

            test_preds = model.predict(x_test)
            val_accuracies.append(
                np.sum(np.argmax(test_preds, axis=1) == np.argmax(y_test, axis=1)) / len(y_test)
            )

        self.oracle.update_trial(
            trial.trial_id, 
            {
                'val_loss': np.mean(val_losses), 
                'val_accuracy': np.mean(val_accuracies),
                'val_accuracy_std': np.std(val_accuracies),
            }
        )
        # self.save_model(trial.trial_id, model)

In [54]:
def build_model(hp, input_shape: tuple = (24, 56), units: int = 5):

    lstm_cells = hp.Int('lstm_cells', min_value=8, max_value=32, step=8)
    lstm_layers = hp.Int('lstm_layers', min_value=1, max_value=2, step=1)
    learning_rate = hp.Choice('learning_rate', values=[5e-5, 7.5e-5, 1e-4, 2.5e-4, 5e-4, 7.5e-4, 1e-3])

    model = tf.keras.Sequential()
    model.add(InputLayer(input_shape=input_shape))

    for i in range(lstm_layers):

        model.add(Bidirectional(
            LSTM(
                units=lstm_cells, 
                return_sequences=True if i + 1 != lstm_layers else False, 
                activation='relu', 
                recurrent_dropout=0.25
            )
        ))

    model.add(Dense(units, activation='softmax'))

    model.compile(
        optimizer=tf.keras.optimizers.Adam(learning_rate=learning_rate),
        loss=tf.keras.losses.CategoricalCrossentropy(from_logits=False),
        metrics=['accuracy']
    )

    return model

### Connect to db

In [5]:
conn = sqlite3.connect(os.path.join("..", DB_NAME))
cursor = conn.cursor()

### Get saved videos

In [6]:
cursor.execute('SELECT name, video_array, true_label FROM sign_videos where verified = 1')
res = cursor.fetchall()

In [7]:
X = np.array([np.frombuffer(v, dtype=np.float16).reshape(SEQUENCE_LENGTH - 1, -1) for name, v, true_label in res])
Y = [label for _, _, label in res]

In [8]:
counter = Counter(Y)
counter

Counter({'hello': 67, 'thank you': 64, 'please': 63, 'what': 63, 'love': 62})

In [9]:
mapping_classes = {
    "hello": 0,
    "love": 1,
    "please": 2,
    "thank you": 3,
    "what": 4,
}

Y = [mapping_classes.get(y) for y in Y]

In [10]:
Y = to_categorical(Y)

In [60]:
EPOCHS = 20
ES_PATIENCE = 80
LR_REDUCE = 40
BATCH_SIZE = 50
VAL_BATCH_SIZE = 50

In [61]:
tuner = CVTuner(
    
  hypermodel=build_model,
  oracle=kt.oracles.BayesianOptimizationOracle(
    objective='val_loss',
    max_trials=40),
  directory='kt-test',
  project_name='kt-test-1')

In [62]:
callbacks = [EarlyStopping(monitor='loss', patience=ES_PATIENCE, restore_best_weights=True),
             ReduceLROnPlateau(monitor='loss', factor=0.5, patience=LR_REDUCE)]

In [63]:
tuner.search(
    X, 
    Y, 
    batch_size=BATCH_SIZE, 
    epochs=EPOCHS,
    callbacks=callbacks,
    workers=-1,
    use_multiprocessing=True
)

Trial 7 Complete [00h 00m 25s]
val_loss: 0.9606221273541451

Best val_loss So Far: 0.8919920518994331
Total elapsed time: 00h 04m 11s

Search: Running Trial #8

Value             |Best Value So Far |Hyperparameter
16                |8                 |lstm_cells
2                 |2                 |lstm_layers
0.00075           |7.5e-05           |learning_rate

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
Epoch 1/20
Epoch 2/20

KeyboardInterrupt: 

In [66]:
X.shape

(319, 24, 56)

In [191]:
X_t = X.reshape(X.shape[0], X.shape[1], -1, 4)
X_t = X_t[:, :, :, :-1]
X_t = X_t.reshape(X_t.shape[0], X_t.shape[1], -1)
X_t  = X_t[:, :, :-6]

In [192]:
x_lgbm = X_t.reshape(X.shape[0], -1)
y_lgbm = np.argmax(Y, axis=1)

In [204]:
from sklearn.model_selection import train_test_split

X_train_lgbm, X_test_lgbm, y_train_lgbm, y_test_lgbm = train_test_split(
    x_lgbm, 
    y_lgbm, 
    test_size=0.15, 
    random_state=42, 
    shuffle=True
)


In [235]:
import lightgbm as lgb

lgbm = lgb.LGBMClassifier(
    n_estimators=500,
    learning_rate=0.0001,
    num_leaves=18,
    colsample_bytree=0.8,
    reg_alpha=0.1,
    max_depth=10,
    verbose=1,
    objective='multiclass')

lgbm_classifier = lgbm.fit(
    X_train_lgbm, 
    y_train_lgbm, 
    eval_set=[(X_test_lgbm, y_test_lgbm)]
)

[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.006833 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 64693
[LightGBM] [Info] Number of data points in the train set: 271, number of used features: 864
[LightGBM] [Info] Start training from score -1.524581
[LightGBM] [Info] Start training from score -1.613135
[LightGBM] [Info] Start training from score -1.594786
[LightGBM] [Info] Start training from score -1.751971
[LightGBM] [Info] Start training from score -1.576767


In [236]:
preds_lgbm = lgbm_classifier.predict(x_lgbm)
train_preds_lgbm = lgbm_classifier.predict(X_train_lgbm)
test_preds_lgbm = lgbm_classifier.predict(X_test_lgbm)

In [237]:
sum(y_lgbm == preds_lgbm) / len(y_lgbm)

0.5360501567398119

In [238]:
sum(y_train_lgbm == train_preds_lgbm) / len(y_train_lgbm)

0.5940959409594095

In [239]:
sum(y_test_lgbm == test_preds_lgbm) / len(y_test_lgbm)

0.20833333333333334

In [240]:
x_lgbm[0].shape

(864,)

In [241]:
with open(os.path.join('..', 'config', 'pose_keys.json'), 'r') as f:
    pose_keys = json.load(f)


In [242]:
pose_keys = {k: v.replace(' ', '_') for k, v in pose_keys.items()}


In [244]:
f_names = np.array([[[f"x_{pose_keys.get(str(i + 11))}_step{j}", f"y_{pose_keys.get(str(i + 11))}_step{j}", f"z_{pose_keys.get(str(i + 11))}_step{j}"] for i in range(12)] for j in range(1, 25)])

In [245]:
f_names = f_names.flatten()

In [246]:
f_imp_dict = {i: f for i, f in enumerate(lgbm_classifier.feature_importances_)}

In [247]:
sorted_f_imp_dict = dict(sorted(f_imp_dict.items(), key=lambda item: item[1], reverse=True))
total = sum(sorted_f_imp_dict.values())
normalized_dict = {str(k): round(v / total, 3) for k, v in sorted_f_imp_dict.items()}
print(normalized_dict)


{'172': 0.031, '528': 0.027, '156': 0.025, '25': 0.025, '65': 0.025, '715': 0.025, '622': 0.025, '312': 0.022, '562': 0.022, '289': 0.022, '604': 0.021, '280': 0.021, '216': 0.02, '387': 0.019, '118': 0.018, '406': 0.016, '624': 0.016, '121': 0.016, '658': 0.015, '340': 0.015, '580': 0.014, '249': 0.012, '676': 0.012, '574': 0.012, '758': 0.011, '556': 0.011, '391': 0.011, '441': 0.01, '450': 0.01, '586': 0.009, '358': 0.009, '364': 0.009, '495': 0.009, '447': 0.009, '825': 0.008, '778': 0.008, '478': 0.007, '366': 0.007, '681': 0.007, '733': 0.007, '745': 0.007, '266': 0.006, '190': 0.006, '133': 0.006, '657': 0.006, '568': 0.006, '160': 0.006, '645': 0.006, '162': 0.006, '736': 0.006, '516': 0.005, '242': 0.005, '784': 0.005, '694': 0.005, '19': 0.005, '45': 0.005, '388': 0.005, '300': 0.005, '381': 0.005, '267': 0.004, '217': 0.004, '369': 0.004, '177': 0.004, '204': 0.004, '709': 0.004, '228': 0.004, '729': 0.004, '337': 0.004, '326': 0.004, '718': 0.004, '385': 0.004, '618': 0.004

In [248]:
normalized_dict

{'172': 0.031,
 '528': 0.027,
 '156': 0.025,
 '25': 0.025,
 '65': 0.025,
 '715': 0.025,
 '622': 0.025,
 '312': 0.022,
 '562': 0.022,
 '289': 0.022,
 '604': 0.021,
 '280': 0.021,
 '216': 0.02,
 '387': 0.019,
 '118': 0.018,
 '406': 0.016,
 '624': 0.016,
 '121': 0.016,
 '658': 0.015,
 '340': 0.015,
 '580': 0.014,
 '249': 0.012,
 '676': 0.012,
 '574': 0.012,
 '758': 0.011,
 '556': 0.011,
 '391': 0.011,
 '441': 0.01,
 '450': 0.01,
 '586': 0.009,
 '358': 0.009,
 '364': 0.009,
 '495': 0.009,
 '447': 0.009,
 '825': 0.008,
 '778': 0.008,
 '478': 0.007,
 '366': 0.007,
 '681': 0.007,
 '733': 0.007,
 '745': 0.007,
 '266': 0.006,
 '190': 0.006,
 '133': 0.006,
 '657': 0.006,
 '568': 0.006,
 '160': 0.006,
 '645': 0.006,
 '162': 0.006,
 '736': 0.006,
 '516': 0.005,
 '242': 0.005,
 '784': 0.005,
 '694': 0.005,
 '19': 0.005,
 '45': 0.005,
 '388': 0.005,
 '300': 0.005,
 '381': 0.005,
 '267': 0.004,
 '217': 0.004,
 '369': 0.004,
 '177': 0.004,
 '204': 0.004,
 '709': 0.004,
 '228': 0.004,
 '729': 0.004,
 '

In [249]:
for k, v in normalized_dict.items():
    if v > 0:
        print(f"{f_names[int(k)]}: {v}")


y_right_index_step5: 0.031
x_left_index_step15: 0.027
x_left_wrist_step5: 0.025
y_left_index_step1: 0.025
z_right_index_step2: 0.025
y_left_thumb_step20: 0.025
y_right_elbow_step18: 0.025
x_left_index_step9: 0.022
y_right_pinky_step16: 0.022
y_left_shoulder_step9: 0.022
y_right_index_step17: 0.021
y_right_index_step8: 0.021
x_left_shoulder_step7: 0.02
x_right_index_step11: 0.019
y_right_elbow_step4: 0.018
y_right_elbow_step12: 0.016
x_left_wrist_step18: 0.016
y_left_wrist_step4: 0.016
y_right_elbow_step19: 0.015
y_right_wrist_step10: 0.015
y_right_shoulder_step17: 0.014
x_right_thumb_step7: 0.012
y_right_index_step19: 0.012
y_right_thumb_step16: 0.012
z_left_shoulder_step22: 0.011
y_right_wrist_step16: 0.011
y_left_thumb_step11: 0.011
x_right_elbow_step13: 0.01
x_left_pinky_step13: 0.01
y_right_elbow_step17: 0.009
y_right_thumb_step10: 0.009
y_right_shoulder_step11: 0.009
x_right_index_step14: 0.009
x_right_wrist_step13: 0.009
x_right_thumb_step23: 0.008
y_right_pinky_step22: 0.008
y_r