# Basic dynamic model

In [82]:
DATA_PATH = '../backend/data/dynamic_dataset'
DATA_PATH = '../backend/dynamic_signs/frames'


In [83]:
#Load DATA
from sign.training.landmark_extraction.MediaPiper import MediaPiper

mp = MediaPiper()
gestures = mp.process_dynamic_gestures_from_folder(DATA_PATH)

I0000 00:00:1710424071.765103   54620 gl_context_egl.cc:85] Successfully initialized EGL. Major : 1 Minor: 5
I0000 00:00:1710424071.766065  107750 gl_context.cc:344] GL version: 3.2 (OpenGL ES 3.2 Mesa 23.3.2-1pop0~1704238321~22.04~36f1d0e), renderer: Mesa Intel(R) Xe Graphics (TGL GT2)


In [84]:
[(gesture.label, f"Length: {len(gesture.results)}") for gesture in gestures]

[('J', 'Length: 13'), ('Z', 'Length: 13')]

In [85]:
import numpy as np
from sign.trajectory import TrajectoryBuilder, trajectory

bob = TrajectoryBuilder()

gesture_trajector_map: dict[str, list[trajectory]] = {}

for gesture in gestures:
    gesture_trajector_map[gesture.label] = []
    for sequence in gesture.results:
        new_sequence = []
        for image_mp_res in sequence:
            hand_landmarks = np.array(image_mp_res.multi_hand_landmarks)
            new_sequence.append(hand_landmarks)
        new_sequence = bob.extract_keyframes_sample(new_sequence)
        sequence_as_np_array = np.array(new_sequence)
        sequence_trajectory = bob.make_trajectory(sequence_as_np_array)

        gesture_trajector_map[gesture.label].append(sequence_trajectory)


print(gesture_trajector_map)

🔥🔥 TrajectoryBuilder is now running in BERTRAM_MODE 🔥🔥
{'J': [trajectory(directions=[trajectory_element(x=<direction.DOWN: -1>, y=<direction.UP: 1>, z=<direction.UP: 1>), trajectory_element(x=<direction.STATIONARY: 0>, y=<direction.DOWN: -1>, z=<direction.UP: 1>)]), trajectory(directions=[trajectory_element(x=<direction.UP: 1>, y=<direction.DOWN: -1>, z=<direction.DOWN: -1>), trajectory_element(x=<direction.DOWN: -1>, y=<direction.DOWN: -1>, z=<direction.UP: 1>)]), trajectory(directions=[trajectory_element(x=<direction.DOWN: -1>, y=<direction.UP: 1>, z=<direction.DOWN: -1>), trajectory_element(x=<direction.DOWN: -1>, y=<direction.DOWN: -1>, z=<direction.UP: 1>)]), trajectory(directions=[trajectory_element(x=<direction.UP: 1>, y=<direction.UP: 1>, z=<direction.DOWN: -1>), trajectory_element(x=<direction.DOWN: -1>, y=<direction.DOWN: -1>, z=<direction.UP: 1>)]), trajectory(directions=[trajectory_element(x=<direction.DOWN: -1>, y=<direction.UP: 1>, z=<direction.STATIONARY: 0>), trajectory

In [86]:
X = []
y = []

for k,gesture in gesture_trajector_map.items():
    min = len(gesture[0].directions)
    for trajectory in gesture[1:]:
        length = len(trajectory.directions)
        if min > length:
            min = length
    for trajectory in gesture:
        if len(trajectory.directions) > min:
            for i in range(len(trajectory.directions) - min):
                trajectory.directions.pop()


    for trajector in gesture:
        xyz  = []
        for te in trajector.directions:
            xyz.append(te.x.value)
            xyz.append(te.y.value)
            xyz.append(te.z.value)
        X.append(np.array(xyz))
        y.append(k)

X = np.array(X)
y = np.array(y)
X

array([[-1,  1,  1,  0, -1,  1],
       [ 1, -1, -1, -1, -1,  1],
       [-1,  1, -1, -1, -1,  1],
       [ 1,  1, -1, -1, -1,  1],
       [-1,  1,  0,  0, -1,  1],
       [-1,  1, -1, -1, -1,  1],
       [ 0,  1, -1, -1, -1,  1],
       [ 1,  1, -1, -1, -1,  1],
       [-1,  1, -1, -1, -1,  1],
       [ 1,  1, -1, -1, -1,  1],
       [ 1,  1, -1, -1, -1,  1],
       [-1,  1, -1,  0, -1,  1],
       [-1,  0,  0,  0, -1,  1],
       [ 1,  1,  1, -1,  1,  0],
       [-1,  1, -1,  1,  1,  1],
       [ 1,  1,  0,  1,  1,  0],
       [ 1,  1,  0,  1,  1,  0],
       [ 1,  0,  0, -1,  1, -1],
       [ 1,  1,  0,  1,  1,  0],
       [-1,  1,  0, -1,  1,  0],
       [ 1, -1,  0,  0,  0,  1],
       [ 1,  1,  0, -1,  1,  0],
       [ 1,  1,  1,  1,  1, -1],
       [ 1,  0, -1,  1,  1,  0],
       [ 1,  1,  0,  1,  1,  0],
       [ 1,  1,  1, -1,  1, -1]])

In [87]:
from sklearn.linear_model import LinearRegression, LogisticRegression

model = LogisticRegression()
model.fit(X,y)

In [88]:
if False:
    from joblib import dump
    dump(model, 'dynamic_model.joblib')

### Basic Gesture Recognizer but with landmarks :eyes:

So the idea is that we will still use the "basic" trajectory, all landmarks reduced to mean x,y,z values.
But, next to this trajectory we will attach preprocessed landmarks for each of the frames.
- That is, we will only attach preprocessed landmarks corresponding to the chosen key frames.
- The preprocessing step is EXACTLY the same as for static images.
  - Which is also why the 'Z' values have been cut in the following steps. No other reason than our methods already in use for preprocssing do so.

In [89]:
from typing import Tuple
import random
import numpy as np
from sign.training.landmark_extraction.MediaPiper import DynamicGesture
from sign.trajectory import TrajectoryBuilder, trajectory
from sign.landmarks import pre_process_landmark, calc_landmark_list

def training_trajectories_with_normalized_landmarks(gestures:list[DynamicGesture]) -> dict[str, list[np.ndarray]]:
    bob = TrajectoryBuilder()

    gesture_trajector_map: dict[str, list[np.ndarray]] = {}

    for gesture in gestures:
        gesture_trajector_map[gesture.label] = []
        for sequence in gesture.results:
            new_sequence : list[Tuple] = []
            #flatmarks: list[np.ndarray] = []
            for image_mp_res in sequence:
                if image_mp_res.multi_hand_landmarks is not None:
                    hand_landmarks = np.array(image_mp_res.multi_hand_landmarks)

                    normalized_landmarks_for_image = pre_process_landmark(
                        calc_landmark_list(image_mp_res.multi_hand_landmarks))
                    flatmark = np.array(normalized_landmarks_for_image)

                    new_sequence.append((hand_landmarks, flatmark))

            #new_sequence = bob.extract_keyframes_sample(new_sequence)
            random.seed(42)
            res = [new_sequence[0]]
            res.extend(random.sample(new_sequence[1:-1], bob.target_len - 2))
            res.append(new_sequence[-1])
            
            
            hand_landmark_seq = [ hand_landmark for hand_landmark, _ in res ]
            flatmarks = [ flatmark for _, flatmark in res ] #without Z values
            sequence_as_np_array = np.array(hand_landmark_seq)
            sequence_trajectory = bob.make_trajectory(sequence_as_np_array)

            xyz = []
            for trajectory_elm in sequence_trajectory.directions:
                xyz.append(trajectory_elm.x.value)
                xyz.append(trajectory_elm.y.value)
                xyz.append(trajectory_elm.z.value)
            xyz = np.array(xyz)

            
            for flat_landmark in flatmarks:
                xyz = np.append(xyz, flat_landmark)

            gesture_trajector_map[gesture.label].append(xyz)

    return gesture_trajector_map

def extract_training_data_and_labels_from_dynamic_gesture_map(gesture_map: dict[str, list[np.ndarray]]) -> Tuple[list[np.ndarray], list[str]]:
    trajectories_and_landmarks: list[np.ndarray] = []
    labels : list[str] = []
    for label, label_data in gesture_map.items():
        for data in label_data:
            labels.append(label)
            trajectories_and_landmarks.append(data)

    return trajectories_and_landmarks, labels

In [90]:
map = training_trajectories_with_normalized_landmarks(gestures)
train_data, train_labels  = extract_training_data_and_labels_from_dynamic_gesture_map(map)
len(train_data), len(train_labels) 

🔥🔥 TrajectoryBuilder is now running in BERTRAM_MODE 🔥🔥


(26, 26)

In [122]:
from sklearn.svm import SVC
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import make_pipeline

model_logistic = LogisticRegression()
model_logistic.fit(train_data, train_labels)

model_svm = make_pipeline(StandardScaler(),
                          SVC(kernel="poly", degree=6, coef0=1))
model_svm.fit(train_data, train_labels)

In [123]:
def sample_tuple_hand_landmarks_normalized(tuple_list:list[Tuple]):
    random.seed(42)
    res = [tuple_list[0]]
    res.extend(random.sample(tuple_list[1:-1], bob.target_len - 2))
    res.append(tuple_list[-1])
    
    
    hand_landmark_seq = [ hand_landmark for hand_landmark, _ in res ]
    flatmarks = [ flatmark for _, flatmark in res ] #without Z values
    return hand_landmark_seq, flatmarks

def mp_landmarks_to_dynamic_prediction_ready(landmarks):
    tuple_list = []
    for landmark in landmarks:
        pre_processed = pre_process_landmark(calc_landmark_list(landmark))
        hand_landmark = np.array(landmark)
        tuple_list.append((hand_landmark, pre_processed))
    
    key_frame_tuple = sample_tuple_hand_landmarks_normalized(tuple_list)

    flatmarks = key_frame_tuple[1]
    sequence_as_np_array = np.array(key_frame_tuple[0])
    trajectory = bob.make_trajectory(sequence_as_np_array)

    xyz = []
    for trajectory_elm in trajectory.directions:
        xyz.append(trajectory_elm.x.value)
        xyz.append(trajectory_elm.y.value)
        xyz.append(trajectory_elm.z.value)
    xyz = np.array(xyz)

    
    for flat_landmark in flatmarks:
        xyz = np.append(xyz, flat_landmark)

    return xyz
    

In [138]:
#Test on a single Z
TEST_PATH = '../backend/dynamic_signs/test'

test_data = mp.process_dynamic_gestures_from_folder(TEST_PATH)
#res = list(filter(lambda x : x.label == 'Z', res))
labels_test = [result.label for result in test_data]
labels_test

['J', 'Z']

In [139]:
prediction_material = []
for dynamic_gesture in test_data:
    for sequence in dynamic_gesture.results:
        hand_landmarks_for_seq = [mp_result.multi_hand_landmarks for mp_result in sequence 
                                  if mp_result.multi_hand_landmarks is not None]
        prediction_material.append(
            mp_landmarks_to_dynamic_prediction_ready(hand_landmarks_for_seq)
        )

print("Logistis predictions: ", model_logistic.predict(prediction_material))
print("Support vector machine: ", model_svm.predict(prediction_material))

Logistis predictions:  ['J' 'Z']
Support vector machine:  ['J' 'Z']


##### Metrics for Dynamic Gesture recogniser

In [142]:
from sklearn.metrics import classification_report
cr = classification_report(model_svm.predict(prediction_material), labels_test)
print(cr)

              precision    recall  f1-score   support

           J       1.00      1.00      1.00         1
           Z       1.00      1.00      1.00         1

    accuracy                           1.00         2
   macro avg       1.00      1.00      1.00         2
weighted avg       1.00      1.00      1.00         2

