In [None]:
import json
import numpy as np
import tensorflow as tf
from tensorflow.keras.preprocessing.sequence import pad_sequences
from sklearn.preprocessing import LabelEncoder

# 1. Load Data
with open('finger_sequences.json', 'r') as f:
    data = json.load(f)

finger_sequences = []
time_sequences = []
labels = []

# Each key in the JSON is a gesture label (e.g. "CallHarisVai", "Noise", etc.)
for gesture, sequences in data.items():
    for seq in sequences:
        f_seq = []
        t_seq = []
        for pair in seq:
            f_seq.append(int(pair[0]))       # Finger ID as int
            t_seq.append(float(pair[1]))     # Time as float
        finger_sequences.append(f_seq)
        time_sequences.append(t_seq)
        labels.append(gesture)

# 2. Compute global mean duration for padding
all_times = [t for seq in time_sequences for t in seq]
mean_duration = np.mean(all_times)

# 3. Define a finger map for label-specific padding
finger_map = {
    "CallHarisVai": 2,
    "Noise": 1,
    "LikeMe": 3,
    "ComeClose": 8  # Ensure the keys match your actual labels!
}

# 4. Custom padding function returning separate arrays for fingers and times
def custom_pad_sequences(finger_seqs, time_seqs, labels, maxlen, padding='post'):
    padded_fingers = []
    padded_times = []
    for i, (f_seq, t_seq) in enumerate(zip(finger_seqs, time_seqs)):
        label = labels[i]
        # Use label-specific pad value; default is -1
        pad_finger = finger_map.get(label, -1)
        pad_time = mean_duration
        if len(f_seq) < maxlen:
            pad_count = maxlen - len(f_seq)
            if padding == 'pre':
                padded_f_seq = [pad_finger] * pad_count + f_seq
                padded_t_seq = [pad_time] * pad_count + t_seq
            else:  # 'post'
                padded_f_seq = f_seq + [pad_finger] * pad_count
                padded_t_seq = t_seq + [pad_time] * pad_count
        else:
            padded_f_seq = f_seq[:maxlen]
            padded_t_seq = t_seq[:maxlen]
        padded_fingers.append(padded_f_seq)
        padded_times.append(padded_t_seq)
    return np.array(padded_fingers, dtype=np.int32), np.array(padded_times, dtype=np.float32)

# 5. Determine maximum sequence length and apply custom padding
max_len = max(len(seq) for seq in finger_sequences)
padded_fingers, padded_times = custom_pad_sequences(finger_sequences, time_sequences, labels, maxlen=max_len)

# 6. Encode labels to one-hot vectors
label_encoder = LabelEncoder()
encoded_labels = label_encoder.fit_transform(labels)
num_classes = len(label_encoder.classes_)
onehot_labels = tf.keras.utils.to_categorical(encoded_labels, num_classes=num_classes)

# 7. Build the TensorFlow Model
embedding_dim = 8
# Calculate vocab_size based on your original data (assuming padded values are handled separately)
max_finger = max(max(seq) for seq in finger_sequences)
vocab_size = max_finger + 1  # Since we assume indices 0..max_finger

# Define inputs
finger_input = tf.keras.Input(shape=(max_len,), name='finger_input')
time_input = tf.keras.Input(shape=(max_len,), name='time_input')

# Note: Set mask_zero=False because our custom padding tokens are not zero.
x1 = tf.keras.layers.Embedding(input_dim=vocab_size, output_dim=embedding_dim, mask_zero=False)(finger_input)
# Reshape time input to have a channel dimension
x2 = tf.keras.layers.Reshape((max_len, 1))(time_input)
# Concatenate the embedded finger data with the time data
x = tf.keras.layers.Concatenate(axis=-1)([x1, x2])

# Process the sequence with an LSTM
x = tf.keras.layers.LSTM(64)(x)
x = tf.keras.layers.Dense(32, activation='relu')(x)
output = tf.keras.layers.Dense(num_classes, activation='softmax')(x)

model = tf.keras.Model(inputs=[finger_input, time_input], outputs=output)
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
model.summary()

# 8. Train the Model
history = model.fit(
    [padded_fingers, padded_times],
    onehot_labels,
    epochs=20,
    batch_size=32,
    validation_split=0.2
)



Epoch 1/20
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 483ms/step - accuracy: 0.0833 - loss: 1.3882 - val_accuracy: 0.1250 - val_loss: 1.3823
Epoch 2/20
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 87ms/step - accuracy: 0.6042 - loss: 1.3754 - val_accuracy: 0.0000e+00 - val_loss: 1.3812
Epoch 3/20
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 86ms/step - accuracy: 0.6562 - loss: 1.3636 - val_accuracy: 0.0000e+00 - val_loss: 1.3740
Epoch 4/20
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 120ms/step - accuracy: 0.9062 - loss: 1.3464 - val_accuracy: 0.0000e+00 - val_loss: 1.3653
Epoch 5/20
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 109ms/step - accuracy: 0.8854 - loss: 1.3262 - val_accuracy: 0.0000e+00 - val_loss: 1.3535
Epoch 6/20
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 84ms/step - accuracy: 0.9062 - loss: 1.2982 - val_accuracy: 0.0000e+00 - val_loss: 1.3376
Epoch 7/20
[1m2/2[0m 

In [None]:
# 9. Prediction Example
# Suppose we have a new gesture sequence
new_sequence = [
    ["1", 0.5],
    ["2", -0.3],
    ["3", 1.2]
]
new_finger_seq = [int(pair[0]) for pair in new_sequence]
new_time_seq = [float(pair[1]) for pair in new_sequence]

# Use the same custom padding for the new sample; here, assume label "CallHarisVai" for padding purposes
new_padded_fingers, new_padded_times = custom_pad_sequences([new_finger_seq], [new_time_seq], ["CallHarisVai"], maxlen=max_len)

prediction = model.predict([new_padded_fingers, new_padded_times])
predicted_class = label_encoder.inverse_transform([np.argmax(prediction)])
print("Predicted Gesture:", predicted_class[0])


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 659ms/step
Predicted Gesture: CallHarisVai
