In [None]:
import numpy as np
import glob
import os

def merge_npz_to_single_npy(data_dir='data', data_out='Combined_holistic_landmarks_data.npy', labels_out='Combined_holistic_landmarks_labels.npy'):
    npz_files = sorted(glob.glob(os.path.join(data_dir, "holistic_landmarks_*.npz")))

    all_data = []
    all_labels = []

    print(f"Found {len(npz_files)} files to process...\n")

    for i, file in enumerate(npz_files):
        try:
            npz = np.load(file, allow_pickle=True)
            data_batch = np.array(npz['X'], dtype=object)
            labels_batch = np.array(npz['y'])

            print(f"[{i+1}/{len(npz_files)}] Loaded {file} — data shape: {data_batch.shape}, label shape: {labels_batch.shape}")

            all_data.append(data_batch)
            all_labels.append(labels_batch)

        except Exception as e:
            print(f"❌ Error loading file {file}: {type(e).__name__}: {e}")
            break

    print("\n📦 Loaded all files (except failed ones).")
    for i, d in enumerate(all_data):
        print(f" - Batch {i}: shape={d.shape}, dtype={d.dtype}")

    if not all_data or not all_labels:
        print("No valid data found. Exiting.")
        return

    # Combine all batches
    try:
        full_data = np.concatenate(all_data, axis=0)
        full_labels = np.concatenate(all_labels, axis=0)
        print("🔗 Data concatenated successfully.")
    except Exception as e:
        print(f"Concatenation failed: {e}")
        return

    # Save once, in proper .npy format
    np.save(data_out, full_data)
    np.save(labels_out, full_labels)

    print("\n✅ Done saving combined files!")
    print(f"📐 Final Landmarks shape: {full_data.shape}")
    print(f"🏷️ Final Labels shape: {full_labels.shape}")

# Run it
merge_npz_to_single_npy()


Found 43 files to process...

[1/43] Loaded data/holistic_landmarks_Adjectives_1of8.npz — data shape: (5991,), label shape: (5991,)
[2/43] Loaded data/holistic_landmarks_Adjectives_2of8.npz — data shape: (6596,), label shape: (6596,)
[3/43] Loaded data/holistic_landmarks_Adjectives_3of8.npz — data shape: (6636,), label shape: (6636,)
[4/43] Loaded data/holistic_landmarks_Adjectives_4of8.npz — data shape: (5617,), label shape: (5617,)
[5/43] Loaded data/holistic_landmarks_Adjectives_5of8.npz — data shape: (6042,), label shape: (6042,)
[6/43] Loaded data/holistic_landmarks_Adjectives_6of8.npz — data shape: (5848,), label shape: (5848,)
[7/43] Loaded data/holistic_landmarks_Adjectives_7of8.npz — data shape: (5833,), label shape: (5833,)
[8/43] Loaded data/holistic_landmarks_Adjectives_8of8.npz — data shape: (3861,), label shape: (3861,)
[9/43] Loaded data/holistic_landmarks_Animals_1of2.npz — data shape: (7269,), label shape: (7269,)
[10/43] Loaded data/holistic_landmarks_Animals_2of2.npz

In [1]:
import numpy as np

# Replace with the exact filename if needed
filename = "data/holistic_landmarks_Home_2of4.npz"

try:
    npz = np.load(filename, allow_pickle=True)
    print("Loaded successfully.")
    print("Keys in file:", npz.files)
    
    X = npz['X']
    y = npz['y']
    print("X shape:", X.shape)
    print("y shape:", y.shape)

except Exception as e:
    print(f"❌ Failed to load {filename}")
    print(f"   {type(e).__name__}: {e}")


Loaded successfully.
Keys in file: ['X', 'y']
X shape: (3332,)
y shape: (3332,)


In [None]:
import numpy as np

def load_appended_npy(file_path):
    arrays = []
    with open(file_path, 'rb') as f:
        while True:
            try:
                arrays.append(np.load(f, allow_pickle=True))
            except (ValueError, EOFError):  # Catch both ValueError & EOFError
                break
    return np.concatenate(arrays, axis=0)

x = load_appended_npy('Combined_holistic_landmarks_data.npy')
y = load_appended_npy('Combined_holistic_landmarks_labels.npy')

np.save("final_landmarks.npy", x)
np.save("final_labels.npy", y)

print(f"Landmarks shape: {x.shape}")
print(f"Labels shape: {y.shape}")

In [15]:
import matplotlib.pyplot as plt
from collections import Counter

# Count the occurrences of each label
label_counts = Counter(y)

# Get the unique labels and their counts
labels, counts = zip(*label_counts.items())

# Plotting the label distribution
plt.figure(figsize=(10, 6))
plt.bar(labels, counts, color='skyblue')
plt.xlabel('Labels')
plt.ylabel('Count')
plt.title('Distribution of Labels in the Dataset')
plt.xticks(rotation=45)
plt.show()


# Alternatively, you can print the counts
print("Label Distribution:")
for label, count in label_counts.items():
    print(f'Label: {label}, Count: {count}')

TypeError: unhashable type: 'numpy.ndarray'

In [None]:
# Check for missing items in landmarks
missing_landmarks = []
for i, landmark in enumerate(x):
    if isinstance(landmark, np.ndarray):
        if np.any(np.isnan(landmark)):
            missing_landmarks.append(i)

# Check for missing labels (None or empty strings)
missing_labels = [i for i, label in enumerate(y) if label is None or label == '']

# Count missing items
num_missing_landmarks = len(missing_landmarks)
num_missing_labels = len(missing_labels)

# Print the results
print(f'Total missing landmarks: {num_missing_landmarks}')
print(f'Total missing labels: {num_missing_labels}')

# Optionally, find the indices of the missing items
print(f'Indices of missing landmarks: {missing_landmarks}')
print(f'Indices of missing labels: {missing_labels}')

## Data processing

In [None]:
# Checking the shape of each landmark in x to find inconsistencies
landmark_shapes = [np.array(landmark).shape for landmark in x if isinstance(landmark, list)]

# Identify the unique shapes
unique_shapes = set(landmark_shapes)
print(f"Unique shapes found: {unique_shapes}")

# Fixing inconsistent landmarks by padding or truncating
fixed_landmarks = []
for landmark in x:
    if isinstance(landmark, list):
        landmark_array = np.array(landmark)
        if landmark_array.shape[0] < 543:
            # Pad with zeros if less than 543
            padded = np.pad(landmark_array, ((0, 543 - landmark_array.shape[0]), (0, 0)), mode='constant')
            fixed_landmarks.append(padded)
        elif landmark_array.shape[0] > 543:
            # Truncate if greater than 543
            truncated = landmark_array[:543]
            fixed_landmarks.append(truncated)
        else:
            # Append as is if it has the correct shape
            fixed_landmarks.append(landmark_array)

# Convert the list of fixed landmarks to a numpy array
x_fixed = np.array(fixed_landmarks)
print(f"Fixed landmarks shape: {x_fixed.shape}")

In [None]:
from sklearn.preprocessing import StandardScaler
import joblib

# Normalize the data
scaler = StandardScaler()
landmarks_data_scaled = scaler.fit_transform(x_fixed.reshape(-1, x_fixed.shape[-1]))  # Flatten for scaling
landmarks_data_scaled = landmarks_data_scaled.reshape(x_fixed.shape)  # Reshape back to original
print(f'Scaled landmarks shape: {landmarks_data_scaled.shape}')

# Save the scaler to a file
joblib.dump(scaler, '30Labels_scaler_filename.pkl')  

In [None]:
import matplotlib.pyplot as plt

# Check for the sequences with the highest and lowest values
for i in range(5):
    plt.plot(landmarks_data_scaled[i].flatten())
    plt.title(f"Sample {i} - Sequence")
    plt.show()

In [None]:
from sklearn.model_selection import train_test_split

# Split the data into 80% training and 20% testing
x_train, x_test, y_train, y_test = train_test_split(landmarks_data_scaled, y, test_size=0.2, random_state=42)

# Print the shapes of the split data
print(f"x_train shape: {x_train.shape}")
print(f"x_test shape: {x_test.shape}")
print(f"y_train shape: {y_train.shape}")
print(f"y_test shape: {y_test.shape}")

## Artifically Creating Data using SMOTE and then Train and Test Split

In [None]:
from imblearn.over_sampling import SMOTE

# Oversample the minority classes using SMOTE (optional)
smote = SMOTE()
x_train_resampled, y_train_resampled = smote.fit_resample(x_train.reshape((x_train.shape[0], -1)), y_train)
x_train_resampled = x_train_resampled.reshape((x_train_resampled.shape[0], x_train.shape[1], x_train.shape[2]))

# Print the shapes of the split data
print(f"x_train shape: {x_train_resampled.shape}")
print(f"x_test shape: {x_test.shape}")
print(f"y_train shape: {y_train_resampled.shape}")
print(f"y_test shape: {y_test.shape}")

In [None]:
import matplotlib.pyplot as plt
from collections import Counter

# Count the occurrences of each label
label_counts = Counter(y_train_resampled)

# Get the unique labels and their counts
labels, counts = zip(*label_counts.items())

# Plotting the label distribution
plt.figure(figsize=(10, 6))
plt.bar(labels, counts, color='skyblue')
plt.xlabel('Labels')
plt.ylabel('Count')
plt.title('Distribution of Labels in the Dataset')
plt.xticks(rotation=45)
plt.show()

# Alternatively, you can print the counts
print("Label Distribution of Y Training:")
for label, count in label_counts.items():
    print(f'Label: {label}, Count: {count}')

In [None]:
from sklearn.preprocessing import LabelEncoder

# Initialize label encoder
label_encoder = LabelEncoder()

# Fit the label encoder on y_train (since y_train contains all possible labels)
y_train_encoded = label_encoder.fit_transform(y_train_resampled)
y_test_encoded = label_encoder.transform(y_test)  # Apply the same transformation to y_test

# Check the encoded labels
print(f"Encoded y_train: {y_train_encoded[:5]}")  # Print first 5 encoded labels for reference
print(f"Encoded y_test: {y_test_encoded[:5]}")

# Also, let's map the classes for reference
label_mapping = dict(zip(label_encoder.classes_, label_encoder.transform(label_encoder.classes_)))
print(f"Label Mapping: {label_mapping}")

In [None]:
from collections import Counter

# Check the distribution of the encoded labels
print(f"y_train_encoded distribution: {Counter(y_train_encoded)}")
print(f"y_test_encoded distribution: {Counter(y_test_encoded)}")

In [None]:
print(f"x_train shape: {x_train_resampled.shape}")
print(f"x_test shape: {x_test.shape}")
print(f"y_train shape: {y_train_encoded.shape}")
print(f"y_test shape: {y_test_encoded.shape}")

In [None]:
num_classes = len(label_mapping)

## Model Training

In [None]:
from keras.models import Model
from keras.layers import (Input, Bidirectional, LSTM, BatchNormalization, Dropout, 
                          Attention, Flatten, Dense, LeakyReLU, Conv1D, MaxPooling1D)
from keras.optimizers import Adam
from keras.regularizers import l2
input_shape = (x_train_resampled.shape[1], x_train_resampled.shape[2])
input_layer = Input(shape=input_shape)

# First Bidirectional LSTM layer
lstm_out1 = Bidirectional(LSTM(256, return_sequences=True))(input_layer)
lstm_out1 = BatchNormalization()(lstm_out1)
lstm_out1 = Dropout(0.2)(lstm_out1)  # Reduce dropout

# Second Bidirectional LSTM layer
lstm_out2 = Bidirectional(LSTM(256, return_sequences=True))(lstm_out1)
lstm_out2 = BatchNormalization()(lstm_out2)
# Optionally, remove dropout here if needed

# Attention layer
attention_out = Attention()([lstm_out2, lstm_out2])

# Flatten the output of attention
flattened = Flatten()(attention_out)

# Dense layers with reduced L2 regularization
dense1 = Dense(128, kernel_regularizer=l2(0.001))(flattened)
dense1 = LeakyReLU()(dense1)
dense1 = BatchNormalization()(dense1)
dense1 = Dropout(0.3)(dense1)

dense2 = Dense(64, kernel_regularizer=l2(0.001))(dense1)
dense2 = LeakyReLU()(dense2)
dense2 = BatchNormalization()(dense2)
dense2 = Dropout(0.3)(dense2)

# Output layer for classification
output_layer = Dense(num_classes, activation='softmax')(dense2)

# Create and compile the model
model = Model(inputs=input_layer, outputs=output_layer)
model.compile(optimizer=Adam(learning_rate=0.0001),
              loss='sparse_categorical_crossentropy',
              metrics=['accuracy'])
model.summary()


In [None]:
from keras.callbacks import EarlyStopping, ReduceLROnPlateau
early_stopping = EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)
reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=3, min_lr=1e-6)

# Train the model
history = model.fit(
    x_train_resampled,
    y_train_encoded,
    validation_data=(x_test, y_test_encoded),
    epochs=5,
    batch_size=64,
    callbacks = [early_stopping, reduce_lr]
)

if early_stopping.stopped_epoch > 0:
    print(f"Training stopped early at epoch: {early_stopping.stopped_epoch + 1}")
    print(f"Weights restored from epoch: {early_stopping.best_epoch + 1}")
else:
    print("Training completed without early stopping.")


In [None]:
# Extract training and validation accuracy from the history
training_accuracy = history.history['accuracy']
validation_accuracy = history.history['val_accuracy']

# Extract training and validation loss from the history (optional)
training_loss = history.history['loss']
validation_loss = history.history['val_loss']

# Print the final training and validation accuracy in the desired format
print(f"Final Training Accuracy: {training_accuracy[-1]:.4f}")
print(f"Final Validation Accuracy: {validation_accuracy[-1]:.4f}")

In [None]:
# Extract training and validation accuracy from the history
training_accuracy = history.history['accuracy']
validation_accuracy = history.history['val_accuracy']

# Extract training and validation loss from the history (optional)
training_loss = history.history['loss']
validation_loss = history.history['val_loss']

# Print the final training and validation accuracy in the desired format
print(f"Final Training Accuracy: {training_accuracy[-1]:.4f}")
print(f"Final Validation Accuracy: {validation_accuracy[-1]:.4f}")  

In [None]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay

# Get predictions from the trained model (using the model, not history)
y_pred = model.predict(x_test)
y_pred_classes = np.argmax(y_pred, axis=1)  # Convert probabilities to class labels

# Calculate the confusion matrix
cm = confusion_matrix(y_test_encoded, y_pred_classes)

# Create a figure with a high resolution (e.g., 1080x1920 pixels)
plt.figure(figsize=(19.2, 10.8))  # 1920/100 and 1080/100 for a 100 DPI image

# Display confusion matrix with a larger axis
disp = ConfusionMatrixDisplay(confusion_matrix=cm, display_labels=np.unique(y_train_encoded))
disp.plot(cmap=plt.cm.Blues, ax=plt.gca())  # Plot on the current axis with the desired size

# Customize the plot title
plt.title("30 Labels Test Confusion Matrix")

# Save the figure in high resolution (1080x1920)
plt.savefig("confusion_matrix_1080x1920.png", dpi=100, bbox_inches='tight')  # You can increase DPI for higher quality
plt.show()

In [None]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay

# Get predictions
y_pred = model.predict(x_train_resampled)
y_pred_classes = np.argmax(y_pred, axis=1)  # Convert probabilities to class labels

# Calculate the confusion matrix
cm = confusion_matrix(y_train_encoded, y_pred_classes)

# Create a figure with a high resolution (e.g., 1080x1920 pixels)
plt.figure(figsize=(19.2, 10.8))  # 1920/100 and 1080/100 for a 100 DPI image

# Display confusion matrix
disp = ConfusionMatrixDisplay(confusion_matrix=cm, display_labels=np.unique(y_train_encoded))
disp.plot(cmap=plt.cm.Blues, ax=plt.gca())  # Plot on the current larger axis

# Customize the plot
plt.title("30 Labels + LSTM Training Confusion Matrix")

# Save the figure in high resolution (1080x1920)
plt.savefig("Train confusion_matrix_1080x1920.png", dpi=100, bbox_inches='tight')  # You can increase DPI for higher quality
plt.show()


In [None]:
# Assuming validation_accuracy is a single numeric value
model_name = '30 `labels' + f"{validation_accuracy[-1]:.4f}"
model.save(f"Model_{model_name}.h5")  # Save with .h5 extension
print(f"Model {model_name} was saved.")

In [2]:
from tensorflow.keras.models import load_model

loaded_model = load_model('Model_30_Labels 0.9501.h5')

E0000 00:00:1744140890.641495    4032 cuda_executor.cc:1228] INTERNAL: CUDA Runtime error: Failed call to cudaGetRuntimeVersion: Error loading CUDA libraries. GPU will not be used.: Error loading CUDA libraries. GPU will not be used.
W0000 00:00:1744140890.645047    4032 gpu_device.cc:2341] Cannot dlopen some GPU libraries. Please make sure the missing libraries mentioned above are installed properly if you would like to use GPU. Follow the guide at https://www.tensorflow.org/install/gpu for how to download and setup the required libraries for your platform.
Skipping registering GPU devices...


In [4]:
loss, accuracy = loaded_model.evaluate(x_test, y_test_encoded, verbose=0)

print(f'Model Loss: {loss:.4f}')
print(f'Model Accuracy: {accuracy:.4f}')

NameError: name 'x_test' is not defined

In [3]:
import tensorflow as tf

#model = tf.keras.models.load_model(f"{model_name}.h5")
converter = tf.lite.TFLiteConverter.from_keras_model(loaded_model)

# Fix for TensorListReserve / dynamic RNN ops
converter.target_spec.supported_ops = [
    tf.lite.OpsSet.TFLITE_BUILTINS,
    tf.lite.OpsSet.SELECT_TF_OPS
]
converter._experimental_lower_tensor_list_ops = False

tflite_model = converter.convert()

with open("model.tflite", "wb") as f:
    f.write(tflite_model)

INFO:tensorflow:Assets written to: /tmp/tmpdud0e29h/assets


INFO:tensorflow:Assets written to: /tmp/tmpdud0e29h/assets


Saved artifact at '/tmp/tmpdud0e29h'. The following endpoints are available:

* Endpoint 'serve'
  args_0 (POSITIONAL_ONLY): TensorSpec(shape=(None, 543, 3), dtype=tf.float32, name='input_layer')
Output Type:
  TensorSpec(shape=(None, 25), dtype=tf.float32, name=None)
Captures:
  138870207184416: TensorSpec(shape=(), dtype=tf.resource, name=None)
  138870207182304: TensorSpec(shape=(), dtype=tf.resource, name=None)
  138870207172624: TensorSpec(shape=(), dtype=tf.resource, name=None)
  138870207177728: TensorSpec(shape=(), dtype=tf.resource, name=None)
  138870207178784: TensorSpec(shape=(), dtype=tf.resource, name=None)
  138870207180896: TensorSpec(shape=(), dtype=tf.resource, name=None)
  138870188759296: TensorSpec(shape=(), dtype=tf.resource, name=None)
  138870188760000: TensorSpec(shape=(), dtype=tf.resource, name=None)
  138870188754720: TensorSpec(shape=(), dtype=tf.resource, name=None)
  138870188757008: TensorSpec(shape=(), dtype=tf.resource, name=None)
  138870188610432: Te

W0000 00:00:1744140904.732972    4032 tf_tfl_flatbuffer_helpers.cc:365] Ignored output_format.
W0000 00:00:1744140904.733024    4032 tf_tfl_flatbuffer_helpers.cc:368] Ignored drop_control_dependency.
2025-04-09 01:05:04.734312: I tensorflow/cc/saved_model/reader.cc:83] Reading SavedModel from: /tmp/tmpdud0e29h
2025-04-09 01:05:04.740404: I tensorflow/cc/saved_model/reader.cc:52] Reading meta graph with tags { serve }
2025-04-09 01:05:04.740436: I tensorflow/cc/saved_model/reader.cc:147] Reading SavedModel debug info (if present) from: /tmp/tmpdud0e29h
I0000 00:00:1744140904.786863    4032 mlir_graph_optimization_pass.cc:425] MLIR V1 optimization pass is not enabled
2025-04-09 01:05:04.789695: I tensorflow/cc/saved_model/loader.cc:236] Restoring SavedModel bundle.
2025-04-09 01:05:05.210574: I tensorflow/cc/saved_model/loader.cc:220] Running initialization op on SavedModel bundle at path: /tmp/tmpdud0e29h
2025-04-09 01:05:05.248342: I tensorflow/cc/saved_model/loader.cc:471] SavedModel 

In [4]:
import joblib
joblib.dump(scaler, "scaler.pkl")
joblib.dump(label_encoder, "label_encoder.pkl")

NameError: name 'scaler' is not defined