In [2]:
import numpy as np
import os
from tensorflow.keras.preprocessing.sequence import pad_sequences

# Paths
landmarks_dir = 'D:/code/Mini/pro v2/landmarks_output'  # Adjust if needed
adjectives = ['1. loud', '2. quiet', '3. happy', '4. sad', '5. Beautiful', '6. Ugly', '7. Deaf', '8. Blind']

# Load data
all_data = []
labels = []

for label_idx, adjective in enumerate(adjectives):
    adjective_path = os.path.join(landmarks_dir, adjective)
    if not os.path.exists(adjective_path):
        print(f"Skipping {adjective_path} - directory does not exist.")
        continue
    
    for landmark_file in os.listdir(adjective_path):
        if not landmark_file.endswith('.npy'):
            continue
        
        file_path = os.path.join(adjective_path, landmark_file)
        data = np.load(file_path)
        all_data.append(data)
        labels.append(label_idx)
        print(f"Loaded {file_path}: {data.shape}")

labels = np.array(labels)
print(f"Total videos loaded: {len(all_data)}")
print(f"Labels shape: {labels.shape}")

# Debug shapes
print(f"Number of videos in all_data: {len(all_data)}")
if len(all_data) > 0:
    print(f"Shape of first video in all_data: {all_data[0].shape}")
else:
    print("Error: all_data is empty. Check the landmarks_dir path and files.")
    exit()

# Update max_len
max_len = 80
padded_data = pad_sequences(all_data, maxlen=max_len, padding='post', truncating='post', dtype='float32')
print(f"Shape of padded_data after padding: {padded_data.shape}")
print(f"Total size of padded_data: {np.prod(padded_data.shape)}")

# Normalize the data
num_videos = padded_data.shape[0]
print(f"num_videos: {num_videos}")

padded_data_reshaped = padded_data.reshape(-1, 258)
print(f"Shape of padded_data_reshaped: {padded_data_reshaped.shape}")
print(f"Total size of padded_data_reshaped: {np.prod(padded_data_reshaped.shape)}")

mean = np.mean(padded_data_reshaped, axis=0)
std = np.std(padded_data_reshaped, axis=0)
std[std == 0] = 1  # Avoid division by zero
padded_data_reshaped = (padded_data_reshaped - mean) / std

# Reshape back
padded_data = padded_data_reshaped.reshape(num_videos, max_len, 258)
print(f"Shape of padded_data after reshaping: {padded_data.shape}")

# Data augmentation
augmented_data = []
augmented_labels = []

for i in range(len(padded_data)):
    original_sequence = padded_data[i]
    label = labels[i]
    
    # Add the original sequence
    augmented_data.append(original_sequence)
    augmented_labels.append(label)
    
    # Augmentation 1: Add noise
    noise = np.random.normal(0, 0.05, size=original_sequence.shape)
    augmented_sequence = original_sequence + noise
    augmented_data.append(augmented_sequence)
    augmented_labels.append(label)
    
    # Augmentation 2: Scale the coordinates (x, y, z)
    scale_factor = np.random.uniform(0.9, 1.1)  # Scale between 90% and 110%
    scaled_sequence = original_sequence * scale_factor
    augmented_data.append(scaled_sequence)
    augmented_labels.append(label)
    
    # Augmentation 3: Shift the coordinates (x, y, z)
    shift = np.random.uniform(-0.05, 0.05, size=3)  # Shift x, y, z
    # Create a shift array matching the 258 features
    shift_full = np.zeros(258)
    # Pose landmarks (132 features: 33 points × 4 [x, y, z, visibility])
    for i in range(33):
        shift_full[i*4:i*4+3] = shift  # Shift x, y, z
        shift_full[i*4+3] = 0  # No shift for visibility
    # Left hand landmarks (63 features: 21 points × 3 [x, y, z])
    for i in range(21):
        shift_full[132 + i*3:132 + i*3+3] = shift
    # Right hand landmarks (63 features: 21 points × 3 [x, y, z])
    for i in range(21):
        shift_full[195 + i*3:195 + i*3+3] = shift
    
    # Reshape shift to (1, 258) to broadcast across all frames
    shift_full = shift_full.reshape(1, 258)
    shifted_sequence = original_sequence + shift_full
    augmented_data.append(shifted_sequence)
    augmented_labels.append(label)

padded_data = np.array(augmented_data)
labels = np.array(augmented_labels)
print(f"Augmented data shape: {padded_data.shape}")
print(f"Augmented labels shape: {labels.shape}")

# Oversample underrepresented classes
target_count = 42
balanced_data = []
balanced_labels = []

for class_idx in range(8):
    class_indices = np.where(labels == class_idx)[0]
    class_data = padded_data[class_indices]
    class_labels = labels[class_indices]
    
    current_count = len(class_data)
    if current_count < target_count * 2:
        num_to_add = (target_count * 2) - current_count
        indices_to_add = np.random.choice(len(class_data), num_to_add, replace=True)
        additional_data = class_data[indices_to_add]
        additional_labels = class_labels[indices_to_add]
        class_data = np.concatenate([class_data, additional_data])
        class_labels = np.concatenate([class_labels, additional_labels])
    
    balanced_data.append(class_data)
    balanced_labels.append(class_labels)

padded_data = np.concatenate(balanced_data, axis=0)
labels = np.concatenate(balanced_labels, axis=0)
def moving_average(data, window_size=3):
    smoothed_data = np.copy(data)
    for i in range(data.shape[0]):  # Iterate over videos
        for j in range(data.shape[2]):  # Iterate over features
            # Apply moving average to each feature across the temporal dimension
            smoothed_data[i, :, j] = np.convolve(data[i, :, j], np.ones(window_size)/window_size, mode='same')
    return smoothed_data

padded_data = moving_average(padded_data, window_size=3)
print("Applied moving average smoothing to padded_data.")
# Shuffle the data
perm = np.random.permutation(len(padded_data))
padded_data = padded_data[perm]
labels = labels[perm]

print(f"Balanced data shape: {padded_data.shape}")
print(f"Balanced labels shape: {labels.shape}")

# Verify new class distribution
unique, counts = np.unique(labels, return_counts=True)
print("New class distribution:", dict(zip(unique, counts)))

# Print frame lengths
frame_lengths = [data.shape[0] for data in all_data]
print(f"Frame lengths: min={min(frame_lengths)}, max={max(frame_lengths)}, avg={np.mean(frame_lengths):.1f}")

Loaded D:/code/Mini/pro v2/landmarks_output\1. loud\MVI_5177 - Copy_landmarks.npy: (56, 258)
Loaded D:/code/Mini/pro v2/landmarks_output\1. loud\MVI_5177_landmarks.npy: (56, 258)
Loaded D:/code/Mini/pro v2/landmarks_output\1. loud\MVI_5178 - Copy_landmarks.npy: (64, 258)
Loaded D:/code/Mini/pro v2/landmarks_output\1. loud\MVI_5178_landmarks.npy: (64, 258)
Loaded D:/code/Mini/pro v2/landmarks_output\1. loud\MVI_5179 - Copy_landmarks.npy: (66, 258)
Loaded D:/code/Mini/pro v2/landmarks_output\1. loud\MVI_5179_landmarks.npy: (66, 258)
Loaded D:/code/Mini/pro v2/landmarks_output\1. loud\MVI_5257 - Copy_landmarks.npy: (52, 258)
Loaded D:/code/Mini/pro v2/landmarks_output\1. loud\MVI_5257_landmarks.npy: (52, 258)
Loaded D:/code/Mini/pro v2/landmarks_output\1. loud\MVI_5258 - Copy_landmarks.npy: (76, 258)
Loaded D:/code/Mini/pro v2/landmarks_output\1. loud\MVI_5258_landmarks.npy: (76, 258)
Loaded D:/code/Mini/pro v2/landmarks_output\1. loud\MVI_5259 - Copy_landmarks.npy: (64, 258)
Loaded D:/co

In [3]:
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv1D, LSTM, Dense, Dropout, Input, BatchNormalization, Masking
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.regularizers import l2
from sklearn.model_selection import train_test_split
from sklearn.utils.class_weight import compute_class_weight

# Split the data
X_train, X_test, y_train, y_test = train_test_split(
    padded_data, labels, test_size=0.2, random_state=42, stratify=labels
)

# Compute class weights
class_weights = compute_class_weight('balanced', classes=np.unique(y_train), y=y_train)
class_weight_dict = dict(enumerate(class_weights))

# Define a slightly deeper model
model = Sequential([
    Input(shape=(80, 258)),

    # Mask padded values
    Masking(mask_value=0.0),

    # Conv1D layers
    Conv1D(filters=64, kernel_size=3, activation='relu', padding='same'),  # Increased filters
    BatchNormalization(),
    Conv1D(filters=64, kernel_size=3, activation='relu', padding='same'),
    BatchNormalization(),
    Dropout(0.5),

    # LSTM layers
    LSTM(64, return_sequences=True),  # Increased units
    Dropout(0.5),
    LSTM(32, return_sequences=False),
    Dropout(0.5),

    # Dense layers with regularization
    Dense(32, activation='relu', kernel_regularizer=l2(0.01)),
    Dense(16, activation='relu', kernel_regularizer=l2(0.01)),
    Dense(8, activation='softmax')
])

# Compile with a slightly higher learning rate
optimizer = Adam(learning_rate=0.0002)  # Increased from 0.0001
model.compile(
    optimizer=optimizer,
    loss='sparse_categorical_crossentropy',
    metrics=['accuracy']
)

# Print model summary
model.summary()

# Train with early stopping
early_stopping = tf.keras.callbacks.EarlyStopping(
    monitor='val_accuracy',
    patience=15,
    restore_best_weights=True
)

history = model.fit(
    X_train, y_train,
    validation_data=(X_test, y_test),
    epochs=100,
    batch_size=16,
    callbacks=[early_stopping],
    class_weight=class_weight_dict,
    verbose=1
)

# Evaluate
test_loss, test_accuracy = model.evaluate(X_test, y_test, verbose=0)
print(f"\nTest Accuracy: {test_accuracy:.4f}")

# Save the model
model.save('isl_model_v8.keras')
print("Model saved as 'isl_model_v8.keras'.")



Epoch 1/100
[1m48/48[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 25ms/step - accuracy: 0.1666 - loss: 2.6064 - val_accuracy: 0.1302 - val_loss: 2.5678
Epoch 2/100
[1m48/48[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 19ms/step - accuracy: 0.1461 - loss: 2.5432 - val_accuracy: 0.1771 - val_loss: 2.5287
Epoch 3/100
[1m48/48[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 19ms/step - accuracy: 0.1615 - loss: 2.5211 - val_accuracy: 0.1875 - val_loss: 2.5085
Epoch 4/100
[1m48/48[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 20ms/step - accuracy: 0.1922 - loss: 2.4924 - val_accuracy: 0.2135 - val_loss: 2.4667
Epoch 5/100
[1m48/48[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 19ms/step - accuracy: 0.1700 - loss: 2.4566 - val_accuracy: 0.2708 - val_loss: 2.4131
Epoch 6/100
[1m48/48[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 19ms/step - accuracy: 0.2529 - loss: 2.4110 - val_accuracy: 0.2500 - val_loss: 2.3718
Epoch 7/100
[1m48/48[0m [