In [3]:
import tensorflow as tf

print("Num GPUs Available: ", len(tf.config.experimental.list_physical_devices('GPU')))


Num GPUs Available:  0


Preprocessing the Data

In [None]:
import numpy as np
import os
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelBinarizer

def load_data(data_dir):
    X = []
    y = []
    labels = sorted(os.listdir(data_dir))
    
    for label in labels:
        label_dir = os.path.join(data_dir, label)
        sequences = []
        
        for file in sorted(os.listdir(label_dir)):
            if file.endswith('.npy'):
                frame = np.load(os.path.join(label_dir, file))
                sequences.append(frame)
        
        if sequences:
            X.append(np.array(sequences))
            y.append(label)
    
    return np.array(X), np.array(y)

# Load and preprocess the data
data_dir = 'data'
X, y = load_data(data_dir)

# Ensure there are enough samples to split
if len(X) < 2:
    raise ValueError("Not enough samples to split into training and testing sets.")

# Normalize the data
X = X / np.max(X)

# Encode labels
lb = LabelBinarizer()
y = lb.fit_transform(y)

# Split the data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

print(f"Training data shape: {X_train.shape}")
print(f"Test data shape: {X_test.shape}")
print(f"Labels shape: {y_train.shape}")


Normalise data

In [None]:
import numpy as np
import os
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelBinarizer

def load_data(data_dir):
    X = []
    y = []
    labels = sorted(os.listdir(data_dir))
    
    for label in labels:
        label_dir = os.path.join(data_dir, label)
        sequences = []
        
        for file in sorted(os.listdir(label_dir)):
            if file.endswith('.npy'):
                frame = np.load(os.path.join(label_dir, file))
                sequences.append(frame)
        
        if sequences:
            X.append(np.array(sequences))
            y.append(label)
    
    return np.array(X), np.array(y)

# Load and preprocess the data
data_dir = 'data'
X, y = load_data(data_dir)

# Ensure there are enough samples to split
if len(X) < 2:
    raise ValueError("Not enough samples to split into training and testing sets.")

# Normalize the data
X = X / np.max(X)

# Encode labels
lb = LabelBinarizer()
y = lb.fit_transform(y)

# Split the data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

print(f"Training data shape: {X_train.shape}")
print(f"Test data shape: {X_test.shape}")
print(f"Labels shape: {y_train.shape}")


Defining the Model

In [None]:
import tensorflow as tf
from tensorflow.keras import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, TimeDistributed, Flatten, LSTM, Dense, Dropout

# Define the model
model = Sequential([
    TimeDistributed(Conv2D(32, (3, 3), activation='relu'), input_shape=(None, X_train.shape[2], X_train.shape[3], 1)),
    TimeDistributed(MaxPooling2D((2, 2))),
    TimeDistributed(Conv2D(64, (3, 3), activation='relu')),
    TimeDistributed(MaxPooling2D((2, 2))),
    TimeDistributed(Conv2D(128, (3, 3), activation='relu')),
    TimeDistributed(MaxPooling2D((2, 2))),
    TimeDistributed(Flatten()),
    LSTM(128, return_sequences=True),
    Dropout(0.5),
    LSTM(128),
    Dropout(0.5),
    Dense(256, activation='relu'),
    Dropout(0.5),
    Dense(y_train.shape[1], activation='softmax')
])

# Compile the model
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

# Model summary
model.summary()


Training the Model

In [None]:
# Train the model
history = model.fit(X_train, y_train, epochs=50, batch_size=64, validation_data=(X_test, y_test))


Evaluating the Model

In [None]:
# Evaluate the model
loss, accuracy = model.evaluate(X_test, y_test)
print(f'Accuracy: {accuracy * 100:.2f}%')


 Save the Model

In [None]:
model.save('complex_sign_language_model.h5')
