In [6]:
import numpy as np
import os
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.layers import Conv2D, MaxPooling2D, GlobalAveragePooling2D, LSTM, Dense, TimeDistributed, Bidirectional, Dropout, BatchNormalization
from tensorflow.keras.models import Sequential
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.preprocessing.image import load_img, img_to_array
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.preprocessing.text import Tokenizer
from sklearn.model_selection import train_test_split

# Paths
output_frames_path = "output_frames"
img_size = (64, 64)  # Reduce image size
max_sequence_length = 15  # Reduce sequence length

# Load character mappings
tokenizer = Tokenizer(filters="", char_level=True, lower=True)
tokenizer.fit_on_texts(["abcdefghijklmnopqrstuvwxyz' "])  # Define the vocabulary

# Load preprocessed data using generator
def data_generator(batch_size=16):
    video_folders = os.listdir(output_frames_path)
    while True:
        np.random.shuffle(video_folders)
        for i in range(0, len(video_folders), batch_size):
            batch_videos = video_folders[i:i+batch_size]
            X, y = [], []
            for video_folder in batch_videos:
                folder_path = os.path.join(output_frames_path, video_folder)
                if not os.path.isdir(folder_path):
                    continue

                frames = []
                for frame_file in sorted(os.listdir(folder_path)):
                    if frame_file.endswith(".jpg"):
                        frame = load_img(os.path.join(folder_path, frame_file), color_mode="grayscale", target_size=img_size)
                        frame = img_to_array(frame) / 255.0  # Normalize
                        frames.append(frame)

                if len(frames) == 0:
                    continue  # Skip if no valid frames

                word = video_folder.split("_")[0].lower()  # Extract label from folder name
                X.append(np.array(frames))
                y.append(word)

            if len(X) == 0:
                continue  # Skip empty batches

            X_padded = pad_sequences(X, maxlen=max_sequence_length, dtype="float32", padding="post", truncating="post")
            y_encoded = tokenizer.texts_to_sequences(y)
            y_padded = pad_sequences(y_encoded, maxlen=max_sequence_length, dtype="int32", padding="post", truncating="post")
            y_categorical = to_categorical(y_padded, num_classes=len(tokenizer.word_index) + 1)

            yield np.expand_dims(X_padded, axis=-1), y_categorical

# Model Definition
def build_model():
    model = Sequential([
        TimeDistributed(Conv2D(32, (3, 3), activation='relu', padding='same'), input_shape=(max_sequence_length, img_size[0], img_size[1], 1)),
        TimeDistributed(MaxPooling2D((2, 2))),
        TimeDistributed(BatchNormalization()),

        TimeDistributed(Conv2D(64, (3, 3), activation='relu', padding='same')),
        TimeDistributed(MaxPooling2D((2, 2))),
        TimeDistributed(BatchNormalization()),

        TimeDistributed(Conv2D(128, (3, 3), activation='relu', padding='same')),
        TimeDistributed(GlobalAveragePooling2D()),  # Replaces Flatten (lowers complexity)
        TimeDistributed(BatchNormalization()),

        Bidirectional(LSTM(128, return_sequences=True, dropout=0.3)),
        Bidirectional(LSTM(128, return_sequences=True, dropout=0.3)),

        TimeDistributed(Dense(len(tokenizer.word_index) + 1, activation="softmax"))
    ])

    model.compile(loss="categorical_crossentropy", optimizer=keras.optimizers.Adam(learning_rate=0.0005), metrics=["accuracy"])
    return model

# Model Training
batch_size = 16
train_gen = data_generator(batch_size)
model = build_model()
history = model.fit(train_gen, steps_per_epoch=100, epochs=20)

# Save model
model.save("optimized_lip_reading_model.h5")

# Evaluate Model Accuracy
test_gen = data_generator(batch_size)
test_loss, test_acc = model.evaluate(test_gen, steps=10)
print(f"\n📌 **Test Accuracy:** {test_acc * 100:.2f}%")

# Predict Sample Words
def predict_samples(num_samples=5):
    test_gen = data_generator(batch_size=1)  # Single batch
    for i in range(num_samples):
        X_test, y_test = next(test_gen)
        y_pred = model.predict(X_test)
        
        # Convert predictions to text
        pred_word = "".join([tokenizer.index_word.get(np.argmax(char), "") for char in y_pred[0]])
        actual_word = "".join([tokenizer.index_word.get(np.argmax(char), "") for char in y_test[0]])

        print(f"✅ **Actual:** {actual_word}  |  🔥 **Predicted:** {pred_word}")

# Print 5 sample predictions
predict_samples(5)


Epoch 1/20
[1m100/100[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m78s[0m 647ms/step - accuracy: 0.6334 - loss: 1.6801
Epoch 2/20
[1m100/100[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m67s[0m 667ms/step - accuracy: 0.7204 - loss: 0.8696
Epoch 3/20
[1m100/100[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m68s[0m 679ms/step - accuracy: 0.7226 - loss: 0.7908
Epoch 4/20
[1m100/100[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m66s[0m 655ms/step - accuracy: 0.7327 - loss: 0.7257
Epoch 5/20
[1m100/100[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m66s[0m 661ms/step - accuracy: 0.7339 - loss: 0.6897
Epoch 6/20
[1m100/100[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m66s[0m 662ms/step - accuracy: 0.7407 - loss: 0.6589
Epoch 7/20
[1m100/100[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m66s[0m 664ms/step - accuracy: 0.7454 - loss: 0.6440
Epoch 8/20
[1m100/100[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m67s[0m 667ms/step - accuracy: 0.7533 - loss: 0.6262
Epoch 9/20
[1m1



[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 241ms/step - accuracy: 0.7982 - loss: 0.5173

📌 **Test Accuracy:** 78.96%
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2s/step
✅ **Actual:** pracn  |  🔥 **Predicted:** pbann
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 16ms/step
✅ **Actual:** sgwqzp  |  🔥 **Predicted:** pbbbp
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 26ms/step
✅ **Actual:** srihzn  |  🔥 **Predicted:** srinn
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 18ms/step
✅ **Actual:** bgwia  |  🔥 **Predicted:** bbwma
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 29ms/step
✅ **Actual:** bbbsn  |  🔥 **Predicted:** bbbnn


In [5]:
import numpy as np
import os
import tensorflow as tf
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.preprocessing.image import load_img, img_to_array
from tensorflow.keras.preprocessing.text import Tokenizer

# Paths
output_frames_path = "output_frames"
img_size = (64, 64)
max_sequence_length = 15

# Load tokenizer (same as used during training)
tokenizer = Tokenizer(filters="", char_level=True, lower=True)
tokenizer.fit_on_texts(["abcdefghijklmnopqrstuvwxyz' "])  # Define the vocabulary

# Function to load test data
def load_test_data():
    X_test, y_test = [], []
    test_folders = os.listdir(output_frames_path)[:100]  # Load some test samples (adjust as needed)

    for video_folder in test_folders:
        folder_path = os.path.join(output_frames_path, video_folder)
        if not os.path.isdir(folder_path):
            continue

        frames = []
        for frame_file in sorted(os.listdir(folder_path)):
            if frame_file.endswith(".jpg"):
                frame = load_img(os.path.join(folder_path, frame_file), color_mode="grayscale", target_size=img_size)
                frame = img_to_array(frame) / 255.0  # Normalize
                frames.append(frame)

        word = video_folder.split("_")[0].lower()  # Extract label from folder name
        X_test.append(np.array(frames))
        y_test.append(word)

    X_padded = pad_sequences(X_test, maxlen=max_sequence_length, dtype="float32", padding="post", truncating="post")
    y_encoded = tokenizer.texts_to_sequences(y_test)
    y_padded = pad_sequences(y_encoded, maxlen=max_sequence_length, dtype="int32", padding="post", truncating="post")
    
    return np.expand_dims(X_padded, axis=-1), tf.keras.utils.to_categorical(y_padded, num_classes=len(tokenizer.word_index) + 1)

# Load test data
X_test, y_test_categorical = load_test_data()

# Load trained model
model = tf.keras.models.load_model("optimized_lip_reading_model.h5")

# Evaluate model
test_loss, test_acc = model.evaluate(X_test, y_test_categorical, verbose=0)

# Print accuracy
print(f"\nTest Accuracy: {test_acc * 100:.2f}%")





Test Accuracy: 75.60%
