In [83]:
import cv2
import os

In [84]:
video_dir = "videos"  # Folder containing videos
output_dir = "frames"  # Folder to save extracted frames



In [85]:
os.makedirs(output_dir, exist_ok=True)

for video_file in os.listdir(video_dir):
    video_path = os.path.join(video_dir, video_file)
    if not video_file.lower().endswith(('.mp4', '.avi', '.mov', '.mkv')):
        print(f"Skipping unsupported file: {video_file}")
        continue
    video_name = os.path.splitext(video_file)[0]
    video_frames_dir = os.path.join(output_dir, video_name)
    os.makedirs(video_frames_dir, exist_ok=True)
    cap = cv2.VideoCapture(video_path)
    if not cap.isOpened():
        print(f"Error opening video file: {video_file}")
        continue  
    

    frame_idx = 0
    while True:
        ret, frame = cap.read()
        if not ret:
            break
        frame_filename = os.path.join(video_frames_dir, f"frame_{frame_idx:04d}.jpg")
        cv2.imwrite(frame_filename, frame)
        frame_idx += 1

  




FileNotFoundError: [WinError 3] The system cannot find the path specified: 'videos'

In [None]:
import os
import numpy as np
import cv2

def load_clips(data_dir, clip_length=16, frame_size=(64,64)):
    clips = []
    labels = []
    class_names = sorted(os.listdir(data_dir))  # Get the list of class folders (e.g., class001, class002)
    
    for label, class_name in enumerate(class_names):
        class_dir = os.path.join(data_dir, class_name)
        if not os.path.isdir(class_dir):  # Skip if it's not a folder
            continue
            
        # Loop through each subdirectory (representing a video) inside a class
        for video_folder in os.listdir(class_dir):
            video_path = os.path.join(class_dir, video_folder)
            if not os.path.isdir(video_path):  # Ensure it's a directory
                continue

            # Check if there are nested subdirectories
            nested_dirs = [d for d in os.listdir(video_path) if os.path.isdir(os.path.join(video_path, d))]
            if nested_dirs:
                # Assume frames are in the first nested subdirectory
                video_path = os.path.join(video_path, nested_dirs[0])
            
            frames = sorted(os.listdir(video_path))  # Ensure frames are ordered
            video_frames = []

            for frame_file in frames:
                frame_path = os.path.join(video_path, frame_file)
                frame = cv2.imread(frame_path)
                if frame is None:  # Skip invalid frames
                    continue
                frame = cv2.resize(frame, frame_size)  # Resize to the target size
                video_frames.append(frame)
            
            # Break the video into clips of clip_length
            for i in range(0, len(video_frames) - clip_length + 1, clip_length):
                clip = video_frames[i:i+clip_length]
                clips.append(np.array(clip))
                labels.append(label)
    
    return np.array(clips), np.array(labels)

# Example usage
data_dir = "video_to_frames"  # Root directory containing classXXX folders
X_data, y_data = load_clips(data_dir)

print("Number of clips:", len(X_data))
print("Clip shape:", X_data[0].shape if len(X_data) > 0 else "No clips loaded")
print("Labels shape:", y_data.shape)


Number of clips: 2485
Clip shape: (16, 64, 64, 3)
Labels shape: (2485,)


In [None]:
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv3D, MaxPooling3D, Flatten, Dense, Dropout

# Define the model
class_names = sorted(os.listdir(data_dir)) 
model = Sequential([
    Conv3D(32, (3, 3, 3), activation='relu', input_shape=(16, 64, 64, 3)),
    MaxPooling3D((2, 2, 2)),
    Conv3D(64, (3, 3, 3), activation='relu'),
    MaxPooling3D((2, 2, 2)),
    Flatten(),
    Dense(128, activation='relu'),
    Dropout(0.5),
    Dense(len(class_names), activation='softmax')  # Output layer
])

model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

# Summary
model.summary()

In [None]:
import tensorflow as tf
from tensorflow.keras.utils import to_categorical
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split

data_dir = "video_to_frames"  # Root directory containing classXXX folders
x, y = load_clips(data_dir)
x = x.astype(np.float32)
x = x.astype(np.float32)
# Assuming you already have your dataset: x_train, y_train, x_val, y_val, x_test, y_test
x_train, x_remaining, y_train, y_remaining = train_test_split(x, y, test_size=0.2, random_state=100)
x_val, x_test, y_val, y_test = train_test_split(x_remaining, y_remaining, test_size=0.5, random_state=100)

# Normalize the data (if not already done)
x_train = x_train / 255.0
x_val = x_val / 255.0
x_test = x_test / 255.0


x_val = x_val.astype(np.float32)

# Convert labels to one-hot encoding
num_classes = 36  # Change this to the number of classes in your dataset
y_train = to_categorical(y_train, num_classes=num_classes)
y_val = to_categorical(y_val, num_classes=num_classes)
y_test = to_categorical(y_test, num_classes=num_classes)
depth = 16  # Number of frames in each clip
height = 64  # Height of each frame
width = 64  # Width of each frame
channels = 3  # RGB channels
# Define your 3D CNN model (replace this with your model if already defined)
model = tf.keras.Sequential([
    tf.keras.layers.Conv3D(32, kernel_size=(3, 3, 3), activation='relu', input_shape=(depth, height, width, channels)),
    tf.keras.layers.MaxPooling3D(pool_size=(2, 2, 2)),
    tf.keras.layers.Conv3D(64, kernel_size=(3, 3, 3), activation='relu'),
    tf.keras.layers.MaxPooling3D(pool_size=(2, 2, 2)),
    tf.keras.layers.Flatten(),
    tf.keras.layers.Dense(128, activation='relu'),
    tf.keras.layers.Dropout(0.5),
    tf.keras.layers.Dense(num_classes, activation='softmax')
])

# Compile the model
model.compile(optimizer='adam',
              loss='categorical_crossentropy',
              metrics=['accuracy'])

# Train the model
history = model.fit(
    x_train, y_train,
    validation_data=(x_val, y_val),
    epochs=10,  # Adjust based on your needs
    batch_size=8,  # Adjust based on available GPU memory
    verbose=1
)

# Evaluate the model
test_loss, test_accuracy = model.evaluate(x_test, y_test, verbose=1)
print(f"Test Loss: {test_loss}")
print(f"Test Accuracy: {test_accuracy}")



AttributeError: module 'ml_dtypes' has no attribute 'float8_e3m4'
Epoch 1/10
[1m249/249[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m59s[0m 221ms/step - accuracy: 0.0564 - loss: 3.6550 - val_accuracy: 0.2460 - val_loss: 2.6012
Epoch 2/10
[1m249/249[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m43s[0m 174ms/step - accuracy: 0.2816 - loss: 2.5434 - val_accuracy: 0.5927 - val_loss: 1.4530
Epoch 3/10
[1m249/249[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m44s[0m 177ms/step - accuracy: 0.5106 - loss: 1.6677 - val_accuracy: 0.6492 - val_loss: 1.1905
Epoch 4/10
[1m249/249[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m43s[0m 172ms/step - accuracy: 0.5969 - loss: 1.3127 - val_accuracy: 0.7177 - val_loss: 0.8504
Epoch 5/10
[1m249/249[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m47s[0m 189ms/step - accuracy: 0.6983 - loss: 0.9939 - val_accuracy: 0.7742 - val_loss: 0.6929
Epoch 6/10
[1m249/249[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m48s[0m 192ms/step - accuracy: 0.7469 -

In [None]:
model.save("sign_language_model.h5")
loaded_model = tf.keras.models.load_model("sign_language_model.h5")




In [None]:
import cv2
import os
import numpy as np
import tensorflow as tf
from collections import deque

# Load the trained model
model = tf.keras.models.load_model("sign_language_model.h5")

# Parameters
depth = 16  # Number of frames per clip
height = 64
width = 64
channels = 3
frames_dir = "webcam_frames"  # Directory to store captured frames
os.makedirs(frames_dir, exist_ok=True)

# Automatically generate class labels based on the model's structure or pre-defined mapping
class_labels = {idx: f"class{str(idx).zfill(3)}" for idx in range(36)}  # Adjust based on the number of classes

# Step 1: Capture video and save frames
print("Press 'q' to stop capturing frames...")
cap = cv2.VideoCapture(0)
frame_count = 0
frames_queue = deque(maxlen=depth)

while True:
    ret, frame = cap.read()
    if not ret:
        break

    # Save frame to the specified directory
    frame_path = os.path.join(frames_dir, f"frame_{frame_count:04d}.jpg")
    cv2.imwrite(frame_path, frame)
    frame_count += 1

    # Display the webcam feed
    cv2.imshow("Webcam Feed", frame)

    # Exit capturing loop on pressing 'q'
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

cap.release()
cv2.destroyAllWindows()
print(f"Captured {frame_count} frames to '{frames_dir}'.")

# Step 2: Load frames for prediction
def load_frames_for_prediction(frames_dir, frame_size=(height, width)):
    """
    Load frames from the directory and preprocess them for model prediction.
    """
    frame_files = sorted(os.listdir(frames_dir))  # Ensure frames are in correct order
    frames = []
    for frame_file in frame_files:
        frame_path = os.path.join(frames_dir, frame_file)
        frame = cv2.imread(frame_path)
        if frame is not None:
            frame = cv2.resize(frame, frame_size) / 255.0  # Resize and normalize
            frames.append(frame)
    return np.array(frames)

# Load frames and group them into clips of 'depth' size
frames = load_frames_for_prediction(frames_dir)
num_clips = len(frames) // depth
clips = [frames[i * depth:(i + 1) * depth] for i in range(num_clips)]

# Step 3: Predict signs from frames
predicted_labels = []
for clip_idx, clip in enumerate(clips):
    if len(clip) == depth:
        input_clip = np.expand_dims(clip, axis=0)  # (1, depth, height, width, channels)
        predictions = model.predict(input_clip)
        predicted_class = np.argmax(predictions[0])  # Get the class index
        predicted_label = class_labels[predicted_class]  # Get the corresponding label
        predicted_labels.append(predicted_label)
        print(f"Clip {clip_idx + 1}: Predicted - {predicted_label}")

# Step 4: Annotate frames with predictions
annotated_dir = "annotated_frames"
os.makedirs(annotated_dir, exist_ok=True)
for i, frame_file in enumerate(sorted(os.listdir(frames_dir))[:len(predicted_labels) * depth]):
    frame_path = os.path.join(frames_dir, frame_file)
    frame = cv2.imread(frame_path)
    clip_idx = i // depth
    if clip_idx < len(predicted_labels):
        predicted_label = predicted_labels[clip_idx]
        cv2.putText(frame, f"Predicted: {predicted_label}", (10, 30),
                    cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2, cv2.LINE_AA)
    annotated_path = os.path.join(annotated_dir, f"frame_{i:04d}.jpg")
    cv2.imwrite(annotated_path, frame)

print(f"Annotated frames saved to '{annotated_dir}'.")

# Optional: Combine annotated frames into a video
output_video_path = "output_video.mp4"
fourcc = cv2.VideoWriter_fourcc(*"mp4v")  # Codec for MP4
fps = 30  # Set frames per second
frame_size = (frame.shape[1], frame.shape[0])  # Frame size from the first frame

video_writer = cv2.VideoWriter(output_video_path, fourcc, fps, frame_size)
for frame_file in sorted(os.listdir(annotated_dir)):
    frame_path = os.path.join(annotated_dir, frame_file)
    frame = cv2.imread(frame_path)
    if frame is not None:
        video_writer.write(frame)

video_writer.release()
print(f"Annotated video saved to '{output_video_path}'.")




Press 'q' to stop capturing frames...


KeyboardInterrupt: 

In [None]:
model.compile(optimizer="adam", loss="categorical_crossentropy", metrics=["accuracy"])


In [88]:
from tensorflow.keras.metrics import Precision, Recall

# Compile the model with additional metrics
model.compile(
    optimizer='adam',
    loss='categorical_crossentropy',
    metrics=['accuracy', Precision(name='precision'), Recall(name='recall')]
)

# Train the model
history = model.fit(
    x_train, y_train,
    validation_data=(x_val, y_val),
    epochs=10,  # Adjust based on your needs
    batch_size=8,  # Adjust based on available GPU memory
    verbose=1
)

# Evaluate the model
test_loss, test_accuracy, test_precision, test_recall = model.evaluate(x_test, y_test, verbose=1)
print(f"Test Loss: {test_loss}")
print(f"Test Accuracy: {test_accuracy}")
print(f"Test Precision: {test_precision}")
print(f"Test Recall: {test_recall}")


Epoch 1/10
[1m249/249[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m97s[0m 337ms/step - accuracy: 0.8680 - loss: 0.3872 - precision: 0.9040 - recall: 0.8456 - val_accuracy: 0.8871 - val_loss: 0.3364 - val_precision: 0.9076 - val_recall: 0.8710
Epoch 2/10
[1m249/249[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m68s[0m 272ms/step - accuracy: 0.8829 - loss: 0.3396 - precision: 0.9153 - recall: 0.8595 - val_accuracy: 0.9073 - val_loss: 0.3413 - val_precision: 0.9310 - val_recall: 0.8710
Epoch 3/10
[1m249/249[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m70s[0m 282ms/step - accuracy: 0.9007 - loss: 0.3181 - precision: 0.9286 - recall: 0.8711 - val_accuracy: 0.8992 - val_loss: 0.3452 - val_precision: 0.9234 - val_recall: 0.8750
Epoch 4/10
[1m249/249[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m68s[0m 273ms/step - accuracy: 0.9010 - loss: 0.3319 - precision: 0.9285 - recall: 0.8631 - val_accuracy: 0.8911 - val_loss: 0.3246 - val_precision: 0.8963 - val_recall: 0.8710
Epoch 5/10


In [None]:
import cv2
import os
import numpy as np
import tensorflow as tf
from collections import deque

# Load the trained model
model = tf.keras.models.load_model("sign_language_model.h5")
model.compile(optimizer="adam", loss="categorical_crossentropy", metrics=["accuracy"])

# Parameters
depth = 16  # Frames per clip
height, width, channels = 64, 64, 3
frames_queue = deque(maxlen=depth)
class_labels = {idx: f"class{str(idx).zfill(3)}" for idx in range(36)}

# Start video capture
cap = cv2.VideoCapture(0)

while True:
    ret, frame = cap.read()
    if not ret:
        print("Failed to capture frame.")
        break

    # Preprocess frame: resize and normalize
    resized_frame = cv2.resize(frame, (width, height))
    normalized_frame = resized_frame / 255.0
    frames_queue.append(normalized_frame)

    # Debug input shapes
    if len(frames_queue) == depth:
        input_clip = np.expand_dims(np.array(frames_queue), axis=0)
        print(f"Input clip shape: {input_clip.shape}")  # Debug print
        
        # Make predictions
        try:
            predictions = model.predict(input_clip)
            predicted_class = np.argmax(predictions[0])
            predicted_label = class_labels[predicted_class]

            # Display prediction
            cv2.putText(frame, f"Predicted: {predicted_label}", (10, 30),
                        cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2, cv2.LINE_AA)
        except Exception as e:
            print(f"Prediction error: {e}")

    # Display the frame
    cv2.imshow("Sign Language Recognition", frame)

    # Quit on 'q'
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

cap.release()
cv2.destroyAllWindows()




KeyboardInterrupt: 