In [1]:
import os
import cv2
import json
import numpy as np
from glob import glob
import matplotlib.pyplot as plt

import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import *
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau

from sklearn.model_selection import train_test_split


In [2]:
json_path = r"D:\Samvad_Setu_final\datasets\WLASL\WLASL_v0.3.json"

with open(json_path, "r") as f:
    wlasl_meta = json.load(f)

print("Total JSON entries:", len(wlasl_meta))


Total JSON entries: 2000


In [3]:
video_labels = {}
class_to_idx = {}
idx_counter = 0

for item in wlasl_meta:
    label = item["gloss"]
    
    if label not in class_to_idx:
        class_to_idx[label] = idx_counter
        idx_counter += 1
    
    for inst in item["instances"]:
        vid = inst["video_id"]
        subset = inst.get("subset", "unknown")  # default if missing
        
        video_labels[vid] = {
            "subset": subset,
            "label": class_to_idx[label]
        }

print("Total classes:", len(class_to_idx))
print("Total mapped videos:", len(video_labels))


Total classes: 2000
Total mapped videos: 21083


In [4]:
VIDEO_PATH = r"D:\Samvad_Setu_final\datasets\WLASL\videos"

def load_frames(path, max_frames=16, resize=(112,112)):
    frames = []
    cap = cv2.VideoCapture(path)
    total = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))

    # sample frames uniformly
    ids = np.linspace(0, total-1, max_frames).astype(int)

    for i in ids:
        cap.set(cv2.CAP_PROP_POS_FRAMES, i)
        ret, frame = cap.read()
        if ret:
            frame = cv2.resize(frame, resize)
            frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
            frames.append(frame)
    cap.release()

    if len(frames) < max_frames:
        return None
    
    return np.array(frames)


In [None]:
X_train, y_train = [], []
X_val, y_val = [], []
X_test, y_test = [], []

all_videos = glob(os.path.join(VIDEO_PATH, "*.mp4"))
print("Total video files in folder:", len(all_videos))

for vid in all_videos:
    vidname = os.path.basename(vid).split(".")[0]  # "00335"

    if vidname not in video_labels:
        continue

    info = video_labels[vidname]
    subset = info["subset"]
    label = info["label"]

    frames = load_frames(vid)
    if frames is None:
        continue

    if subset == "train":
        X_train.append(frames)
        y_train.append(label)
    elif subset == "val":
        X_val.append(frames)
        y_val.append(label)
    else:  # test
        X_test.append(frames)
        y_test.append(label)


Total video files in folder: 9659


In [None]:
import os
from glob import glob


In [None]:
video_list_train = []
video_list_val = []
video_list_test = []

all_videos = glob(os.path.join(VIDEO_PATH, "*.mp4"))
print("Total video files in folder:", len(all_videos))

for vid in all_videos:
    vidname = os.path.basename(vid).split(".")[0]

    if vidname not in video_labels:
        continue

    subset = video_labels[vidname]["subset"]
    label = video_labels[vidname]["label"]

    if subset == "train":
        video_list_train.append((vid, label))
    elif subset == "val":
        video_list_val.append((vid, label))
    else:
        video_list_test.append((vid, label))

print(
    "Train:", len(video_list_train),
    "Val:", len(video_list_val),
    "Test:", len(video_list_test)
)


In [None]:
import tensorflow as tf
import numpy as np
import cv2

class WLASLGenerator(tf.keras.utils.Sequence):

    def __init__(self, video_list, batch_size=4, max_frames=16, shuffle=True):
        self.video_list = video_list
        self.batch_size = batch_size
        self.max_frames = max_frames
        self.shuffle = shuffle
        self.on_epoch_end()

    def __len__(self):
        return len(self.video_list) // self.batch_size

    def on_epoch_end(self):
        if self.shuffle:
            np.random.shuffle(self.video_list)

    def load_video(self, path):
        cap = cv2.VideoCapture(path)
        total = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
        ids = np.linspace(0, max(total-1, 0), self.max_frames).astype(int)

        frames = []
        for i in ids:
            cap.set(cv2.CAP_PROP_POS_FRAMES, i)
            ret, frame = cap.read()
            if ret:
                frame = cv2.resize(frame, (112,112))
                frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
                frames.append(frame)
        cap.release()

        if len(frames) < self.max_frames:
            frames += [frames[-1]] * (self.max_frames - len(frames))

        return np.array(frames) / 255.0

    def __getitem__(self, idx):
        batch = self.video_list[idx*self.batch_size : (idx+1)*self.batch_size]

        X = []
        y = []

        for path, label in batch:
            X.append(self.load_video(path))
            y.append(label)

        return np.array(X), np.array(y)


In [None]:
train_gen = WLASLGenerator(video_list_train, batch_size=4, max_frames=16)
val_gen   = WLASLGenerator(video_list_val, batch_size=4, max_frames=16)
test_gen  = WLASLGenerator(video_list_test, batch_size=4, max_frames=16, shuffle=False)


In [None]:
history = model.fit(
    train_gen,
    validation_data=val_gen,
    epochs=20
)


In [None]:
X_train = X_train / 255.0
X_val   = X_val / 255.0
X_test  = X_test / 255.0


In [None]:
model = Sequential([
    Conv3D(32, (3,3,3), activation='relu', padding='same', input_shape=(16,112,112,3)),
    BatchNormalization(),
    MaxPooling3D((1,2,2)),

    Conv3D(64, (3,3,3), activation='relu', padding='same'),
    BatchNormalization(),
    MaxPooling3D((2,2,2)),

    Conv3D(128, (3,3,3), activation='relu', padding='same'),
    BatchNormalization(),
    MaxPooling3D((2,2,2)),

    Flatten(),
    Dense(256, activation='relu'),
    Dropout(0.5),
    Dense(len(class_to_idx), activation='softmax')
])

model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
model.summary()


In [None]:
history = model.fit(
    X_train, y_train,
    validation_data=(X_val, y_val),
    epochs=20,
    batch_size=8
)


In [None]:
def load_video_frames(video_path, max_frames=16, resize=(112,112)):
    frames = []
    cap = cv2.VideoCapture(video_path)

    total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
    frame_ids = np.linspace(0, total_frames-1, max_frames).astype(int)

    for fid in frame_ids:
        cap.set(cv2.CAP_PROP_POS_FRAMES, fid)
        ret, frame = cap.read()
        if ret:
            frame = cv2.resize(frame, resize)
            frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
            frames.append(frame)

    cap.release()
    return np.array(frames)


In [None]:
videos = []
labels = []

label_folders = sorted(os.listdir(DATASET_PATH))

print("Total classes found:", len(label_folders))

for idx, label in enumerate(label_folders):
    folder_path = os.path.join(DATASET_PATH, label)
    
    if not os.path.isdir(folder_path):
        continue
    
    for vid in glob(folder_path + "/*.mp4"):
        frames = load_video_frames(vid)
        if frames.shape[0] == 16:
            videos.append(frames)
            labels.append(idx)

videos = np.array(videos)
labels = np.array(labels)

print("Total videos loaded:", videos.shape)
print("Labels shape:", labels.shape)


In [None]:
import os

root = r"D:\Samvad_Setu_final\datasets\WLASL\videos"

print("Exists:", os.path.exists(root))
print("\nItems inside root folder:")
items = os.listdir(root)
print(len(items))
print(items[:20])  # show first 20 items

# Check if folders contain videos
count_videos = 0
for dirpath, _, filenames in os.walk(root):
    for f in filenames:
        if f.lower().endswith(('.mp4', '.mov', '.avi', '.mkv')):
            count_videos += 1

print("\nTotal video files found:", count_videos)


In [None]:
videos = videos.astype("float32") / 255.0



In [None]:
X_train, X_temp, y_train, y_temp = train_test_split(videos, labels, test_size=0.2, random_state=42)
X_val, X_test, y_val, y_test     = train_test_split(X_temp, y_temp, test_size=0.5, random_state=42)

print("Train:", X_train.shape)
print("Val:", X_val.shape)
print("Test:", X_test.shape)


In [None]:
model = Sequential([

    Conv3D(32, (3,3,3), activation='relu', padding='same', input_shape=(16,112,112,3)),
    BatchNormalization(),
    MaxPooling3D((1,2,2)),

    Conv3D(64, (3,3,3), activation='relu', padding='same'),
    BatchNormalization(),
    MaxPooling3D((2,2,2)),

    Conv3D(128, (3,3,3), activation='relu', padding='same'),
    BatchNormalization(),
    MaxPooling3D((2,2,2)),

    Flatten(),
    Dense(256, activation='relu'),
    Dropout(0.5),

    Dense(len(label_folders), activation='softmax')
])

model.compile(
    optimizer='adam',
    loss='sparse_categorical_crossentropy',
    metrics=['accuracy']
)

model.summary()


In [None]:
early_stop = EarlyStopping(monitor='val_accuracy', patience=5, restore_best_weights=True)
lr_reduce = ReduceLROnPlateau(monitor='val_loss', patience=2, factor=0.5)


In [None]:
history = model.fit(
    X_train, y_train,
    validation_data=(X_val, y_val),
    epochs=25,
    batch_size=8,
    callbacks=[early_stop, lr_reduce]
)


In [None]:
test_loss, test_acc = model.evaluate(X_test, y_test)
print("Test Accuracy:", test_acc)


In [None]:
idx = np.random.randint(0, len(X_test))
sample = X_test[idx]

pred = model.predict(sample[np.newaxis, ...])
pred_class = np.argmax(pred)

print("Predicted Label:", label_folders[pred_class])
print("Actual Label:", label_folders[y_test[idx]])


In [None]:
plt.figure(figsize=(15,4))
for i in range(8):
    plt.subplot(2,4,i+1)
    plt.imshow(sample[i])
    plt.axis("off")
plt.show()


In [None]:
model.save("wlasl_sign_model_3dcnn.h5")
print("Model saved.")


In [None]:
import cv2

def capture_and_predict():
    cap = cv2.VideoCapture(0)
    frames = []

    while len(frames) < 16:
        ret, frame = cap.read()
        if not ret: break
        f = cv2.resize(frame, (112,112))
        f = cv2.cvtColor(f, cv2.COLOR_BGR2RGB)
        frames.append(f)
        cv2.imshow("Recording Frames...", frame)
        if cv2.waitKey(1) & 0xFF == ord('q'): break

    cap.release()
    cv2.destroyAllWindows()

    if len(frames) == 16:
        frames = np.array(frames).astype("float32")/255.0
        pred = model.predict(frames[np.newaxis,...])
        cls = np.argmax(pred)
        print("Predicted Sign:", label_folders[cls])
    else:
        print("Not enough frames captured.")

capture_and_predict()
