In [None]:
!kaggle datasets download -d risangbaskoro/wlasl-processed
!unzip /content/wlasl-processed.zip -d /content

In [1]:
import json
with open('/content/WLASL_v0.3.json', 'r') as f:
  data = json.load(f)

In [None]:
!pip install mediapipe

In [None]:
import cv2
import numpy as np
import mediapipe as mp

mp_holistic = mp.solutions.holistic
holistic_model = mp_holistic.Holistic(static_image_mode=False, min_detection_confidence=0.5)

def extract_landmarks_and_draw(video_path):
    cap = cv2.VideoCapture(video_path)
    if not cap.isOpened():
        return None

    full_vid = []
    frame_landmarks = []
    frame_count = 0

    while cap.isOpened() and frame_count < 25:
        ret, frame = cap.read()
        if not ret:
            break

        image_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        results = holistic_model.process(image_rgb)

        if results.pose_landmarks:
            for landmark in results.pose_landmarks.landmark:
                x = int(landmark.x * frame.shape[1])
                y = int(landmark.y * frame.shape[0])
                cv2.circle(frame, (x, y), 5, (0, 0, 255), 5)

            connections = mp_holistic.POSE_CONNECTIONS
            for connection in connections:
                start_index = connection[0]
                end_index = connection[1]
                start_landmark = results.pose_landmarks.landmark[start_index]
                end_landmark = results.pose_landmarks.landmark[end_index]
                start_point = (int(start_landmark.x * frame.shape[1]), int(start_landmark.y * frame.shape[0]))
                end_point = (int(end_landmark.x * frame.shape[1]), int(end_landmark.y * frame.shape[0]))
                cv2.line(frame, start_point, end_point, (0, 255, 0), 1)

        resized_frame = cv2.resize(frame, (640, 640))

        frame_landmarks.append(resized_frame)
        frame_count += 1

    full_vid.append(frame_landmarks)
    cap.release()
    return full_vid

train_gloss = []
train_instances = []
class_instances_count = {}

for train_data in data[5:7]:
    class_name = train_data['gloss']
    if class_name not in class_instances_count:
        class_instances_count[class_name] = 0

    for train_g in train_data['instances']:
        if class_instances_count[class_name] >= 10:
            break

        landmarks_images = extract_landmarks_and_draw('/content/videos/' + train_g['video_id'] + '.mp4')
        if landmarks_images is None:
            continue

        train_gloss.append(class_name)
        train_instances.extend(landmarks_images)
        class_instances_count[class_name] += 1


In [None]:
import matplotlib.pyplot as plt

for i, video_frames in enumerate(train_instances):
    for j, frame in enumerate(video_frames):
        plt.figure(figsize=(10, 10))
        plt.imshow(frame[:, :, ::-1])
        plt.title(f"Frame {j+1} of video {train_gloss[i]}")
        plt.axis('off')
        plt.show()


In [12]:
import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator
import matplotlib.pyplot as plt

augmented_vids=[]
augmented_gloss=[]


Image_data_generator1 = ImageDataGenerator(zoom_range=0.3)
Image_data_generator2 = ImageDataGenerator(height_shift_range=0.3)


for index , vids in enumerate(train_instances):
  augmented_frames1=[]
  augmented_frames2=[]

  for frames in vids:
    augmented_frames1.append(Image_data_generator1.random_transform(frames))
    augmented_frames2.append(Image_data_generator2.random_transform(frames))


  augmented_vids.append(augmented_frames1)
  augmented_vids.append(augmented_frames2)

  augmented_gloss.append(train_gloss[index])
  augmented_gloss.append(train_gloss[index])

train_data = train_instances + augmented_vids
train_labels = train_gloss + augmented_gloss

In [None]:
len(train_data)

In [None]:
len(train_labels)

In [None]:
train_vids=[]
import tensorflow as tf
for vids in train_data:
  train_instances_tensor = tf.convert_to_tensor(vids)
  train_vids.append(train_instances_tensor)


In [None]:
len(train_vids)

In [None]:
from sklearn.preprocessing import LabelEncoder
from tensorflow.keras.utils import to_categorical

label_encoder = LabelEncoder()
train_gloss_numerical = label_encoder.fit_transform(train_labels)

train_gloss_one_hot = to_categorical(train_gloss_numerical)


In [None]:
import numpy as np

for i in range(len(train_data)):
    while np.array(train_data[i]).shape[0] < 25:
        train_data[i] = np.append(train_data[i], np.zeros((1, 224, 224, 3)), axis=0)

train_data = np.array(train_data)
train_data = train_data / 255.0


In [None]:

from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(train_data, train_gloss_one_hot, test_size=0.2, random_state=42,shuffle=True)


In [None]:

batch_size = 1

train_dataset = tf.data.Dataset.from_tensor_slices((X_train, y_train))
train_dataset = train_dataset.shuffle(buffer_size=1024).batch(batch_size)

test_dataset = tf.data.Dataset.from_tensor_slices((X_test, y_test))
test_dataset = test_dataset.shuffle(buffer_size=1024).batch(batch_size)


In [None]:
from tensorflow.keras import layers, Model

inputs = layers.Input(shape=(25, 640, 640, 3))

x = layers.Conv3D(32, (3, 3, 3), activation='relu')(inputs)
x = layers.MaxPooling3D((2, 2, 2))(x)
x = layers.Conv3D(64, (3, 3, 3), activation='relu')(x)
x = layers.MaxPooling3D((2, 2, 2))(x)
x = layers.Conv3D(128, (3, 3, 3), activation='relu')(x)
x = layers.MaxPooling3D((2, 2, 2))(x)
x = layers.Flatten()(x)
x = layers.Dense(256, activation='relu')(x)
x = layers.Dropout(0.5)(x)
outputs = layers.Dense(10, activation='softmax')(x)
model = Model(inputs, outputs)

model.summary()

model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

history = model.fit(train_dataset, epochs=50, validation_data=test_dataset)
