In [8]:
import os
import cv2
import json
from multiprocessing import Pool
import multiprocessing

In [9]:
with open('WLASL_v0.3_filtered.json', 'r') as file:
    data = json.load(file)

video_dir = 'C:/Users/DELL/null_class/sign_langu/videos' 

def extract_frames(video_info):
    video_id, video_path, bbox, start_frame, end_frame, fps = video_info
    cap = cv2.VideoCapture(video_path)
    frames = []
    current_frame = start_frame

    cap.set(cv2.CAP_PROP_POS_FRAMES, current_frame)
    while current_frame <= end_frame or end_frame == -1:
        ret, frame = cap.read()
        if not ret:
            break
        x, y, w, h = bbox
        frame = frame[y:y+h, x:x+w]
        frames.append(frame)
        current_frame += 1

    cap.release()
    return video_id, frames

video_infos = []
for item in data:
    for instance in item['instances']:
        video_id = instance['video_id']
        video_path = os.path.join(video_dir, f'{video_id}.mp4')
        bbox = instance['bbox']
        start_frame = instance['frame_start']
        end_frame = instance['frame_end']
        fps = instance['fps']
        video_infos.append((video_id, video_path, bbox, start_frame, end_frame, fps))

def process_results(result):
    video_id, frames = result
    video_frames[video_id] = frames

if __name__ == '__main__':
    video_frames = {}
    with Pool(multiprocessing.cpu_count()) as pool:
        for result in pool.imap_unordered(extract_frames, video_infos):
            process_results(result)


In [None]:
from tensorflow.keras.applications import VGG16
from tensorflow.keras.applications.vgg16 import preprocess_input
import numpy as np

vgg_model = VGG16(weights='imagenet', include_top=False)

def extract_features(frames):
    features = []
    for frame in frames:
        frame = cv2.resize(frame, (224, 224))
        frame = preprocess_input(frame)
        frame = np.expand_dims(frame, axis=0)
        feature = vgg_model.predict(frame)
        features.append(feature)
    return np.array(features)

video_features = {}
for video_id, frames in video_frames.items():
    features = extract_features(frames)
    video_features[video_id] = features

In [None]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Flatten

X_train = []
y_train = []

for item in data:
    for instance in item['instances']:
        video_id = instance['video_id']
        gloss = instance['gloss']
        features = video_features[video_id]
        X_train.append(features)
        y_train.append(gloss)

X_train = np.array(X_train)
y_train = np.array(y_train)

from sklearn.preprocessing import LabelEncoder
label_encoder = LabelEncoder()
y_train_encoded = label_encoder.fit_transform(y_train)

model = Sequential()
model.add(LSTM(256, input_shape=(X_train.shape[1], X_train.shape[2], X_train.shape[3], X_train.shape[4]), return_sequences=True))
model.add(Flatten())
model.add(Dense(128, activation='relu'))
model.add(Dense(len(set(y_train_encoded)), activation='softmax'))
model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

model.fit(X_train, y_train_encoded, epochs=10, batch_size=32)