In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import json 
import cv2
import os
import keras.utils

In [2]:
with open("dataset/WLASL_v0.3.json") as f:
    data = json.loads(f.read())

In [3]:
def get_video_ids(word):
    results = []
    for i in data:
        if(i['gloss'] == word):
            for j in i['instances']:
                results.append(j['video_id'])
    return results

In [4]:
words = np.array(["book","drink","computer","before","chair","go","clothes","who","candy","cousin","deaf","fine","help","no","thin"])
words_summary = []
for word in words:
    word_summary = [word,len(get_video_ids(word))]
    print(word_summary)

['book', 40]
['drink', 35]
['computer', 30]
['before', 26]
['chair', 26]
['go', 26]
['clothes', 25]
['who', 25]
['candy', 24]
['cousin', 23]
['deaf', 23]
['fine', 22]
['help', 22]
['no', 22]
['thin', 22]


In [5]:
def frames_count(videoPath):
    
    #Empty list to store the frames of the video
    frames = [] 
    
    #Reading the video 
    video_reader = cv2.VideoCapture(videoPath)
    
    #Store the total number of frames in the video
    count = int(video_reader.get(cv2.CAP_PROP_FRAME_COUNT))
    
    return count
    
    

In [6]:
video_dir = './dataset/videos'

# Data map to store the video ids of the videos of the words present in the dataset
data_mp = dict()
for word in words:
    data_mp[word] = []

# Store the min frames count
min_frames_count = 1000000

for word in words:

    # Get video ids for of all the videos of a word
    video_ids = get_video_ids(word)

    for video_id in video_ids:
        videoPath = f"{video_dir}/{video_id}.mp4"
        video_reader = cv2.VideoCapture(videoPath)
        count  = int(video_reader.get(cv2.CAP_PROP_FRAME_COUNT))

        if (count!=0):
            data_mp[word].append(video_id)
            min_frames_count  = min(min_frames_count,count)

print("Minimum no. of frames: ",min_frames_count)


Minimum no. of frames:  24


In [8]:
SEQ_LENGTH = min_frames_count

def extract_frames(video_path):
    
    frames = []
    
    # open the video file
    cap = cv2.VideoCapture(video_path)
    
    # get the number of frames in the video
    num_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
    
    # Calculate the skip size
    skip_size = max(int(num_frames//SEQ_LENGTH),1)
    
    # loop through the frames of the video
    for i in range(SEQ_LENGTH):
        
        cap.set(cv2.CAP_PROP_POS_FRAMES, i * skip_size)
        ret, frame = cap.read()
        if not ret:
            break
        
        #resizing the frames
        frame = cv2.resize(frame, (256, 256))
        
        #normalize the frame
        normalized_frame = frame/255
        
        frames.append(normalized_frame)
        
    # release the video file
    cap.release()
    
    return frames

In [9]:
training_data = [] # [frames_list1,frames_list2,frames_list3, .....] 
training_labels = [] #[word1, word1, word2, ....]

label_map = {label:num for num, label in enumerate(words)}

for word in data_mp:
    
    word_video_ids = data_mp[word]
    
    for vid_id in word_video_ids:
        # Create the video path
        video_path = f"{video_dir}/{vid_id}.mp4"
        
        # Extract frames of the video and store it in the list
        frames_list = extract_frames(video_path)
        
        if(len(frames_list) == SEQ_LENGTH):
            training_data.append(frames_list)
            training_labels.append(label_map[word])

training_data = np.asarray(training_data)
training_labels = np.asarray(training_labels)
        

24 24
24 24
24 24
24 24
24 24
24 24
24 24
24 24
24 24
24 24
24 24
24 24
24 24
24 24
24 24
24 24
24 24
24 24
24 24
24 24
24 24
24 24
24 24
24 24
24 24
24 24
24 24
24 24
24 24
24 24
24 24
24 24
24 24
24 24
24 24
24 24
24 24
24 24
24 24
24 24
24 24
24 24
24 24
24 24
24 24
24 24
24 24
24 24
24 24
24 24
24 24
24 24
24 24
24 24
24 24
24 24
24 24
24 24
24 24
24 24
24 24
24 24
24 24
24 24
24 24
24 24
24 24
24 24
24 24
24 24
24 24
24 24
24 24
24 24
24 24
24 24
24 24
24 24
24 24
24 24
24 24
24 24
24 24
24 24
24 24
24 24
24 24
24 24
24 24
24 24
24 24
24 24
24 24
24 24
24 24
24 24
24 24
24 24
24 24
24 24
24 24
24 24
24 24
24 24
24 24
24 24
24 24
24 24
24 24
24 24
24 24
24 24
24 24
24 24
24 24
24 24
24 24
24 24
24 24
24 24
24 24
24 24
24 24
24 24
24 24
24 24
24 24
24 24
24 24
24 24
24 24
24 24
24 24
24 24
24 24
24 24
24 24
24 24
24 24
24 24
24 24
24 24
24 24
24 24
24 24
24 24
24 24
24 24
24 24
24 24
24 24
24 24
24 24
24 24
24 24
24 24
24 24
24 24
24 24
24 24
24 24
24 24
24 24
24 24
24 24
24 24
24 2

In [11]:
print(training_data)

[[[[[0.37254902 0.38039216 0.38823529]
    [0.37254902 0.38039216 0.38823529]
    [0.36862745 0.37647059 0.38431373]
    ...
    [0.40392157 0.38823529 0.4       ]
    [0.4        0.38431373 0.39607843]
    [0.4        0.38823529 0.39215686]]

   [[0.37254902 0.38039216 0.38823529]
    [0.36862745 0.37647059 0.38431373]
    [0.36862745 0.37647059 0.38431373]
    ...
    [0.41568627 0.38431373 0.4       ]
    [0.41960784 0.38823529 0.4       ]
    [0.40784314 0.38039216 0.38823529]]

   [[0.37254902 0.38039216 0.38823529]
    [0.36862745 0.37647059 0.38431373]
    [0.36862745 0.37647059 0.38431373]
    ...
    [0.41960784 0.38431373 0.4       ]
    [0.41568627 0.38039216 0.39607843]
    [0.41176471 0.38039216 0.38823529]]

   ...

   [[0.30196078 0.29019608 0.29411765]
    [0.29411765 0.29411765 0.29411765]
    [0.27843137 0.28627451 0.29411765]
    ...
    [0.41960784 0.39607843 0.40784314]
    [0.42352941 0.4        0.41176471]
    [0.42745098 0.40392157 0.41568627]]

   [[0.29411765 

In [19]:
import tensorflow as tf
from keras import utils as np_utils 

In [23]:
from keras.layers import TimeDistributed

In [20]:
one_hot_encoded_labels = tf.keras.utils.to_categorical(training_labels)

In [14]:
# np.save("training_data.npy",training_data)
# np.save("training_labels.npy",training_labels)

In [26]:
import keras
from keras.models import Sequential
from keras.layers import Dense, Flatten, Conv2D, LSTM, MaxPooling2D,TimeDistributed,Dropout

In [27]:
model = Sequential()

model.add(TimeDistributed(Conv2D(16,(3,3),padding="same",activation="relu"),input_shape = (SEQ_LENGTH,256,256,3)))
model.add(TimeDistributed(MaxPooling2D((4,4))))
model.add(TimeDistributed(Dropout(0.25)))

model.add(TimeDistributed(Conv2D(32,(3,3),padding="same",activation="relu"),input_shape = (SEQ_LENGTH,256,256,3)))
model.add(TimeDistributed(MaxPooling2D((4,4))))
model.add(TimeDistributed(Dropout(0.25)))

model.add(TimeDistributed(Conv2D(64,(3,3),padding="same",activation="relu"),input_shape = (SEQ_LENGTH,256,256,3)))
model.add(TimeDistributed(MaxPooling2D((2,2))))
model.add(TimeDistributed(Dropout(0.25)))

model.add(TimeDistributed(Conv2D(64,(3,3),padding="same",activation="relu"),input_shape = (SEQ_LENGTH,256,256,3)))
model.add(TimeDistributed(MaxPooling2D((2,2))))

model.add(TimeDistributed(Flatten()))

model.add(LSTM(32))

model.add(Dense(15,activation="softmax"))

model.summary()


Model: "sequential_2"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 time_distributed_2 (TimeDis  (None, 24, 256, 256, 16)  448      
 tributed)                                                       
                                                                 
 time_distributed_3 (TimeDis  (None, 24, 64, 64, 16)   0         
 tributed)                                                       
                                                                 
 time_distributed_4 (TimeDis  (None, 24, 64, 64, 16)   0         
 tributed)                                                       
                                                                 
 time_distributed_5 (TimeDis  (None, 24, 64, 64, 32)   4640      
 tributed)                                                       
                                                                 
 time_distributed_6 (TimeDis  (None, 24, 16, 16, 32)  