In [63]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import json
import os
import cv2

# Tensorflow
import tensorflow as tf
from sklearn.preprocessing import LabelEncoder
from tensorflow.keras.utils import to_categorical


# 1) Load Data #

In [38]:
# Absolute main path
main_path = '../data/'

# Read JSON file into a DataFrame with unprocessed instance col
wlas_df = pd.read_json(main_path + 'WLASL_v0.3.json')


In [39]:
def get_videos_ids(json_list):
    """
    function to check if the video id is available in the dataset
    and return the viedos ids of the current instance

    Input: instance json list
    Output: list of videos_ids
    """
    videos_list = []
    for ins in json_list:
        video_id = ins['video_id']
        if os.path.exists(f'{main_path}videos/{video_id}.mp4'):
            videos_list.append(video_id)
    return videos_list


In [40]:
def get_json_features(json_list):
    """
    function to check if the video id is available in the dataset
    and return the viedos ids and url or any other featrue of the current instance

    input: instance json list
    output: list of videos_ids
    """
    videos_ids = []
    videos_urls = []
    for ins in json_list:
        video_id = ins['video_id']
        video_url = ins['url']
        if os.path.exists(f'{main_path}videos/{video_id}.mp4'):
            videos_ids.append(video_id)
            videos_urls.append(video_url)
    return videos_ids, videos_urls


In [41]:
# Open JSON file (read only)
with open(main_path+'WLASL_v0.3.json', 'r') as data_file:
    json_data = data_file.read()

instance_json = json.loads(json_data)


In [42]:
# Get available video ids for all rows in wlas_df and add to new col 'videos_id'
wlas_df['videos_ids'] = wlas_df['instances'].apply(get_videos_ids)
wlas_df


Unnamed: 0,gloss,instances,videos_ids
0,book,"[{'bbox': [385, 37, 885, 720], 'fps': 25, 'fra...","[69241, 07069, 07068, 07070, 07099, 07074]"
1,drink,"[{'bbox': [551, 68, 1350, 1080], 'fps': 25, 'f...","[69302, 65539, 17710, 17733, 65540, 17734, 177..."
2,computer,"[{'bbox': [0, 0, 360, 240], 'fps': 25, 'frame_...","[12328, 12312, 12311, 12338, 12313, 12314, 123..."
3,before,"[{'bbox': [0, 0, 360, 240], 'fps': 25, 'frame_...","[05728, 05749, 05750, 05729, 05730, 65167, 057..."
4,chair,"[{'bbox': [0, 0, 360, 240], 'fps': 25, 'frame_...","[09848, 09869, 09849, 09850, 09851, 65328, 09854]"
...,...,...,...
1995,washington,"[{'bbox': [0, 0, 360, 240], 'fps': 25, 'frame_...","[62393, 62394, 62395, 62396, 62398]"
1996,waterfall,"[{'bbox': [0, 0, 360, 240], 'fps': 25, 'frame_...","[62488, 62489, 62490, 62492, 62493]"
1997,weigh,"[{'bbox': [0, 0, 360, 240], 'fps': 25, 'frame_...","[62782, 62783, 62785]"
1998,wheelchair,"[{'bbox': [415, 86, 1811, 1080], 'fps': 25, 'f...","[63044, 63046, 63047, 63050]"


In [43]:
# Create separate DataFrame for available information in each instance
features_df = pd.DataFrame(columns=['word', 'video_id', 'url'])

for row in wlas_df.iterrows():
    # Extract ids and urls for each row
    ids, urls = get_json_features(row[1][1])
    # Initialize a list matching the length (n) of found ids containing the word
    word = [row[1][0]] * len(ids)
    # Using zip to create new df with:
    # n * word in gloss col (e.g. 6 * book)
    # Unique id and url in ids and url col respectively
    df = pd.DataFrame(list(zip(word, ids, urls)), columns = features_df.columns)
    # Append temporary df to feature_df
    features_df = pd.concat([features_df, df], ignore_index=True)

# Renaming index col to index
features_df.index.name = 'index'
features_df


Unnamed: 0_level_0,word,video_id,url
index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
0,book,69241,http://aslbricks.org/New/ASL-Videos/book.mp4
1,book,07069,https://signstock.blob.core.windows.net/signsc...
2,book,07068,https://s3-us-west-1.amazonaws.com/files.start...
3,book,07070,https://media.asldeafined.com/vocabulary/14666...
4,book,07099,http://www.aslsearch.com/signs/videos/book.mp4
...,...,...,...
11975,wheelchair,63047,https://www.signingsavvy.com/signs/mp4/5/5233.mp4
11976,wheelchair,63050,http://www.aslsearch.com/signs/videos/wheelcha...
11977,whistle,63186,https://media.spreadthesign.com/video/mp4/13/9...
11978,whistle,63188,https://www.signingsavvy.com/signs/mp4/9/9961.mp4


# 2) Define 20 target classes #

In [44]:
# These 20 words were selected based on the amount of samples available

selected_words = [
    'like', 'work', 'play', 'take', 'call',
    'go', 'study', 'give', 'write', 'yesterday',
    'far', 'hot', 'cold', 'good', 'bad',
    'computer', 'apple', 'doctor', 'family', 'dog'
]


In [45]:
selected_df = features_df[features_df['word'].isin(selected_words)]


In [46]:
for video_id in selected_df['video_id']:
    if os.path.exists(f'{main_path}videos/{video_id}.mp4'):
        cap = cv2.VideoCapture(f'{main_path}videos/{video_id}.mp4')
        length = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
        selected_df.loc[selected_df['video_id'] == video_id, ['video_length']] = int(length)
    pass

selected_df = selected_df.reset_index(drop=True)


# 3) Defining the Input/Features: X #

In [52]:
# Frame sampling parameters
frames_per_video = 10
target_size = (150, 150)
# Initialize empty array of desired shape
X = np.empty((219, frames_per_video, *target_size, 3), dtype=np.uint8)

# Function to perform frame sampling
def sample_frames(video_path, frames_per_video, total_frames):
    frames = []
    cap = cv2.VideoCapture(video_path)

    frame_indices = []

    while len(set(frame_indices)) != frames_per_video:
        frame_indices = sorted(np.random.uniform(0, total_frames-5, frames_per_video).astype(int))

    frame_counter = 0

    try:
        while cap.isOpened():
            ret, frame = cap.read()

            if not ret:
                break

            if frame_counter in frame_indices:
                # Resize frame to required size
                frame = cv2.resize(frame, target_size)
                # CV2 output BGR -> converting to RGB
                frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
                # Append to list of frames
                frames.append(frame_rgb)

            frame_counter += 1

            if len(frames) == frames_per_video:
                break

    finally:
        cap.release()

    return frames


In [54]:
np.random.seed(10)

for i, row in selected_df.iterrows():
    video_id = row['video_id']
    total_frames = row['video_length']
    video_path = f'../data/videos/{video_id}.mp4'

    sampled_frames = sample_frames(video_path, frames_per_video, total_frames)

    # Assign sampled frames to results array
    X[i] = np.array(sampled_frames)


[h264 @ 0x559d642c45c0] Invalid NAL unit size (745 > 472).
[h264 @ 0x559d642c45c0] Error splitting the input into NAL units.
[mov,mp4,m4a,3gp,3g2,mj2 @ 0x559d63ad9600] stream 1, offset 0x3b468: partial file


In [91]:
if X.shape == (219, 10, 150, 150, 3):
    print(f'✅ X has been initialized with Shape {X.shape}!')
else:
    print('❌ X has not been initialized properly!')


✅ X has been initialized with Shape (219, 10, 150, 150, 3)!


# 4) Defining the Output/Target: y #

In [94]:
label_encoder = LabelEncoder()

selected_df['encoded_word'] = label_encoder.fit_transform(selected_df['word'])
y_cat = tf.keras.utils.to_categorical(selected_df['encoded_word'], num_classes=20)


In [95]:
if y_cat.shape == (219, 20):
    print(f'✅ y has been initialized with Shape {y_cat.shape}!')
else:
    print('❌ y has not been initialized properly!')


✅ y has been initialized with Shape (219, 20)!
