In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import json
import os
import cv2


In [2]:
# Absolute main path
main_path = '../data/'

# Read JSON file into a DataFrame with unprocessed instance col
wlas_df = pd.read_json(main_path + 'WLASL_v0.3.json')

print(wlas_df.head())
print(wlas_df.shape)


      gloss                                          instances
0      book  [{'bbox': [385, 37, 885, 720], 'fps': 25, 'fra...
1     drink  [{'bbox': [551, 68, 1350, 1080], 'fps': 25, 'f...
2  computer  [{'bbox': [0, 0, 360, 240], 'fps': 25, 'frame_...
3    before  [{'bbox': [0, 0, 360, 240], 'fps': 25, 'frame_...
4     chair  [{'bbox': [0, 0, 360, 240], 'fps': 25, 'frame_...
(2000, 2)


In [3]:
def get_videos_ids(json_list):
    """
    function to check if the video id is available in the dataset
    and return the viedos ids of the current instance

    Input: instance json list
    Output: list of videos_ids
    """
    videos_list = []
    for ins in json_list:
        video_id = ins['video_id']
        if os.path.exists(f'{main_path}videos/{video_id}.mp4'):
            videos_list.append(video_id)
    return videos_list


In [4]:
def get_json_features(json_list):
    """
    function to check if the video id is available in the dataset
    and return the viedos ids and url or any other featrue of the current instance

    input: instance json list
    output: list of videos_ids
    """
    videos_ids = []
    videos_urls = []
    for ins in json_list:
        video_id = ins['video_id']
        video_url = ins['url']
        if os.path.exists(f'{main_path}videos/{video_id}.mp4'):
            videos_ids.append(video_id)
            videos_urls.append(video_url)
    return videos_ids, videos_urls


In [5]:
# Open JSON file (read only)
with open(main_path+'WLASL_v0.3.json', 'r') as data_file:
    json_data = data_file.read()

instance_json = json.loads(json_data)


In [6]:
# Get available video ids for all rows in wlas_df and add to new col 'videos_id'
wlas_df['videos_ids'] = wlas_df['instances'].apply(get_videos_ids)
wlas_df.head()


Unnamed: 0,gloss,instances,videos_ids
0,book,"[{'bbox': [385, 37, 885, 720], 'fps': 25, 'fra...","[69241, 07069, 07068, 07070, 07099, 07074]"
1,drink,"[{'bbox': [551, 68, 1350, 1080], 'fps': 25, 'f...","[69302, 65539, 17710, 17733, 65540, 17734, 177..."
2,computer,"[{'bbox': [0, 0, 360, 240], 'fps': 25, 'frame_...","[12328, 12312, 12311, 12338, 12313, 12314, 123..."
3,before,"[{'bbox': [0, 0, 360, 240], 'fps': 25, 'frame_...","[05728, 05749, 05750, 05729, 05730, 65167, 057..."
4,chair,"[{'bbox': [0, 0, 360, 240], 'fps': 25, 'frame_...","[09848, 09869, 09849, 09850, 09851, 65328, 09854]"


In [7]:
# Create separate DataFrame for available information in each instance
features_df = pd.DataFrame(columns=['word', 'video_id', 'url'])

for row in wlas_df.iterrows():
    # Extract ids and urls for each row
    ids, urls = get_json_features(row[1][1])
    # Initialize a list matching the length (n) of found ids containing the word
    word = [row[1][0]] * len(ids)
    # Using zip to create new df with:
    # n * word in gloss col (e.g. 6 * book)
    # Unique id and url in ids and url col respectively
    df = pd.DataFrame(list(zip(word, ids, urls)), columns = features_df.columns)
    # Append temporary df to feature_df
    features_df = pd.concat([features_df, df], ignore_index=True)

# Renaming index col to index
features_df.index.name = 'index'
features_df.head()


Unnamed: 0_level_0,word,video_id,url
index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
0,book,69241,http://aslbricks.org/New/ASL-Videos/book.mp4
1,book,7069,https://signstock.blob.core.windows.net/signsc...
2,book,7068,https://s3-us-west-1.amazonaws.com/files.start...
3,book,7070,https://media.asldeafined.com/vocabulary/14666...
4,book,7099,http://www.aslsearch.com/signs/videos/book.mp4


In [8]:
selected_words = [
    'like',
    'work',
    'play',
    'take',
    'call',
    'go',
    'study',
    'give',
    'write',
    'yesterday',
    'far',
    'hot',
    'cold',
    'good',
    'bad',
    'computer',
    'apple',
    'doctor',
    'family',
    'dog'
]


In [9]:
selected_df = features_df[features_df['word'].isin(selected_words)]
selected_df


Unnamed: 0_level_0,word,video_id,url
index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
21,computer,12328,https://www.signingsavvy.com/signs/mp4/6/6326.mp4
22,computer,12312,https://s3-us-west-1.amazonaws.com/files.start...
23,computer,12311,https://media.spreadthesign.com/video/mp4/13/5...
24,computer,12338,http://www.aslsearch.com/signs/videos/computer...
25,computer,12313,https://s3-us-west-1.amazonaws.com/files.start...
...,...,...,...
1510,good,25076,http://www.aslsearch.com/signs/videos/good.mp4
1511,good,25067,https://s3-us-west-1.amazonaws.com/files.start...
1512,good,25068,https://signstock.blob.core.windows.net/signsc...
1513,good,25069,https://signstock.blob.core.windows.net/signsc...


In [10]:
for video_id in selected_df['video_id']:
    if os.path.exists(f'{main_path}videos/{video_id}.mp4'):
        cap = cv2.VideoCapture(f'{main_path}videos/{video_id}.mp4')
        length = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
        selected_df.loc[selected_df['video_id'] == video_id, ['video_length']] = int(length)
    pass

selected_df = selected_df.reset_index(drop=True)
selected_df


Unnamed: 0,word,video_id,url,video_length
0,computer,12328,https://www.signingsavvy.com/signs/mp4/6/6326.mp4,88.0
1,computer,12312,https://s3-us-west-1.amazonaws.com/files.start...,101.0
2,computer,12311,https://media.spreadthesign.com/video/mp4/13/5...,72.0
3,computer,12338,http://www.aslsearch.com/signs/videos/computer...,107.0
4,computer,12313,https://s3-us-west-1.amazonaws.com/files.start...,81.0
...,...,...,...,...
214,good,25076,http://www.aslsearch.com/signs/videos/good.mp4,86.0
215,good,25067,https://s3-us-west-1.amazonaws.com/files.start...,74.0
216,good,25068,https://signstock.blob.core.windows.net/signsc...,60.0
217,good,25069,https://signstock.blob.core.windows.net/signsc...,21.0


In [11]:
selected_df.describe()


Unnamed: 0,video_length
count,219.0
mean,65.228311
std,23.40691
min,20.0
25%,46.0
50%,64.0
75%,81.0
max,148.0


In [43]:
frame_indices = []

while len(set(frame_indices)) != frames_per_video:
        frame_indices = sorted(np.random.uniform(0, total_frames, frames_per_video).astype(int))

# frame_indices = sorted(np.random.uniform(82, 10, replace=False))


print(frame_indices)
print(len(frame_indices))


[0, 7, 9, 17, 30, 33, 34, 43, 47, 48]
10


In [67]:
# Frame sampling parameters
frames_per_video = 10
target_size = (150, 150)
# Initialize empty array of desired shape
results = np.empty((219, frames_per_video, *target_size, 3), dtype=np.uint8)

# Function to perform frame sampling
def sample_frames(video_path, frames_per_video, total_frames):
    frames = []
    cap = cv2.VideoCapture(video_path)

    frame_indices = []

    while len(set(frame_indices)) != frames_per_video:
        frame_indices = sorted(np.random.uniform(0, total_frames-5, frames_per_video).astype(int))

    frame_counter = 0

    print(frame_indices, total_frames)

    try:
        while cap.isOpened():
            ret, frame = cap.read()

            if not ret:
                break

            if frame_counter in frame_indices:
                # Resize frame to required size
                frame = cv2.resize(frame, target_size)
                # CV2 output BGR -> converting to RGB
                frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
                # Append to list of frames
                frames.append(frame_rgb)

            frame_counter += 1

            if len(frames) == frames_per_video:
                break

    finally:
        cap.release()

    return frames


In [33]:
word = selected_df['word'][0]
video_id = selected_df['video_id'][0]
url = selected_df['url'][0]
total_frames = selected_df['video_length'][0]

video_path = f'../data/videos/{video_id}.mp4'

sampled_frames = sample_frames(video_path, frames_per_video, total_frames)
sampled_frames


0
0
0
0
0
0
0
0
0
if condition met, frame numer 1 added
1
1
1
1
1
1
1
1
1
1
1
1
1
1
if condition met, frame numer 2 added
2
2
2
2
2
if condition met, frame numer 3 added
3
3
3
3
3
3
3
3
3
if condition met, frame numer 4 added
4
4
4
4
4
4
4
4
4
4
4
4
4
4
if condition met, frame numer 5 added
5
5
if condition met, frame numer 6 added
6
6
6
6
6
6
6
6
6
6
6
6
6
if condition met, frame numer 7 added
7
7
7
7
7
7
if condition met, frame numer 8 added
8
8
8
8
8
8
if condition met, frame numer 9 added
9
9
9
9
9
9
9
9
if condition met, frame numer 10 added
10
breaking loop


[array([[[255, 255, 255],
         [255, 255, 255],
         [255, 255, 255],
         ...,
         [255, 255, 255],
         [255, 255, 255],
         [255, 255, 255]],
 
        [[255, 255, 255],
         [255, 255, 255],
         [255, 255, 255],
         ...,
         [255, 255, 255],
         [255, 255, 255],
         [255, 255, 255]],
 
        [[255, 255, 255],
         [255, 255, 255],
         [255, 255, 255],
         ...,
         [255, 255, 255],
         [255, 255, 255],
         [255, 255, 255]],
 
        ...,
 
        [[255, 255, 251],
         [255, 255, 251],
         [255, 252, 255],
         ...,
         [255, 255, 248],
         [255, 254, 254],
         [255, 255, 255]],
 
        [[255, 254, 255],
         [255, 254, 255],
         [255, 253, 255],
         ...,
         [255, 255, 255],
         [255, 255, 255],
         [255, 255, 255]],
 
        [[255, 254, 255],
         [255, 254, 255],
         [255, 253, 255],
         ...,
         [253, 253, 253],
  

In [45]:
temp_array = np.empty((219, 10, 150, 150, 3))
temp_array.shape


(219, 10, 150, 150, 3)

In [68]:
# sampled_frames = []
# # Iterate through the DataFrame and perform frame sampling
# for index, row in selected_df.iterrows():
#     video_id = row['video_id']
#     total_frames = row['video_length']

#     video_path = f'../data/videos/{video_id}.mp4'

#     sampled_frames.append(sample_frames(video_path, frames_per_video, total_frames))


for i, row in selected_df.iterrows():
    video_id = row['video_id']
    total_frames = row['video_length']
    video_path = f'../data/videos/{video_id}.mp4'

    sampled_frames = sample_frames(video_path, frames_per_video, total_frames)
    print(np.array(sampled_frames).shape)
    # Assign sampled frames to results array
    results[i] = np.array(sampled_frames)


[4, 10, 22, 27, 46, 50, 53, 67, 72, 78] 88.0
(10, 150, 150, 3)
[2, 15, 25, 30, 32, 43, 51, 68, 70, 74] 101.0
(10, 150, 150, 3)
[5, 8, 11, 14, 19, 22, 24, 26, 30, 44] 72.0
(10, 150, 150, 3)
[1, 12, 16, 40, 64, 69, 70, 72, 76, 83] 107.0
(10, 150, 150, 3)
[3, 21, 23, 29, 44, 49, 52, 56, 63, 72] 81.0
(10, 150, 150, 3)
[0, 5, 6, 7, 10, 14, 18, 26, 31, 35] 43.0
(10, 150, 150, 3)
[9, 11, 17, 20, 28, 32, 41, 44, 45, 49] 57.0
(10, 150, 150, 3)
[0, 2, 4, 10, 11, 16, 21, 30, 31, 34] 43.0
(10, 150, 150, 3)
[1, 4, 11, 14, 24, 25, 28, 31, 33, 47] 53.0
(10, 150, 150, 3)
[4, 7, 9, 22, 24, 27, 35, 36, 38, 39] 45.0
(10, 150, 150, 3)
[5, 7, 9, 13, 19, 23, 26, 28, 31, 33] 40.0
(10, 150, 150, 3)
[9, 11, 13, 52, 54, 61, 64, 86, 94, 96] 102.0
(10, 150, 150, 3)
[4, 9, 17, 20, 22, 28, 33, 36, 38, 56] 73.0
(10, 150, 150, 3)
[8, 12, 20, 21, 23, 26, 29, 34, 35, 37] 46.0
(10, 150, 150, 3)
[1, 10, 11, 18, 25, 33, 38, 43, 45, 57] 67.0
(10, 150, 150, 3)
[9, 16, 17, 21, 23, 26, 34, 41, 46, 54] 61.0
(10, 150, 150, 3)
[

[h264 @ 0x55ca01bc2580] Invalid NAL unit size (745 > 472).
[h264 @ 0x55ca01bc2580] Error splitting the input into NAL units.
[mov,mp4,m4a,3gp,3g2,mj2 @ 0x55c9fe50f280] stream 1, offset 0x3b468: partial file
[mov,mp4,m4a,3gp,3g2,mj2 @ 0x55c9fe50f280] stream 1, offset 0x3b7d3: partial file


ValueError: could not broadcast input array from shape (9,150,150,3) into shape (10,150,150,3)

In [38]:
results.shape


(219,)

In [31]:
sampled_frames


array([list([array([[[255, 255, 255],
               [255, 255, 255],
               [255, 255, 255],
               ...,
               [255, 255, 255],
               [255, 255, 255],
               [255, 255, 255]],

              [[255, 255, 255],
               [255, 255, 255],
               [255, 255, 255],
               ...,
               [255, 255, 255],
               [255, 255, 255],
               [255, 255, 255]],

              [[255, 255, 255],
               [255, 255, 255],
               [255, 255, 255],
               ...,
               [255, 255, 255],
               [255, 255, 255],
               [255, 255, 255]],

              ...,

              [[255, 255, 251],
               [255, 255, 251],
               [255, 252, 255],
               ...,
               [255, 255, 248],
               [255, 254, 254],
               [255, 255, 255]],

              [[255, 254, 255],
               [255, 254, 255],
               [255, 253, 255],
               ...,
  