In [1]:
import os
# Changing the working directory
os.chdir('..')
os.getcwd()

'/home/javiermunoz/TFM_DSBD'

In [2]:
from config import DATA_PATH, VIDEOS_PATH
import pandas as pd
from sklearn.model_selection import train_test_split
from config import SPLITS
from numpy.random import default_rng
import shutil
import pathlib

## 0. Data Organization

In [3]:
# Load the labels data
labels = pd.read_csv('./data/labels.csv')
labels

Unnamed: 0,gloss_id,gloss,hand
0,1,Opaque,R
1,2,Red,R
2,3,Green,R
3,4,Yellow,R
4,5,Bright,R
...,...,...,...
59,60,Copy,B
60,61,Run,B
61,62,Realize,R
62,63,Give,B


In [4]:
list_of_videos = os.listdir(VIDEOS_PATH)
print(f"The number of videos in the dataset is: {len(list_of_videos)}")

The number of videos in the dataset is: 3200


In [5]:
# Create a metadata dictionary
metadata_dict = {
    "video_id": [],
    "gloss_id": [],
    "interpreter_id": [],
    "take_id": []
}

for video_id in list_of_videos:
    str_splits = video_id.split("_")
    gloss_id, interpreter_id, take_id = int(str_splits[0]), str_splits[1], str_splits[2].split(".")[0]
    metadata_dict["gloss_id"].append(gloss_id)
    metadata_dict["interpreter_id"].append(interpreter_id)
    metadata_dict["take_id"].append(take_id)
    metadata_dict["video_id"].append(video_id)

metadata_df = pd.DataFrame(metadata_dict)
metadata_df

Unnamed: 0,video_id,gloss_id,interpreter_id,take_id
0,039_008_005.mp4,39,008,005
1,045_005_001.mp4,45,005,001
2,015_007_003.mp4,15,007,003
3,032_008_003.mp4,32,008,003
4,049_010_004.mp4,49,010,004
...,...,...,...,...
3195,025_007_005.mp4,25,007,005
3196,053_001_002.mp4,53,001,002
3197,021_002_004.mp4,21,002,004
3198,001_010_003.mp4,1,010,003


In [6]:
# Join both dictionaries
full_metadata_df = metadata_df.merge(labels, on='gloss_id')
full_metadata_df

Unnamed: 0,video_id,gloss_id,interpreter_id,take_id,gloss,hand
0,039_008_005.mp4,39,008,005,Name,R
1,039_005_004.mp4,39,005,004,Name,R
2,039_009_004.mp4,39,009,004,Name,R
3,039_010_002.mp4,39,010,002,Name,R
4,039_006_005.mp4,39,006,005,Name,R
...,...,...,...,...,...,...
3195,040_003_004.mp4,40,003,004,Patience,R
3196,040_002_001.mp4,40,002,001,Patience,R
3197,040_006_001.mp4,40,006,001,Patience,R
3198,040_008_005.mp4,40,008,005,Patience,R


In [7]:
# Check that there are no nulls
full_metadata_df.isnull().any(), full_metadata_df.isna().any()

(video_id          False
 gloss_id          False
 interpreter_id    False
 take_id           False
 gloss             False
 hand              False
 dtype: bool,
 video_id          False
 gloss_id          False
 interpreter_id    False
 take_id           False
 gloss             False
 hand              False
 dtype: bool)

In [8]:
full_metadata_df[full_metadata_df.isnull().any(axis=1)]

Unnamed: 0,video_id,gloss_id,interpreter_id,take_id,gloss,hand


In [9]:
metadata_4_train, metadata_4_test = train_test_split(full_metadata_df, test_size=0.2, shuffle=True)
metadata_4_val, metadata_4_test = train_test_split(metadata_4_test, test_size=0.4, shuffle=True)

In [10]:
metadata_4_train["sp"] = 'train'
metadata_4_val["sp"] = 'val'
metadata_4_test["sp"] = 'test'
full_metadata_df = pd.concat([metadata_4_train, metadata_4_val, metadata_4_test])
full_metadata_df.reset_index()
full_metadata_df

Unnamed: 0,video_id,gloss_id,interpreter_id,take_id,gloss,hand,sp
3056,056_003_001.mp4,56,003,001,Help,B,train
61,045_009_004.mp4,45,009,004,Barbecue,B,train
672,050_002_003.mp4,50,002,003,Accept,B,train
379,034_002_002.mp4,34,002,002,Map,B,train
3024,020_005_005.mp4,20,005,005,Sweet milk,R,train
...,...,...,...,...,...,...,...
2040,047_004_001.mp4,47,004,001,Chewing-gum,R,test
2616,054_005_003.mp4,54,005,003,To land,B,test
298,004_007_002.mp4,4,007,002,Yellow,R,test
772,001_009_003.mp4,1,009,003,Opaque,R,test


In [11]:
# we take a subset of classes from the whole dataset
rng = default_rng()
subset_size = 10
subset_labels = rng.choice(len(labels), size=subset_size, replace=False)

subset_metadata_df = full_metadata_df[full_metadata_df['gloss_id'].isin(subset_labels)]
subset_metadata_df.reset_index()
subset_metadata_df

Unnamed: 0,video_id,gloss_id,interpreter_id,take_id,gloss,hand,sp
379,034_002_002.mp4,34,002,002,Map,B,train
2864,037_009_004.mp4,37,009,004,Ship,R,train
172,032_001_002.mp4,32,001,002,Photo,B,train
158,032_001_001.mp4,32,001,001,Photo,B,train
2,039_009_004.mp4,39,009,004,Name,R,train
...,...,...,...,...,...,...,...
165,032_003_005.mp4,32,003,005,Photo,B,test
1048,041_008_003.mp4,41,008,003,Perfume,R,test
1006,041_004_004.mp4,41,004,004,Perfume,R,test
16,039_004_001.mp4,39,004,001,Name,R,test


In [12]:
CREATE_SUBSET = True
erase_original = False
subset = f"subset_{subset_size}_lsa_64"

In [13]:
if CREATE_SUBSET:
    # iterate through rows of the dataset
    for index, row in subset_metadata_df.iterrows():
        source = os.path.join(VIDEOS_PATH, row['video_id'])
        destination = os.path.join(DATA_PATH, subset, row['sp'], row['gloss'])

        # create the dataset structure /data/videos/<train|test|val>/gloss
        if not os.path.exists(destination): 
            os.makedirs(destination)

        # and now, we copy from /data/videos to /data/subset_{subset_size}_lsa_64/<train|test|val>/gloss
        if os.path.exists(source):
            shutil.copy(source, destination)

        if erase_original:
            os.remove(source)

## 1. Data Augmentation

In [15]:
import random
import cv2
import numpy as np
import vidaug.augmentors as va 
from PIL import Image
from tqdm import tqdm
from augment_dataset import video_loader, data_transformer, from_PIL_to_opencv
MAX_AUG = 1
DATA_AUG = False
random.seed()

In [16]:
to_aument = random.randint(0, MAX_AUG)
subset_path = os.path.join(DATA_PATH, subset)

In [17]:
if DATA_AUG:
    for sp in SPLITS:
        for gloss in tqdm(os.listdir(os.path.join(subset_path, sp))):
            for video_name in os.listdir(os.path.join(subset_path, sp, gloss)):
                for i in range(to_aument):
                    video_path = os.path.join(subset_path, sp, gloss, video_name)
                    # get metadata from the video and encoding the output
                    cap = cv2.VideoCapture(video_path)
                    fps = cap.get(cv2.CAP_PROP_FPS)
                    fourCC = cv2.VideoWriter_fourcc("m", "p", "4", "v")
                    # retrieve frames as PIL images
                    frames = video_loader(video_path)
                    # get frame width and height
                    frame_width, frame_height = frames[0].size

                    vid_name = video_name.split(".")[0]
                    aug_vid_name = f"{vid_name}_aug{i}.mp4"
                    aug_frames = data_transformer(frames, frame_height, frame_width, crop_factor=0.2)
                    new_frame_width, new_frame_height = aug_frames[0].size
                    cv2_frames = from_PIL_to_opencv(aug_frames)  
                    path_out = os.path.join(subset_path, sp, gloss, aug_vid_name)
                    out = cv2.VideoWriter(path_out, fourCC, fps, (new_frame_width, new_frame_height))

                    for frame in cv2_frames:
                        out.write(frame)

                    out.release()
                    cap.release()