# Imports

In [9]:
import os
import json
import math
from pathlib import Path

import numpy as np
import pandas as pd
import albumentations as A
import cv2
from tqdm.notebook import tqdm
import re
# from torchvision.io import read_video

In [10]:
import warnings
warnings.filterwarnings('ignore', message=r'.*ReplayMode.*')

# Dataset

In [11]:
FRAMES_PER_VIDEO = 16
RESIZE_SHAPE = 256
SEED = 54

def read_video(video_path):
    cap = cv2.VideoCapture(video_path)
    frames = []
    while cap.isOpened():
        ret, frame = cap.read()
        if not ret:
            break
        frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        frames.append(frame)
    
#     if len(frames) > 0:
#         frames = frames[len(frames) // 2-2:len(frames) // 2+3]
    
    return np.array(frames)

transform = A.Compose([
     A.LongestMaxSize(max_size=RESIZE_SHAPE),
])

def video2rgb(filename, out_dir):
    file_template = 'img_{0:04d}.jpg'
    reader = cv2.VideoCapture(filename)
    success, frame, = reader.read()  # read first frame

    count = 0
    while success:
        # frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        frame = cv2.resize(frame, (RESIZE_SHAPE, RESIZE_SHAPE))
        frame = transform(image=frame)['image']
        out_filepath = os.path.join(out_dir, file_template.format(count))
        cv2.imwrite(out_filepath, frame)
        success, frame = reader.read()
        count += 1
    if count == 0:
        os.rmdir(out_dir)
        
def uniform_temporal_subsample(data, n_samples=FRAMES_PER_VIDEO):
    n_frames = len(data)
    indices = np.round(np.linspace(0, n_frames - 1, n_samples)).astype(np.int32)
    data = data[indices]
    return data

def apply(video):
    video = uniform_temporal_subsample(video)
    res = []
    for frame in video:
        res.append(transform(image=frame)['image'])
    return np.array(res)
    

In [18]:
from multiprocessing import Pool, Process, Manager

class AutslDataset():
    def __init__(self, ds_type, dir_path='data/autsl', csv_postfix_file_name="_labels.csv"):
        self.dir_path = dir_path
        self.ds_type = ds_type

        self.df = pd.read_csv(os.path.join(dir_path, ds_type+csv_postfix_file_name), header=None)
        self.df.columns = ['filename', 'cls']
        self.num_classes = int(self.df['cls'].max() + 1)

    def __len__(self):
        return len(self.df)

    def __getitem__(self, ind):
        sample = self.df.iloc[ind]
        video_path = os.path.join(self.dir_path, self.ds_type, sample["filename"] + '_color.mp4')

        video = read_video(video_path)
        if len(video.shape) != 4:
            print(video_path)
            ind = np.random.randint(len(self))
            return self[ind]

        return video, sample['cls']
    
    def handle_video(self, idx, out_dir_path='data/autsl_frames'):
        try:
            sample = self.df.iloc[idx]
            cls = sample['cls']
            video_path = os.path.join(self.dir_path, self.ds_type, sample["filename"] + '_color.mp4')
            
            out_video_folder = os.path.join(out_dir_path, self.ds_type, str(cls), str(idx))
            # print(out_video_folder)
            Path(out_video_folder).mkdir(parents=True, exist_ok=True)
            video2rgb(video_path, out_video_folder)
        except Exception as e:
            print(str(e))
    
    def create_np_ds(self, dir_path='data/autsl_np16'):
        ds_path = os.path.join(dir_path, self.ds_type)
        Path(ds_path).mkdir(parents=True, exist_ok=True)
        for idx in tqdm(range(len(self))):
            video, cls = self[idx]
            cls_dir_path = os.path.join(ds_path, str(cls))
            Path(cls_dir_path).mkdir(parents=True, exist_ok=True)
            np.save(os.path.join(cls_dir_path, str(idx)), apply(video))
    
    def create_frames_ds(self, dir_path='data/autsl_frames'):
        pool = Pool(8)
        pool.map(self.handle_video, tqdm(list(range(len(self)))))

ds = AutslDataset('test')

In [19]:
ds[97][0].shape

(84, 512, 512, 3)

In [20]:
ds.create_np_ds()

  0%|          | 0/3742 [00:00<?, ?it/s]

[mov,mp4,m4a,3gp,3g2,mj2 @ 0x5641ce834700] moov atom not found


data/autsl/test/signer6_sample185_color.mp4


In [3]:
def create_annotations_file(dir):
    with open(os.path.join(dir, 'annotations.txt'), 'w') as f:
        for cls in sorted(os.listdir(dir)):
            if cls == 'annotations.txt' or cls.startswith('.'):
                continue
            for vid_folder in sorted(os.listdir(os.path.join(dir, cls))):
                vid_folder_path = os.path.join(dir, cls, vid_folder)
                f.write(f'{os.path.join(cls, vid_folder)} 0 {len(os.listdir(vid_folder_path)) - 1} {cls}\n')

In [6]:
create_annotations_file('data/autsl_frames/train')