In [None]:
import cv2
import pandas as pd 
import glob
import numpy as np
import os
import shutil

In [None]:
label_path = '/kaggle/input/dcsass-dataset/DCSASS Dataset/Labels'
folder_path = '/kaggle/input/dcsass-dataset/DCSASS Dataset/*' # Paths to data
folder_paths = sorted(glob.glob(folder_path), key=lambda x: x.split('.')[0])
folder_paths.remove(label_path) 
for folder_path in folder_paths:
    print(folder_path)

In [None]:
# Creates output directory for processed data
if not os.path.exists('processed-data'):
    os.mkdir('processed-data')

os.makedirs('processed-data/Labels', exist_ok = True)
csv_path = '/kaggle/input/dcsass-dataset/DCSASS Dataset/Labels/*'
csv_paths = sorted(glob.glob(csv_path), key=lambda x: x.split('.')[0])

# Copies csv files to the output directory
for csv_path in csv_paths:
    shutil.copy(csv_path, 'processed-data/Labels') 
    print("Copied file from", csv_path )

In [None]:
# Resizes images
def resize(image, size):
    resized = cv2.resize(image, size)
    return resized

#Extracts frames from videos and added them to frames list
def extract_frames(video_path, size):
    frames = []

    cap = cv2.VideoCapture(video_path)
    ret = True    
    while ret: 
        ret, img = cap.read()
        if ret:
            img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB); # Converts from BGR to RGB
            resized = resize(img, size)
            #Scale 0-255 to 0-1 float32 values
            normalized_img = cv2.normalize(resized, None, 0, 1.0, cv2.NORM_MINMAX, dtype=cv2.CV_32F) 
            frames.append(normalized_img)
    return frames

In [None]:
target_size = (128, 128)

for folder_path in folder_paths:
    # Creates new directories for processed data
    data_type = folder_path.split('/')[-1]
    output_path = 'processed-data' + '/' + data_type
    os.makedirs(output_path, exist_ok = True)
    
    video_folder = folder_path + '/*' # Paths to video directories
    video_folders = sorted(glob.glob(video_folder), key=lambda x: x.split('.')[0])
    
    for video in video_folders:
        video_path = video + '/*' # Paths to videos
        video_paths = sorted(glob.glob(video_path), key=lambda x: x.split('.')[0])
        extracted_frames = []
        
        for video_path in video_paths:
            frames = extract_frames(video_path, target_size)
            extracted_frames.append(frames) # Stores all the frames of the video 
        
        # Converts to numpy array (sub-lists, T, H, W, C)       
        extracted_frames = np.stack(extracted_frames, axis = 0) 
        extracted_frames = extracted_frames[0, :, :, :, :] # Removes first dimension
        video_name = video.split('/')[-1]
        file_name = video_name.split('.')[0]
        np.save(os.path.join(output_path, file_name), extracted_frames) # Saves numpy array 
        
    print("Done with", folder_path)