In [None]:
import os
import cv2
import numpy as np
from tqdm.notebook import tqdm

In [None]:
def resize_video(video_path, target_size=(224, 224)):
    cap = cv2.VideoCapture(video_path)
    if not cap.isOpened():
        print(f"Error opening video file: {video_path}")
        return None, None

    fps = cap.get(cv2.CAP_PROP_FPS)
    frames = []

    while True:
        ret, frame = cap.read()
        if not ret:
            break
        resized_frame = cv2.resize(frame, target_size)
        frames.append(resized_frame)

    cap.release()
    return frames, fps

In [None]:
def normalize_fps(frames, fps, target_fps=30):
    if fps == target_fps:
        return frames

    num_frames = len(frames)
    duration = num_frames / fps
    target_num_frames = int(duration * target_fps)

    frame_indices = np.linspace(0, num_frames - 1, target_num_frames, dtype=int)
    normalized_frames = [frames[i] for i in frame_indices]
    return normalized_frames

In [None]:
def apply_brightness_normalization(frames):
    clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8, 8))
    normalized_frames = []
    for frame in frames:
        gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
        cl = clahe.apply(gray)
        frame[:, :, 0] = cl
        frame[:, :, 1] = cl
        frame[:, :, 2] = cl
        normalized_frames.append(frame)
    return normalized_frames

In [None]:
def calculate_frame_difference(frame1, frame2):
    gray1 = cv2.cvtColor(frame1, cv2.COLOR_BGR2GRAY)
    gray2 = cv2.cvtColor(frame2, cv2.COLOR_BGR2GRAY)
    frame_diff = cv2.absdiff(gray1, gray2)
    _, thresh = cv2.threshold(frame_diff, 30, 255, cv2.THRESH_BINARY)
    return np.sum(thresh)


def select_motion_videos(frames, motion_threshold=50000):
    total_motion = 0
    for i in range(len(frames) - 1):
        total_motion += calculate_frame_difference(frames[i], frames[i + 1])
    return total_motion > motion_threshold

In [None]:
def normalize_video_duration(frames, target_length=30):
    num_frames = len(frames)
    if num_frames == target_length:
        return frames
    elif num_frames < target_length:
        frame_indices = np.linspace(0, num_frames - 1, target_length, dtype=int)
        interpolated_frames = [frames[i] for i in frame_indices]
        return interpolated_frames
    else:
        frame_indices = np.linspace(0, num_frames - 1, target_length, dtype=int)
        sampled_frames = [frames[i] for i in frame_indices]
        return sampled_frames

In [None]:
def save_video(frames, output_path, target_fps=30):
    height, width, _ = frames[0].shape
    fourcc = cv2.VideoWriter_fourcc(*"mp4v")
    out = cv2.VideoWriter(output_path, fourcc, target_fps, (width, height))
    for frame in frames:
        out.write(frame)
    out.release()


def preprocess_pipeline(
    input_root,
    output_root,
    target_size=(224, 224),
    target_fps=30,
    target_length=30,
    motion_threshold=50000,
):
    for folder in ["train", "test"]:  # Process both train and test folders
        input_folder = os.path.join(input_root, folder)
        output_folder = os.path.join(output_root, folder)
        os.makedirs(output_folder, exist_ok=True)

        # Iterate over subfolders corresponding to labels
        for label in os.listdir(input_folder):
            label_input_folder = os.path.join(input_folder, label)
            label_output_folder = os.path.join(output_folder, label)

            if not os.path.isdir(label_input_folder):  # Skip non-folder entries
                continue

            os.makedirs(label_output_folder, exist_ok=True)

            # Iterate over video files in the label subfolder
            for file in tqdm(
                os.listdir(label_input_folder),
                desc=f"Processing {folder}/{label} Videos",
                unit="video", leave=False
            ):
                video_path = os.path.join(label_input_folder, file)
                output_path = os.path.join(
                    label_output_folder,
                    f"{os.path.splitext(file)[0]}_processed.mp4",
                )

                try:
                    # Resize video
                    frames, fps = resize_video(video_path, target_size)
                    if frames is None:
                        continue

                    # Normalize FPS
                    frames = normalize_fps(frames, fps, target_fps)

                    # Brightness normalization
                    frames = apply_brightness_normalization(frames)

                    # Motion-based selection
                    # if not select_motion_videos(frames, motion_threshold):
                    #     continue

                    # Normalize duration
                    frames = normalize_video_duration(frames, target_length)

                    # Save the final processed video
                    save_video(frames, output_path, target_fps)

                except Exception as e:
                    print(f"Error processing {video_path}: {e}")


In [None]:
input_folder = "data_splits_muted/"  # Path to input videos
output_folder = "data_processed/"  # Path to save final processed videos
preprocess_pipeline(input_folder, output_folder)

Processing train/adik Videos:   0%|          | 0/8 [00:00<?, ?video/s]

Processing train/anak Videos:   0%|          | 0/8 [00:00<?, ?video/s]

Processing train/besar Videos:   0%|          | 0/8 [00:00<?, ?video/s]

Processing train/buka Videos:   0%|          | 0/8 [00:00<?, ?video/s]

Processing train/buruk Videos:   0%|          | 0/8 [00:00<?, ?video/s]

Processing train/dengar Videos:   0%|          | 0/8 [00:00<?, ?video/s]

Processing train/gembira Videos:   0%|          | 0/8 [00:00<?, ?video/s]

Processing train/guru Videos:   0%|          | 0/8 [00:00<?, ?video/s]

Processing train/haus Videos:   0%|          | 0/8 [00:00<?, ?video/s]

Processing train/ibu Videos:   0%|          | 0/8 [00:00<?, ?video/s]

Processing train/jalan Videos:   0%|          | 0/8 [00:00<?, ?video/s]

Processing train/keluarga Videos:   0%|          | 0/8 [00:00<?, ?video/s]

Processing train/kertas Videos:   0%|          | 0/8 [00:00<?, ?video/s]

Processing train/kucing Videos:   0%|          | 0/8 [00:00<?, ?video/s]

Processing train/lapar Videos:   0%|          | 0/8 [00:00<?, ?video/s]

Processing train/lihat Videos:   0%|          | 0/8 [00:00<?, ?video/s]

Processing train/maaf Videos:   0%|          | 0/8 [00:00<?, ?video/s]

Processing train/main Videos:   0%|          | 0/8 [00:00<?, ?video/s]

Processing train/makan Videos:   0%|          | 0/7 [00:00<?, ?video/s]

Processing train/marah Videos:   0%|          | 0/9 [00:00<?, ?video/s]

Processing train/minum Videos:   0%|          | 0/8 [00:00<?, ?video/s]

Processing train/nama Videos:   0%|          | 0/8 [00:00<?, ?video/s]

Processing train/orang Videos:   0%|          | 0/7 [00:00<?, ?video/s]

Processing train/panggil Videos:   0%|          | 0/8 [00:00<?, ?video/s]

Processing train/rumah Videos:   0%|          | 0/8 [00:00<?, ?video/s]

Processing train/sedikit Videos:   0%|          | 0/8 [00:00<?, ?video/s]

Processing train/selamat Videos:   0%|          | 0/8 [00:00<?, ?video/s]

Processing train/senyum Videos:   0%|          | 0/8 [00:00<?, ?video/s]

Processing train/teman Videos:   0%|          | 0/7 [00:00<?, ?video/s]

Processing train/tidur Videos:   0%|          | 0/7 [00:00<?, ?video/s]

Processing test/adik Videos:   0%|          | 0/2 [00:00<?, ?video/s]

Processing test/anak Videos:   0%|          | 0/2 [00:00<?, ?video/s]

Processing test/besar Videos:   0%|          | 0/2 [00:00<?, ?video/s]

Processing test/buka Videos:   0%|          | 0/2 [00:00<?, ?video/s]

Processing test/buruk Videos:   0%|          | 0/2 [00:00<?, ?video/s]

Processing test/dengar Videos:   0%|          | 0/2 [00:00<?, ?video/s]

Processing test/gembira Videos:   0%|          | 0/2 [00:00<?, ?video/s]

Processing test/guru Videos:   0%|          | 0/2 [00:00<?, ?video/s]

Processing test/haus Videos:   0%|          | 0/2 [00:00<?, ?video/s]

Processing test/ibu Videos:   0%|          | 0/2 [00:00<?, ?video/s]

Processing test/jalan Videos:   0%|          | 0/2 [00:00<?, ?video/s]

Processing test/keluarga Videos:   0%|          | 0/2 [00:00<?, ?video/s]

Processing test/kertas Videos:   0%|          | 0/2 [00:00<?, ?video/s]

Processing test/kucing Videos:   0%|          | 0/2 [00:00<?, ?video/s]

Processing test/lapar Videos:   0%|          | 0/2 [00:00<?, ?video/s]

Processing test/lihat Videos:   0%|          | 0/2 [00:00<?, ?video/s]

Processing test/maaf Videos:   0%|          | 0/2 [00:00<?, ?video/s]

Processing test/main Videos:   0%|          | 0/2 [00:00<?, ?video/s]

Processing test/makan Videos:   0%|          | 0/2 [00:00<?, ?video/s]

Processing test/marah Videos:   0%|          | 0/2 [00:00<?, ?video/s]

Processing test/minum Videos:   0%|          | 0/2 [00:00<?, ?video/s]

Processing test/nama Videos:   0%|          | 0/2 [00:00<?, ?video/s]

Processing test/orang Videos:   0%|          | 0/2 [00:00<?, ?video/s]

Processing test/panggil Videos:   0%|          | 0/2 [00:00<?, ?video/s]

Processing test/rumah Videos:   0%|          | 0/2 [00:00<?, ?video/s]

Processing test/sedikit Videos:   0%|          | 0/2 [00:00<?, ?video/s]

Processing test/selamat Videos:   0%|          | 0/2 [00:00<?, ?video/s]

Processing test/senyum Videos:   0%|          | 0/2 [00:00<?, ?video/s]

Processing test/teman Videos:   0%|          | 0/2 [00:00<?, ?video/s]

Processing test/tidur Videos:   0%|          | 0/2 [00:00<?, ?video/s]