In [2]:
import cv2
import numpy as np
import os
import dlib
from imutils import face_utils

# Initialize dlib's face detector (HOG-based) and the facial landmark predictor
detector = dlib.get_frontal_face_detector()
predictor = dlib.shape_predictor('external_models/shape_predictor_68_face_landmarks.dat')

# Augmentation functions
def augment_brightness(frame, factor=1.5):
    """Adjust brightness by multiplying pixel values."""
    return np.clip(frame * factor, 0, 255).astype(np.uint8)

def augment_contrast(frame, factor=1.5):
    """Adjust contrast by scaling pixel values around the mean."""
    mean_intensity = np.mean(frame)
    return np.clip((1 - factor) * mean_intensity + factor * frame, 0, 255).astype(np.uint8)

def augment_rotation(frame, angle=15):
    """Rotate the image by a given angle in degrees."""
    (h, w) = frame.shape[:2]
    center = (w // 2, h // 2)
    matrix = cv2.getRotationMatrix2D(center, angle, 1.0)
    return cv2.warpAffine(frame, matrix, (w, h), flags=cv2.INTER_LINEAR, borderMode=cv2.BORDER_REFLECT)

def augment_flip(frame):
    """Flip the frame horizontally."""
    return cv2.flip(frame, 1)

def augment_gaussian_noise(frame, mean=0, stddev=10):
    """Add Gaussian noise to the frame."""
    noise = np.random.normal(mean, stddev, frame.shape).astype(np.uint8)
    return np.clip(frame + noise, 0, 255).astype(np.uint8)

# Function to preprocess individual frames
def preprocess_frame(frame, apply_augmentations=None):
    gray_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
    faces = detector(gray_frame)
    if len(faces) > 0:
        face = faces[0]
        landmarks = predictor(gray_frame, face)
        landmarks = face_utils.shape_to_np(landmarks)
        (x, y, w, h) = cv2.boundingRect(landmarks[48:68])
        roi = gray_frame[y:y+h, x:x+w]  # Extract lip region

        # Apply augmentations on the lip region (ROI) if specified
        if apply_augmentations:
            for aug in apply_augmentations:
                roi = aug(roi)

        # Resize the lip region to 64x64 for consistency in model input size
        resized_frame = cv2.resize(roi, (64, 64))
    else:
        # If no face is detected, resize the entire frame to 64x64 and augment if required
        resized_frame = cv2.resize(gray_frame, (64, 64))
        if apply_augmentations:
            for aug in apply_augmentations:
                resized_frame = aug(resized_frame)

    # Normalize and add a channel dimension
    normalized_frame = resized_frame[..., np.newaxis] / 255.0
    return normalized_frame

# Function to adjust video length to exactly 60 frames
def adjust_video_length(frames, target_length=60):
    length = len(frames)
    if length > target_length:
        return frames[:target_length]
    pad_width = target_length - length
    left_pad = pad_width // 2
    right_pad = pad_width - left_pad
    return np.pad(frames, ((left_pad, right_pad), (0, 0), (0, 0), (0, 0)), mode='reflect')[:target_length]

# Function to apply augmentations to the video
def apply_augmentations(frames, video_name, output_folder, processed_files, total_files):
    augmentation_combinations = {
        '': [],  # Original
        '_aug_brightness': [augment_brightness],
        '_aug_contrast': [augment_contrast],
        '_aug_rotation': [augment_rotation],
        '_aug_flip': [augment_flip],
        '_aug_gaussian': [augment_gaussian_noise],
        '_aug_brightness_contrast': [augment_brightness, augment_contrast],
        '_aug_brightness_rotation': [augment_brightness, augment_rotation],
        '_aug_brightness_flip': [augment_brightness, augment_flip],
        '_aug_brightness_gaussian': [augment_brightness, augment_gaussian_noise],
        '_aug_contrast_rotation': [augment_contrast, augment_rotation],
        '_aug_contrast_flip': [augment_contrast, augment_flip],
        '_aug_contrast_gaussian': [augment_contrast, augment_gaussian_noise],
        '_aug_rotation_flip': [augment_rotation, augment_flip],
        '_aug_rotation_gaussian': [augment_rotation, augment_gaussian_noise],
        '_aug_flip_gaussian': [augment_flip, augment_gaussian_noise],
        '_aug_brightness_contrast_rotation_flip_gaussian': [
            augment_brightness, augment_contrast, augment_rotation, augment_flip, augment_gaussian_noise
        ],
    }

    for suffix, aug_list in augmentation_combinations.items():
        augmented_frames = [preprocess_frame(frame, apply_augmentations=aug_list) for frame in frames]
        adjusted_frames = adjust_video_length(augmented_frames)
        
        # Remove repetition in output file name
        output_filename = f"{video_name}{suffix}.npy"
        output_path = os.path.join(output_folder, output_filename)
        
        np.save(output_path, np.array(adjusted_frames, dtype=np.float16))
        processed_files += 1
        print(f"Saved: {output_path} ({processed_files}/{total_files} total files saved)")

    return processed_files

# Function to preprocess all videos in a folder
def preprocess_videos(input_folder, output_folder):
    if not os.path.exists(output_folder):
        os.makedirs(output_folder)

    total_videos = sum([len(files) for _, _, files in os.walk(input_folder)])
    total_files = total_videos * 17  # 17 different versions per video (original + 16 augmentations)
    processed_files = 0

    for session_folder in os.listdir(input_folder):
        session_path = os.path.join(input_folder, session_folder)
        if os.path.isdir(session_path):
            for video_file in os.listdir(session_path):
                video_path = os.path.join(session_path, video_file)
                cap = cv2.VideoCapture(video_path)
                frames = []
                
                while True:
                    ret, frame = cap.read()
                    if not ret:
                        break
                    frames.append(frame)
                
                cap.release()

                # Use only the `video_file` name, removing any repetitive parts in the final output filename
                video_name = os.path.splitext(video_file)[0]  # Removes the file extension (.avi)

                try:
                    processed_files = apply_augmentations(frames, video_name, output_folder, processed_files, total_files)
                except Exception as e:
                    print(f"Error processing {video_name}: {e}")

# Paths to your data folders
input_folder = 'data'
output_folder = 'preprocessed_data'

# Preprocess the videos and apply augmentations
preprocess_videos(input_folder, output_folder)


Saved: preprocessed_data\Speaker1_Session1_apple_1.npy (1/85000 total files saved)
Saved: preprocessed_data\Speaker1_Session1_apple_1_aug_brightness.npy (2/85000 total files saved)
Saved: preprocessed_data\Speaker1_Session1_apple_1_aug_contrast.npy (3/85000 total files saved)
Saved: preprocessed_data\Speaker1_Session1_apple_1_aug_rotation.npy (4/85000 total files saved)
Saved: preprocessed_data\Speaker1_Session1_apple_1_aug_flip.npy (5/85000 total files saved)



KeyboardInterrupt



In [2]:
# Import necessary libraries
import cv2
import numpy as np
import os
import dlib
from imutils import face_utils

# Initialize dlib's face detector (HOG-based) and the facial landmark predictor
detector = dlib.get_frontal_face_detector()
predictor = dlib.shape_predictor('external_models/shape_predictor_68_face_landmarks.dat')

# Initialize a dictionary to keep track of the frame counts for each video
frame_counts_per_video = {}

# Function to preprocess individual frames
def preprocess_frame(frame):
    # Convert the frame to grayscale (since color isn't needed for lip reading)
    gray_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)

    # Detect faces in the frame
    faces = detector(gray_frame)

    # Check if any face is detected
    if len(faces) > 0:
        # Take the first detected face (assume it's the most relevant one)
        face = faces[0]

        # Get facial landmarks (68 points) using the predictor
        landmarks = predictor(gray_frame, face)
        landmarks = face_utils.shape_to_np(landmarks)

        # Extract the bounding box of the lip region (landmarks 48 to 67)
        (x, y, w, h) = cv2.boundingRect(landmarks[48:68])
        roi = gray_frame[y:y+h, x:x+w]  # Region of Interest (lips area)

        # Resize the lip region to 64x64 (for consistency in model input size)
        resized_frame = cv2.resize(roi, (64, 64))
    else:
        # If no face is detected, resize the entire frame to 64x64
        resized_frame = cv2.resize(gray_frame, (64, 64))

    # Add a channel dimension and normalize pixel values to [0, 1]
    normalized_frame = resized_frame[..., np.newaxis] / 255.0

    # Return the processed frame
    return normalized_frame

# Function to adjust video length to exactly 60 frames using truncation or mirror padding
def adjust_video_length(frames, target_length=60):
    length = len(frames)

    if length > target_length:
        return frames[:target_length]  # Truncate if longer than target length

    # Calculate padding needed
    pad_width = target_length - length
    left_pad = pad_width // 2
    right_pad = pad_width - left_pad

    # Create mirror padded video
    padded_video = np.pad(frames, ((left_pad, right_pad), (0, 0), (0, 0), (0, 0)), mode='reflect')

    return padded_video[:target_length]  # Ensure exactly target_length frames

# Function to preprocess all videos in a folder
def preprocess_videos(input_folder, output_folder):
    # Create the output folder if it doesn't exist
    if not os.path.exists(output_folder):
        os.makedirs(output_folder)

    # Loop through each session folder
    for session_folder in os.listdir(input_folder):
        session_path = os.path.join(input_folder, session_folder)
        if os.path.isdir(session_path):
            for video_file in os.listdir(session_path):
                video_path = os.path.join(session_path, video_file)
                cap = cv2.VideoCapture(video_path)

                frames = []  # List to store all the frames in the current video

                # Loop through each frame of the video
                while True:
                    ret, frame = cap.read()
                    if not ret:
                        break
                    # Preprocess each frame using the defined preprocess_frame function
                    processed_frame = preprocess_frame(frame)
                    frames.append(processed_frame)

                cap.release()  # Release the video capture

                # Adjust video length to exactly 60 frames using truncation or mirror padding
                adjusted_frames = adjust_video_length(frames)

                # Convert adjusted_frames list to a NumPy array before saving
                adjusted_frames_array = np.array(adjusted_frames)

                # Create proper output file name based on the session and video
                output_file_name = f"{session_folder}_{video_file}".replace('.avi', '')  # Remove any extensions
                output_video_path = os.path.join(output_folder, f"{output_file_name}.npy")

                # Replace or create new file with the proper naming convention
                if os.path.exists(output_video_path):
                    print(f"Replacing existing file: {output_video_path}")
                else:
                    print(f"Saving new preprocessed video: {output_video_path}")

                # Save the preprocessed video as a .npy file with float16 data type
                np.save(output_video_path, adjusted_frames_array.astype(np.float16))

                # Store frame counts for summary
                frame_counts_per_video[output_file_name] = {
                    'total': len(frames),
                    'processed': len(adjusted_frames_array)
                }

    print("\nSummary of frame counts for each video:")
    for video_file, counts in frame_counts_per_video.items():
        print(f"{video_file}: Total Frames = {counts['total']}, Processed Frames = {counts['processed']}")

# Paths to your data folders - adjust these paths as necessary!
input_folder = 'test_data'           # Path where your input videos are stored.
output_folder = 'preprocessed_test_data'  # Path where you want to save preprocessed videos.

# Preprocess the videos and adjust them to exactly 60 frames each
preprocess_videos(input_folder, output_folder)

Replacing existing file: preprocessed_test_data\TestSpeaker1_Session1_cat_TestSpeaker1_Session1_cat_1.npy
Replacing existing file: preprocessed_test_data\TestSpeaker1_Session1_cat_TestSpeaker1_Session1_cat_10.npy
Replacing existing file: preprocessed_test_data\TestSpeaker1_Session1_cat_TestSpeaker1_Session1_cat_11.npy
Replacing existing file: preprocessed_test_data\TestSpeaker1_Session1_cat_TestSpeaker1_Session1_cat_12.npy
Replacing existing file: preprocessed_test_data\TestSpeaker1_Session1_cat_TestSpeaker1_Session1_cat_13.npy
Replacing existing file: preprocessed_test_data\TestSpeaker1_Session1_cat_TestSpeaker1_Session1_cat_14.npy
Replacing existing file: preprocessed_test_data\TestSpeaker1_Session1_cat_TestSpeaker1_Session1_cat_15.npy
Replacing existing file: preprocessed_test_data\TestSpeaker1_Session1_cat_TestSpeaker1_Session1_cat_16.npy
Replacing existing file: preprocessed_test_data\TestSpeaker1_Session1_cat_TestSpeaker1_Session1_cat_17.npy
Replacing existing file: preprocessed_

In [6]:
import cv2
import numpy as np
import os
import dlib
import cupy as cp
import tensorflow as tf

# Enable GPU memory growth
gpus = tf.config.list_physical_devices('GPU')
if gpus:
    try:
        for gpu in gpus:
            tf.config.experimental.set_memory_growth(gpu, True)
    except RuntimeError as e:
        print(e)

# Initialize face detector and predictor
detector = dlib.get_frontal_face_detector()
predictor = dlib.shape_predictor('external_models/shape_predictor_68_face_landmarks.dat')

# GPU-accelerated augmentation functions
def augment_brightness(frame_gpu, factor=1.1):
    return cp.clip(frame_gpu * factor, 0, 1)  # Clip to 0-1 range

def augment_brightness_strong(frame_gpu, factor=1.3):
    return cp.clip(frame_gpu * factor, 0, 1)  # Clip to 0-1 range

def augment_brightness_negative(frame_gpu, factor=0.7):
    return cp.clip(frame_gpu * factor, 0, 1)  # Clip to 0-1 range

def augment_contrast(frame_gpu, factor=1.1):
    mean_intensity = cp.mean(frame_gpu)
    return cp.clip((1 - factor) * mean_intensity + factor * frame_gpu, 0, 1)  # Clip to 0-1 range

def augment_contrast_strong(frame_gpu, factor=1.3):
    mean_intensity = cp.mean(frame_gpu)
    return cp.clip((1 - factor) * mean_intensity + factor * frame_gpu, 0, 1)  # Clip to 0-1 range

def augment_contrast_negative(frame_gpu, factor=0.7):
    mean_intensity = cp.mean(frame_gpu)
    return cp.clip((1 - factor) * mean_intensity + factor * frame_gpu, 0, 1)  # Clip to 0-1 range

def augment_rotation(frame_gpu, angle=10):
    k = int(abs(angle) / 90)
    return cp.rot90(frame_gpu, k=k) if angle >= 0 else cp.rot90(frame_gpu, k=-k)

def augment_rotation_strong(frame_gpu, angle=20):
    k = int(abs(angle) / 90)
    return cp.rot90(frame_gpu, k=k) if angle >= 0 else cp.rot90(frame_gpu, k=-k)

def augment_rotation_negative(frame_gpu, angle=-10):
    k = int(abs(angle) / 90)
    return cp.rot90(frame_gpu, k=k) if angle >= 0 else cp.rot90(frame_gpu, k=-k)

def augment_flip(frame_gpu):
    return cp.fliplr(frame_gpu)

def augment_gaussian_noise(frame_gpu, mean=0, stddev=0.005):
    noise = cp.random.normal(mean, stddev, frame_gpu.shape).astype(cp.float32)
    return cp.clip(frame_gpu + noise, 0, 1)  # Clip to 0-1 range

def augment_gaussian_noise_strong(frame_gpu, mean=0, stddev=0.01):
    noise = cp.random.normal(mean, stddev, frame_gpu.shape).astype(cp.float32)
    return cp.clip(frame_gpu + noise, 0, 1)  # Clip to 0-1 range

def augment_gaussian_noise_negative(frame_gpu, mean=0, stddev=0.003):
    noise = cp.random.normal(mean, stddev, frame_gpu.shape).astype(cp.float32)
    return cp.clip(frame_gpu + noise, 0, 1)  # Clip to 0-1 range

def preprocess_frame(frame):
    # Convert to grayscale
    gray_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
    
    # Detect face
    faces = detector(gray_frame)
    
    if len(faces) > 0:
        face = faces[0]
        landmarks = predictor(gray_frame, face)
        landmarks = np.array([[p.x, p.y] for p in landmarks.parts()])[48:68]  # Lip region
        x, y, w, h = cv2.boundingRect(landmarks)
        roi = gray_frame[y:y+h, x:x+w]
    else:
        roi = gray_frame
    
    # Resize to 128x128
    resized_frame = cv2.resize(roi, (128, 128))
    
    # Move to GPU and normalize
    frame_gpu = cp.asarray(resized_frame, dtype=cp.float32)
    frame_gpu = frame_gpu / 255.0  # Normalize to 0-1 range
    
    return frame_gpu

def adjust_video_length(frames_list, target_length=60):
    # Convert list of CuPy arrays to a single stacked array
    frames = cp.stack(frames_list)
    length = len(frames)
    
    if length > target_length:
        return cp.asnumpy(frames[:target_length])
    
    # Move to CPU for padding operation
    frames_cpu = cp.asnumpy(frames)
    pad_width = target_length - length
    left_pad = pad_width // 2
    right_pad = pad_width - left_pad
    
    padded = np.pad(frames_cpu, ((left_pad, right_pad), (0, 0), (0, 0)), mode='reflect')
    return padded[:target_length]

def process_video(video_path, output_folder, video_name):
    print(f"Processing {video_name}")
    
    # Read video frames
    cap = cv2.VideoCapture(video_path)
    frames = []
    while True:
        ret, frame = cap.read()
        if not ret:
            break
        frames.append(frame)
    cap.release()
    
    # Define augmentation combinations
    augmentation_combinations = [
        ('no_aug', []),
        ('brightness', [augment_brightness]),
        ('brightness_strong', [augment_brightness_strong]),
        ('brightness_negative', [augment_brightness_negative]),
        ('contrast', [augment_contrast]),
        ('contrast_strong', [augment_contrast_strong]), 
        ('contrast_negative', [augment_contrast_negative]),
        ('rotation', [augment_rotation]),
        ('rotation_strong', [augment_rotation_strong]),
        ('rotation_negative', [augment_rotation_negative]),
        ('flip', [augment_flip]),
        ('gaussian', [augment_gaussian_noise]),
        ('gaussian_strong', [augment_gaussian_noise_strong]),
        ('gaussian_negative', [augment_gaussian_noise_negative])
    ]
    
    # Process frames
    processed_frames = [preprocess_frame(frame) for frame in frames]
    
    for aug_level, aug_list in augmentation_combinations:
        augmented_frames = []
        
        for frame_gpu in processed_frames:
            current_frame = cp.copy(frame_gpu)
            
            # Apply augmentations
            for aug_func in aug_list:
                current_frame = aug_func(current_frame)
            
            augmented_frames.append(current_frame)
        
        # Adjust to 60 frames and save
        adjusted_frames = adjust_video_length(augmented_frames)
        output_path = os.path.join(output_folder, f"{video_name}_{aug_level}.npy")
        np.save(output_path, adjusted_frames.astype(np.float16))
        print(f"Saved: {output_path}")
        
        # Free GPU memory
        cp.get_default_memory_pool().free_all_blocks()

def preprocess_videos(input_folder, output_folder):
    if not os.path.exists(output_folder):
        os.makedirs(output_folder)
    
    total_videos = sum([len(files) for _, _, files in os.walk(input_folder)])
    processed = 0
    
    for session_folder in os.listdir(input_folder):
        session_path = os.path.join(input_folder, session_folder)
        if os.path.isdir(session_path):
            for video_file in os.listdir(session_path):
                video_path = os.path.join(session_path, video_file)
                video_name = os.path.splitext(video_file)[0]
                
                try:
                    process_video(video_path, output_folder, video_name)
                    processed += 1
                    print(f"Progress: {processed}/{total_videos} videos processed")
                except Exception as e:
                    print(f"Error processing {video_name}: {e}")
                    
                # Free GPU memory after each video
                cp.get_default_memory_pool().free_all_blocks()

if __name__ == "__main__":
    input_folder = 'test_data'
    output_folder = 'preprocessed_test_data'
    
    # Clear any existing GPU memory
    cp.get_default_memory_pool().free_all_blocks()
    
    preprocess_videos(input_folder, output_folder)

Processing TestSpeaker1_Session1_apple_1
Saved: preprocessed_test_data\TestSpeaker1_Session1_apple_1_no_aug.npy
Saved: preprocessed_test_data\TestSpeaker1_Session1_apple_1_brightness.npy
Saved: preprocessed_test_data\TestSpeaker1_Session1_apple_1_brightness_strong.npy
Saved: preprocessed_test_data\TestSpeaker1_Session1_apple_1_brightness_negative.npy
Saved: preprocessed_test_data\TestSpeaker1_Session1_apple_1_contrast.npy
Saved: preprocessed_test_data\TestSpeaker1_Session1_apple_1_contrast_strong.npy
Saved: preprocessed_test_data\TestSpeaker1_Session1_apple_1_contrast_negative.npy
Saved: preprocessed_test_data\TestSpeaker1_Session1_apple_1_rotation.npy
Saved: preprocessed_test_data\TestSpeaker1_Session1_apple_1_rotation_strong.npy
Saved: preprocessed_test_data\TestSpeaker1_Session1_apple_1_rotation_negative.npy
Saved: preprocessed_test_data\TestSpeaker1_Session1_apple_1_flip.npy
Saved: preprocessed_test_data\TestSpeaker1_Session1_apple_1_gaussian.npy
Saved: preprocessed_test_data\TestS

In [None]:
import cv2
import numpy as np
import os
import dlib
import cupy as cp
import tensorflow as tf

# Enable GPU memory growth
gpus = tf.config.list_physical_devices('GPU')
if gpus:
    try:
        for gpu in gpus:
            tf.config.experimental.set_memory_growth(gpu, True)
    except RuntimeError as e:
        print(e)

# Initialize face detector and predictor
detector = dlib.get_frontal_face_detector()
predictor = dlib.shape_predictor('external_models/shape_predictor_68_face_landmarks.dat')

# GPU-accelerated augmentation functions
def augment_brightness(frame_gpu, factor=1.5):
    return cp.clip(frame_gpu * factor, 0, 1)  # Clip to 0-1 range

def augment_contrast(frame_gpu, factor=1.5):
    mean_intensity = cp.mean(frame_gpu)
    return cp.clip((1 - factor) * mean_intensity + factor * frame_gpu, 0, 1)  # Clip to 0-1 range

def augment_rotation(frame_gpu, angle=15):
    k = int(abs(angle) / 90)
    return cp.rot90(frame_gpu, k=k) if angle >= 0 else cp.rot90(frame_gpu, k=-k)

def augment_flip(frame_gpu):
    return cp.fliplr(frame_gpu)

def augment_gaussian_noise(frame_gpu, mean=0, stddev=0.01):  # Significantly reduced noise
    noise = cp.random.normal(mean, stddev, frame_gpu.shape).astype(cp.float32)
    return cp.clip(frame_gpu + noise, 0, 1)  # Clip to 0-1 range

def preprocess_frame(frame):
    # Convert to grayscale
    gray_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
    
    # Detect face
    faces = detector(gray_frame)
    
    if len(faces) > 0:
        face = faces[0]
        landmarks = predictor(gray_frame, face)
        landmarks = np.array([[p.x, p.y] for p in landmarks.parts()])[48:68]  # Lip region
        x, y, w, h = cv2.boundingRect(landmarks)
        roi = gray_frame[y:y+h, x:x+w]
    else:
        roi = gray_frame
    
    # Resize to 64x64
    resized_frame = cv2.resize(roi, (64, 64))
    
    # Move to GPU and normalize
    frame_gpu = cp.asarray(resized_frame, dtype=cp.float32)
    frame_gpu = frame_gpu / 255.0  # Normalize to 0-1 range
    
    return frame_gpu

def adjust_video_length(frames_list, target_length=60):
    # Convert list of CuPy arrays to a single stacked array
    frames = cp.stack(frames_list)
    length = len(frames)
    
    if length > target_length:
        return cp.asnumpy(frames[:target_length])
    
    # Move to CPU for padding operation
    frames_cpu = cp.asnumpy(frames)
    pad_width = target_length - length
    left_pad = pad_width // 2
    right_pad = pad_width - left_pad
    
    padded = np.pad(frames_cpu, ((left_pad, right_pad), (0, 0), (0, 0)), mode='reflect')
    return padded[:target_length]

def process_video(video_path, output_folder, video_name):
    print(f"Processing {video_name}")
    
    # Read video frames
    cap = cv2.VideoCapture(video_path)
    frames = []
    while True:
        ret, frame = cap.read()
        if not ret:
            break
        frames.append(frame)
    cap.release()
    
    # Define augmentation combinations
    augmentation_combinations = {
        '': [],
        '_aug_brightness': [augment_brightness],
        '_aug_contrast': [augment_contrast],
        '_aug_rotation': [augment_rotation],
        '_aug_flip': [augment_flip],
        '_aug_gaussian': [augment_gaussian_noise],
        '_aug_brightness_contrast': [augment_brightness, augment_contrast],
        '_aug_brightness_rotation': [augment_brightness, augment_rotation],
        '_aug_brightness_flip': [augment_brightness, augment_flip],
        '_aug_brightness_gaussian': [augment_brightness, augment_gaussian_noise],
        '_aug_contrast_rotation': [augment_contrast, augment_rotation],
        '_aug_contrast_flip': [augment_contrast, augment_flip],
        '_aug_contrast_gaussian': [augment_contrast, augment_gaussian_noise],
        '_aug_rotation_flip': [augment_rotation, augment_flip],
        '_aug_rotation_gaussian': [augment_rotation, augment_gaussian_noise],
        '_aug_flip_gaussian': [augment_flip, augment_gaussian_noise],
        '_aug_brightness_contrast_rotation_flip_gaussian': [
            augment_brightness, augment_contrast, augment_rotation, augment_flip, augment_gaussian_noise
        ]
    }
    
    # Process frames
    processed_frames = [preprocess_frame(frame) for frame in frames]
    
    for suffix, aug_list in augmentation_combinations.items():
        augmented_frames = []
        
        for frame_gpu in processed_frames:
            current_frame = cp.copy(frame_gpu)
            
            # Apply augmentations
            for aug_func in aug_list:
                current_frame = aug_func(current_frame)
            
            augmented_frames.append(current_frame)
        
        # Adjust to 60 frames and save
        adjusted_frames = adjust_video_length(augmented_frames)
        output_path = os.path.join(output_folder, f"{video_name}{suffix}.npy")
        np.save(output_path, adjusted_frames.astype(np.float16))
        print(f"Saved: {output_path}")
        
        # Free GPU memory
        cp.get_default_memory_pool().free_all_blocks()

def preprocess_videos(input_folder, output_folder):
    if not os.path.exists(output_folder):
        os.makedirs(output_folder)
    
    total_videos = sum([len(files) for _, _, files in os.walk(input_folder)])
    processed = 0
    
    for session_folder in os.listdir(input_folder):
        session_path = os.path.join(input_folder, session_folder)
        if os.path.isdir(session_path):
            for video_file in os.listdir(session_path):
                video_path = os.path.join(session_path, video_file)
                video_name = os.path.splitext(video_file)[0]
                
                try:
                    process_video(video_path, output_folder, video_name)
                    processed += 1
                    print(f"Progress: {processed}/{total_videos} videos processed")
                except Exception as e:
                    print(f"Error processing {video_name}: {e}")
                    
                # Free GPU memory after each video
                cp.get_default_memory_pool().free_all_blocks()

if __name__ == "__main__":
    input_folder = 'data'
    output_folder = 'preprocessed_data'
    
    # Clear any existing GPU memory
    cp.get_default_memory_pool().free_all_blocks()
    
    preprocess_videos(input_folder, output_folder)