In [None]:
# Install required packages
%pip install numpy torch torchvision opencv-python matplotlib scikit-learn pandas albumentations PyYAML tqdm seaborn

# Data Preprocessing for Video Object Detection & Segmentation

This notebook covers the data preprocessing steps required for preparing video data for training the YOLOv5 and U-Net models. It includes techniques such as data augmentation, random cropping, and color jitter.

In [None]:
import os
import cv2
import numpy as np
import random
from glob import glob
from torchvision import transforms

# Define paths
raw_data_path = '../data/raw/'
processed_data_path = '../data/processed/'
annotation_path = '../data/annotations/'

# Create directories if they don't exist
os.makedirs(processed_data_path, exist_ok=True)

# Data augmentation functions
def random_crop(image, crop_size):
    h, w, _ = image.shape
    ch, cw = crop_size
    top = random.randint(0, h - ch)
    left = random.randint(0, w - cw)
    return image[top:top + ch, left:left + cw]

def color_jitter(image, brightness=0.2, contrast=0.2, saturation=0.2, hue=0.1):
    transform = transforms.ColorJitter(brightness=brightness, contrast=contrast, saturation=saturation, hue=hue)
    return transform(image)

# Load raw video files
video_files = glob(os.path.join(raw_data_path, '*.mp4'))

# Process each video file
for video_file in video_files:
    cap = cv2.VideoCapture(video_file)
    frame_count = 0
    while cap.isOpened():
        ret, frame = cap.read()
        if not ret:
            break
        
        # Apply random crop
        cropped_frame = random_crop(frame, (256, 256))
        
        # Apply color jitter
        jittered_frame = color_jitter(cropped_frame)
        
        # Save processed frame
        processed_frame_path = os.path.join(processed_data_path, f'frame_{frame_count}.jpg')
        cv2.imwrite(processed_frame_path, jittered_frame)
        frame_count += 1
    cap.release()

print('Data preprocessing completed. Processed frames saved to:', processed_data_path)