# Video -> Images + Annotations

- Takes a video and a model.pt as the inputs
- Saves all frames as images
- Saves the images + annotations in a separate directory for frames where the model detected an object

### Library Imports

In [1]:
import cv2
import os
from tqdm import tqdm
from ultralytics import YOLO


### Set Variables

In [2]:
video_path = ''
model_path = ''
frames_folder = ''
filtered_folder = ''
confidence_threshold = 0.7
iou_threshold = 0.4

### Extract Frames

In [None]:
# Create directories if not exist
os.makedirs(frames_folder, exist_ok=True)
os.makedirs(filtered_folder, exist_ok=True)

# Step 1: Extract frames from video
cap = cv2.VideoCapture(video_path)
total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
frame_id = 0

print("[INFO] Extracting frames from video...")
with tqdm(total=total_frames, desc="Extracting Frames") as pbar:
    while True:
        ret, frame = cap.read()
        if not ret:
            break
        frame_path = os.path.join(frames_folder, f"frame_{frame_id:05d}.jpg")
        cv2.imwrite(frame_path, frame)
        frame_id += 1
        pbar.update(1)

cap.release()
print(f"[INFO] Extracted {frame_id} frames to '{frames_folder}'.")

### Run the Model + Filter Frames + Save Annotations

In [None]:
# Step 2: Load model
print("[INFO] Loading YOLO model...")
model = YOLO(model_path)

# Step 3: Run detection on each frame and filter
print("[INFO] Running detection and filtering frames...")
frame_files = sorted(os.listdir(frames_folder))
filtered_count = 0

for file_name in tqdm(frame_files, desc="Filtering Frames"):
    img_path = os.path.join(frames_folder, file_name)

    results = model(img_path, conf=confidence_threshold, iou=iou_threshold)
    detections = results[0].boxes

    if detections is not None and len(detections) > 0:
        if any(conf.item() >= confidence_threshold for conf in detections.conf):
            save_path = os.path.join(filtered_folder, file_name)
            cv2.imwrite(save_path, cv2.imread(img_path))
            filtered_count += 1

print(f"\nDone! {filtered_count} filtered frames saved to '{filtered_folder}'.")


# Crop + Center Images

- Finds the largest bounding box
- Crops around the bunding box leaving a specified % of margin
- Pads the image so that the margin is consistent wherever necessary

### Library Imports

In [2]:
import os
import cv2
import shutil
import numpy as np
from pathlib import Path

### Functions

In [1]:
def read_yolo_label(label_path):
    """Read YOLO label file and return list of bounding boxes."""
    bboxes = []
    with open(label_path, 'r') as f:
        for line in f:
            class_id, x_center, y_center, width, height = map(float, line.strip().split())
            bboxes.append({
                'class_id': int(class_id),
                'x_center': x_center,
                'y_center': y_center,
                'width': width,
                'height': height
            })
    return bboxes

def find_largest_bbox(bboxes):
    """Find the bounding box with the largest area."""
    if not bboxes:
        return None
    areas = [bbox['width'] * bbox['height'] for bbox in bboxes]
    return bboxes[np.argmax(areas)]

def denormalize_bbox(bbox, img_width, img_height):
    """Convert normalized YOLO bbox to pixel coordinates."""
    x_center = bbox['x_center'] * img_width
    y_center = bbox['y_center'] * img_height
    width = bbox['width'] * img_width
    height = bbox['height'] * img_height
    x_min = x_center - width / 2
    y_min = y_center - height / 2
    x_max = x_center + width / 2
    y_max = y_center + height / 2
    return x_min, y_min, x_max, y_max

def calculate_crop_coords(x_min, y_min, x_max, y_max, img_width, img_height, padding_ratio=0.1):
    """Calculate crop coordinates with padding (20% of bbox dimensions), allowing extension beyond image."""
    bbox_width = x_max - x_min
    bbox_height = y_max - y_min
    padding_x = bbox_width * padding_ratio
    padding_y = bbox_height * padding_ratio

    # Calculate crop coordinates without clamping to image boundaries
    crop_x_min = int(x_min - padding_x)
    crop_x_max = int(x_max + padding_x)
    crop_y_min = int(y_min - padding_y)
    crop_y_max = int(y_max + padding_y)

    return crop_x_min, crop_y_min, crop_x_max, crop_y_max

def update_bboxes(bboxes, crop_x_min, crop_y_min, crop_width, crop_height, img_width, img_height):
    """Update bounding box annotations for the cropped image."""
    updated_bboxes = []
    for bbox in bboxes:
        x_min, y_min, x_max, y_max = denormalize_bbox(bbox, img_width, img_height)
        
        # Check if bbox is within crop region
        if x_max < crop_x_min or x_min > crop_x_min + crop_width or \
           y_max < crop_y_min or y_min > crop_y_min + crop_height:
            continue

        # Adjust bbox coordinates to cropped image
        new_x_min = x_min - crop_x_min
        new_y_min = y_min - crop_y_min
        new_x_max = x_max - crop_x_min
        new_y_max = y_max - crop_y_min

        # Normalize to new image dimensions
        new_x_center = (new_x_min + new_x_max) / 2 / crop_width
        new_y_center = (new_y_min + new_y_max) / 2 / crop_height
        new_width = (new_x_max - new_x_min) / crop_width
        new_height = (new_y_max - new_y_min) / crop_height

        # Ensure bbox is valid
        if new_x_center >= 0 and new_x_center <= 1 and new_y_center >= 0 and new_y_center <= 1 and \
           new_width > 0 and new_height > 0:
            updated_bboxes.append({
                'class_id': bbox['class_id'],
                'x_center': new_x_center,
                'y_center': new_y_center,
                'width': new_width,
                'height': new_height
            })
    return updated_bboxes

def save_updated_label(label_path, updated_bboxes, output_label_path):
    """Save updated YOLO annotations."""
    os.makedirs(os.path.dirname(output_label_path), exist_ok=True)
    with open(output_label_path, 'w') as f:
        for bbox in updated_bboxes:
            f.write(f"{bbox['class_id']} {bbox['x_center']:.6f} {bbox['y_center']:.6f} "
                    f"{bbox['width']:.6f} {bbox['height']:.6f}\n")

def process_split(data_dir, split, output_dir):
    """Process images and labels for a given split (train/val)."""
    image_dir = os.path.join(data_dir, 'images', split)
    label_dir = os.path.join(data_dir, 'labels', split)
    output_image_dir = os.path.join(output_dir, 'cropped_images', split)
    output_label_dir = os.path.join(output_dir, 'cropped_labels', split)

    os.makedirs(output_image_dir, exist_ok=True)
    os.makedirs(output_label_dir, exist_ok=True)

    # Get list of all images in the image_dir
    image_files = [f for f in os.listdir(image_dir) if f.lower().endswith(('.jpg', '.png'))]

    for image_file in image_files:
        image_name = os.path.splitext(image_file)[0]
        image_path = os.path.join(image_dir, image_file)

        # Corresponding label file path
        label_file = image_name + '.txt'
        label_path = os.path.join(label_dir, label_file)

        # Read image
        image = cv2.imread(image_path)
        if image is None:
            print(f"Failed to read image {image_path}")
            continue

        if not os.path.exists(label_path):
            # No label file: simply copy the image to output_image_dir
            output_image_path = os.path.join(output_image_dir, image_file)
            shutil.copy2(image_path, output_image_path)
            print(f"No label for {image_file}. Image copied without cropping.")
            continue

        # Process images with labels as before
        img_height, img_width = image.shape[:2]
        bboxes = read_yolo_label(label_path)
        largest_bbox = find_largest_bbox(bboxes)

        if not largest_bbox:
            print(f"No valid bounding boxes in {label_file}. Copying image as is.")
            # Copy image as is since no valid bbox
            output_image_path = os.path.join(output_image_dir, image_file)
            shutil.copy2(image_path, output_image_path)
            # Save empty label file
            output_label_path = os.path.join(output_label_dir, label_file)
            with open(output_label_path, 'w') as f:
                pass
            continue

        # Calculate crop coordinates
        x_min, y_min, x_max, y_max = denormalize_bbox(largest_bbox, img_width, img_height)
        crop_x_min, crop_y_min, crop_x_max, crop_y_max = calculate_crop_coords(
            x_min, y_min, x_max, y_max, img_width, img_height
        )

        # Calculate crop dimensions
        crop_width = crop_x_max - crop_x_min
        crop_height = crop_y_max - crop_y_min

        if crop_width <= 0 or crop_height <= 0:
            print(f"Invalid crop dimensions for {image_path}. Copying image as is.")
            output_image_path = os.path.join(output_image_dir, image_file)
            shutil.copy2(image_path, output_image_path)
            # Save empty label file
            output_label_path = os.path.join(output_label_dir, label_file)
            with open(output_label_path, 'w') as f:
                pass
            continue

        # Create a black canvas for the cropped image
        cropped_image = np.zeros((crop_height, crop_width, 3), dtype=np.uint8)

        # Calculate the region of the original image that can be copied
        src_x_min = max(0, crop_x_min)
        src_y_min = max(0, crop_y_min)
        src_x_max = min(img_width, crop_x_max)
        src_y_max = min(img_height, crop_y_max)

        # Calculate the corresponding region in the cropped image
        dst_x_min = max(0, -crop_x_min)
        dst_y_min = max(0, -crop_y_min)
        dst_x_max = dst_x_min + (src_x_max - src_x_min)
        dst_y_max = dst_y_min + (src_y_max - src_y_min)

        # Copy the valid portion of the original image to the cropped image
        if src_x_max > src_x_min and src_y_max > src_y_min:
            cropped_image[dst_y_min:dst_y_max, dst_x_min:dst_x_max] = \
                image[src_y_min:src_y_max, src_x_min:src_x_max]

        # Update bounding boxes
        updated_bboxes = update_bboxes(
            bboxes, crop_x_min, crop_y_min, crop_width, crop_height, img_width, img_height
        )

        # Save cropped image
        output_image_path = os.path.join(output_image_dir, image_file)
        cv2.imwrite(output_image_path, cropped_image)

        # Save updated label
        output_label_path = os.path.join(output_label_dir, label_file)
        save_updated_label(label_path, updated_bboxes, output_label_path)

### Main

In [None]:
def main():
    data_dir = '/home/ishaan/Documents/AppRely/Repositories/ultralytics/data/2label_background'
    output_dir = '/home/ishaan/Documents/AppRely/Repositories/ultralytics/data/2label_background_cropCentred'
    os.makedirs(output_dir, exist_ok=True)
    
    for split in ['train', 'val']:
    # for split in ['test']:
        print(f"Processing {split} split...")
        process_split(data_dir, split, output_dir)
    print("Processing complete.")

if __name__ == '__main__':
    main()

# Blur Images

- If an image has a resolution > 800x800 pixels
- Fetches a kernel of an appropriate size based on the aspect ratio and resolution of the image
- Applies Gaussian Blur with the fetched kernel size and saves the image to a new directory with the same structure

### Library Imports

In [3]:
import os
import shutil
import cv2
from pathlib import Path
import yaml

### Functions

In [4]:
def create_directory_structure(src_root, dst_root):
    """Create the same directory structure in the destination as in the source."""
    # Ensure the destination root exists
    os.makedirs(dst_root, exist_ok=True)
    
    # Create images and labels directories
    os.makedirs(os.path.join(dst_root, "images", "train"), exist_ok=True)
    os.makedirs(os.path.join(dst_root, "images", "val"), exist_ok=True)
    os.makedirs(os.path.join(dst_root, "labels", "train"), exist_ok=True)
    os.makedirs(os.path.join(dst_root, "labels", "val"), exist_ok=True)
    
    # Copy the data.yaml file
    yaml_src = os.path.join(src_root, "data.yaml")
    yaml_dst = os.path.join(dst_root, "data.yaml")
    
    if os.path.exists(yaml_src):
        # Read the YAML file
        with open(yaml_src, 'r') as file:
            yaml_data = yaml.safe_load(file)
        
        # Update paths if necessary (optional)
        if 'path' in yaml_data:
            # Update the path to point to the new blurred data directory
            yaml_data['path'] = str(Path(dst_root).resolve())
        
        # Write to the destination
        with open(yaml_dst, 'w') as file:
            yaml.dump(yaml_data, file, default_flow_style=False)
    else:
        print(f"Warning: {yaml_src} not found. YAML file not copied.")

def checkBaseDimensions(image):
    if image is None:
        return False
    height, width, channels = image.shape

    if height <=800 or width <=800:
        return False

    return True    

def fetchDynamicKernel(image):
    height, width, channels = image.shape
    aspect_ratio = width / height
    kernel_width = int(width * 0.01 * max(1, aspect_ratio/2))
    kernel_height = int(height * 0.01 * max(1, 2/aspect_ratio))
    # Make both odd
    if kernel_width % 2 == 0: kernel_width += 1
    if kernel_height % 2 == 0: kernel_height += 1
    kernel_size = (kernel_width, kernel_height)  # Non-square kernel

    return kernel_size

def apply_blur_to_images(src_root, dst_root):#, blur_kernel_size=(15, 15)):

    # Process training images
    train_src = os.path.join(src_root, "images", "train")
    train_dst = os.path.join(dst_root, "images", "train")
    
    # Process validation images
    val_src = os.path.join(src_root, "images", "val")
    val_dst = os.path.join(dst_root, "images", "val")
    
    # Process both directories
    for src_dir, dst_dir in [(train_src, train_dst), (val_src, val_dst)]:
        if not os.path.exists(src_dir):
            print(f"Warning: Source directory {src_dir} does not exist. Skipping.")
            continue
            
        print(f"Processing images in {src_dir}...")
        for img_file in os.listdir(src_dir):
            if img_file.lower().endswith(('.jpg', '.jpeg', '.png', '.bmp')):
                src_path = os.path.join(src_dir, img_file)
                dst_path = os.path.join(dst_dir, img_file)
                
                # Read the image
                img = cv2.imread(src_path)
                if img is None:
                    print(f"Warning: Could not read {src_path}. Skipping.")
                    continue
                if checkBaseDimensions(img) == False:
                    print(f"Warning: Image {src_path} has a low resolution. Skipping.")
                    continue
                
                blur_kernel_size = fetchDynamicKernel(img)
                print(blur_kernel_size)
                print("\n\n")

                # Apply Gaussian blur
                blurred_img = cv2.GaussianBlur(img, blur_kernel_size, 0)
                
                # Save the blurred image
                cv2.imwrite(dst_path, blurred_img)
                print(f"Blurred and saved: {dst_path}")

def copy_label_files(src_root, dst_root):
    """Copy all label files from the source to the destination."""
    # Copy training labels
    train_src = os.path.join(src_root, "labels", "train")
    train_dst = os.path.join(dst_root, "labels", "train")
    
    # Copy validation labels
    val_src = os.path.join(src_root, "labels", "val")
    val_dst = os.path.join(dst_root, "labels", "val")
    
    # Process both directories
    for src_dir, dst_dir in [(train_src, train_dst), (val_src, val_dst)]:
        if not os.path.exists(src_dir):
            print(f"Warning: Source directory {src_dir} does not exist. Skipping.")
            continue
            
        print(f"Copying labels from {src_dir}...")
        for label_file in os.listdir(src_dir):
            if label_file.lower().endswith('.txt'):
                src_path = os.path.join(src_dir, label_file)
                dst_path = os.path.join(dst_dir, label_file)
                
                # Copy the label file
                shutil.copy2(src_path, dst_path)
                print(f"Copied: {dst_path}")

### Main

In [None]:
def main():
    # Configuration
    src_data_dir = "/home/ishaan/Documents/AppRely/Repositories/ultralytics/data/1label_background_cropCentred"               # Source data directory
    dst_data_dir = "/home/ishaan/Documents/AppRely/Repositories/ultralytics/data/1label_background_cropCentred_blurred"       # Destination data directory
    # blur_kernel_size = (16, 17)         # Gaussian blur kernel size (width, height)
    
    # Create the destination directory structure
    create_directory_structure(src_data_dir, dst_data_dir)
    
    # Apply blur to all images
    apply_blur_to_images(src_data_dir, dst_data_dir)#, blur_kernel_size)
    
    # Copy all label files
    copy_label_files(src_data_dir, dst_data_dir)
    
    print(f"\nDone! Blurred dataset created at {dst_data_dir}")

if __name__ == "__main__":
    main()