# Occlusion via cv2

In [None]:
import cv2
import numpy as np
import os
import random
import matplotlib.pyplot as plt

def add_occlusion(image, occlusion_type="rectangle", mask_image_path=None):
    h, w, _ = image.shape
    
    if occlusion_type == "rectangle":
        # Random occlusion with a black rectangle
        x1, y1 = random.randint(0, w//2), random.randint(0, h//2)
        x2, y2 = random.randint(x1 + w//4, w), random.randint(y1 + h//4, h)
        cv2.rectangle(image, (x1, y1), (x2, y2), (0, 0, 0), -1)
    
    elif occlusion_type == "blur":
        # Random blur over an area
        x1, y1 = random.randint(0, w//2), random.randint(0, h//2)
        x2, y2 = random.randint(x1 + w//4, w), random.randint(y1 + h//4, h)
        blurred_part = cv2.GaussianBlur(image[y1:y2, x1:x2], (21, 21), 30)
        image[y1:y2, x1:x2] = blurred_part
    
    elif occlusion_type == "sunglasses":
        # Draw sunglasses dynamically
        y_offset = h // 3
        x_offset = w // 4
        sunglass_width = w // 2
        sunglass_height = h // 6
        
        cv2.ellipse(image, (x_offset + sunglass_width // 4, y_offset), (sunglass_width // 4, sunglass_height // 2), 0, 0, 360, (0, 0, 0), -1)
        cv2.ellipse(image, (x_offset + 3 * sunglass_width // 4, y_offset), (sunglass_width // 4, sunglass_height // 2), 0, 0, 360, (0, 0, 0), -1)
        cv2.line(image, (x_offset + sunglass_width // 4, y_offset), (x_offset + 3 * sunglass_width // 4, y_offset), (0, 0, 0), 5)
    
    elif occlusion_type == "mask":
        if mask_image_path and os.path.exists(mask_image_path):
            # Load mask image with alpha channel
            mask = cv2.imread(mask_image_path, cv2.IMREAD_UNCHANGED)
            if mask is None or mask.shape[2] != 4:
                print(f"Error loading mask image: {mask_image_path}")
                return image
            
            mask_h, mask_w = mask.shape[:2]
            
            # Resize mask while maintaining aspect ratio
            scale = (w / mask_w) * 0.9  # Slightly smaller than face width
            new_w = int(mask_w * scale)
            new_h = int(mask_h * scale)
            mask = cv2.resize(mask, (new_w, new_h))
            
            # Define mask position (lower face)
            x_offset = (w - new_w) // 2
            y_offset = int(h * 0.65)  # Position around mouth/nose
            
            # Extract alpha channel for blending
            alpha_s = mask[:, :, 3] / 255.0  # Normalize alpha channel
            alpha_l = 1.0 - alpha_s
            
            for c in range(3):  # Blend each channel
                image[y_offset:y_offset+new_h, x_offset:x_offset+new_w, c] = (
                    alpha_s * mask[:, :, c] + alpha_l * image[y_offset:y_offset+new_h, x_offset:x_offset+new_w, c]
                ).astype(np.uint8)
        else:
            # Draw a face mask dynamically if no mask image is provided
            mask_top = h // 2
            mask_bottom = int(h * 0.85)
            mask_left = int(w * 0.25)
            mask_right = int(w * 0.75)
            cv2.rectangle(image, (mask_left, mask_top), (mask_right, mask_bottom), (50, 50, 50), -1)
    
    return image
def process_images(input_folder, output_folder, occlusion_type="rectangle", mask_image_path=None):
    os.makedirs(output_folder, exist_ok=True)
    
    for filename in os.listdir(input_folder):
        if filename.endswith(".jpg") or filename.endswith(".png"):
            img_path = os.path.join(input_folder, filename)
            img = cv2.imread(img_path)
            img = add_occlusion(img, occlusion_type, mask_image_path)
            cv2.imwrite(os.path.join(output_folder, filename), img)
            
# Example Usage
input_folder = r"D:\Akash\Work\AI\2025\Research_Datasets\celeba_hq\images\male"
output_folder = r"D:\Akash\Work\AI\2025\Research_Datasets\celeba_hq\output"
occlusion_type = "rectangle"  # Change to "blur", "sunglasses", or "mask" as needed
mask_image_path = "mask.png"  # Provide a mask image path or None for dynamic mask

process_images(input_folder, output_folder, occlusion_type, mask_image_path)

# Display an example
sample_img = cv2.imread(os.path.join(output_folder, os.listdir(output_folder)[0]))
sample_img = cv2.cvtColor(sample_img, cv2.COLOR_BGR2RGB)
plt.imshow(sample_img)
plt.axis("off")
plt.show()


## rename labels

In [None]:
import os

folder_path = r"Z:\AI\research_2023-\TechnicalResearch\202502_TC03\OCHuman_遮蔽追加\3_Annotation\labels"  # Change this if needed
prefix = "OCHuman_"  # Change this to your desired prefix

for filename in os.listdir(folder_path):
    if filename.endswith(".txt") and filename != "classes.txt":
        old_path = os.path.join(folder_path, filename)
        new_path = os.path.join(folder_path, prefix + filename)
        os.rename(old_path, new_path)
        print(f'Renamed: {filename} → {prefix + filename}')

print("Renaming complete.")


# Occlusion inside YOLO Annotation

In [None]:
import os
import cv2
import random
import numpy as np
from tqdm import tqdm

# Paths
image_folder = r"D:\Akash\Work\AI\2025\TC03\OC\images"
label_folder = r"D:\Akash\Work\AI\2025\TC03\OC\labels"
save_folder = r"D:\Akash\Work\AI\2025\TC03\OC\shahei"
os.makedirs(save_folder, exist_ok=True)

# Occlusion Parameters
num_occlusions = (1, 5)  # Min and max occlusions per image
occlusion_size = (20, 100)  # Min and max box size
colors = [(255, 0, 0), (0, 255, 0), (0, 0, 255), (255, 255, 0)]  # Red, Green, Blue, Yellow

# Process Images
image_files = [f for f in os.listdir(image_folder) if f.endswith('.jpg')]
for image_file in tqdm(image_files):
    img_path = os.path.join(image_folder, image_file)
    label_path = os.path.join(label_folder, image_file.replace('.jpg', '.txt'))
    img = cv2.imread(img_path)
    h, w, _ = img.shape
    
    # Read YOLO labels
    if not os.path.exists(label_path):
        continue
    with open(label_path, 'r') as f:
        labels = f.readlines()
    
    for label in labels:
        parts = label.strip().split()
        if len(parts) < 5:
            continue
        _, x_center, y_center, bbox_w, bbox_h = map(float, parts)
        x1 = int((x_center - bbox_w / 2) * w)
        y1 = int((y_center - bbox_h / 2) * h)
        x2 = int((x_center + bbox_w / 2) * w)
        y2 = int((y_center + bbox_h / 2) * h)
        
        # Apply random occlusions inside the annotation box
        for _ in range(random.randint(*num_occlusions)):
            box_size = random.randint(*occlusion_size)
            occl_x1 = random.randint(x1, max(x1, x2 - box_size))
            occl_y1 = random.randint(y1, max(y1, y2 - box_size))
            occl_x2 = occl_x1 + box_size
            occl_y2 = occl_y1 + box_size
            color = random.choice(colors)
            cv2.rectangle(img, (occl_x1, occl_y1), (occl_x2, occl_y2), color, -1)
    
    # Save modified image
    save_path = os.path.join(save_folder, image_file)
    cv2.imwrite(save_path, img)

print("Occlusion augmentation completed!")


100%|██████████| 5081/5081 [01:02<00:00, 80.87it/s] 

Occlusion augmentation completed!





## Occlusion with constraints :
1. min = 25% of bb and max = 75% of bb
2. Only one Occlusion per BB

In [None]:
import os
import cv2
import random
import numpy as np
from tqdm import tqdm

# Paths
image_folder = r"D:\Akash\Work\AI\2025\TC03\OC\images"
label_folder = r"D:\Akash\Work\AI\2025\TC03\OC\labels"
save_folder = r"D:\Akash\Work\AI\2025\TC03\OC\shahei_bigger"
os.makedirs(save_folder, exist_ok=True)

# Occlusion Parameters
colors = [(255, 0, 0), (0, 255, 0), (0, 0, 255), (255, 255, 0)]  # Red, Green, Blue, Yellow

# Process Images
image_files = [f for f in os.listdir(image_folder) if f.endswith('.jpg')]
for image_file in tqdm(image_files):
    img_path = os.path.join(image_folder, image_file)
    label_path = os.path.join(label_folder, image_file.replace('.jpg', '.txt'))
    img = cv2.imread(img_path)
    h, w, _ = img.shape
    
    # Read YOLO labels
    if not os.path.exists(label_path):
        continue
    with open(label_path, 'r') as f:
        labels = f.readlines()
    
    for label in labels:
        parts = label.strip().split()
        if len(parts) < 5:
            continue
        _, x_center, y_center, bbox_w, bbox_h = map(float, parts)
        x1 = int((x_center - bbox_w / 2) * w)
        y1 = int((y_center - bbox_h / 2) * h)
        x2 = int((x_center + bbox_w / 2) * w)
        y2 = int((y_center + bbox_h / 2) * h)
        
        # Ensure at least 1/4th of the bbox area remains visible
        bbox_area = (x2 - x1) * (y2 - y1)
        max_occlusion_area = bbox_area * 0.75
        min_size = max((x2 - x1) // 4, (y2 - y1) // 4)
        max_size = int(min(x2 - x1, y2 - y1, (max_occlusion_area) ** 0.5))
        
        if min_size > 0 and max_size >= min_size:
            box_size = random.randint(min_size, max_size)
            occl_x1 = random.randint(x1, max(x1, x2 - box_size))
            occl_y1 = random.randint(y1, max(y1, y2 - box_size))
            occl_x2 = occl_x1 + box_size
            occl_y2 = occl_y1 + box_size
            color = random.choice(colors)
            cv2.rectangle(img, (occl_x1, occl_y1), (occl_x2, occl_y2), color, -1)
    
    # Save modified image
    save_path = os.path.join(save_folder, image_file)
    cv2.imwrite(save_path, img)

print("Occlusion augmentation completed!")


100%|██████████| 5081/5081 [02:23<00:00, 35.47it/s]

Occlusion augmentation completed!





## Handles even Thin BB

In [9]:
import os
import cv2
import random
import numpy as np
from tqdm import tqdm
from math import ceil

# Paths
image_folder = r"D:\Akash\Work\AI\2025\TC03\OC\images"
label_folder = r"D:\Akash\Work\AI\2025\TC03\OC\labels"
save_folder = r"D:\Akash\Work\AI\2025\TC03\OC\shahei_bigger_thin"
os.makedirs(save_folder, exist_ok=True)

# Occlusion Parameters
colors = [(255, 0, 0), (0, 255, 0), (0, 0, 255), (255, 255, 0)]  # Red, Green, Blue, Yellow

# Process Images
image_files = [f for f in os.listdir(image_folder) if f.endswith('.jpg')]
for image_file in tqdm(image_files):
    img_path = os.path.join(image_folder, image_file)
    label_path = os.path.join(label_folder, image_file.replace('.jpg', '.txt'))
    img = cv2.imread(img_path)
    h, w, _ = img.shape
    
    # Read YOLO labels
    if not os.path.exists(label_path):
        continue
    with open(label_path, 'r') as f:
        labels = f.readlines()
    
    updated_labels = []
    for label in labels:
        parts = label.strip().split()
        if len(parts) < 5:
            continue
        class_id, x_center, y_center, bbox_w, bbox_h = map(float, parts)
        x1 = int((x_center - bbox_w / 2) * w)
        y1 = int((y_center - bbox_h / 2) * h)
        x2 = int((x_center + bbox_w / 2) * w)
        y2 = int((y_center + bbox_h / 2) * h)
        
        # Bounding Box Area
        bbox_area = (x2 - x1) * (y2 - y1)
        max_occlusion_area = bbox_area * 0.75  # Max occlusion is 75% of bbox
        min_occlusion_area = bbox_area * 0.25  # Min occlusion is 25% of bbox

        # Compute min/max occlusion sizes
        min_size = max(ceil((x2 - x1) / 4), ceil((y2 - y1) / 4), 5)  # Ensure minimum size is at least 5 pixels
        max_size = int(min(x2 - x1, y2 - y1, (max_occlusion_area) ** 0.5))  # Ensure occlusion fits inside bbox

        # Ensure occlusion is applied only if it meets the criteria
        if min_size <= max_size:
            box_size = random.randint(min_size, max_size)
            
            # Randomly place occlusion inside the bbox
            occl_x1 = random.randint(x1, max(x1, x2 - box_size))
            occl_y1 = random.randint(y1, max(y1, y2 - box_size))
            occl_x2 = occl_x1 + box_size
            occl_y2 = occl_y1 + box_size

            # Choose random occlusion color
            color = random.choice(colors)
            cv2.rectangle(img, (occl_x1, occl_y1), (occl_x2, occl_y2), color, -1)

    # Save modified image
    save_path = os.path.join(save_folder, image_file)
    cv2.imwrite(save_path, img)

print("Occlusion augmentation completed!")


100%|██████████| 5081/5081 [02:14<00:00, 37.65it/s]

Occlusion augmentation completed!





# 25-75% area occlusion + overlap protection

In [10]:
import os
import cv2
import random
import numpy as np

# Ensure the occlusion covers 25%-75% of each bounding box
MIN_OCCLUSION = 0.25
MAX_OCCLUSION = 0.75
ASPECT_RATIO_RANGE = (0.5, 2.0)

def apply_occlusion(image, labels):
    height, width, _ = image.shape
    
    total_occluded_area = {}  # Track occlusion per person
    for label in labels:
        values = label.split()
        class_id, x_center, y_center, bbox_width, bbox_height = map(float, values)
        
        x1 = int((x_center - bbox_width / 2) * width)
        y1 = int((y_center - bbox_height / 2) * height)
        x2 = int((x_center + bbox_width / 2) * width)
        y2 = int((y_center + bbox_height / 2) * height)
        
        bbox_area = (x2 - x1) * (y2 - y1)
        max_occlusion_area = MAX_OCCLUSION * bbox_area
        min_occlusion_area = MIN_OCCLUSION * bbox_area
        
        if class_id not in total_occluded_area:
            total_occluded_area[class_id] = 0
        
        # Prevent full occlusion
        remaining_area = max_occlusion_area - total_occluded_area[class_id]
        if remaining_area <= 0:
            continue
        
        # Generate occlusion size within remaining limits
        occ_area = random.uniform(min_occlusion_area, min(remaining_area, max_occlusion_area))
        aspect_ratio = random.uniform(*ASPECT_RATIO_RANGE)
        occ_w = int((occ_area * aspect_ratio) ** 0.5)
        occ_h = int((occ_area / aspect_ratio) ** 0.5)
        
        occ_x1 = random.randint(x1, max(x1, x2 - occ_w))
        occ_y1 = random.randint(y1, max(y1, y2 - occ_h))
        occ_x2 = occ_x1 + occ_w
        occ_y2 = occ_y1 + occ_h
        
        # Ensure occlusion stays within bounding box
        occ_x2 = min(occ_x2, x2)
        occ_y2 = min(occ_y2, y2)
        
        total_occluded_area[class_id] += (occ_x2 - occ_x1) * (occ_y2 - occ_y1)
        
        # Random occlusion color (green or blue)
        color = (0, 255, 0) if random.random() < 0.5 else (255, 0, 0)
        image[occ_y1:occ_y2, occ_x1:occ_x2] = color
    
    return image

# Paths
image_folder = r"D:\Akash\Work\AI\2025\TC03\OC\images"
label_folder = r"D:\Akash\Work\AI\2025\TC03\OC\labels"
output_folder = r"D:\Akash\Work\AI\2025\TC03\OC\shahei_area"
os.makedirs(output_folder, exist_ok=True)


for image_file in os.listdir(image_folder):
    image_path = os.path.join(image_folder, image_file)
    label_path = os.path.join(label_folder, image_file.replace(".jpg", ".txt"))
    
    if not os.path.exists(label_path):
        continue
    
    image = cv2.imread(image_path)
    with open(label_path, "r") as f:
        labels = f.read().strip().split("\n")
    
    occluded_image = apply_occlusion(image, labels)
    output_path = os.path.join(output_folder, image_file)
    cv2.imwrite(output_path, occluded_image)


## same with max set to 50%

In [11]:
import os
import cv2
import random
import numpy as np

# Ensure the occlusion covers 25%-50% of each bounding box
MIN_OCCLUSION = 0.25
MAX_OCCLUSION = 0.50
ASPECT_RATIO_RANGE = (0.5, 2.0)

def apply_occlusion(image, labels):
    height, width, _ = image.shape
    
    total_occluded_area = {}  # Track occlusion per person
    for label in labels:
        values = label.split()
        class_id, x_center, y_center, bbox_width, bbox_height = map(float, values)
        
        x1 = int((x_center - bbox_width / 2) * width)
        y1 = int((y_center - bbox_height / 2) * height)
        x2 = int((x_center + bbox_width / 2) * width)
        y2 = int((y_center + bbox_height / 2) * height)
        
        bbox_area = (x2 - x1) * (y2 - y1)
        max_occlusion_area = MAX_OCCLUSION * bbox_area
        min_occlusion_area = MIN_OCCLUSION * bbox_area
        
        if class_id not in total_occluded_area:
            total_occluded_area[class_id] = 0
        
        # Prevent full occlusion
        remaining_area = max_occlusion_area - total_occluded_area[class_id]
        if remaining_area <= 0:
            continue
        
        # Generate occlusion size within remaining limits
        occ_area = random.uniform(min_occlusion_area, min(remaining_area, max_occlusion_area))
        aspect_ratio = random.uniform(*ASPECT_RATIO_RANGE)
        occ_w = int((occ_area * aspect_ratio) ** 0.5)
        occ_h = int((occ_area / aspect_ratio) ** 0.5)
        
        occ_x1 = random.randint(x1, max(x1, x2 - occ_w))
        occ_y1 = random.randint(y1, max(y1, y2 - occ_h))
        occ_x2 = occ_x1 + occ_w
        occ_y2 = occ_y1 + occ_h
        
        # Ensure occlusion stays within bounding box
        occ_x2 = min(occ_x2, x2)
        occ_y2 = min(occ_y2, y2)
        
        total_occluded_area[class_id] += (occ_x2 - occ_x1) * (occ_y2 - occ_y1)
        
        # Random occlusion color (green or blue)
        color = (0, 255, 0) if random.random() < 0.5 else (255, 0, 0)
        image[occ_y1:occ_y2, occ_x1:occ_x2] = color
    
    return image

# Paths
image_folder = r"D:\Akash\Work\AI\2025\TC03\OC\images"
label_folder = r"D:\Akash\Work\AI\2025\TC03\OC\labels"
output_folder = r"D:\Akash\Work\AI\2025\TC03\OC\shahei_50"
os.makedirs(output_folder, exist_ok=True)


for image_file in os.listdir(image_folder):
    image_path = os.path.join(image_folder, image_file)
    label_path = os.path.join(label_folder, image_file.replace(".jpg", ".txt"))
    
    if not os.path.exists(label_path):
        continue
    
    image = cv2.imread(image_path)
    with open(label_path, "r") as f:
        labels = f.read().strip().split("\n")
    
    occluded_image = apply_occlusion(image, labels)
    output_path = os.path.join(output_folder, image_file)
    cv2.imwrite(output_path, occluded_image)


# Final

## Function to :
1. Calculate IOU (track occlucded area)
2. Apply Occlusion

#### Note :
1. The aspect ratio range for occlusion boxes is set to (0.5, 2.0), meaning:
    1. The occlusion box width can be half to twice the height.
    2. This allows for rectangular occlusions but avoids extreme shapes (e.g., very long or very thin occlusions).
2. Min and max occlusion is set to 0.5 and 0.25, meaning:
    1. Min Occlusion 25% area of the box
    2. Max Occlusion 50% area of the box

In [1]:
import os
import cv2
import random


# Ensure the occlusion covers 25%-50% of each bounding box
MIN_OCCLUSION = 0.25
MAX_OCCLUSION = 0.50
ASPECT_RATIO_RANGE = (0.5, 2.0)

def iou(box1, box2):
    x1 = max(box1[0], box2[0])
    y1 = max(box1[1], box2[1])
    x2 = min(box1[2], box2[2])
    y2 = min(box1[3], box2[3])
    
    inter_area = max(0, x2 - x1) * max(0, y2 - y1)
    box1_area = (box1[2] - box1[0]) * (box1[3] - box1[1])
    box2_area = (box2[2] - box2[0]) * (box2[3] - box2[1])
    
    union_area = box1_area + box2_area - inter_area
    return inter_area / union_area if union_area > 0 else 0

def apply_occlusion(image, labels):
    height, width, _ = image.shape
    bboxes = []
    for label in labels:
        values = label.split()
        class_id, x_center, y_center, bbox_width, bbox_height = map(float, values)
        
        x1 = int((x_center - bbox_width / 2) * width)
        y1 = int((y_center - bbox_height / 2) * height)
        x2 = int((x_center + bbox_width / 2) * width)
        y2 = int((y_center + bbox_height / 2) * height)
        
        bboxes.append((x1, y1, x2, y2, class_id))
    
    occluded_areas = []  # To track already occluded areas
    for x1, y1, x2, y2, class_id in bboxes:
        if any(iou((x1, y1, x2, y2), occ) > 0.5 for occ in occluded_areas):
            continue  # Skip if significant overlap with existing occlusion
        
        bbox_area = (x2 - x1) * (y2 - y1)
        min_occlusion_area = MIN_OCCLUSION * bbox_area
        max_occlusion_area = MAX_OCCLUSION * bbox_area
        
        occ_area = random.uniform(min_occlusion_area, max_occlusion_area)
        aspect_ratio = random.uniform(*ASPECT_RATIO_RANGE)
        occ_w = int((occ_area * aspect_ratio) ** 0.5)
        occ_h = int((occ_area / aspect_ratio) ** 0.5)
        
        occ_x1 = random.randint(x1, max(x1, x2 - occ_w))
        occ_y1 = random.randint(y1, max(y1, y2 - occ_h))
        occ_x2 = min(occ_x1 + occ_w, x2)
        occ_y2 = min(occ_y1 + occ_h, y2)
        
        occluded_areas.append((occ_x1, occ_y1, occ_x2, occ_y2))
        
        # # occlusion color set to black
        # color = (0,0,0)
        # image[occ_y1:occ_y2, occ_x1:occ_x2] = color
        
        # # Random occlusion color (green or blue)
        # color = (0, 255, 0) if random.random() < 0.5 else (255, 0, 0)
        # image[occ_y1:occ_y2, occ_x1:occ_x2] = color
        
        # Random occlusion color
        color = tuple(random.randint(0, 255) for _ in range(3))
        image[occ_y1:occ_y2, occ_x1:occ_x2] = color
    
    return image

## Inputs

Root folder should have 2 folders "images" and "labels" in it

In [None]:
from pathlib import Path

# Process all images and labels
root_folder = Path(r"D:\Akash\Work\AI\2025\TC04\test")
image_folder = Path(root_folder , "images")
label_folder = Path(root_folder , "labels")
output_folder = Path(root_folder , "Output") 
os.makedirs(output_folder, exist_ok=True)

## input format 2 (for yolo datset format)

In [14]:
from pathlib import Path
from PIL import Image
import numpy as np

# Process all images and labels

occlusion_percentage = 10
root_folder = Path(r"D:\Akash\Work\AI\2025\TC04\2025_01_tc1_dataset_01") / f"occlusion_{occlusion_percentage}"

for datatype in ["train", "val"]:
    image_folder = Path(root_folder, "images", datatype)
    label_folder = Path(root_folder,  "labels", datatype)
    output_folder = Path(root_folder,  "Output", datatype) 
    os.makedirs(output_folder, exist_ok=True)
    
    for image_file in os.listdir(image_folder):
        image_path = os.path.join(image_folder, image_file)
        image_path = Path(image_path)
        label_path = os.path.join(label_folder, image_file.replace(".jpg", ".txt"))
        
        if not os.path.exists(label_path):
            continue
        
        # image = cv2.imread(fr"{image_path}")
        # if image is None:
        #     print(f"Error: Unable to read image {image_path}")
        #     continue # Skip processing this image
        # with open(label_path, "r") as f:
        #     labels = f.read().strip().split("\n")
        
        # occluded_image = apply_occlusion(image, labels)
        # output_path = os.path.join(output_folder, image_file)
        # cv2.imwrite(output_path, occluded_image)
        
        # Reading image through pil as cv2 doesn't read images with special charecters 
        try:
            # Use PIL to open the image and convert it to OpenCV format
            pil_image = Image.open(image_path).convert("RGB")
            image = np.array(pil_image)
            image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)  # Convert RGB to BGR for OpenCV
        except Exception as e:
            print(f"Error: Unable to read image {image_path} - {e}")
            continue  # Skip processing this image

        with open(label_path, "r") as f:
            labels = f.read().strip().split("\n")
        
        occluded_image = apply_occlusion(image, labels)
        output_path = os.path.join(output_folder, image_file)
        cv2.imwrite(output_path, occluded_image)


## Apply Occlusion

In [4]:
for image_file in os.listdir(image_folder):
    image_path = os.path.join(image_folder, image_file)
    label_path = os.path.join(label_folder, image_file.replace(".jpg", ".txt"))
    
    if not os.path.exists(label_path):
        continue
    
    image = cv2.imread(image_path)
    with open(label_path, "r") as f:
        labels = f.read().strip().split("\n")
    
    occluded_image = apply_occlusion(image, labels)
    output_path = os.path.join(output_folder, image_file)
    cv2.imwrite(output_path, occluded_image)


# Apply % occlusion to dataset 
1. make dataset without occlusion applied to it
2. run occlusion on the new dataset
3. add the remaining images to it

In [7]:
import os
import random
import shutil
from pathlib import Path

# Define paths
dataset_root = Path(r"D:\Akash\Work\AI\2025\TC04\2025_01_tc1_dataset_01")  # Change this to your dataset root
train_images = dataset_root / "images" / "train"
val_images = dataset_root / "images" / "val"
train_labels = dataset_root / "labels" / "train"
val_labels = dataset_root / "labels" / "val"

# Percentage values
percentages = [5, 10, 15, 20, 25]

# Function to get image paths
def get_image_paths(folder):
    return sorted([p for p in folder.glob("*.jpg")])

def get_label_paths(folder):
    return sorted([p for p in folder.glob("*.txt")])

# Get image and label paths
train_image_paths = get_image_paths(train_images)
val_image_paths = get_image_paths(val_images)
train_label_paths = get_label_paths(train_labels)
val_label_paths = get_label_paths(val_labels)

# Compute total number of images for 25% subset
total_images = len(train_image_paths) + len(val_image_paths)
num_total_25 = int(total_images * 0.25)
num_train_25 = int(num_total_25 * 0.9)  # 90% of 25%
num_val_25 = num_total_25 - num_train_25  # 10% of 25%

# Randomly select 25% subset
random.seed(42)  # For reproducibility
train_subset_25 = random.sample(list(zip(train_image_paths, train_label_paths)), num_train_25)
val_subset_25 = random.sample(list(zip(val_image_paths, val_label_paths)), num_val_25)

# Function to get progressive subsets
def get_progressive_subsets(full_set, percentages):
    subset_dict = {}
    for p in percentages:
        num = int(len(full_set) * (p / 25))  # Ensure subset is from 25%
        subset_dict[p] = full_set[:num]  # Take a portion from sorted list
    return subset_dict

# Create progressive subsets
train_subsets = get_progressive_subsets(train_subset_25, percentages)
val_subsets = get_progressive_subsets(val_subset_25, percentages)

# Create occlusion datasets without applying occlusion
for p in percentages:
    print(f"Processing {p}% dataset...")
    
    # Define occlusion folder structure
    occlusion_root = dataset_root / f"occlusion_{p}"
    occlusion_train_images = occlusion_root / "images" / "train"
    occlusion_val_images = occlusion_root / "images" / "val"
    occlusion_train_labels = occlusion_root / "labels" / "train"
    occlusion_val_labels = occlusion_root / "labels" / "val"
    
    # Create directories
    occlusion_train_images.mkdir(parents=True, exist_ok=True)
    occlusion_val_images.mkdir(parents=True, exist_ok=True)
    occlusion_train_labels.mkdir(parents=True, exist_ok=True)
    occlusion_val_labels.mkdir(parents=True, exist_ok=True)
    
    # Copy train images and labels
    for img_path, lbl_path in train_subsets[p]:
        shutil.copy(img_path, occlusion_train_images / img_path.name)
        shutil.copy(lbl_path, occlusion_train_labels / lbl_path.name)
    
    # Copy val images and labels
    for img_path, lbl_path in val_subsets[p]:
        shutil.copy(img_path, occlusion_val_images / img_path.name)
        shutil.copy(lbl_path, occlusion_val_labels / lbl_path.name)

print("Occlusion dataset creation complete!")


Processing 5% dataset...
Processing 10% dataset...
Processing 15% dataset...
Processing 20% dataset...
Processing 25% dataset...
Occlusion dataset creation complete!


# Checking subset integrity

In [18]:
from pathlib import Path

# Define dataset root
root = Path(r"D:\Akash\Work\AI\2025\TC04\2025_01_tc1_dataset_01\post_occlusion")

# Define percentage sets
percentages = [5, 10, 15, 20, 25]

# Function to get all image names in a folder
def get_image_names(folder):
    return {p.name for p in folder.glob("*.jpg")}

# Get 25% dataset images
occlusion_25_train = get_image_names(root / "occlusion_25" / "images" / "train")
occlusion_25_val = get_image_names(root / "occlusion_25" / "images" / "val")

# Check subsets
for p in [5, 10, 15, 20]:
    print(f"Checking {p}% subset...")
    
    occlusion_train = get_image_names(root / f"occlusion_{p}" / "images" / "train")
    occlusion_val = get_image_names(root / f"occlusion_{p}" / "images" / "val")
    
    missing_train = occlusion_train - occlusion_25_train
    missing_val = occlusion_val - occlusion_25_val
    
    if missing_train:
        print(f"Missing in {p}% train: {sorted(missing_train)}")
    if missing_val:
        print(f"Missing in {p}% val: {sorted(missing_val)}")
    
    if not missing_train and not missing_val:
        print(f"{p}% subset is correctly contained within 25%.")

print("Check complete.")


Checking 5% subset...
5% subset is correctly contained within 25%.
Checking 10% subset...
10% subset is correctly contained within 25%.
Checking 15% subset...
15% subset is correctly contained within 25%.
Checking 20% subset...
20% subset is correctly contained within 25%.
Check complete.


# Rename files 
- Some special charecters where replaced with some randon kanji when occluding images.
- This would rename them back to their original file name

In [19]:
import os

for x in [5, 10, 15, 20, 25]:
    # Set your root directory here
    root_dir = fr"D:\Akash\Work\AI\2025\TC04\2025_01_tc1_dataset_01\post_occlusion\occlusion_{x}"

    # Define what you want to replace
    old_str = "ﾐ岱嫺"
    new_str = "БЛ"

    # Walk through all subdirectories and files
    for dirpath, dirnames, filenames in os.walk(root_dir):
        for filename in filenames:
            if old_str in filename:
                old_path = os.path.join(dirpath, filename)
                new_filename = filename.replace(old_str, new_str)
                new_path = os.path.join(dirpath, new_filename)
                os.rename(old_path, new_path)
                print(f"Renamed:\n{old_path}\n→ {new_path}\n")


# Paste Labels from 25%

In [18]:
import shutil
from pathlib import Path

# Define dataset root
dataset_root = Path(r"D:\Akash\Work\AI\2025\TC04\2025_01_tc1_dataset_01\post_occlusion")

# Define occlusion folders
occlusion_25_labels_train = dataset_root / "occlusion_25" / "labels" / "train"
occlusion_25_labels_val = dataset_root / "occlusion_25" / "labels" / "val"

# Percentages to update
percentages = [5, 10, 15, 20]

# Function to update labels
def update_labels(occlusion_x, occlusion_25_labels):
    occlusion_x_labels = dataset_root / f"occlusion_{occlusion_x}" / "labels"

    for subset in ["train", "val"]:
        occlusion_x_labels_subset = occlusion_x_labels / subset
        occlusion_25_labels_subset = occlusion_25_labels / subset

        for label_file in occlusion_x_labels_subset.glob("*.txt"):
            src_label = occlusion_25_labels_subset / label_file.name
            if src_label.exists():
                shutil.copy(src_label, label_file)

# Update labels for all subsets
for p in percentages:
    update_labels(p, dataset_root / "occlusion_25" / "labels")

print("Corrected labels distributed successfully!")


Corrected labels distributed successfully!


# Merge with remaining unoccluded images for the final datsets

In [15]:
import shutil
from pathlib import Path

# Define dataset root
dataset_root = Path(r"D:\Akash\Work\AI\2025\TC04\2025_01_tc1_dataset_01")

# Define the original train and val dataset folders
original_images_train = dataset_root / "images" / "train"
original_images_val = dataset_root / "images" / "val"
original_labels_train = dataset_root / "labels" / "train" 
original_labels_val = dataset_root / "labels" / "val" 

# Percentages to update
percentages = [5, 10, 15, 20, 25]

# Function to merge missing images and labels
def merge_remaining(occlusion_x):
    occlusion_x_images = dataset_root / "post_occlusion"/ f"occlusion_{occlusion_x}" / "images"
    occlusion_x_labels = dataset_root / "post_occlusion"/ f"occlusion_{occlusion_x}" / "labels"

    for subset, original_images, original_labels in [
        ("train", original_images_train, original_labels_train),
        ("val", original_images_val, original_labels_val),
    ]:
        occlusion_x_images_subset = occlusion_x_images / subset
        occlusion_x_labels_subset = occlusion_x_labels / subset

        # Ensure destination folders exist
        occlusion_x_images_subset.mkdir(parents=True, exist_ok=True)
        occlusion_x_labels_subset.mkdir(parents=True, exist_ok=True)

        # Merge images
        for img_file in original_images.glob("*.jpg"):
            dest_img = occlusion_x_images_subset / img_file.name
            if not dest_img.exists():  # Only copy if it doesn't exist
                shutil.copy(img_file, dest_img)

        # Merge labels
        for lbl_file in original_labels.glob("*.txt"):
            dest_lbl = occlusion_x_labels_subset / lbl_file.name
            if not dest_lbl.exists():  # Only copy if it doesn't exist
                shutil.copy(lbl_file, dest_lbl)

# Merge the remaining images and labels for all subsets
for p in percentages:
    merge_remaining(p)

print("Remaining images and labels merged successfully!")


Remaining images and labels merged successfully!


# Delete Discards

In [20]:
import os
from pathlib import Path

# List your occlusion folders
occlusion_folders = ["occlusion_25", "occlusion_20", "occlusion_15", "occlusion_10", "occlusion_5"]

# Base path where your occlusion folders are
base_path = Path(r"D:\Akash\Work\AI\2025\TC04\2025_01_tc1_dataset_01\post_occlusion")  # <-- CHANGE this to your actual path

for folder_name in occlusion_folders:
    print(f"Processing {folder_name}...")
    folder_path = base_path / folder_name

    for split in ["train", "val"]:
        labels_path = folder_path / "labels" / split
        images_path = folder_path / "images" / split

        if not labels_path.exists():
            print(f"Labels path {labels_path} does not exist, skipping.")
            continue

        for label_file in labels_path.glob("*.txt"):
            if label_file.stat().st_size == 0:
                image_file = images_path / label_file.with_suffix(".jpg").name
                if not image_file.exists():
                    # Sometimes image extensions are .png or others
                    image_file = next(images_path.glob(label_file.stem + ".*"), None)

                print(f"Deleting: {label_file} and {image_file}")
                label_file.unlink()  # delete label
                if image_file and image_file.exists():
                    image_file.unlink()  # delete image


Processing occlusion_25...
Processing occlusion_20...
Processing occlusion_15...
Processing occlusion_10...
Processing occlusion_5...


## delete extra labels

In [17]:
import os

root_dir = fr"D:\Akash\Work\AI\2025\TC04\2025_01_tc1_dataset_01\post_occlusion\occlusion_25"

for split in ["train", "val"]:
    labels_dir = os.path.join(root_dir, "labels", split)
    images_dir = os.path.join(root_dir, "images", split)

    # Get all image names without extensions
    image_basenames = set(os.path.splitext(f)[0] for f in os.listdir(images_dir) if f.lower().endswith(('.jpg', '.jpeg', '.png')))
    label_files = [f for f in os.listdir(labels_dir) if f.lower().endswith('.txt')]

    for label_file in label_files:
        label_basename = os.path.splitext(label_file)[0]
        if label_basename not in image_basenames:
            label_path = os.path.join(labels_dir, label_file)
            print(f"Deleting extra label: {label_path}")
            os.remove(label_path)


Deleting extra label: D:\Akash\Work\AI\2025\TC04\2025_01_tc1_dataset_01\post_occlusion\occlusion_25\labels\train\classes.txt
Deleting extra label: D:\Akash\Work\AI\2025\TC04\2025_01_tc1_dataset_01\post_occlusion\occlusion_25\labels\train\STILL360БЛxperience_STILL_Fair_Booth_at_CeMAT_2016_1_001911_part_4_2.txt
Deleting extra label: D:\Akash\Work\AI\2025\TC04\2025_01_tc1_dataset_01\post_occlusion\occlusion_25\labels\train\STILL360БЛxperience_STILL_Fair_Booth_at_CeMAT_2016_1_002005_part_3_2.txt
Deleting extra label: D:\Akash\Work\AI\2025\TC04\2025_01_tc1_dataset_01\post_occlusion\occlusion_25\labels\train\STILL360БЛxperience_STILL_Fair_Booth_at_CeMAT_2016_1_002428_part_2_2.txt
Deleting extra label: D:\Akash\Work\AI\2025\TC04\2025_01_tc1_dataset_01\post_occlusion\occlusion_25\labels\train\STILL360БЛxperience_STILL_Fair_Booth_at_CeMAT_2016_1_002428_part_3_2.txt
Deleting extra label: D:\Akash\Work\AI\2025\TC04\2025_01_tc1_dataset_01\post_occlusion\occlusion_25\labels\train\Toyota_VR_360ﾐ岱媽Fa

# Final check of img and label integrity

In [2]:
import os

# Base root where all occlusion folders are
base_root = r"D:\Akash\Work\AI\2025\TC04\2025_01_tc1_dataset_01\Final"  # <- change this

# List of occlusion folders
occlusion_folders = [f"{x}" for x in ["2025_04_tc04_dataset_10", "2025_04_tc04_dataset_11", "2025_04_tc04_dataset_12", "2025_04_tc04_dataset_13", "2025_04_tc04_dataset_14"]]

# Helper function to get file stems (without extension)
def get_stems(folder, exts):
    stems = set()
    for filename in os.listdir(folder):
        if any(filename.lower().endswith(ext) for ext in exts):
            stems.add(os.path.splitext(filename)[0])
    return stems

# File extensions
image_exts = {'.jpg', '.jpeg', '.png'}
label_exts = {'.txt'}

for folder in occlusion_folders:
    root_dir = os.path.join(base_root, folder)
    print(f"\nChecking folder: {folder}")
    
    for split in ["train", "val"]:
        images_folder = os.path.join(root_dir, "images", split)
        labels_folder = os.path.join(root_dir, "labels", split)

        # Get the stems
        image_stems = get_stems(images_folder, image_exts)
        label_stems = get_stems(labels_folder, label_exts)

        images_missing_labels = image_stems - label_stems
        labels_missing_images = label_stems - image_stems

        if images_missing_labels:
            print(f"\n[{split}] Images missing labels in {folder}:")
            for img_stem in images_missing_labels:
                print(f"{img_stem}")

        if labels_missing_images:
            print(f"\n[{split}] Labels missing images in {folder}:")
            for lbl_stem in labels_missing_images:
                print(f"{lbl_stem}")

        if not images_missing_labels and not labels_missing_images:
            print(f"[{split}] All images and labels are correctly paired in {folder}! 🎯")



Checking folder: 2025_04_tc04_dataset_10
[train] All images and labels are correctly paired in 2025_04_tc04_dataset_10! 🎯
[val] All images and labels are correctly paired in 2025_04_tc04_dataset_10! 🎯

Checking folder: 2025_04_tc04_dataset_11
[train] All images and labels are correctly paired in 2025_04_tc04_dataset_11! 🎯
[val] All images and labels are correctly paired in 2025_04_tc04_dataset_11! 🎯

Checking folder: 2025_04_tc04_dataset_12
[train] All images and labels are correctly paired in 2025_04_tc04_dataset_12! 🎯
[val] All images and labels are correctly paired in 2025_04_tc04_dataset_12! 🎯

Checking folder: 2025_04_tc04_dataset_13
[train] All images and labels are correctly paired in 2025_04_tc04_dataset_13! 🎯
[val] All images and labels are correctly paired in 2025_04_tc04_dataset_13! 🎯

Checking folder: 2025_04_tc04_dataset_14
[train] All images and labels are correctly paired in 2025_04_tc04_dataset_14! 🎯
[val] All images and labels are correctly paired in 2025_04_tc04_data