In [1]:
import cv2 as cv
import numpy as np 
import matplotlib.pyplot as plt
import pandas as pd
import os
import shutil
from pathlib import Path
from tqdm import tqdm

In [2]:
import sys
sys.path.insert(0, '../../')
from utils.general import  *

In [3]:
"""
Description:
    This method shrinks an image iteratively until the average bounding box size of its labels 
    falls below a specified threshold. The size reduction is controlled by a kernel, which determines 
    the scale of each shrinking step.

Inputs:
    img_path (str): 
        The file path to the image that needs to be shrunk.
    label_path (str): 
        The file path to the corresponding label file containing bounding box segments and classes.
    kernel_size (int): 
        The factor by which the image is reduced in each iteration (default: 2).
    avg_bbox_size (float): 
        The target average bounding box size (width * height in pixels). The shrinking process 
        stops once the average bounding box size is below this threshold (default: 16272).

Return:
    img (numpy.ndarray): 
        The resulting shrunken image.
    labels (pandas.DataFrame): 
        A DataFrame containing the adjusted bounding box labels for the shrunken image.
"""
def shrink_image(img_path, label_path, kernel_size=2, avg_bbox_size=16272):
    bbox_sizes = []
    img = cv.imread(img_path)
    labels = pd.DataFrame({'class':[],'segment':[]})
    kernel = np.ones((kernel_size, kernel_size), dtype=float) / kernel_size**2

    with open(label_path) as f:
        for line in f:
            split_line = line.split(' ')
            cl = int(split_line[0])
            segment = np.array(split_line[1:], dtype=float)
            row = pd.DataFrame({'class':cl, 'segment': [segment]})
            labels = pd.concat([labels, row], ignore_index=True)
    
    while True: 
        bbox_sizes = []
        mean_img = cv.filter2D(img, -1, kernel)
        smaller_img = mean_img[::kernel_size, ::kernel_size]
        ysize = smaller_img.shape[0]
        xsize = smaller_img.shape[1]
        for idx, row in labels.iterrows():
            segment = row['segment']
            segment = segment.reshape(-1, 2)
            bbox = segment2box(segment)
            x, y, w, h = bbox
            w*=xsize
            h*=ysize
            bbox_sizes.append(w*h)
        bbox_mean = np.mean(bbox_sizes)
        if bbox_mean < avg_bbox_size:
            img = smaller_img
            break
        img = smaller_img

    return img, labels

In [21]:
"""
Function: create_small_train_imgs

Description:
    This function generates smaller versions of training images by shrinking them until the 
    average bounding box size of the labels falls below a specified threshold. It processes 
    either a specified number of random images or a predefined list of image filenames. The 
    function updates the training image directory with the smaller images and their 
    corresponding labels.

Inputs:
    train_dir_path (str): 
        Path to the directory containing the training images.
    label_dir_path (str): 
        Path to the directory containing the corresponding label files.
    num_imgs (int, optional): 
        Number of images to randomly select and shrink. Either this or `chosen_images` 
        must be provided.
    chosen_images (list of str, optional): 
        List of specific image filenames to shrink. If provided, the function processes only 
        these images.
    avg_bbox_size (float, default=16272): 
        Target average bounding box size (area in pixels). The shrinking process continues 
        until the average bounding box size of the labels falls below this threshold.
    run_test (Bool, default=True):
        Tests whether the images were uploaded correctly and their corresponding labels were
        updated as well.

Return:
    chosen_images (list of str): 
        A list of filenames of the processed images, including any randomly selected images.
"""

def create_small_train_imgs(train_dir_path, 
                            label_dir_path, 
                            num_imgs=None, 
                            chosen_images = None,
                            avg_bbox_size=16272,
                            run_test=True):

    assert not ((num_imgs is None) and (chosen_images is None)), "Must provide either num_imgs, or chosen_images"

    created_images = []
    select_random = False

    if not chosen_images:
        chosen_images = []
        select_random = True
    else:
        num_imgs = len(chosen_images)
        
    train_dir_path = Path(train_dir_path)
    label_dir_path = Path(label_dir_path)
    img_dir_list = os.listdir(train_dir_path)

    for i in tqdm(range(num_imgs)):

        if select_random:
            img_path = train_dir_path / Path(np.random.choice(img_dir_list, replace=False))
        else:
            img_path = train_dir_path / chosen_images[i]

        label_path = label_dir_path / img_path.name.replace('.jpg','.txt')

        if label_path.exists():
            chosen_images.append(img_path.name)
            small_img, small_labels = shrink_image(img_path, label_path)

            new_file_name = img_path.name.replace('.jpg', 'sm')
            small_img_train_path = train_dir_path / (new_file_name + '.jpg')
            small_img_label_path = label_dir_path / (new_file_name + '.txt')

            created_images.append(small_img_train_path)
            cv2.imwrite(small_img_train_path, small_img)
            shutil.copy(label_path, small_img_label_path)
    
    train_txt_path = train_dir_path.parent.parent / 'train2017.txt'

    with open(train_txt_path, 'a') as f:
        for filepath in created_images:
            img_path = Path(filepath)
            f.write('\n./images/train2017/' + img_path.name)

    if run_test:
        # Assert that the images were uploaded and the appropriate labels
        # were added correctly
        with open(train_txt_path, 'r') as f:
            train_txt_vals = f.read().splitlines()
        for filepath in created_images:
            txt_path = './images/train2017/' + img_path.name
            img_path = Path(filepath)
            label_path = label_dir_path / img_path.name.replace('.jpg','.txt')

            assert txt_path in train_txt_vals, f"Image not added to text path correctly: {filepath}"
            assert label_path.exists(), f"Label does not exist for following image: {filepath}" 
            assert img_path.exists(), f"Image path does not exist: {filepath}" 

        print("Images created and labels updated correctly")

    return chosen_images

In [22]:
coco_path = '../../coco/'
train_img_dir = coco_path + 'images/train2017'
train_label_dir = coco_path + 'labels/train2017'
num_images = 100
chosen_images = create_small_train_imgs(train_img_dir, train_label_dir, num_images)
create_small_train_imgs(train_img_dir, train_label_dir, chosen_images=chosen_images, avg_bbox_size=np.inf)

100%|██████████| 100/100 [00:01<00:00, 89.94it/s]


All images upldated correctly


100%|██████████| 99/99 [00:00<00:00, 290.56it/s]

All images upldated correctly





['000000100037.jpg',
 '000000240128.jpg',
 '000000052286.jpg',
 '000000145259.jpg',
 '000000458202.jpg',
 '000000357743.jpg',
 '000000168454.jpg',
 '000000412301.jpg',
 '000000157634.jpg',
 '000000251135.jpg',
 '000000392650.jpg',
 '000000287507.jpg',
 '000000353090.jpg',
 '000000026132.jpg',
 '000000262371.jpg',
 '000000288268.jpg',
 '000000241611.jpg',
 '000000419755.jpg',
 '000000556617.jpg',
 '000000223243.jpg',
 '000000223687.jpg',
 '000000267182.jpg',
 '000000060448.jpg',
 '000000396039.jpg',
 '000000471535.jpg',
 '000000012678.jpg',
 '000000556183.jpg',
 '000000138806.jpg',
 '000000422782.jpg',
 '000000364280.jpg',
 '000000199011.jpg',
 '000000078113.jpg',
 '000000444487.jpg',
 '000000359853.jpg',
 '000000046885.jpg',
 '000000093553.jpg',
 '000000561629.jpg',
 '000000012472.jpg',
 '000000211115.jpg',
 '000000464274.jpg',
 '000000157916.jpg',
 '000000119358.jpg',
 '000000098444.jpg',
 '000000374166.jpg',
 '000000340771.jpg',
 '000000139397.jpg',
 '000000504172.jpg',
 '00000017034