In [1]:
from yolox.data import TrainTransform

import numpy as np
import pandas as pd
import os
import cv2
import matplotlib.pyplot as plt
import json

import albumentations as A


In [13]:
ROOT_DIR = "/workspace/mnt/"
TRAIN_SAVE_PATH = f"{ROOT_DIR}data/train2017/"
ANNOTATION_SAVE_PATH = f"{ROOT_DIR}data/annotations/"


## load the training dataset

In [14]:
# read in the train.json from ANNOTATION_SAVE_PATH
with open(f"{ANNOTATION_SAVE_PATH}train.json") as f:
    train_json = json.load(f)

df_annotations = pd.json_normalize(train_json['annotations'])
df_images = pd.json_normalize(train_json['images']).rename(columns={'id': 'image_id'}).drop_duplicates(subset=['image_id'])
df_categories = pd.json_normalize(train_json['categories']).rename(columns={'id': 'category_id'})
# ensure category_id are ints
df_annotations['category_id'] = df_annotations['category_id'].astype(int)
df_categories['category_id'] = df_categories['category_id'].astype(int)

# join the annotations and images dataframes, using the image_id from df_annotations and row index from df_images
df_annotations = df_annotations.merge(df_images, how='left', on='image_id')
#df_annotations = df_annotations.merge(df_categories, how='left', on='category_id')

# rename the bbox column to bboxes_xywh
df_annotations = df_annotations.rename(columns={'bbox': 'bboxes_xywh'})
# calculate a bbox column with xyxy format
df_annotations['bbox'] = df_annotations['bboxes_xywh'].apply(lambda x: [x[0], x[1], x[0] + x[2], x[1] + x[3]])

# group by image_id and apply the visualize function to each group
category_id_to_name = dict(zip(df_categories['category_id'], df_categories['name']))

df_annotations

Unnamed: 0,id,image_id,category_id,bboxes_xywh,score,area,iscrowd,file_name,height,width,source_name,bbox
0,1,0,3,"[305, 169, 207, 175]",-1,36225,0,00000e98-e5cc-4788-a144-bd1fbd13822f.jpg,512,512,0,"[305, 169, 512, 344]"
1,2,0,2,"[120, 108, 241, 97]",-1,23377,0,00000e98-e5cc-4788-a144-bd1fbd13822f.jpg,512,512,0,"[120, 108, 361, 205]"
2,3,0,3,"[322, 352, 190, 110]",-1,20900,0,00000e98-e5cc-4788-a144-bd1fbd13822f.jpg,512,512,0,"[322, 352, 512, 462]"
3,4,0,3,"[287, 45, 182, 86]",-1,15652,0,00000e98-e5cc-4788-a144-bd1fbd13822f.jpg,512,512,0,"[287, 45, 469, 131]"
4,5,0,3,"[257, 463, 254, 48]",-1,12192,0,00000e98-e5cc-4788-a144-bd1fbd13822f.jpg,512,512,0,"[257, 463, 511, 511]"
...,...,...,...,...,...,...,...,...,...,...,...,...
147839,147840,147829,3,"[1227, 1312, 876, 413]",-1,361788,0,fffbfddc-4b02-47bf-9b77-76fa1b4b73ef.jpg,3000,4096,15610,"[1227, 1312, 2103, 1725]"
147840,147841,147829,1,"[1203, 0, 896, 377]",-1,337792,0,fffbfddc-4b02-47bf-9b77-76fa1b4b73ef.jpg,3000,4096,15610,"[1203, 0, 2099, 377]"
147841,147842,147829,3,"[2855, 321, 766, 393]",-1,301038,0,fffbfddc-4b02-47bf-9b77-76fa1b4b73ef.jpg,3000,4096,15610,"[2855, 321, 3621, 714]"
147842,147843,147829,3,"[0, 2274, 597, 496]",-1,296112,0,fffbfddc-4b02-47bf-9b77-76fa1b4b73ef.jpg,3000,4096,15610,"[0, 2274, 597, 2770]"


In [18]:
# find rows where the bbox has a width or height of 0
df_annotations[(df_annotations['bboxes_xywh'].str[2] == 0) | (df_annotations['bboxes_xywh'].str[3] == 0)]


Unnamed: 0,id,image_id,category_id,bboxes_xywh,score,area,iscrowd,file_name,height,width,source_name,bbox


## visualize one image

In [None]:
BOX_COLORS = {
    0: (0, 255, 0), # Green, HIGH
    1: (0, 0, 255), # Blue, LOW
    2: (255, 0, 255), # Magenta, MEDIUM
    3: (255, 0, 0), # Red, PARTIAL
}

TEXT_COLOR = (255, 255, 255) # White


def visualize_bbox(img, bbox, class_name, color=(255, 0, 0), thickness=2):
    """Visualizes a single bounding box on the image"""
    x_min, y_min, x_max, y_max = bbox
    x_min, y_min, x_max, y_max = int(x_min), int(y_min), int(x_max), int(y_max)
    cv2.rectangle(img, (x_min, y_min), (x_max, y_max), color=color, thickness=thickness)

    ((text_width, text_height), _) = cv2.getTextSize(class_name, cv2.FONT_HERSHEY_SIMPLEX, 0.35, 1)
    cv2.rectangle(img, (x_min, y_min - int(1.3 * text_height)), (x_min + text_width, y_min), color, -1)
    cv2.putText(
        img,
        text=class_name,
        org=(x_min, y_min - int(0.3 * text_height)),
        fontFace=cv2.FONT_HERSHEY_SIMPLEX,
        fontScale=0.35,
        color=TEXT_COLOR,
        lineType=cv2.LINE_AA,
    )
    return img


def visualize(image, targets, category_id_to_name, ax=None):
    img = image.copy()
    for target in targets:
        bbox = target[:4]
        category_id = target[4]
        class_name = category_id_to_name[category_id]
        color = BOX_COLORS[category_id]
        img = visualize_bbox(img, bbox, class_name, color=color)
    if ax is None:
        plt.figure()
        plt.axis('off')
        plt.imshow(img)
    else:
        ax.axis('off')
        ax.imshow(img)
        
def get_image_targets(image_id):
    """
        returns the image and targets for a given image_id
        targets is a np.array, where each row is of 5 elements (bbox and category_id)
    """
    df_group = df_annotations[df_annotations['image_id'] == image_id]
    file_path = f"{TRAIN_SAVE_PATH}{df_group.iloc[0]['file_name']}"
    image = cv2.imread(file_path)
    targets = df_group['bbox'] + df_group['category_id'].apply(lambda x: [x])
    targets = np.vstack(targets, dtype=np.float64)
    return image, targets

def padded_label_to_bbox(padded_labels):
    # transform the padded labels in cx, cy, w, h format to xyxy format
    category_ids = padded_labels[:, 0].astype(int)
    padded_bboxes = padded_labels[:, 1:]
    padded_bboxes[:, :2] = padded_bboxes[:, :2] - padded_bboxes[:, 2:] / 2
    padded_bboxes[:, 2:] = padded_bboxes[:, :2] + padded_bboxes[:, 2:]
    # add back the category_id
    padded_targets = np.hstack([padded_bboxes, category_ids.reshape(-1, 1)])
    return padded_targets

In [None]:
# test out plotting with one image
image_ids = df_annotations['image_id'].unique()
image_id = image_ids[1]
image, targets = get_image_targets(image_id)
visualize(image, targets, category_id_to_name)

## test YoloX transform


In [None]:
input_size = (416, 416)
tt = TrainTransform()

# the old YOLOX transform
image_t, padded_labels = tt(image, targets, input_size, use_albumentations=False)
# image_t is (channels, height, width), change to (height, width, channels) for plotting
image_t_plot = image_t.transpose(1, 2, 0).astype(np.uint8)
# padded labels are no longer in bbox format, so we will use the original targets
padded_targets = padded_label_to_bbox(padded_labels)
visualize(image_t_plot, padded_targets, category_id_to_name)


# the new YOLOX transform with albumentations
image_t, padded_labels = tt(image, targets, input_size)
# image_t is (channels, height, width), change to (height, width, channels) for plotting
image_t_plot = image_t.transpose(1, 2, 0).astype(np.uint8)
# padded labels are no longer in bbox format, so we will use the original targets
padded_targets = padded_label_to_bbox(padded_labels)
visualize(image_t_plot, padded_targets, category_id_to_name)

## play with albumentations

In [None]:
image, targets = get_image_targets(image_id)

# use albumentations to apply mirroring
transform = A.Compose(
    [
        A.Flip(p=0.5),
        A.RandomRotate90(p=0.5),
        A.BBoxSafeRandomCrop(p=0.5),
        A.ColorJitter(brightness=0,contrast=0,saturation=2,hue=0.2,p=0.5),
        A.OneOf([
            A.Blur(blur_limit=(3,20)),
            A.Posterize(num_bits=(2,5)),
            A.CLAHE(clip_limit=4.0, tile_grid_size=(8, 8)),
            A.ImageCompression(quality_lower=10, quality_upper=100),
            A.ToGray(),
            A.ChannelShuffle(),
            A.FancyPCA(alpha=0.1),
        ], p=0.8),
    ],

    bbox_params=A.BboxParams(format='pascal_voc', label_fields=['category_ids']),
)

# make a grid of subplots, and sample N images to plot
N_COLS = 4
N_ROWS = 10
fig, axs = plt.subplots(N_ROWS, N_COLS, figsize=(5*N_COLS, 5*N_ROWS))
# tight layout
plt.tight_layout()

for i in range(N_ROWS):
    for j in range(N_COLS):
        transformed = transform(image=image, bboxes=targets[:, :4], category_ids=targets[:, 4])
        image_t = transformed['image']
        targets_t = np.hstack([transformed['bboxes'], targets[:, 4].reshape(-1, 1)])
        ax = axs[i, j]
        visualize(image_t, targets_t, category_id_to_name, ax=ax)