# Introduction
This notebook is to visualize the already created mosaic augmented images saved in the folder

In [1]:
from google.colab import drive
import os

drive.mount('/content/gdrive', force_remount=True)
# (WARNING!!!) DIRECT THE PATH TO "Data-Competition" folder in the Github
path = '/content/gdrive/My Drive/Data-Centric_Competition/Github_save/Data-Competition'
os.chdir(path)

Mounted at /content/gdrive


In [3]:
%%capture
! pip install opencv-python-headless==4.1.2.30

In [4]:
%matplotlib inline

import glob
import numpy as np
import matplotlib.pyplot as plt
import cv2

In [5]:
%%capture
!pip install -U albumentations

In [6]:
%%capture
!pip install -r ./requirements.txt

# Construct mosaic .cvs file

In [None]:
import glob
import pandas as pd
import numpy as np

def construct_csv_file(folder_path):
  """Read all txt annotation files & return a dataframe containing them
  Input:
    folder_path : folder's path contained txt files
  Output:
    Name of the output file the merged lines will be written to.
  """

  train_csv = list()
  # make sure there's a slash to the folder path 
  folder_path += "" if folder_path[-1] == "/" else "/"
  # get all text files
  txt_files = glob.glob(folder_path + "*.txt")

  # Read each txt file
  for txt_file in txt_files:
    id = [txt_file.strip().split('/')[-1][:-4], 1280, 720]
    # Read the content of file
    with open(txt_file, 'rt') as fd:
      lines = fd.readlines()
      for line in lines:
        box = line.strip().split(' ')
        train_csv.append(id+box)
  
  return np.array(train_csv)

In [None]:
mosaic_csv = construct_csv_file('./dataset_origin/mosaic_randsafebox/mosaic/labels')

In [None]:
# "mosaic" folder
mosaic_anno_csv = pd.DataFrame(mosaic_csv, columns=['image_id', 'width', 'height', 'label', 'x', 'y', 'w', 'h'])
mosaic_anno_csv[['width', 'height', 'label', 'x', 'y', 'w', 'h']] = mosaic_anno_csv[['width', 'height', 'label', 'x', 'y', 'w', 'h']].astype(float)

# mosaic_anno_csv.to_csv('./dataset_origin/mosaic_randsafebox/mosaic/mosaic_csv.csv', index=False)

In [None]:
mosaic_anno_csv

Unnamed: 0,image_id,width,height,label,x,y,w,h
0,img_mosaic_1,1280.0,720.0,1.0,0.108189,0.430670,0.062787,0.069404
1,img_mosaic_1,1280.0,720.0,0.0,0.340571,0.263594,0.024354,0.025511
2,img_mosaic_1,1280.0,720.0,1.0,0.570117,0.281007,0.026481,0.023395
3,img_mosaic_2,1280.0,720.0,1.0,0.554310,0.165524,0.071133,0.089362
4,img_mosaic_2,1280.0,720.0,1.0,0.830134,0.042720,0.085806,0.085440
...,...,...,...,...,...,...,...,...
948,img_mosaic_598,1280.0,720.0,1.0,0.242088,0.286569,0.100993,0.101816
949,img_mosaic_598,1280.0,720.0,1.0,0.618396,0.352025,0.086105,0.082076
950,img_mosaic_598,1280.0,720.0,1.0,0.087293,0.533158,0.060393,0.060302
951,img_mosaic_600,1280.0,720.0,2.0,0.224139,0.946596,0.063872,0.089875


# Dataset visualization

In [None]:
import pandas as pd
import numpy as np
import glob
import cv2
import os
import re

from PIL import Image

import albumentations as A
from albumentations.pytorch.transforms import ToTensorV2, ToTensor

import torch
import torchvision

from torchvision.models.detection.faster_rcnn import FastRCNNPredictor
from torchvision.models.detection import FasterRCNN
from torchvision.models.detection.rpn import AnchorGenerator

from torch.utils.data import DataLoader, Dataset
from torch.utils.data.sampler import SequentialSampler

from matplotlib import pyplot as plt

DIR_MOSAIC = './dataset_origin/mosaic_randsafebox/mosaic/images'

In [None]:
mosaic_df = pd.read_csv('./dataset_origin/mosaic_randsafebox/mosaic/mosaic_csv.csv', index_col=False)

mosaic_ids = mosaic_df['image_id'].unique()
mosaic_df = mosaic_df[mosaic_df['image_id'].isin(mosaic_ids)]

In [None]:
len(mosaic_ids)

367

## Dataset construction

In [None]:
# HELPER FUNCTIONS
def load_image(self, index):
  # loads 1 image from dataset, returns img, original hw, resized hw
  """Load 1 image from dataset
  Input:
    index: idx to search for image's id
  Output:
    img, hw_original, hw_resized """
  # Read an image using opencv2
  image_id = self.image_ids[index]
  img = cv2.imread(f'{self.image_dir}/{image_id}.jpg', cv2.IMREAD_COLOR)
    
  assert img is not None, 'Image Not Found ' + image_id
  h0, w0 = img.shape[:2]  # orig hw
  return img, (h0, w0)  # img, hw_original


def yolo_to_pascal(x, y, w, h, width, height):
  xmax = int((x*width) + (w * width)/2.0)
  xmin = int((x*width) - (w * width)/2.0)
  ymax = int((y*height) + (h * height)/2.0)
  ymin = int((y*height) - (h * height)/2.0)
  return xmin, ymin, xmax, ymax

In [None]:
from sklearn.utils import shuffle
import random

class FPTDataset(Dataset):
  def __init__(self, dataframe, image_dir, transforms=None):
    super().__init__()

    self.df = dataframe  # Annotation & Image's ID dataframe
    self.transforms = transforms  # Albumentation's augmentation
    self.image_ids = shuffle(dataframe['image_id'].unique())  # Image's ID
    self.labels = [np.zeros((0, 4), dtype=np.float32)] * len(self.image_ids) # Image's bboxes
    self.class_labels = [np.zeros((0, 1), dtype=np.float32)] * len(self.image_ids)  # Image's label
    self.img_size = 960
    self.image_dir = image_dir
    im_w = 1280
    im_h = 720

    # Loop through each image (Each image might containt multiple bboxes & labels)
    for i, img_id in enumerate(self.image_ids):
      records = self.df[self.df['image_id'] == img_id]
      boxes = records[['x', 'y', 'w', 'h']].values  # Annotations
      class_label = records[['label']].values
      self.labels[i] = np.array(boxes)
      self.class_labels[i] = class_label

  def __getitem__(self, index: int):
    # DATA AUGMENTATION
    if self.transforms is not None:
      # Load image
      image, (h0, w0) = load_image(self, index)

      # Augmentation
      augmented = self.transforms(image=image, bboxes=self.labels[index], class_labels=self.class_labels[index])
      image = augmented['image']
      image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)  # BGR to RGB
      bboxes = augmented['bboxes']
      labels = augmented['class_labels']
      img_id = self.image_ids[index]

      return image, bboxes, labels, img_id

  def __len__(self) -> int:
    return self.image_ids.shape[0]

# Visualize mosaic images

In [None]:
def aug_resize():
  return A.Compose([
      A.augmentations.geometric.resize.Resize (960, 960, interpolation=1, always_apply=False, p=1)
  ], p=1.0, bbox_params=A.BboxParams(format='yolo', label_fields=['class_labels']))

In [None]:
import random, math


def collate_fn(batch):
    return tuple(zip(*batch))

def aug_visualize(train_val_df, train_val_dir, transform):
  train_dataset = FPTDataset(train_val_df, train_val_dir, transform)  # 792 images
  train_data_loader = DataLoader(
      train_dataset,
      batch_size=36,
      shuffle=True,
      num_workers=2,
      collate_fn=collate_fn
  )


  fig, ax = plt.subplots(6, 6, figsize=(25, 25))
  ax = ax.flatten()
  images, targets, class_labels, img_ids = next(iter(train_data_loader))

  for i in range(36):  # Go through each batch 
    boxes = targets[i]
    sample = images[i]
    height, width = sample.shape[:2]

    for box in boxes:
      # Convert from Yolo to Pascal_voc
      box = yolo_to_pascal(box[0], box[1], box[2], box[3], width, height)
      # Drawing bounding box
      cv2.rectangle(sample,
                (int(box[0]), int(box[1])),
                (int(box[2]), int(box[3])),
                220, 3)
      
    ax[i].set_title(img_ids[i])
    ax[i].imshow(sample)  # Visualize a sample for each batch
  plt.show()

In [None]:
aug_visualize(mosaic_df, DIR_MOSAIC, aug_resize())

Output hidden; open in https://colab.research.google.com to view.

<img src="../images/mosaic1.png" width="2000" height="2000">

# Save all the mosaic augmented images (.jpg) + label (.txt) into specific folder

In [None]:
def mosaic_filter(num_img, train_df, DIR_TRAIN):
  """
    Filter only mosaic image with labels
    Input:
      num_img: (int) # of mosaic images wanted to create
      train_df: (df) .csv metadata file of train dataset wanted to do augmentation
      DIR_TRAIN: (str) path direct to train's folder
    Output:
      image_lst: list of mosaic images
      target_lst: list of appropriate mosaic labels
  """
  a = 0
  image_lst = list()
  target_lst = list()

  train_dataset = FPTDatasetMosaic(train_df, DIR_TRAIN)  # 792 images
  train_data_loader = DataLoader(
      train_dataset,
      batch_size=15,
      shuffle=True,
      num_workers=4,
      collate_fn=collate_fn
  )

  while (a < num_img):
    images, targets = next(iter(train_data_loader))
    for image, target in zip(images, targets):
      if len(target) > 1:
        image_lst.append(image)
        target_lst.append(target)
        a += 1
      else:
        continue

      if a == num_img:
        break

  return image_lst, target_lst


# Create 200 mosaic images based "train" dataset folder
images, targets = mosaic_filter(200)

In [None]:
def save_mosaic(images, targets, save_labels_path, save_images_path):
  """Save the mosaic images & labels into image's folder & label's folder
  Input:
    images: (list) of mosaic images (np) 
    targets: (list) of associated mosaic labels (np)
    save_labels_path: (str) path folder used to save .txt labels
    save_images_path: (str) path folder used to save .jpg images
  """
  a1 = 0
  b1 = 0

  for label in targets:
    height, width = img.shape[:2]
    a1 += 1

    txt_file = open(f'{save_labels_path}/img_mosaic_{a1}.txt', 'w')
    # Through each bbox of an image
    for j in range(len(label)):
      # Normalize the box's annotation after augmentation (AS requirement from competition)
      a,b,c,d = pascal_to_yolo(label[j][1], label[j][2], label[j][3], label[j][4], width, height)
      label_yolo = np.array([label[j][0], a, b, c, d])
      label_yolo = arr_to_str(label_yolo, a1)

      # Save the string for txt file
      txt_file.write(label_yolo)
    txt_file.close()
  print(f'FINISH SAVING MOSAIC LABELS TO FOLDER: {save_labels_path}')

  # Save images into folder "images/train"
  for img in images:
    b1 += 1
    im = Image.fromarray(img, "RGB")
    im.save(f'{save_images_path}/img_mosaic_{b1}.jpg')
  print(f'FINISH SAVING MOSAIC IMAGES TO FOLDER: {save_images_path}')

In [None]:
# Save the created mosaic images to folder
save_mosaic(images, targets, "./dataset_aug/labels/train", "./dataset_aug/images/train")