# **FILTERING ANNOTATIONS**

# **Section 01.** Necessary Setup and Imports

In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
import os
os.chdir(r'/content/drive/MyDrive/TIC-YOLO')

In [3]:
import os
import requests
import zipfile
import cv2
import matplotlib.pyplot as plt
import glob
import numpy as np
import random
from tqdm.auto import tqdm

# **Section 02.** Some Functions to Plot Bounding Boxes

In [5]:
# We set file paths and fix the classes
ROOT_DIR = '/content/drive/MyDrive/TIC-YOLO/ColdwaterML'
train_imgs_dir = 'images/train'
train_labels_dir = 'labels/train'
val_imgs_dir = 'images/val'
val_labels_dir = 'labels/val'
test_imgs_dir = 'images/test'
test_labels_dir = 'labels/test'
classes = ['Car', 'Pickup truck', 'Truck','Bus']

In [6]:
colors = np.random.uniform(0, 255, size=(len(classes), 3))

In [7]:
# Function to convert bounding boxes in YOLO format to xmin, ymin, xmax, ymax.
def yolo2bbox(bboxes):
    xmin, ymin = bboxes[0]-bboxes[2]/2, bboxes[1]-bboxes[3]/2
    xmax, ymax = bboxes[0]+bboxes[2]/2, bboxes[1]+bboxes[3]/2
    return xmin, ymin, xmax, ymax

In [8]:
def plot_box(image, bboxes, labels):
    # Need the image height and width to denormalize
    # the bounding box coordinates
    height, width, _ = image.shape
    lw = max(round(sum(image.shape) / 2 * 0.003), 2)  # Line width.
    tf = max(lw - 1, 1) # Font thickness.
    for box_num, box in enumerate(bboxes):
        x1, y1, x2, y2 = yolo2bbox(box)
        # denormalize the coordinates
        xmin = int(x1*width)
        ymin = int(y1*height)
        xmax = int(x2*width)
        ymax = int(y2*height)

        p1, p2 = (int(xmin), int(ymin)), (int(xmax), int(ymax))

        class_name = classes[int(labels[box_num])]

        color=colors[classes.index(class_name)]

        cv2.rectangle(
            image,
            p1, p2,
            color=color,
            thickness=lw,
            lineType=cv2.LINE_AA
        )

        # For filled rectangle.
        w, h = cv2.getTextSize(
            class_name,
            0,
            fontScale=lw / 3,
            thickness=tf
        )[0]

        outside = p1[1] - h >= 3
        p2 = p1[0] + w, p1[1] - h - 3 if outside else p1[1] + h + 3

        cv2.rectangle(
            image,
            p1, p2,
            color=color,
            thickness=-1,
            lineType=cv2.LINE_AA
        )
        cv2.putText(
            image,
            class_name,
            (p1[0], p1[1] - 5 if outside else p1[1] + h + 2),
            cv2.FONT_HERSHEY_SIMPLEX,
            fontScale=lw/3.5,
            color=(255, 255, 255),
            thickness=tf,
            lineType=cv2.LINE_AA
        )
    return image

# **Section 03.** Filtering and Keeping Annotations

In [36]:
# Plot original annotations

sets_bdd = ['train','val','test']
num_samples_sets = [5,5,5] # Set up the number of samples in each set (Train, Val, Test) you want to see

for k in range(len(sets_bdd)):
  image_path = "/content/drive/MyDrive/TIC-YOLO/ColdwaterML/images/" + sets_bdd[k]
  label_path = "/content/drive/MyDrive/TIC-YOLO/ColdwaterML/labels/" + sets_bdd[k]
  all_images = glob.glob(image_path+'/*')
  all_labels = glob.glob(label_path+'/*')
  all_images.sort()
  all_labels.sort()

  temp = list(zip(all_images, all_labels))
  random.shuffle(temp)
  all_images, all_labels = zip(*temp)
  all_images, all_labels = list(all_images), list(all_labels)

  print(f'%%%%%%%%%%%%%%%%%%%%%% Conjunto: {sets_bdd[k]} %%%%%%%%%%%%%%%%')
  for i in range(num_samples_sets[k]):
          image_name = all_images[i].split(os.path.sep)[-1]
          image = cv2.imread(all_images[i])
          with open(all_labels[i], 'r') as f:
              bboxes = []
              labels = []
              label_lines = f.readlines()
              for label_line in label_lines:
                  label, x_c, y_c, w, h = label_line.split(' ')
                  x_c = float(x_c)
                  y_c = float(y_c)
                  w = float(w)
                  h = float(h)
                  bboxes.append([x_c, y_c, w, h])
                  labels.append(label)
          result_image = plot_box(image, bboxes, labels)
          plt.figure(figsize=(12, 9))
          plt.imshow(image[:, :, ::-1])
          plt.axis('off')
          plt.show()
          print(image_name)

Output hidden; open in https://colab.research.google.com to view.

###  **Section 03.01.** Testing umbrals

In [10]:
# Function to create a new filtered .txt file with a given image path
def save_filtered_annotations(file_path, bboxes, labels, threshold=0.0020): #0.0034, 0.0025, 0.0020, 0.0017, 0.0018, 0.0019, 0.0020
    with open(file_path, 'w') as f:
        for bbox, label in zip(bboxes, labels):
            x_c, y_c, w, h = bbox
            if w * h > threshold:
                f.write(f"{label} {x_c} {y_c} {w} {h}\n")

In [11]:
# Folder we want to keep the new .txt
os.makedirs('/content/drive/MyDrive/TIC-YOLO/Annotations_vs_Predictions/Filtered_Annotations/', exist_ok=True)  # Create the directory "Filtered_Annotations" if it doesn't exist

In [23]:
# Saving new .txt files (in Test set only)
image_path = "/content/drive/MyDrive/TIC-YOLO/ColdwaterML/images/" + sets_bdd[2]
label_path = "/content/drive/MyDrive/TIC-YOLO/ColdwaterML/labels/" + sets_bdd[2]
all_images = glob.glob(image_path+'/*')
all_labels = glob.glob(label_path+'/*')
all_images.sort()
all_labels.sort()

temp = list(zip(all_images, all_labels))
#random.shuffle(temp)
all_images, all_labels = zip(*temp)
all_images, all_labels = list(all_images), list(all_labels)

for i in range(num_samples_sets[2]):
        image_name = all_images[i].split(os.path.sep)[-1]
        main_name = os.path.splitext(os.path.basename(image_name))[0]
        image = cv2.imread(all_images[i])
        label_path = '/content/drive/MyDrive/TIC-YOLO/ColdwaterML/labels/test/' + main_name + '.txt'
        output_path = '/content/drive/MyDrive/TIC-YOLO/Annotations_vs_Predictions/Filtered_Annotations/filtered_' + main_name + '.txt'

        bboxes = []
        labels = []
        with open(label_path, 'r') as f:
            label_lines = f.readlines()
            for label_line in label_lines:
                label_line = label_line.strip()
                if not label_line:
                    continue
                label, x_c, y_c, w, h = label_line.split(' ')
                x_c = float(x_c)
                y_c = float(y_c)
                w = float(w)
                h = float(h)
                bboxes.append([x_c, y_c, w, h])
                labels.append(label)

        save_filtered_annotations(output_path, bboxes, labels)

In [24]:
# This help to check if all files were kept
files = os.listdir('/content/drive/MyDrive/TIC-YOLO/Annotations_vs_Predictions/Filtered_Annotations')
len(files)

1155

In [37]:
  # Plot a few test images (to set up an appropriate umbral)

sets_bdd = ['test']
num_samples_sets_v2 = [num_samples_sets[2]]

for k in range(len(sets_bdd)):
    image_path = "/content/drive/MyDrive/TIC-YOLO/ColdwaterML/images/" + sets_bdd[k]
    label_path = "/content/drive/MyDrive/TIC-YOLO/Annotations_vs_Predictions/Filtered_Annotations"

    all_images = glob.glob(image_path+'/*')
    all_labels = glob.glob(label_path+'/*')
    all_labels = [all_labels_k for all_labels_k in all_labels if all_labels_k.split(os.path.sep)[-1][:10] == "filtered_7"]

    all_images.sort()
    all_labels.sort()

    temp = list(zip(all_images, all_labels))
    #random.shuffle(temp)
    all_images, all_labels = zip(*temp)
    all_images, all_labels = list(all_images), list(all_labels)

    print(f'%%%%%%%%%%%%%%%%%%%%%% Conjunto: {sets_bdd[k]} %%%%%%%%%%%%%%%%')
    for i in range(num_samples_sets_v2[k]):
            image_name = all_images[i].split(os.path.sep)[-1]
            image = cv2.imread(all_images[i])
            with open(all_labels[i], 'r') as f:
                bboxes = []
                labels = []
                label_lines = f.readlines()
                for label_line in label_lines:
                    label, x_c, y_c, w, h = label_line.split(' ')
                    x_c = float(x_c)
                    y_c = float(y_c)
                    w = float(w)
                    h = float(h)
                    bboxes.append([x_c, y_c, w, h])
                    labels.append(int(float(label)))
            result_image = plot_box(image, bboxes, labels)
            plt.figure(figsize=(10, 6))
            plt.imshow(image[:, :, ::-1])
            plt.axis('off')
            plt.show()
            print(image_name)

# Obs:
# 1. If you see that exist bounding boxes that detect parked or distant vehicles yet, set up the umbral in the "save_filtered_annotations" function
#    at the start of this section, and run again the Section 03.01.
# 2. Note that you can set up a new number of test samples editing the 3rd line of the Section 03 code.
# 3. Note that we are testing the umbral election using a few test images only. If you want, you can edit the code in order to test the umbral election using
#    train or validation images.

Output hidden; open in https://colab.research.google.com to view.

###  **Section 03.02.** Filtering and Keeping

In [39]:
# Bucle to create all the new filtered .txt files

sets_bdd = ['train','val','test']

for k in range(len(sets_bdd)):
  image_path = "/content/drive/MyDrive/TIC-YOLO/ColdwaterML/images/" + sets_bdd[k]
  label_path = "/content/drive/MyDrive/TIC-YOLO/ColdwaterML/labels/" + sets_bdd[k]
  all_images = glob.glob(image_path+'/*')
  all_labels = glob.glob(label_path+'/*')
  all_images.sort()
  all_labels.sort()

  temp = list(zip(all_images, all_labels))
  random.shuffle(temp)
  all_images, all_labels = zip(*temp)
  all_images, all_labels = list(all_images), list(all_labels)

  num_images = len(all_images)

  for i in range(num_images):
          image_name = all_images[i].split(os.path.sep)[-1]
          main_name = os.path.splitext(os.path.basename(image_name))[0]
          image = cv2.imread(all_images[i])
          label_path_N2 = label_path + '/' + main_name + '.txt'
          output_path = '/content/drive/MyDrive/TIC-YOLO/Annotations_vs_Predictions/Filtered_Annotations/filtered_' + main_name + '.txt'

          bboxes = []
          labels = []
          with open(label_path_N2, 'r') as f:
              label_lines = f.readlines()
              for label_line in label_lines:
                  label_line = label_line.strip()
                  if not label_line:
                      continue
                  label, x_c, y_c, w, h = label_line.split(' ')
                  x_c = float(x_c)
                  y_c = float(y_c)
                  w = float(w)
                  h = float(h)
                  bboxes.append([x_c, y_c, w, h])
                  labels.append(label)

          save_filtered_annotations(output_path, bboxes, labels)

In [41]:
files = os.listdir('/content/drive/MyDrive/TIC-YOLO/Annotations_vs_Predictions/Filtered_Annotations')
len(files)

1155