In [7]:
%%capture
import glob
import math
import os
import warnings
from ast import literal_eval
from typing import Tuple

import numpy as np
import pandas as pd
import PIL.ImageDraw
from IPython.display import Image
from matplotlib import pyplot as plt
from PIL import Image
from shapely.geometry import Polygon

# load kedro variables 
with warnings.catch_warnings():
    warnings.filterwarnings("ignore")
    %load_ext kedro.extras.extensions.ipython

In [8]:
# define util functions
def plot_image(img: PIL.Image, dimensions: Tuple[int, int] = (20, 6)):
    plt.figure(figsize=dimensions)
    _ = plt.imshow(img)
    plt.axis("off")
    plt.show()

In [9]:
# define global variables
img_path = "../data/01_raw/images/"
newpath = "../data/03_primary/sliced_images/"
falsepath = "../data/03_primary/images/"
slice_size = 1280
img_list = glob.glob(img_path + "*.jpg")
df_res = pd.DataFrame(
    columns=[
        "image_id",
        "class",
        
        "bounds",
        "width",
        "height",
    ]
)

# read annotations
annotations = catalog.load("annotations")

# converts bounding box definitions to python object
annotations["bounds"] = annotations["bounds"].apply(
    lambda x: literal_eval(x.rstrip("\r\n"))
)

In [10]:
for img_name in img_list:

    # open image
    im = Image.open(img_name)
    imr = np.array(im, dtype=np.uint8)

    # extract labels for given image
    labels = annotations[
        annotations["image_id"] == img_name.split("/")[-1].split(".")[0]
    ]

    # convert bounding boxes to shapely polygons.
    boxes = [
        (
            row["class"],
            Polygon(
                [
                    (row["bounds"][0], row["bounds"][1]),
                    (row["bounds"][2], row["bounds"][1]),
                    (row["bounds"][2], row["bounds"][3]),
                    (row["bounds"][0], row["bounds"][3]),
                ]
            ),
        )
        for _, row in labels.iterrows()
    ]

    # create tiles and find intersection with bounding boxes for each tile
    for i in range((im.size[1] // slice_size)):
        for j in range((im.size[0] // slice_size)):
            x1 = j * slice_size
            y1 = i * slice_size
            x2 = ((j + 1) * slice_size) - 1
            y2 = ((i + 1) * slice_size) - 1

            pol = Polygon([(x1, y1), (x2, y1), (x2, y2), (x1, y2)])

            imsaved = False
            slice_labels = []
            slice_anno = []

            for box in boxes:
                if pol.intersects(box[1]):
                    inter = pol.intersection(box[1])
                    if not imsaved:
                        # slice image
                        sliced = imr[
                            i * slice_size : (i + 1) * slice_size,
                            j * slice_size : (j + 1) * slice_size,
                        ]
                        sliced_im = Image.fromarray(sliced)

                        filename = img_name.split("/")[-1]
                        slice_path = newpath + filename.replace(".jpg", f"_{i}_{j}.jpg")
                        slice_labels_path = newpath + filename.replace(
                            ".jpg", f"_{i}_{j}.txt"
                        )
                        os.makedirs(newpath, exist_ok=True)
                        sliced_im.save(slice_path)
                        imsaved = True

                    # calculate the vertices of the bounding box
                    x, y = inter.envelope.exterior.coords.xy
                    min_x, max_x, min_y, max_y = (
                        math.ceil(min(x) - (j * slice_size)),
                        math.ceil(max(x) - (j * slice_size)),
                        math.ceil(min(y) - (i * slice_size)),
                        math.ceil(max(y) - (i * slice_size)),
                    )

                    slice_anno.append(
                        [
                            img_name.split("/")[-1].replace(".jpg", f"_{i}_{j}.jpg"),
                            "oil-storage-tank",
                            (min_x, min_y, max_x, max_y),
                            max_x - min_x,
                            max_y - min_y,
                        ]
                    )

            # save txt with labels for the current tile
            if len(slice_anno) > 0:
                df_anno = pd.DataFrame(
                    slice_anno,
                    columns=[
                        "image_id",
                        "class",
                        "bounds",
                        "width",
                        "height",
                    ],
                )
                df_res = pd.concat(
                    [
                        df_res,
                        df_anno,
                    ],
                    axis=0,
                )

            # if there are no bounding boxes intersect current tile, save this tile to a separate folder
            if not imsaved:
                sliced = imr[
                    i * slice_size : (i + 1) * slice_size,
                    j * slice_size : (j + 1) * slice_size,
                ]
                sliced_im = Image.fromarray(sliced)
                filename = img_name.split("/")[-1]
                slice_path = falsepath + filename.replace(".jpg", f"_{i}_{j}.jpg")
                os.makedirs(falsepath, exist_ok=True)
                sliced_im.save(slice_path)
                imsaved = True


df_res.reset_index(drop=True).to_csv(
    "../data/03_primary/annotation.csv",
    index=False,
)
print(f"Wrote annotation file with the following shape: {df_res.shape} -> Dif. {df_res.shape[0] - annotations.shape[0]}")
print(f"Images with no boxes: {len(os.listdir(falsepath))} / {len(img_list)}")
print(f"Images with boxes: {len(os.listdir(newpath))} / {len(img_list)}")

Wrote annotation file with the following shape: (358, 5) -> Dif. -13234
Images with no boxes: 16 / 98
Images with boxes: 382 / 98


### Notes: 
- Internal image representation: $\quad\begin{pmatrix} (0,0)&(0,1)\\(1,0)&(1,1)\end{pmatrix}$