In [1]:
from cv2 import aruco
import cv2
import numpy as np
import matplotlib.pyplot as plt
import msgpack as mp
import msgpack_numpy as mpn
import os

from tqdm import tqdm
import csv
import toml
from joblib import Parallel, delayed

In [2]:
blur_parameters = [4, 6, 8, 10]
angle_parameters = [0, 30, 60, 90, 120, 150, 180]

In [3]:
num_processes = 20  # You can adjust this based on your requirements

In [5]:
training_pth = toml.load(os.path.join(os.path.dirname(os.getcwd()), "parameters.toml"))[
    "training_dataset"
]["pth"]
training_raw_data = os.path.join(training_pth, "raw_data")
_raw_saved_data_pth = training_raw_data
_raw_saved_data_list = os.listdir(os.path.join(training_raw_data, "images"))

In [6]:
_raw_saved_data_list = list(
    filter(lambda x: x.split("_")[0] != "blur", _raw_saved_data_list)
)
_raw_saved_data_list = list(
    filter(lambda x: x.split("_")[0] != "noise", _raw_saved_data_list)
)

In [7]:
# _blur_save_pth = os.path.dirname(os.path.dirname(_raw_saved_data_pth))
# _blur_save_pth = os.path.join(_blur_save_pth, 'motion_blur')
_blur_save_img_pth = os.path.join(_raw_saved_data_pth, "images")
_blur_save_label_pth = os.path.join(_raw_saved_data_pth, "labels")

if not os.path.exists(_blur_save_img_pth):
    os.makedirs(_blur_save_img_pth)

if not os.path.exists(_blur_save_label_pth):
    os.makedirs(_blur_save_label_pth)

In [16]:
def apply_motion_blur(image, size, angle):
    k = np.zeros((size, size), dtype=np.float32)
    k[(size - 1) // 2, :] = np.ones(size, dtype=np.float32)
    k = cv2.warpAffine(
        k,
        cv2.getRotationMatrix2D((size / 2 - 0.5, size / 2 - 0.5), angle, 1.0),
        (size, size),
    )
    k = k * (1.0 / np.sum(k))
    return cv2.filter2D(image, -1, k)

In [17]:
def adding_blur_to_image(img_name):
    _image_path = os.path.join(_raw_saved_data_pth, "images", img_name)
    image = cv2.imread(_image_path)
    # adding noise to twenty percent of the images
    _csv_rows = []

    if np.random.random() < 0.15:
        for _b in blur_parameters:
            for _a in angle_parameters:
                _blur_size = _b
                _blur_angle = _a

                image = apply_motion_blur(image, _blur_size, _blur_angle)

                _image_path = os.path.join(
                    _blur_save_img_pth, f"blur_s{_blur_size}_a{_blur_angle}_{img_name}"
                )
                cv2.imwrite(_image_path, image)

                label_name = img_name.split(".")[0]
                label_path = os.path.join(
                    _raw_saved_data_pth, "labels", f"{label_name}.txt"
                )
                label_file = open(label_path, "r", newline="")
                label_reader = csv.reader(label_file, delimiter=" ")
                for _row in label_reader:
                    _csv_rows.append(_row)
                label_file.close()

                label_path = os.path.join(
                    os.path.join(_blur_save_label_pth),
                    f"blur_s{_blur_size}_a{_blur_angle}_{img_name.split('.')[0]}.txt",
                )
                label_file = open(label_path, "w", newline="")
                label_writer = csv.writer(label_file, delimiter=" ")
                for _r in _csv_rows:
                    label_writer = csv.writer(label_file, delimiter=" ")
                    label_writer.writerow(_r)
                _csv_rows.clear()
                label_file.close()

    return 0


results = Parallel(n_jobs=num_processes, verbose=1)(
    delayed(adding_blur_to_image)(element) for element in _raw_saved_data_list
)

[Parallel(n_jobs=20)]: Using backend LokyBackend with 20 concurrent workers.


[Parallel(n_jobs=20)]: Done  10 tasks      | elapsed:    0.6s
[Parallel(n_jobs=20)]: Done 260 tasks      | elapsed:    3.2s
[Parallel(n_jobs=20)]: Done 1260 tasks      | elapsed:   11.4s
[Parallel(n_jobs=20)]: Done 1991 tasks      | elapsed:   17.7s
[Parallel(n_jobs=20)]: Done 2880 tasks      | elapsed:   26.2s
[Parallel(n_jobs=20)]: Done 3980 tasks      | elapsed:   35.7s
[Parallel(n_jobs=20)]: Done 5280 tasks      | elapsed:   47.7s
[Parallel(n_jobs=20)]: Done 6780 tasks      | elapsed:  1.1min
[Parallel(n_jobs=20)]: Done 8480 tasks      | elapsed:  1.4min
[Parallel(n_jobs=20)]: Done 10380 tasks      | elapsed:  1.7min
[Parallel(n_jobs=20)]: Done 12480 tasks      | elapsed:  2.0min
[Parallel(n_jobs=20)]: Done 12533 out of 12572 | elapsed:  2.0min remaining:    0.3s
[Parallel(n_jobs=20)]: Done 12572 out of 12572 | elapsed:  2.0min finished


### Splitting the dataset into test, train, val

In [8]:
# splitting dataset into train and validation and test
split_data_name = "processed"

images_pth = os.path.join(os.path.dirname(training_raw_data), split_data_name, "images")
labels_pth = os.path.join(os.path.dirname(training_raw_data), split_data_name, "labels")

if not os.path.exists(images_pth):
    os.makedirs(os.path.join(images_pth, "train"))
    os.makedirs(os.path.join(images_pth, "val"))
    os.makedirs(os.path.join(images_pth, "test"))

if not os.path.exists(labels_pth):
    os.makedirs(os.path.join(labels_pth, "train"))
    os.makedirs(os.path.join(labels_pth, "val"))
    os.makedirs(os.path.join(labels_pth, "test"))


image_list = os.listdir(os.path.join(training_raw_data, "images"))


def split_dataset(img_name):
    _image_path = os.path.join(training_raw_data, "images", img_name)
    image = cv2.imread(_image_path)

    label_name = img_name.split(".")[0]
    label_path = os.path.join(training_raw_data, "labels", f"{label_name}.txt")
    label_file = open(label_path, "r", newline="")
    label_reader = csv.reader(label_file, delimiter=" ")

    label = []
    for l in label_reader:
        label.append(l)

    label_file.close()

    # if int(label[0]) == 0:
    if np.random.rand() < 0.7:
        # save image
        image_path = os.path.join(os.path.join(images_pth, "train"), img_name)
        cv2.imwrite(image_path, image)

        label_path = os.path.join(
            os.path.join(labels_pth, "train"), f"{label_name}.txt"
        )
        label_file = open(label_path, "w", newline="")
        label_writer = csv.writer(label_file, delimiter=" ")
        for l in label:
            label_writer.writerow(l)
        label_file.close()

    elif np.random.rand() < 0.9 and np.random.rand() > 0.7:
        # save image
        image_path = os.path.join(os.path.join(images_pth, "val"), img_name)
        cv2.imwrite(image_path, image)

        label_path = os.path.join(os.path.join(labels_pth, "val"), f"{label_name}.txt")
        label_file = open(label_path, "w", newline="")
        label_writer = csv.writer(label_file, delimiter=" ")
        for l in label:
            label_writer.writerow(l)
        label_file.close()

    else:
        # save image
        image_path = os.path.join(os.path.join(images_pth, "test"), img_name)
        cv2.imwrite(image_path, image)

        label_path = os.path.join(os.path.join(labels_pth, "test"), f"{label_name}.txt")
        label_file = open(label_path, "w", newline="")
        label_writer = csv.writer(label_file, delimiter=" ")
        for l in label:
            label_writer.writerow(l)
        label_file.close()

    label.clear()

    return 0


results = Parallel(n_jobs=num_processes, verbose=1)(
    delayed(split_dataset)(element) for element in image_list
)

[Parallel(n_jobs=20)]: Using backend LokyBackend with 20 concurrent workers.
[Parallel(n_jobs=20)]: Done  10 tasks      | elapsed:    0.6s
[Parallel(n_jobs=20)]: Done 200 tasks      | elapsed:    1.5s
[Parallel(n_jobs=20)]: Done 700 tasks      | elapsed:    3.3s
[Parallel(n_jobs=20)]: Done 1400 tasks      | elapsed:    6.4s
[Parallel(n_jobs=20)]: Done 2300 tasks      | elapsed:   10.1s
[Parallel(n_jobs=20)]: Done 3400 tasks      | elapsed:   14.2s
[Parallel(n_jobs=20)]: Done 4700 tasks      | elapsed:   19.0s
[Parallel(n_jobs=20)]: Done 6200 tasks      | elapsed:   24.1s
[Parallel(n_jobs=20)]: Done 7900 tasks      | elapsed:   30.3s
[Parallel(n_jobs=20)]: Done 9800 tasks      | elapsed:   37.7s
[Parallel(n_jobs=20)]: Done 11900 tasks      | elapsed:   46.5s
[Parallel(n_jobs=20)]: Done 14200 tasks      | elapsed:   55.2s
[Parallel(n_jobs=20)]: Done 16700 tasks      | elapsed:  1.1min
[Parallel(n_jobs=20)]: Done 19400 tasks      | elapsed:  1.3min
[Parallel(n_jobs=20)]: Done 22300 tasks 