In [1]:
from cv2 import aruco
import cv2
import numpy as np
import matplotlib.pyplot as plt
import msgpack as mp
import msgpack_numpy as mpn
import os

from tqdm import tqdm
import csv
import toml
from joblib import Parallel, delayed

In [2]:
blur_parameters = [4, 6, 8, 10]
angle_parameters = [0, 30, 60, 90, 120, 150, 180]

In [3]:
num_processes = 20  # You can adjust this based on your requirements

In [4]:
training_pth = toml.load(os.path.join(os.path.dirname(os.getcwd()), "parameters.toml"))[
    "training_dataset"
]["pth"]
training_raw_data = os.path.join(training_pth, "raw_data")
_raw_saved_data_pth = training_raw_data
_raw_saved_data_list = os.listdir(os.path.join(training_raw_data, "images"))

In [5]:
_raw_saved_data_list = list(
    filter(lambda x: x.split("_")[0] != "blur", _raw_saved_data_list)
)
_raw_saved_data_list = list(
    filter(lambda x: x.split("_")[0] != "noise", _raw_saved_data_list)
)

In [6]:
# _blur_save_pth = os.path.dirname(os.path.dirname(_raw_saved_data_pth))
# _blur_save_pth = os.path.join(_blur_save_pth, 'motion_blur')
_blur_save_img_pth = os.path.join(_raw_saved_data_pth, "images")
_blur_save_label_pth = os.path.join(_raw_saved_data_pth, "labels")

if not os.path.exists(_blur_save_img_pth):
    os.makedirs(_blur_save_img_pth)

if not os.path.exists(_blur_save_label_pth):
    os.makedirs(_blur_save_label_pth)

In [7]:
def apply_motion_blur(image, size, angle):
    k = np.zeros((size, size), dtype=np.float32)
    k[(size - 1) // 2, :] = np.ones(size, dtype=np.float32)
    k = cv2.warpAffine(
        k,
        cv2.getRotationMatrix2D((size / 2 - 0.5, size / 2 - 0.5), angle, 1.0),
        (size, size),
    )
    k = k * (1.0 / np.sum(k))
    return cv2.filter2D(image, -1, k)

In [8]:
def adding_blur_to_image(img_name):
    _image_path = os.path.join(_raw_saved_data_pth, "images", img_name)
    image = cv2.imread(_image_path)
    # adding noise to twenty percent of the images
    _csv_rows = []

    if np.random.random() < 0.2:
        for _b in blur_parameters:
            for _a in angle_parameters:
                _blur_size = _b
                _blur_angle = _a

                image = apply_motion_blur(image, _blur_size, _blur_angle)

                _image_path = os.path.join(
                    _blur_save_img_pth, f"blur_s{_blur_size}_a{_blur_angle}_{img_name}"
                )
                cv2.imwrite(_image_path, image)

                label_name = img_name.split(".")[0]
                label_path = os.path.join(
                    _raw_saved_data_pth, "labels", f"{label_name}.txt"
                )
                label_file = open(label_path, "r", newline="")
                label_reader = csv.reader(label_file, delimiter=" ")
                for _row in label_reader:
                    _csv_rows.append(_row)
                label_file.close()

                label_path = os.path.join(
                    os.path.join(_blur_save_label_pth),
                    f"blur_s{_blur_size}_a{_blur_angle}_{img_name.split('.')[0]}.txt",
                )
                label_file = open(label_path, "w", newline="")
                label_writer = csv.writer(label_file, delimiter=" ")
                for _r in _csv_rows:
                    label_writer = csv.writer(label_file, delimiter=" ")
                    label_writer.writerow(_r)
                _csv_rows.clear()
                label_file.close()

    return 0


results = Parallel(n_jobs=num_processes, verbose=1)(
    delayed(adding_blur_to_image)(element) for element in _raw_saved_data_list
)

[Parallel(n_jobs=20)]: Using backend LokyBackend with 20 concurrent workers.


[Parallel(n_jobs=20)]: Done  10 tasks      | elapsed:    0.6s
[Parallel(n_jobs=20)]: Done 266 tasks      | elapsed:    3.4s
[Parallel(n_jobs=20)]: Done 690 tasks      | elapsed:    9.7s
[Parallel(n_jobs=20)]: Done 1399 tasks      | elapsed:   18.5s
[Parallel(n_jobs=20)]: Done 1912 out of 1951 | elapsed:   24.4s remaining:    0.4s
[Parallel(n_jobs=20)]: Done 1951 out of 1951 | elapsed:   25.6s finished


### Splitting the dataset into test, train, val

In [10]:
# splitting dataset into train and validation and test
split_data_name = "processed"

images_pth = os.path.join(os.path.dirname(training_raw_data), split_data_name, "images")
labels_pth = os.path.join(os.path.dirname(training_raw_data), split_data_name, "labels")

if not os.path.exists(images_pth):
    os.makedirs(os.path.join(images_pth, "train"))
    os.makedirs(os.path.join(images_pth, "val"))
    os.makedirs(os.path.join(images_pth, "test"))

if not os.path.exists(labels_pth):
    os.makedirs(os.path.join(labels_pth, "train"))
    os.makedirs(os.path.join(labels_pth, "val"))
    os.makedirs(os.path.join(labels_pth, "test"))


image_list = os.listdir(os.path.join(training_raw_data, "images"))


def split_dataset(img_name):
    _image_path = os.path.join(training_raw_data, "images", img_name)
    image = cv2.imread(_image_path)

    label_name = img_name.split(".")[0]
    label_path = os.path.join(training_raw_data, "labels", f"{label_name}.txt")
    label_file = open(label_path, "r", newline="")
    label_reader = csv.reader(label_file, delimiter=" ")

    label = []
    for l in label_reader:
        label.append(l)

    label_file.close()

    # if int(label[0]) == 0:
    if np.random.rand() < 0.7:
        # save image
        image_path = os.path.join(os.path.join(images_pth, "train"), img_name)
        cv2.imwrite(image_path, image)

        label_path = os.path.join(
            os.path.join(labels_pth, "train"), f"{label_name}.txt"
        )
        label_file = open(label_path, "w", newline="")
        label_writer = csv.writer(label_file, delimiter=" ")
        for l in label:
            label_writer.writerow(l)
        label_file.close()

    elif np.random.rand() < 0.9 and np.random.rand() > 0.7:
        # save image
        image_path = os.path.join(os.path.join(images_pth, "val"), img_name)
        cv2.imwrite(image_path, image)

        label_path = os.path.join(os.path.join(labels_pth, "val"), f"{label_name}.txt")
        label_file = open(label_path, "w", newline="")
        label_writer = csv.writer(label_file, delimiter=" ")
        for l in label:
            label_writer.writerow(l)
        label_file.close()

    else:
        # save image
        image_path = os.path.join(os.path.join(images_pth, "test"), img_name)
        cv2.imwrite(image_path, image)

        label_path = os.path.join(os.path.join(labels_pth, "test"), f"{label_name}.txt")
        label_file = open(label_path, "w", newline="")
        label_writer = csv.writer(label_file, delimiter=" ")
        for l in label:
            label_writer.writerow(l)
        label_file.close()

    label.clear()

    return 0


results = Parallel(n_jobs=num_processes, verbose=1)(
    delayed(split_dataset)(element) for element in image_list
)

[Parallel(n_jobs=20)]: Using backend LokyBackend with 20 concurrent workers.
[Parallel(n_jobs=20)]: Done  10 tasks      | elapsed:    0.0s
[Parallel(n_jobs=20)]: Done 440 tasks      | elapsed:    1.6s
[Parallel(n_jobs=20)]: Done 1440 tasks      | elapsed:    5.1s
[Parallel(n_jobs=20)]: Done 2840 tasks      | elapsed:    9.5s
[Parallel(n_jobs=20)]: Done 4640 tasks      | elapsed:   15.4s
[Parallel(n_jobs=20)]: Done 6840 tasks      | elapsed:   22.9s
[Parallel(n_jobs=20)]: Done 9440 tasks      | elapsed:   31.6s
[Parallel(n_jobs=20)]: Done 12440 tasks      | elapsed:   42.1s
[Parallel(n_jobs=20)]: Done 13940 out of 13940 | elapsed:   48.5s finished
