In [1]:
import os
import sys
import polars as pl
import matplotlib.pyplot as plt
import numpy as np
import msgpack as mp
import msgpack_numpy as mpn
import toml
import cv2
from cv2 import aruco
import pandas as pd
from tqdm import tqdm
import csv
from joblib import Parallel, delayed

### Parameters

In [2]:
# FORMAT: SEGMENTATION, CLASSIFICATION, POSE
# TODO: remove combining of all data
DATASET_FORMAT = "POSE"
RECORDING_TYPE = "MULTIVIDEO"
FILTER_BASED = True

### Modify the parameter.toml for different directory
Defining folders and files

In [3]:
data_pth = r"F:\greenscreen_dataset"
_f = toml.load(os.path.join(os.path.dirname(os.getcwd()), "parameters.toml"))[
    "raw_dataset"
]["pth"]
process_file = os.listdir(_f)[os.listdir(_f).index("00_analysis")]
# analysis_folder = os.path.join(data_pth, process_file)
# analysis_file = os.path.join(analysis_folder, 'folder_names.txt')

training_pth = toml.load(os.path.join(os.path.dirname(os.getcwd()), "parameters.toml"))[
    "training_dataset"
]["pth"]
training_raw_data = os.path.join(training_pth, "raw_data_2")

if not os.path.exists(training_raw_data):
    os.mkdir(training_raw_data)
    os.mkdir(os.path.join(training_raw_data, "labels"))
    os.mkdir(os.path.join(training_raw_data, "images"))

In [4]:
process_file

'00_analysis'

In [5]:
video_folders_list = os.listdir(os.path.join(data_pth))

In [6]:
video_folders_list

['1.mov', '2.mov', '3.mov', '4.mov', '5.mov']

### Calibration files path

In [7]:
_calib_folder_name = "calibration_00"
_webcam_calib_pth = os.path.join(
    _f, os.path.dirname(process_file), _calib_folder_name, "webcam_calibration.msgpack"
)

with open(_webcam_calib_pth, "rb") as f:
    webcam_calib = mp.Unpacker(f, object_hook=mpn.decode)
    _temp = next(webcam_calib)
    _webcam_cam_mat = _temp[0]
    _webcam_dist = _temp[1]

_webcam_cam_mat

array([[671.25534529,   0.        , 678.00736213],
       [  0.        , 692.23316717, 443.37269229],
       [  0.        ,   0.        ,   1.        ]])

### ArUco dictionary and parameters

In [8]:
marker_size = 0.05

marker_points = np.array(
    [
        [-marker_size / 2, marker_size / 2, 0],
        [marker_size / 2, marker_size / 2, 0],
        [marker_size / 2, -marker_size / 2, 0],
        [-marker_size / 2, -marker_size / 2, 0],
    ],
    dtype=np.float32,
)

ARUCO_PARAMETERS = aruco.DetectorParameters()
ARUCO_DICT = aruco.getPredefinedDictionary(aruco.DICT_ARUCO_MIP_36H12)
detector = aruco.ArucoDetector(ARUCO_DICT, ARUCO_PARAMETERS)
markerLength = marker_size
markerSeperation = 0.01

board = aruco.GridBoard(
    size=[1, 1],
    markerLength=markerLength,
    markerSeparation=markerSeperation,
    dictionary=ARUCO_DICT,
)


def my_estimatePoseSingleMarkers(corners, marker_points, mtx, distortion):
    trash = []
    rvecs = []
    tvecs = []
    for c in corners:
        nada, R, t = cv2.solvePnP(
            marker_points, c, mtx, distortion, False, flags=cv2.SOLVEPNP_ITERATIVE
        )
        R = R.T
        t = t.T
        rvecs.append(R)
        tvecs.append(t)
        trash.append(nada)
    return rvecs, tvecs, trash

In [9]:
default_ids = [12, 88, 89]
data = {"frame_id": [], "marker_ids": [], "corners": [], "tvec": [], "rvec": []}

counter = 0

for _name in tqdm(video_folders_list):
    _video_path = os.path.join(data_pth, os.path.dirname(process_file), _name)
    print(_video_path)
    _video_file = cv2.VideoCapture(_video_path)
    # _video_file = mp.Unpacker(open(_video_path, "rb"), object_hook=mpn.decode)
    detector = aruco.ArucoDetector(ARUCO_DICT, ARUCO_PARAMETERS)

    while _video_file.isOpened():
        ret, frame = _video_file.read()
        if not ret:
            break
        gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
        corners, ids, rejectedImgPoints = detector.detectMarkers(gray)
        corners, ids, rejectedImgPoints, _ = detector.refineDetectedMarkers(
            image=gray,
            board=board,
            detectedCorners=corners,
            detectedIds=ids,
            rejectedCorners=rejectedImgPoints,
            cameraMatrix=_webcam_cam_mat,
            distCoeffs=_webcam_dist,
        )
        if ids is None:
            data["frame_id"].append(counter)
            data["marker_ids"].append(ids)
            data["corners"].append(corners)
            data["tvec"].append(None)
            data["rvec"].append(None)
        if ids is not None:
            data["frame_id"].append(counter)
            data["marker_ids"].append(ids)
            data["corners"].append(corners)

            rotation_vectors, translation_vectors, _ = my_estimatePoseSingleMarkers(
                corners, marker_points, _webcam_cam_mat, _webcam_dist
            )
            data["tvec"].append(translation_vectors)
            data["rvec"].append(rotation_vectors)
        counter += 1

  0%|          | 0/5 [00:00<?, ?it/s]

F:\greenscreen_dataset\1.mov


 20%|██        | 1/5 [00:14<00:56, 14.15s/it]

F:\greenscreen_dataset\2.mov


 40%|████      | 2/5 [00:27<00:40, 13.65s/it]

F:\greenscreen_dataset\3.mov


 60%|██████    | 3/5 [00:41<00:27, 13.91s/it]

F:\greenscreen_dataset\4.mov


 80%|████████  | 4/5 [00:54<00:13, 13.65s/it]

F:\greenscreen_dataset\5.mov


100%|██████████| 5/5 [01:09<00:00, 13.86s/it]


In [10]:
coordinate = {
    str(default_ids[0]): {"x": [], "y": [], "z": [], "rx": [], "ry": [], "rz": []},
    str(default_ids[1]): {"x": [], "y": [], "z": [], "rx": [], "ry": [], "rz": []},
    str(default_ids[2]): {"x": [], "y": [], "z": [], "rx": [], "ry": [], "rz": []},
}

doesnt_exist = []  # list of ids that doesnt exist in a frame

for i in range(len(data["frame_id"])):
    if data["marker_ids"][i] is not None:
        if default_ids[0] not in data["marker_ids"][i]:
            doesnt_exist.append(default_ids[0])
        if default_ids[1] not in data["marker_ids"][i]:
            doesnt_exist.append(default_ids[1])
        if default_ids[2] not in data["marker_ids"][i]:
            doesnt_exist.append(default_ids[2])

        for j in range(len(data["marker_ids"][i])):
            if data["marker_ids"][i][j] in default_ids:
                coordinate[str(data["marker_ids"][i][j][0])]["x"].append(
                    data["tvec"][i][j][0][0]
                )
                coordinate[str(data["marker_ids"][i][j][0])]["y"].append(
                    data["tvec"][i][j][0][1]
                )
                coordinate[str(data["marker_ids"][i][j][0])]["z"].append(
                    data["tvec"][i][j][0][2]
                )
                coordinate[str(data["marker_ids"][i][j][0])]["rx"].append(
                    data["rvec"][i][j][0][0]
                )
                coordinate[str(data["marker_ids"][i][j][0])]["ry"].append(
                    data["rvec"][i][j][0][1]
                )
                coordinate[str(data["marker_ids"][i][j][0])]["rz"].append(
                    data["rvec"][i][j][0][2]
                )
        for k in doesnt_exist:
            coordinate[str(k)]["x"].append(np.nan)
            coordinate[str(k)]["y"].append(np.nan)
            coordinate[str(k)]["z"].append(np.nan)
            coordinate[str(k)]["rx"].append(np.nan)
            coordinate[str(k)]["ry"].append(np.nan)
            coordinate[str(k)]["rz"].append(np.nan)
        doesnt_exist = []
    else:
        for k in default_ids:
            coordinate[str(k)]["x"].append(np.nan)
            coordinate[str(k)]["y"].append(np.nan)
            coordinate[str(k)]["z"].append(np.nan)
            coordinate[str(k)]["rx"].append(np.nan)
            coordinate[str(k)]["ry"].append(np.nan)
            coordinate[str(k)]["rz"].append(np.nan)

In [11]:
ar_df_12 = pd.DataFrame(coordinate["12"])
ar_df_12["sort"] = ar_df_12["z"].diff() > 1
drop_idx_12 = ar_df_12.query("sort == True").index

ar_df_88 = pd.DataFrame(coordinate["88"])
ar_df_88["sort"] = ar_df_88["z"].diff() > 1
drop_idx_88 = ar_df_88.query("sort == True").index

ar_df_89 = pd.DataFrame(coordinate["89"])
ar_df_89["sort"] = ar_df_89["z"].diff() > 1
drop_idx_89 = ar_df_89.query("sort == True").index

drop_ids = []
for i in drop_idx_12:
    drop_ids.append(i)
for i in drop_idx_88:
    drop_ids.append(i)
for i in drop_idx_89:
    drop_ids.append(i)
drop = {"drop": drop_ids}
drop_ids = pl.DataFrame(drop)
drops = drop_ids["drop"].unique().to_numpy()

In [14]:
default_ids = [12, 88, 89]
data = {"frame_id": [], "marker_ids": [], "corners": [], "tvec": [], "rvec": []}

counter = len(os.listdir(r"E:\toTrain\raw_data\images")) + 1
second_counter = 0

for _name in tqdm(video_folders_list):
    _video_path = os.path.join(data_pth, os.path.dirname(process_file), _name)
    _video_file = cv2.VideoCapture(_video_path)
    # _video_file = mp.Unpacker(open(_video_path, "rb"), object_hook=mpn.decode)
    detector = aruco.ArucoDetector(ARUCO_DICT, ARUCO_PARAMETERS)

    while _video_file.isOpened():
        ret, _frame = _video_file.read()
        if not ret:
            break

        height, width = _frame.shape[:2]
        gray = cv2.cvtColor(_frame, cv2.COLOR_BGR2GRAY)

        markerCorners, ids, rejectedImgPoints = detector.detectMarkers(gray)

        markerCorners, ids, rejectedImgPoints, _ = detector.refineDetectedMarkers(
            image=gray,
            board=board,
            detectedCorners=markerCorners,
            detectedIds=ids,
            rejectedCorners=rejectedImgPoints,
            cameraMatrix=_webcam_cam_mat,
            distCoeffs=_webcam_dist,
        )

        counter += 1

        if counter in drops:
            continue

        second_counter += 1

        img_name = f"image_{counter}.png"

        label_name = img_name.split(".")[0]
        label_path = os.path.join(training_raw_data, "labels", f"{label_name}.txt")
        label_file = open(label_path, "w", newline="")
        label_writer = csv.writer(label_file, delimiter=" ")

        _class_name = ""

        for i in range(len(ids)):
            _markerCorners = markerCorners[i][0]
            bbox_x, bbox_y, bbox_width, bbox_height = cv2.boundingRect(_markerCorners)

            bbox_x = bbox_x / width
            bbox_y = bbox_y / height

            bbox_center_x = bbox_x + bbox_width / (2 * width)
            bbox_center_y = bbox_y + bbox_height / (2 * height)

            bbox_width = bbox_width / width
            bbox_height = bbox_height / height
            if ids[i][0] == default_ids[0]:
                _class_name = "0"
            elif ids[i][0] == default_ids[1]:
                _class_name = "1"
            elif ids[i][0] == default_ids[2]:
                _class_name = "2"
            else:
                continue

            if DATASET_FORMAT == "SEGMENTATION":
                label_writer.writerow(
                    [
                        _class_name,
                        _markerCorners[0][0] / width,
                        _markerCorners[0][1] / height,
                        _markerCorners[1][0] / width,
                        _markerCorners[1][1] / height,
                        _markerCorners[2][0] / width,
                        _markerCorners[2][1] / height,
                        _markerCorners[3][0] / width,
                        _markerCorners[3][1] / height,
                        _markerCorners[0][0] / width,
                        _markerCorners[0][1] / height,
                    ]
                )
            elif DATASET_FORMAT == "POSE":
                label_writer.writerow(
                    [
                        _class_name,
                        bbox_center_x,
                        bbox_center_y,
                        bbox_width,
                        bbox_height,
                        _markerCorners[0][0] / width,
                        _markerCorners[0][1] / height,
                        _markerCorners[1][0] / width,
                        _markerCorners[1][1] / height,
                        _markerCorners[2][0] / width,
                        _markerCorners[2][1] / height,
                        _markerCorners[3][0] / width,
                        _markerCorners[3][1] / height,
                        _markerCorners[0][0] / width,
                        _markerCorners[0][1] / height,
                    ]
                )

        label_file.close()
        image_path = os.path.join(training_raw_data, "images", img_name)
        cv2.imwrite(image_path, _frame)

  0%|          | 0/5 [00:00<?, ?it/s]

100%|██████████| 5/5 [03:38<00:00, 43.71s/it]


In [None]:
training_raw_data

'E:\\\\toTrain\\raw_data'

In [None]:
counter, second_counter

(12573, 12572)

### Adding gaussian blur and salt and pepper noise

In [20]:
num_processes = 20  # You can adjust this based on your requirements

In [None]:
_raw_saved_data_pth = os.path.join(training_raw_data, "images")
_raw_saved_data_list = os.listdir(os.path.join(training_raw_data, "images"))


def add_noise(image):
    img = image[..., ::-1] / 255.0
    noise = np.random.normal(loc=0, scale=1, size=img.shape)
    noisy2 = np.clip((img + noise * 0.4), 0, 1)
    noisy2 = (noisy2 * 255).astype(np.uint8)
    noisy2 = noisy2[..., ::-1]
    return noisy2


def gaussian_blur(image):
    return cv2.GaussianBlur(image, (3, 35), 0)


def adding_noise_to_image(img_name):
    _image_path = os.path.join(_raw_saved_data_pth, img_name)
    image = cv2.imread(_image_path)
    # adding noise to twenty percent of the images
    _csv_rows = []
    if np.random.random() < 0.4:
        image = gaussian_blur(image)
        _image_path = os.path.join(_raw_saved_data_pth, f"blur_{img_name}")
        cv2.imwrite(_image_path, image)

        label_name = img_name.split(".")[0]
        label_path = os.path.join(
            _raw_saved_data_pth, "..", "labels", f"{label_name}.txt"
        )
        label_file = open(label_path, "r", newline="")
        label_reader = csv.reader(label_file, delimiter=" ")
        for _row in label_reader:
            _csv_rows.append(_row)
        label_file.close()

        label_path = os.path.join(
            os.path.join(_raw_saved_data_pth, "..", "labels"),
            f"blur_{img_name.split('.')[0]}.txt",
        )
        label_file = open(label_path, "w", newline="")
        label_writer = csv.writer(label_file, delimiter=" ")
        for _r in _csv_rows:
            label_writer = csv.writer(label_file, delimiter=" ")
            label_writer.writerow(_r)

        _csv_rows.clear()
        label_file.close()

    if np.random.random() > 0.3 and np.random.random() < 0.6:
        image = add_noise(image)
        _image_path = os.path.join(_raw_saved_data_pth, f"noise_{img_name}")
        cv2.imwrite(_image_path, image)

        label_name = img_name.split(".")[0]
        label_path = os.path.join(
            _raw_saved_data_pth, "..", "labels", f"{label_name}.txt"
        )
        label_file = open(label_path, "r", newline="")
        label_reader = csv.reader(label_file, delimiter=" ")
        for _row in label_reader:
            _csv_rows.append(_row)
        label_file.close()

        label_path = os.path.join(
            os.path.join(_raw_saved_data_pth, "..", "labels"),
            f"noise_{img_name.split('.')[0]}.txt",
        )
        label_file = open(label_path, "w", newline="")
        label_writer = csv.writer(label_file, delimiter=" ")
        for _r in _csv_rows:
            label_writer = csv.writer(label_file, delimiter=" ")
            label_writer.writerow(_r)

        _csv_rows.clear()
        label_file.close()
    return 0


results = Parallel(n_jobs=num_processes, verbose=1)(
    delayed(adding_noise_to_image)(element) for element in _raw_saved_data_list
)

[Parallel(n_jobs=20)]: Using backend LokyBackend with 20 concurrent workers.
[Parallel(n_jobs=20)]: Done  10 tasks      | elapsed:    0.7s
[Parallel(n_jobs=20)]: Done 160 tasks      | elapsed:    3.0s
[Parallel(n_jobs=20)]: Done 410 tasks      | elapsed:    7.0s
[Parallel(n_jobs=20)]: Done 760 tasks      | elapsed:   12.0s
[Parallel(n_jobs=20)]: Done 1210 tasks      | elapsed:   19.0s
[Parallel(n_jobs=20)]: Done 1820 tasks      | elapsed:   27.4s
[Parallel(n_jobs=20)]: Done 3120 tasks      | elapsed:   46.2s
[Parallel(n_jobs=20)]: Done 4620 tasks      | elapsed:  1.1min
[Parallel(n_jobs=20)]: Done 6320 tasks      | elapsed:  1.4min
[Parallel(n_jobs=20)]: Done 8220 tasks      | elapsed:  1.8min
[Parallel(n_jobs=20)]: Done 10320 tasks      | elapsed:  2.3min
[Parallel(n_jobs=20)]: Done 12533 out of 12572 | elapsed:  2.7min remaining:    0.4s
[Parallel(n_jobs=20)]: Done 12572 out of 12572 | elapsed:  2.7min finished


## Splitting into train and validation

In [15]:
raw_data_pth = r"E:\toTrain\raw_data"

In [18]:
data_pth = os.path.abspath(os.path.join(os.getcwd(), os.pardir))
data_pth

'd:\\CMC\\pyprojects\\DeepVision'

In [21]:
# splitting dataset into train and validation and test
# data_pth = os.path.abspath(os.path.join(os.getcwd(), os.pardir))
# data_pth = os.path.join(data_pth, 'dataset',"multi_class", "dataset_processed")
data_pth = r"F:\dataset_processed_v3"

images_pth = os.path.join(data_pth, "images")
labels_pth = os.path.join(data_pth, "labels")

if not os.path.exists(images_pth):
    os.makedirs(os.path.join(images_pth, "train"))
    os.makedirs(os.path.join(images_pth, "val"))
    os.makedirs(os.path.join(images_pth, "test"))

if not os.path.exists(labels_pth):
    os.makedirs(os.path.join(labels_pth, "train"))
    os.makedirs(os.path.join(labels_pth, "val"))
    os.makedirs(os.path.join(labels_pth, "test"))


image_list = os.listdir(os.path.join(raw_data_pth, "images"))


def split_dataset(img_name):
    _image_path = os.path.join(raw_data_pth, "images", img_name)
    image = cv2.imread(_image_path)

    label_name = img_name.split(".")[0]
    label_path = os.path.join(raw_data_pth, "labels", f"{label_name}.txt")
    label_file = open(label_path, "r", newline="")
    label_reader = csv.reader(label_file, delimiter=" ")

    label = []
    for l in label_reader:
        label.append(l)

    label_file.close()

    # if int(label[0]) == 0:
    if np.random.rand() < 0.7:
        # save image
        image_path = os.path.join(os.path.join(images_pth, "train"), img_name)
        cv2.imwrite(image_path, image)

        label_path = os.path.join(
            os.path.join(labels_pth, "train"), f"{label_name}.txt"
        )
        label_file = open(label_path, "w", newline="")
        label_writer = csv.writer(label_file, delimiter=" ")
        for l in label:
            label_writer.writerow(l)
        label_file.close()

    elif np.random.rand() < 0.9 and np.random.rand() > 0.7:
        # save image
        image_path = os.path.join(os.path.join(images_pth, "val"), img_name)
        cv2.imwrite(image_path, image)

        label_path = os.path.join(os.path.join(labels_pth, "val"), f"{label_name}.txt")
        label_file = open(label_path, "w", newline="")
        label_writer = csv.writer(label_file, delimiter=" ")
        for l in label:
            label_writer.writerow(l)
        label_file.close()

    else:
        # save image
        image_path = os.path.join(os.path.join(images_pth, "test"), img_name)
        cv2.imwrite(image_path, image)

        label_path = os.path.join(os.path.join(labels_pth, "test"), f"{label_name}.txt")
        label_file = open(label_path, "w", newline="")
        label_writer = csv.writer(label_file, delimiter=" ")
        for l in label:
            label_writer.writerow(l)
        label_file.close()

    label.clear()

    return 0


results = Parallel(n_jobs=num_processes, verbose=1)(
    delayed(split_dataset)(element) for element in image_list
)

[Parallel(n_jobs=20)]: Using backend LokyBackend with 20 concurrent workers.
[Parallel(n_jobs=20)]: Done  10 tasks      | elapsed:    0.8s
[Parallel(n_jobs=20)]: Done 200 tasks      | elapsed:    1.5s
[Parallel(n_jobs=20)]: Done 700 tasks      | elapsed:    3.5s
[Parallel(n_jobs=20)]: Done 1400 tasks      | elapsed:    6.2s
[Parallel(n_jobs=20)]: Done 2300 tasks      | elapsed:    9.7s
[Parallel(n_jobs=20)]: Done 3400 tasks      | elapsed:   13.6s
[Parallel(n_jobs=20)]: Done 4700 tasks      | elapsed:   18.3s
[Parallel(n_jobs=20)]: Done 6200 tasks      | elapsed:   23.3s
[Parallel(n_jobs=20)]: Done 7900 tasks      | elapsed:   29.6s
[Parallel(n_jobs=20)]: Done 9800 tasks      | elapsed:   39.6s
[Parallel(n_jobs=20)]: Done 11570 tasks      | elapsed:   56.2s
[Parallel(n_jobs=20)]: Done 13300 tasks      | elapsed:  1.1min
[Parallel(n_jobs=20)]: Done 15800 tasks      | elapsed:  1.2min
[Parallel(n_jobs=20)]: Done 18500 tasks      | elapsed:  1.4min
[Parallel(n_jobs=20)]: Done 21400 tasks 