In [None]:
%load_ext autoreload
%autoreload 2

import os

CURRENT_DPATH = os.path.abspath(os.path.dirname("__file__"))
PROJECT_ROOT = os.path.abspath(os.path.join(CURRENT_DPATH, os.pardir))
DATA_DPATH = os.path.join(PROJECT_ROOT, "data")


from lane_detection_medium.utils.fs import read_image, read_yolo_labels
from lane_detection_medium.utils.viz import render_bbox
from lane_detection_medium.types.box_types import YoloBbox

from ipywidgets import interact, IntSlider
import matplotlib.pyplot as plt 
import glob
from sklearn.model_selection import train_test_split
import pandas as pd
import numpy as np
from datetime import datetime
import shutil


## Data Loading

In [None]:
prep_dpath = os.path.join(PROJECT_ROOT, "lane_detection_medium", "yolov5-7.0", "data", "datasets", "20230619_full")
print(prep_dpath)

images = []
labels = []
for image_fpath in glob.iglob(os.path.join(prep_dpath, "**/*.PNG"), recursive=True):
    image_basename = os.path.splitext(os.path.basename(image_fpath))[0]
    label_fpath = os.path.join(prep_dpath, "labels", image_basename + ".txt")

    if os.path.exists(label_fpath):
        images.append(image_fpath)
        labels.append(label_fpath)

RANDOM_SEED = 42

print(len(images))

var = np.column_stack( [images, labels])
df = pd.DataFrame(data = var, columns=["image", "label"])

train_images, test_and_valid_images = train_test_split(
    df, test_size=0.3, random_state=RANDOM_SEED
)

test_images, valid_images = train_test_split(
    test_and_valid_images, test_size=0.5, random_state=RANDOM_SEED
)

# train_images.to_csv(os.path.join(prep_dpath, "train.csv"))
# test_images.to_csv(os.path.join(prep_dpath, "test.csv"))
# valid_images.to_csv(os.path.join(prep_dpath, "valid.csv"))

ds_dirname = os.path.join(PROJECT_ROOT, "lane_detection_medium", "yolov5-7.0", "data", "datasets", datetime.today().strftime("%Y_%m_%d"))
os.makedirs(ds_dirname, exist_ok=True)

folders = ["train", "test", "val"]
for folder in folders:
    images_dirname = os.path.join(ds_dirname, folder, "images")
    os.makedirs(images_dirname, exist_ok=True)

    labels_dirname = os.path.join(ds_dirname, folder, "labels")
    os.makedirs(labels_dirname, exist_ok=True)

for index, row in train_images.iterrows():
    shutil.copy(row["image"], os.path.join(ds_dirname, "train", "images"))
    shutil.copy(row["label"], os.path.join(ds_dirname, "train", "labels"))

for index, row in test_images.iterrows():
    shutil.copy(row["image"], os.path.join(ds_dirname, "test", "images"))
    shutil.copy(row["label"], os.path.join(ds_dirname, "test", "labels"))

for index, row in valid_images.iterrows():
    shutil.copy(row["image"], os.path.join(ds_dirname, "val", "images"))
    shutil.copy(row["label"], os.path.join(ds_dirname, "val", "labels"))


## Dataset Debugging

In [None]:
@interact
def show(index=IntSlider(value=0, min=0, max=len(images) - 1)):

    img_path = os.path.join(prep_dpath, 'images', f'frame_{index :06d}.PNG')
    if (os.path.exists(img_path)):
        src_img = read_image(os.path.join(prep_dpath, 'images', f'frame_{index :06d}.PNG'))
        _fig, ax = plt.subplots(nrows=1, ncols=1, figsize=(20, 6))

        labels = read_yolo_labels(os.path.join(prep_dpath, 'labels', f'frame_{index :06d}.txt'))
        for label in labels:
            render_bbox(src_img, YoloBbox.from_yolo(label[1:], *src_img.shape[:2]))

        ax.imshow(src_img)
        plt.show() 
