In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import json
import glob
import tqdm
from PIL import Image

import os

In [2]:
images_paths = glob.glob("data\\culane\\*\\*.jpg")
labels_paths = glob.glob("data\\culane\\*\\*.txt")

ild = {}

for impath in images_paths:
    name = impath.split("\\")[-1].replace(".jpg", "")
    fldr = impath.split("\\")[-2]
    lbpath = f"data\\culane\\{fldr}\\{name}.lines.txt"

    dnam = name + "_" + fldr.replace(".MP4", "")
    
    if lbpath in labels_paths:
        ild[dnam] = (impath, lbpath)

In [3]:
VAL_FRAC = 0.1
TRN_FRAC = 1 - VAL_FRAC

BBOX_H = 100
BBOX_W = 150

SKIP = 2

num_trn = int(len(ild) * TRN_FRAC)
num_val = int(len(ild) * VAL_FRAC)

In [4]:
names = np.array(list(ild.keys()))
np.random.shuffle(names)

names_trn = names[:num_trn]
names_val = names[num_trn:]

print(f"Number of train images: {len(names_trn)}")
print(f"Number of val images: {len(names_val)}")

Number of train images: 12258
Number of val images: 1363


In [5]:
dataset_train_path = "datasets/cul/train"
dataset_val_path = "datasets/cul/val"

In [6]:
yaml_text = """
path: cul/
train: 'train/images'
val: 'val/images'
 
# class names
names: 
  0: LL
  1: LC
  2: RC
  3: RR
""".strip()

with open("cul_tld.yaml", "w") as f:
    f.write(yaml_text)

In [7]:
def get_label_from_txt(lbpath, im_w, im_h, bb_w=20, bb_h=20, skip=2):
    with open(lbpath, "r") as f:
        data = [[float(j) for j in i.strip().split()] for i in f.read().split("\n")]
    
    label = ""
    for i in range(len(data)):
        for j in range(2, len(data[i]) - 3, 2*skip):
            x = round(np.clip(float(data[i][j]) / im_w, 0, 1), 4)
            y = round(np.clip(float(data[i][j + 1]) / im_h, 0, 1), 4)
            w = round(np.clip(bb_w / im_w, 0, 1), 4)
            h = round(np.clip(bb_h / im_h, 0, 1), 4)

            w_offset_l = np.clip(0 - (x - w / 2), 0, 1) 
            w_offset_r = np.clip((x + w / 2) - 1, 0, 1) 
            h_offset_d = np.clip(0 - (y - h / 2), 0, 1) 
            h_offset_u = np.clip((y + h / 2) - 1, 0, 1) 

            x = x + w_offset_l - w_offset_r
            y = y + h_offset_d - h_offset_u
            
            label += f"{i} {x} {y} {w} {h}\n"
            
    return label

In [8]:
from concurrent.futures import ThreadPoolExecutor, as_completed


# Define the task to be executed in parallel
def process_image_and_label(name):
    dataset_path_prefix = dataset_train_path if name in names_trn else dataset_val_path
    dataset_image_path = f"{dataset_path_prefix}/images/{name}.png"
    dataset_label_path = f"{dataset_path_prefix}/labels/{name}.txt"

    impath, lbpath = ild[name]

    # Load and process the image
    image = Image.open(impath)
    im_w, im_h = image.size
    image.save(dataset_image_path)
    image.close()

    # Process the label
    label = get_label_from_txt(lbpath, im_w, im_h, bb_w=BBOX_W, bb_h=BBOX_H, skip=SKIP)

    # Save the label to a file
    with open(dataset_label_path, "w") as f:
        f.write(label)


# Use ThreadPoolExecutor to process images and labels in parallel
with ThreadPoolExecutor(max_workers=100) as executor:
    # Submit tasks to the executor
    futures = [executor.submit(process_image_and_label, name) for name in list(ild.keys())]

    # Use tqdm to show progress
    for future in tqdm.tqdm(as_completed(futures), total=len(futures), position=0):
        # Results are not needed since we handle everything within the tasks,
        # but exceptions (if any) will be raised here when accessing the result.
        # This is where you could handle exceptions if necessary.
        future.result()

100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 13621/13621 [06:04<00:00, 37.39it/s]
