In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import json
import glob
import tqdm
from PIL import Image

In [2]:
def get_set_filenames(split_path:str):
    with open(split_path, "r") as f:
        return f.read().split("\n")

def load_label_json(filename):
    with open(filename, "r") as f:
        return json.load(f)

### Explore Dataset

In [10]:
labels_folder_path = "data\\mapillary\\annotations\\"
images_folder_path = "data\\mapillary\\images\\"
splits_folder_path = "data\\mapillary\\splits\\"

In [11]:
all_labels_paths = glob.glob(f"{labels_folder_path}*.json")
all_images_paths = glob.glob(f"{images_folder_path}*.jpg")

print("Number of found labels:", len(all_labels_paths))
print("Number of found images:", len(all_images_paths))

print()
if len(all_labels_paths) != len(all_images_paths):
    print("Synchronization needed!")
print()
    
train_names = get_set_filenames(f"{splits_folder_path}train.txt")
val_names = get_set_filenames(f"{splits_folder_path}val.txt")
test_names = get_set_filenames(f"{splits_folder_path}test.txt")

print("Number of train files:", len(train_names))
print("Number of val files:", len(val_names))
print("Number of test files:", len(test_names))

total_number_of_files = len(train_names) + len(val_names) + len(test_names)

print()
if total_number_of_files != len(all_images_paths):
    print("Synchronization needed!")
print() 

labels_names = set(map(lambda x: x.split("\\")[-1].replace(".json", ""), all_labels_paths))
images_names = set(map(lambda x: x.split("\\")[-1].replace(".jpg", ""), all_images_paths))
common_names = images_names & labels_names
print("Number of valid image/label pairs:", len(common_names))

available_train_names = set(train_names) & common_names
available_val_names = set(val_names) & common_names
available_test_names = set(test_names) & common_names
print("Number of valid train image/label pairs:", len(available_train_names))
print("Number of valid val image/label pairs:", len(available_val_names))
print("Number of valid test image/label pairs:", len(available_test_names))

Number of found labels: 41909
Number of found images: 52453

Synchronization needed!

Number of train files: 36589
Number of val files: 5320
Number of test files: 10544


Number of valid image/label pairs: 41909
Number of valid train image/label pairs: 36589
Number of valid val image/label pairs: 5320
Number of valid test image/label pairs: 0


## Setup Dataset

In [5]:
IMAGE_SIZE = (512, 512)

### Create YAML file

In [13]:
all_labels = set()

for label_path in tqdm.tqdm(all_labels_paths, position=0):
    label_dict = load_label_json(label_path)
    all_labels = all_labels.union(all_labels.union(set([i['label'] for i in label_dict['objects']])))

label_map = {lbl:i for i, lbl in enumerate(all_labels)}

100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 41909/41909 [03:20<00:00, 208.97it/s]


In [19]:
# Get only one variant per sign
set(["--".join(i.split("--")[:2]) for i in label_map.keys()])

{'complementary--accident-area',
 'complementary--both-directions',
 'complementary--buses',
 'complementary--chevron-left',
 'complementary--chevron-right',
 'complementary--chevron-right-unsure',
 'complementary--distance',
 'complementary--except-bicycles',
 'complementary--extent-of-prohibition-area-both-direction',
 'complementary--go-left',
 'complementary--go-right',
 'complementary--keep-left',
 'complementary--keep-right',
 'complementary--maximum-speed-limit-15',
 'complementary--maximum-speed-limit-20',
 'complementary--maximum-speed-limit-25',
 'complementary--maximum-speed-limit-30',
 'complementary--maximum-speed-limit-35',
 'complementary--maximum-speed-limit-40',
 'complementary--maximum-speed-limit-45',
 'complementary--maximum-speed-limit-50',
 'complementary--maximum-speed-limit-55',
 'complementary--maximum-speed-limit-70',
 'complementary--maximum-speed-limit-75',
 'complementary--obstacle-delineator',
 'complementary--one-direction-left',
 'complementary--one-dire

In [58]:
classes_str = ""

for lbl in label_map:
    classes_str += f"  {label_map[lbl]}: {lbl}\n"

training_yaml_file = \
f"""
path: mapillary/
train: 'images/train'
val: 'images/val'
 
# class names
names: 
{classes_str}
""".strip()

with open("mapillary_traffix_sign.yaml", "w") as f:
    f.write(training_yaml_file)

print(training_yaml_file)

path: mapillary/
train: 'images/train'
val: 'images/val'
 
# class names
names: 
  1: regulatory--no-heavy-goods-vehicles-or-buses--g1
  3: regulatory--no-right-turn--g1
  4: regulatory--keep-right--g1
  9: information--pedestrians-crossing--g2
  10: regulatory--one-way-straight--g1
  11: regulatory--no-stopping--g8
  12: regulatory--maximum-speed-limit-40--g1
  13: regulatory--maximum-speed-limit-40--g6
  14: regulatory--no-motor-vehicles--g1
  15: regulatory--maximum-speed-limit-20--g1
  16: complementary--chevron-right--g5
  17: regulatory--maximum-speed-limit-100--g3
  19: regulatory--no-entry--g1
  20: complementary--distance--g3
  23: information--tram-bus-stop--g2
  24: regulatory--no-parking-or-no-stopping--g2
  27: regulatory--shared-path-pedestrians-and-bicycles--g1
  28: regulatory--no-bicycles--g1
  29: complementary--chevron-left--g4
  30: regulatory--dual-lanes-go-straight-on-right--g1
  32: information--disabled-persons--g1
  33: regulatory--reversible-lanes--g2
  35: in

### Populate `labels` folder

In [89]:
yolo_labels_folder_path = "datasets\\mapillary\\labels\\"

for label_path in tqdm.tqdm(all_labels_paths, position=0):
    filename = label_path.split("\\")[-1].replace(".json", "")

    set_suffix = None
    
    if filename in available_train_names:
        set_suffix = "train\\"

    if filename in available_val_names:
        set_suffix = "val\\"

    if set_suffix is None:
        print("Set of", label_path, "cannot be determined. Skipping...")
        continue
    
    label_dict = load_label_json(label_path)
    w, h = label_dict['width'], label_dict['height']
    label_txt = ""
    
    for obj in label_dict['objects']:
        lbl_indx = label_map[obj['label']]
        x_center = np.mean([obj['bbox']['xmin'], obj['bbox']['xmax']]) / w
        y_center = np.mean([obj['bbox']['ymin'], obj['bbox']['ymax']]) / h
        x_length = (obj['bbox']['xmax'] - obj['bbox']['xmin']) / w
        y_length = (obj['bbox']['ymax'] - obj['bbox']['ymin']) / h
        label_txt += f"{lbl_indx} {x_center} {y_center} {x_length} {y_length}\n"

    label_txt = label_txt.strip()
    filename = yolo_labels_folder_path + set_suffix + label_path.split("\\")[-1].replace(".json", "") + ".txt"

    with open(filename, "w") as f:
        f.write(label_txt)

100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 41909/41909 [03:16<00:00, 212.80it/s]


### Populate `images` folder

In [None]:
import concurrent.futures
import os


def process_and_save_image(image_path):
    filename = image_path.split("\\")[-1].replace(".jpg", "")
    set_suffix = None

    if filename in available_train_names:
        set_suffix = "train\\"
    elif filename in available_val_names:
        set_suffix = "val\\"
    
    if set_suffix is None:
        return

    # Adjusted to use os.path.join for better path handling
    filename = os.path.join(yolo_images_folder_path, set_suffix, filename + ".png").strip()

    image = Image.open(image_path).resize(IMAGE_SIZE)
    image.save(filename)
    image.close()

# Adjust the max_workers as needed
with concurrent.futures.ThreadPoolExecutor(max_workers=500) as executor:
    # List to store futures
    future_to_image = {executor.submit(process_and_save_image, image_path): image_path for image_path in all_images_paths}
    
    # Process futures as they complete
    for future in tqdm.tqdm(concurrent.futures.as_completed(future_to_image), total=len(future_to_image), position=0):
        image_path = future_to_image[future]
        try:
            data = future.result()
        except Exception as exc:
            print('%r generated an exception: %s' % (image_path, exc))

In [None]:
# yolo_images_folder_path = "datasets\\mapillary\\images\\"

# for image_path in tqdm.tqdm(all_images_paths, position=0):
#     filename = image_path.split("\\")[-1].replace(".jpg", "")

#     set_suffix = None
    
#     if filename in available_train_names:
#         set_suffix = "train\\"

#     if filename in available_val_names:
#         set_suffix = "val\\"

#     if set_suffix is None:
#         continue

#     filename = yolo_images_folder_path + set_suffix + image_path.split("\\")[-1].replace(".json", "") + ".jpg"

#     image = Image.open(image_path).resize(IMAGE_SIZE)
#     image.save(filename)
#     image.close()

### Model Training

In [3]:
from ultralytics import YOLO
 
# Load the model.
model = YOLO('yolov8l.pt')

Downloading https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8l.pt to 'yolov8l.pt'...
100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 83.7M/83.7M [00:07<00:00, 11.2MB/s]


In [None]:
# Training.
results = model.train(
   data='mapillary_traffix_sign.yaml',
   imgsz=IMAGE_SIZE[0],
   epochs=150,
   batch=8,
   name='yolov8l_mapillary_trafic_signs',
)

New https://pypi.org/project/ultralytics/8.1.24 available  Update with 'pip install -U ultralytics'
Ultralytics YOLOv8.0.186  Python-3.11.8 torch-2.2.1 CUDA:0 (NVIDIA GeForce RTX 3080 Ti, 12287MiB)
[34m[1mengine\trainer: [0mtask=detect, mode=train, model=yolov8l.pt, data=mapillary_traffix_sign.yaml, epochs=150, patience=50, batch=8, imgsz=512, save=True, save_period=-1, cache=False, device=None, workers=8, project=None, name=yolov8l_mapillary_trafic_signs, exist_ok=False, pretrained=True, optimizer=auto, verbose=True, seed=0, deterministic=True, single_cls=False, rect=False, cos_lr=False, close_mosaic=10, resume=False, amp=True, fraction=1.0, profile=False, freeze=None, overlap_mask=True, mask_ratio=4, dropout=0.0, val=True, split=val, save_json=False, save_hybrid=False, conf=None, iou=0.7, max_det=300, half=False, dnn=False, plots=True, source=None, show=False, save_txt=False, save_conf=False, save_crop=False, show_labels=True, show_conf=True, vid_stride=1, stream_buffer=False, lin