# 1. Traffic sign detection using YOLOv8

## 1.1 Kaggle data (optional)

In [None]:
import cv2
import os
import numpy as np
from PIL import Image, ImageEnhance, ImageFilter
import random
import shutil
from collections import defaultdict

In [None]:
def create_folder(directory):
    if not os.path.exists(directory):
        os.makedirs(directory)
def apply_augmentations(image):
    """Applies random augmentations to the given image and returns a list of augmented images."""
    augmentations = []

    # Add Gaussian Noise
    def add_noise(img):
        row, col, ch = img.shape
        mean = 0
        sigma = 0.1
        gauss = np.random.normal(mean, sigma, (row, col, ch)).astype('float32')
        noisy = img + gauss * 255
        return np.clip(noisy, 0, 255).astype('uint8')

    # Change Brightness
    def change_brightness(img):
        factor = random.uniform(0.5, 1.5)  # random brightness factor
        enhancer = ImageEnhance.Brightness(Image.fromarray(img))
        return np.array(enhancer.enhance(factor))

    # Change Contrast
    def change_contrast(img):
        factor = random.uniform(0.5, 1.5)  # random contrast factor
        enhancer = ImageEnhance.Contrast(Image.fromarray(img))
        return np.array(enhancer.enhance(factor))

    # Change Saturation
    def change_saturation(img):
        factor = random.uniform(0.5, 1.5)  # random saturation factor
        enhancer = ImageEnhance.Color(Image.fromarray(img))
        return np.array(enhancer.enhance(factor))

    # Apply Gaussian Blur
    def apply_gaussian_blur(img):
        ksize = random.choice([3, 5, 7])  # random kernel size
        return cv2.GaussianBlur(img, (ksize, ksize), 0)

    # Apply augmentations
    augmentations.append(add_noise(image))
    augmentations.append(change_brightness(image))
    augmentations.append(change_contrast(image))
    augmentations.append(change_saturation(image))
    augmentations.append(apply_gaussian_blur(image))

    return augmentations

def augment_image_and_save(image_path, label_path, output_dir):
    label_output_dir=f'{output_dir}/labels'
    image_output_dir=f'{output_dir}/images'
    create_folder(output_dir)
    create_folder(label_output_dir)
    create_folder(image_output_dir)
    # Read image and label
    image = cv2.imread(image_path)
    with open(label_path, 'r') as f:
        label = f.read()

    # Resize image to 512x512
    image_resized = cv2.resize(image, (512, 512))

    # Apply augmentations
    augmented_images = apply_augmentations(image_resized)

    # Get base name of the file (without extension)
    base_name = os.path.splitext(os.path.basename(image_path))[0]

    # Save augmented images and corresponding labels
    for i, augmented_image in enumerate(augmented_images):
        # Construct new file names
        new_image_name = f"{image_output_dir}/{base_name}_{i+1}.jpg"
        new_label_name = f"{label_output_dir}/{base_name}_{i+1}.txt"
        
        # Save augmented image
        new_image_path = os.path.join(output_dir, new_image_name)
        cv2.imwrite(new_image_path, augmented_image)
        
        # Save corresponding label
        new_label_path = os.path.join(output_dir, new_label_name)
        with open(new_label_path, 'w') as f:
            f.write(label)

In [None]:
def get_labels(label_file):
    with open(label_file, 'r') as file:
        lines = file.readlines()
        labels = [int(line.split()[0]) for line in lines]
    return labels

def split_dataset(image_dir, label_dir, output_dir, train_ratio, val_ratio):
    # Create train and val directories for images and labels
    train_dir = os.path.join(output_dir, 'train')
    val_dir = os.path.join(output_dir, 'val')
    # Collect all files and their labels
    files_labels = []
    for filename in os.listdir(image_dir):
        if filename.endswith('.jpg') or filename.endswith('.png'):
            image_path = os.path.join(image_dir, filename)
            label_path = os.path.join(label_dir, filename.replace('.jpg', '.txt').replace('.png', '.txt'))
            if os.path.exists(label_path):  # Ensure corresponding label file exists
                labels = get_labels(label_path)
                files_labels.append((image_path, label_path, labels))

    # Shuffle files to ensure randomness
    random.shuffle(files_labels)

    # Track class distribution in each split
    class_counts = {
        'train': defaultdict(int),
        'val': defaultdict(int)
    }

    total_class_counts = defaultdict(int)

    # Assign files to splits
    splits = {'train': [], 'val': []}
    for image_path, label_path, labels in files_labels:
        for label in labels:
            total_class_counts[label] += 1

    for image_path, label_path, labels in files_labels:
        # Ensure train set has at least 50% of each class
        if all(class_counts['train'][label] >= 0.5 * total_class_counts[label] for label in labels):
            chosen_split = 'val'
        else:
            chosen_split = 'train'
        
        splits[chosen_split].append((image_path, label_path, labels))
        for label in labels:
            class_counts[chosen_split][label] += 1

    # Adjust to ensure train_ratio
    train_size = int(train_ratio * len(files_labels))
    while len(splits['train']) < train_size:
        item = splits['val'].pop()
        splits['train'].append(item)
        labels = item[2]
        for label in labels:
            class_counts['train'][label] += 1
            class_counts['val'][label] -= 1

    # Copy files to the appropriate directories
    for split, files in splits.items():
        for image_path, label_path, _ in files:
            if split == 'train':
                augment_image_and_save(image_path,label_path,train_dir)
            elif split == 'val':
                augment_image_and_save(image_path,label_path,val_dir)

    # Calculate and print class distribution percentages
    print("Class Distribution Percentages:")
    for split in ['train', 'val']:
        print(f"\n{split.capitalize()} Set:")
        for label, count in class_counts[split].items():
            percentage = (count / total_class_counts[label]) * 100 if total_class_counts[label] > 0 else 0
            print(f"Class {label}: {percentage:.2f}%")


In [None]:
image_dir="D:/DatTruong/All/2025/AI_Engineer/20.Project/4.Autonomous_Vehicle/data/trafficsign/images"
label_dir ="D:/DatTruong/All/2025/AI_Engineer/20.Project/4.Autonomous_Vehicle/data/trafficsign/lables"
output_dir = "D:/DatTruong/All/2025/AI_Engineer/20.Project/4.Autonomous_Vehicle/data/datasplit_sign"

In [5]:
train_ratio = 0.9
val_ratio = 0.1  # Adjusted to fill remaining data after train

# Call the split_dataset function
split_dataset(image_dir, label_dir, output_dir, train_ratio, val_ratio)

FileNotFoundError: [WinError 3] The system cannot find the path specified: 'D:/DatTruong/All/2025/AI_Engineer/20.Project/4.Autonomous_Vehicle/data/trafficsign/images'

## 1.2 Roboflow data (main)

In [30]:
ROOT = "D:/DatTruong/All/2025/AI_Engineer/20.Project/4.Autonomous_Vehicle"

In [20]:
from ultralytics import YOLO

In [34]:
%cd {ROOT}/src/traffic_sign

D:\DatTruong\All\2025\AI_Engineer\20.Project\4.Autonomous_Vehicle\src\traffic_sign


In [35]:
from roboflow import Roboflow
rf = Roboflow(api_key="Q8lgh4wyEXLBUcnFKIwe")
project = rf.workspace("vietnam-traffic-sign-detection").project("vietnam-traffic-sign-detection-2i2j8")
version = project.version(6)
dataset = version.download("yolov8")


loading Roboflow workspace...
loading Roboflow project...
Dependency ultralytics==8.0.196 is required but found version=8.2.51, to fix: `pip install ultralytics==8.0.196`


Downloading Dataset Version Zip in Vietnam-Traffic-Sign-Detection-6 to yolov8::   1%|▏         | 3967/268344 [00:06<07:14, 608.58it/s]

In [12]:
dataset.location

'd:\\DatTruong\\All\\2025\\AI_Engineer\\20.Project\\4.Autonomous_Vehicle\\src\\Vietnam-Traffic-Sign-Detection-6'

In [11]:
!yolo task=detect mode=train model=yolov8s.pt data={dataset.location}/data.yaml epochs=25 imgsz=640 plots=True

View settings with 'yolo settings' or at 'C:\Users\ADMIN\AppData\Roaming\Ultralytics\settings.yaml'
Update settings with 'yolo settings key=value', i.e. 'yolo settings runs_dir=path/to/dir'.
Downloading https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8s.pt to 'yolov8s.pt'...

  0%|          | 0.00/21.5M [00:00<?, ?B/s]
  1%|          | 128k/21.5M [00:00<00:32, 689kB/s]
  1%|          | 256k/21.5M [00:00<00:34, 646kB/s]
  2%|▏         | 384k/21.5M [00:00<00:34, 641kB/s]
  2%|▏         | 512k/21.5M [00:00<00:35, 630kB/s]
  3%|▎         | 640k/21.5M [00:01<00:34, 628kB/s]
  3%|▎         | 768k/21.5M [00:01<00:34, 629kB/s]
  4%|▍         | 896k/21.5M [00:01<00:34, 625kB/s]
  5%|▍         | 1.00M/21.5M [00:01<00:33, 652kB/s]
  5%|▌         | 1.12M/21.5M [00:01<00:33, 639kB/s]
  6%|▌         | 1.25M/21.5M [00:02<00:33, 632kB/s]
  6%|▋         | 1.38M/21.5M [00:02<00:33, 640kB/s]
  7%|▋         | 1.50M/21.5M [00:02<00:34, 616kB/s]
  8%|▊         | 1.62M/21.5M [00:02<00:31,