In [1]:
%%capture
%pip install ultralytics
%pip install -U albumentations
%sudo apt-get install tree

In [None]:
base_data_path = "/kaggle/input/carla-object-detection-dataset/carla-object-detection-dataset"
base_output = "/kaggle/working/"

# base_data_path = suraj520_carla_object_detection_dataset_path + "/carla-object-detection-dataset"
# base_output = "/content"

## PLEASE, if you like the Notebook, Don't forget to Upvote.

### Getting Started
Imports and create our variables

In [3]:
from ultralytics import YOLO
import yaml
from pathlib import Path
import shutil
import matplotlib.pyplot as plt
import pandas as pd
import torch
from collections import Counter
import xml.etree.ElementTree as ET
from torchvision.utils import draw_bounding_boxes
from torchvision.io import read_image
import cv2 as cv
from PIL import Image
from torchvision.transforms.functional import to_pil_image, to_tensor
from tqdm.notebook import tqdm
import random
import torch

Lets select our device

In [4]:
device = "cuda" if torch.cuda.is_available() else "cpu"
print(device)

cpu


### Exploring Our Data

In [7]:
image_data = Path(base_data_path, "images/train")

image_data_files = list(image_data.iterdir())

image_data_files[-5:]

[WindowsPath('C:/Users/Casper/Desktop/Bitirme_2_Rapor_Uygulama_Yolo_traffic_sign_detection/datasets/CARLA_Object_Detection_Dataset/carla-object-detection-dataset/images/train/Town04_004800.png'),
 WindowsPath('C:/Users/Casper/Desktop/Bitirme_2_Rapor_Uygulama_Yolo_traffic_sign_detection/datasets/CARLA_Object_Detection_Dataset/carla-object-detection-dataset/images/train/Town04_004860.png'),
 WindowsPath('C:/Users/Casper/Desktop/Bitirme_2_Rapor_Uygulama_Yolo_traffic_sign_detection/datasets/CARLA_Object_Detection_Dataset/carla-object-detection-dataset/images/train/Town04_004920.png'),
 WindowsPath('C:/Users/Casper/Desktop/Bitirme_2_Rapor_Uygulama_Yolo_traffic_sign_detection/datasets/CARLA_Object_Detection_Dataset/carla-object-detection-dataset/images/train/Town04_004980.png'),
 WindowsPath('C:/Users/Casper/Desktop/Bitirme_2_Rapor_Uygulama_Yolo_traffic_sign_detection/datasets/CARLA_Object_Detection_Dataset/carla-object-detection-dataset/images/train/Town04_005040.png')]

In [8]:
file_extensions_count = Counter(file.suffix for file in image_data_files)
file_extensions_count

Counter({'.png': 779})

### Data Transformation

In [None]:
%tree /kaggle/input/carla-object-detection-dataset/carla-object-detection-dataset --filelimit=10

### XML Annotations

In [None]:
train_imgs = Path(base_data_path, "images/train")
train_annotations = Path(base_data_path, "labels/train")

Select `xml` file with segments to find the bbox data.

Note limiting with only `25`, so maybe there are more many detections.

In [None]:
xml_file = Path(train_annotations, "Town01_002040.xml")
!head -n 25 $xml_file

for looping on object, so get all detections.

In [None]:
tree = ET.parse(xml_file)
root = tree.getroot()
root.tag

In [None]:
obj_bboxes = []
obj_labels = []
for obj in root.findall("object"):
    # Extract Label
    label = obj.find("name").text
    obj_labels.append(label)
    # Extract bounding boxes
    bndbox = obj.find("bndbox")
    xmin, xmax = int(bndbox.find("xmin").text), int(bndbox.find("xmax").text)
    ymin, ymax = int(bndbox.find("ymin").text), int(bndbox.find("ymax").text)
    obj_bboxes.append([xmin, ymin, xmax, ymax])

obj_bboxes, obj_labels

In [None]:
# Convering Bounidng Boxes list to tensor
obj_bboxes_tensor = torch.tensor(obj_bboxes, dtype=float)
obj_bboxes_tensor[0]

In [None]:
classes = []
with open(Path(base_data_path, "labels.txt"), "r") as f:
    classes = f.read().split('\n')

print("Classes ", classes)
print("Classes Length", len(classes))

In [None]:
image_path = Path(train_imgs, "Town01_002040.png")
image = Image.open(image_path).convert("RGB") # Make sure your image is RGB or Grayscale
print("Image Type ", type(image))

image_bbox = draw_bounding_boxes(
    image=to_tensor(image), # Only Accept tensors
    boxes=obj_bboxes_tensor, # Object BBoxes
    labels=obj_labels, # Object label
    width=4,
    fill=False
    # font="arial.ttf", # Required to set font_size parameter
    # font_size=15
)

plt.imshow((to_pil_image(image_bbox)))
plt.axis("off")
plt.show()

### Import Detection with YOLO model

In [None]:
def xml_to_yolo(bbox, width, height):
    xmin, ymin, xmax, ymax = bbox
    x_center = ((xmax + xmin) / 2) / width
    y_center = ((ymax + ymin) / 2) / height

    bbox_width = (xmax - xmin) / width
    bbox_height = (ymax - ymin) / height

    return [x_center, y_center, bbox_width, bbox_height]

YOLO training model Expecting labels files to a `.txt` file,

and in order `[class_id, x_center, y_center, bbox_width, bbox_height]`

each `.txt` file has all annotation in the image.

In [None]:
class_mapping = {cls: idx for idx, cls in enumerate(classes)}

def parse_annotation(f):
    tree = ET.parse(xml_file)
    root = tree.getroot() # Get the main folder root of XML file
    # print("XML File root '{}'".format(root.tag))
    width = int(root.find("size").find("width").text)
    height = int(root.find("size").find("height").text)

    objects = []
    for obj in root.findall("object"):
        # Extract Label
        label = obj.find("name").text
        class_id = class_mapping[label]
        # Extract bounding boxes
        bndbox = obj.find("bndbox")
        xmin, xmax = int(bndbox.find("xmin").text), int(bndbox.find("xmax").text)
        ymin, ymax = int(bndbox.find("ymin").text), int(bndbox.find("ymax").text)
        yolo_bbox = xml_to_yolo([xmin, ymin, xmax, ymax], width, height) # Order used for YOLO.

        objects.append([class_id] + yolo_bbox)

    return objects

parse_annotation(Path(train_imgs, "Town01_002040.png")) # Represents Image txt file

In [None]:
def write_label(objects, filename):
    with open(filename, 'w') as f:
        for obj in objects:
            f.write(" ".join(str(x) for x in obj)) # Represents one line
            f.write("\n") # Start new line

objects = parse_annotation(Path(train_imgs, "Town01_002040.png"))
write_label(objects, "Town01_002040.txt")
open(Path(base_output, "Town01_002040.txt"), 'r').read()

NOTE that Yolo deals with `.PNG` and `.JPEG`, But as format, **PNG** is inefficient for photographs compared to JPEG

So we convert all our images to RGB JPEG files before adding them to YOLO

In [None]:
def convert_image(image, out_image):
    Image.open(image).convert("RGB").save(out_image, "JPEG")

convert_image(Path(train_imgs, "Town01_002040.png"), "Town01_002040.jpg")
Image.open(Path(base_output,"Town01_002040.jpg")) # It is the same but in jpg

also YOLO expects dir to be like
```
base
    -images
        -train
        -val
    -labels
        -train
        -val
```

In [None]:
yolo_base = Path(base_output, "yolo_data")
Path(yolo_base, "images", "train").mkdir(parents=True, exist_ok=True)
Path(yolo_base, "images", "val").mkdir(parents=True, exist_ok=True)
Path(yolo_base, "labels", "train").mkdir(parents=True, exist_ok=True)
Path(yolo_base, "labels", "val").mkdir(parents=True, exist_ok=True)

Now lets iterate and apply our transformations on our Data

In [None]:
train_frac = 0.8
images = list(train_imgs.glob("*"))

for img in tqdm(images):
    # Randomly Select train and test split
    split = "train" if random.random() < train_frac else "val"

    annotation = train_annotations / f"{img.stem}.xml"
    try:
        parsed = parse_annotation(annotation)
    except Exception as e:
        print(f"Failed to parse {img.stem} .Skipping.")
        continue

    dest = yolo_base / "labels" / split / f"{img.stem}.txt"
    write_label(parsed, dest)

    dest = yolo_base / "images" / split / f"{img.stem}.jpg"
    convert_image(img, dest)

In [None]:
classes

In [None]:
str(Path(base_output, "yolo_data"))

In [None]:
yolo_metadata = {
    "path": str(Path(base_output, "yolo_data")),
    "train": str(Path(base_output, "yolo_data/images/train")),
    "val": str(Path(base_output, "yolo_data/images/val")),
    "names": classes,
    "nc": len(classes)
}

yolo_config = Path(base_output, "data.yaml")
yaml.safe_dump(yolo_metadata, open(yolo_config, 'w'))

In [None]:
model = YOLO("yolov8n.pt", task="detect")
model.to(device);

In [None]:
assert False, "Make sure you wanna start training"

In [None]:
results = model.train(
    data=Path(base_output, "data.yaml"),
    epochs=40,
    imgsz=640,
    save=True,
    save_txt=True,

)

In [None]:
results = model.predict(
    Path(base_output, yolo_base, "images/train/Town01_002100.jpg"),
    save=True,
    save_txt=True
)

In [None]:
image = draw_bounding_boxes(
    image=to_tensor(results[0].orig_img),
    boxes=results[0].boxes.xyxy, # Object BBoxes
    labels=[results[0].names[int(cls)] for cls in results[0].boxes.cls], # Object label
    width=6,
)

to_pil_image(image)

In [None]:
object_counts = Counter([model.names[int(cls)] for cls in results[0].boxes.cls])
object_counts

In [None]:
most_common_class, count_of_class = object_counts.most_common(n=1)[0]
most_common_class, count_of_class

In [None]:
Image.open("/content/runs/detect/train24/PR_curve.png").resize((700, 400))

In [None]:
Image.open("/content/runs/detect/train24/P_curve.png").resize((700, 400))

In [None]:
Image.open("/content/runs/detect/train24/R_curve.png").resize((700, 400))