## Cek jumlah beserta struktur datasets

In [1]:
import os

root_path = "D:\Pothole Vision - AI Road Damage Detection\dataset\RDD2022_all_countries"
def count_files(path):
    return len([f for f in os.listdir(path) if os.path.isfile(os.path.join(path, f))]) if os.path.exists(path) else 0

summary = []

for country in os.listdir(root_path):
    country_path = os.path.join(root_path, country)
    if os.path.isdir(country_path):
        train_images = count_files(os.path.join(country_path, "train", "images"))
        test_images = count_files(os.path.join(country_path, "test", "images"))
        xml_files = count_files(os.path.join(country_path, "train", "annotations", "xmls"))
        summary.append(f"{country} -> Train Images: {train_images}, Test Images: {test_images}, Annotations: {xml_files}")

for line in summary:
    print(line)

China_Drone -> Train Images: 2401, Test Images: 0, Annotations: 2401
China_MotorBike -> Train Images: 1977, Test Images: 500, Annotations: 1977
Czech -> Train Images: 2829, Test Images: 709, Annotations: 2829
India -> Train Images: 7706, Test Images: 1959, Annotations: 7706
Japan -> Train Images: 10506, Test Images: 2627, Annotations: 10506
Norway -> Train Images: 8161, Test Images: 2040, Annotations: 8161
United_States -> Train Images: 4805, Test Images: 1200, Annotations: 4805


## Issue #1 Folder test China_drone [SOLVED]
China_drone tidak punya folder test, abaikan?
Selanjutnya Saya akan split datset dari Train & Test menjadi Train, Test, & Val


## Issue #2 Struktur dataset [SOLVED]
Jika dataset DIGABUNG semua negara menjadi satu dataset besar:
Keuntungan:

Model akan lebih general karena belajar dari berbagai jenis jalan, cuaca, kamera.

Bisa membantu jika nanti digunakan di Indonesia yang belum punya data.

Jumlah data menjadi sangat besar (10.000++), sangat bagus untuk deep learning.

Kekurangan:

Bisa menyebabkan bias ke negara dengan data terbanyak (misalnya India, Japan).

Anotasi antar negara mungkin memiliki inkonsistensi kecil (labeling style, noise).

Rekomendasi untuk kasus ini:
Karena kamu akan pakai untuk Indonesia, tapi belum punya data lokal, maka:

Gabungkan semua negara → latih model global, supaya kuat terhadap variasi.

Simpan metadata negara asalnya → bisa dipakai untuk evaluasi per negara.

Nanti, jika ada data Indonesia, kamu bisa fine-tune model global ke data lokal.

In [2]:
import os
import xml.etree.ElementTree as ET
from collections import Counter

root_path = "dataset/RDD2022_all_countries"

country_folders = [
    "China_Drone", "China_MotorBike", "Czech",
    "India", "Japan", "Norway", "United_States"
]

all_labels = []

for country in country_folders:
    annotation_folder = os.path.join(root_path, country, "train", "annotations", "xmls")
    if os.path.exists(annotation_folder):
        for filename in os.listdir(annotation_folder):
            if filename.endswith('.xml'):
                file_path = os.path.join(annotation_folder, filename)
                try:
                    tree = ET.parse(file_path)
                    root = tree.getroot()
                    for obj in root.findall('object'):
                        label = obj.find('name').text.strip()
                        all_labels.append(label)
                except Exception as e:
                    print(f"❌ Error parsing {file_path}: {e}")

label_counts = Counter(all_labels)

print("Distribusi label di semua dataset mentah:")
for label, count in label_counts.items():
    print(f"{label}: {count}")

Distribusi label di semua dataset mentah:
D10: 11830
D00: 26016
D20: 10617
Repair: 1046
D40: 6544
Block crack: 3
D44: 5057
D01: 179
D11: 45
D50: 3581
D43: 793
D0w0: 1


In [3]:
import os
import xml.etree.ElementTree as ET

root_path = "dataset/RDD2022_all_countries"

def clean_annotations(root_path, target_label='D40'):
    removed_files = 0
    kept_files = 0
    for country in os.listdir(root_path):
        country_path = os.path.join(root_path, country)
        if not os.path.isdir(country_path):
            continue

        # Proses folder train annotations (ubah jika perlu val/test juga)
        annotations_dir = os.path.join(country_path, 'train', 'annotations', 'xmls')
        images_dir = os.path.join(country_path, 'train', 'images')

        if not os.path.exists(annotations_dir):
            print(f"Folder anotasi tidak ditemukan: {annotations_dir}, dilewati.")
            continue

        for xml_file in os.listdir(annotations_dir):
            if not xml_file.endswith('.xml'):
                continue

            xml_path = os.path.join(annotations_dir, xml_file)
            image_file = xml_file.replace('.xml', '.jpg')
            image_path = os.path.join(images_dir, image_file)

            tree = ET.parse(xml_path)
            root = tree.getroot()

            # Cari objek yang labelnya bukan target_label, hapus mereka
            objects = root.findall('object')
            removed_objs = 0
            for obj in objects:
                label = obj.find('name').text.strip()
                if label != target_label:
                    root.remove(obj)
                    removed_objs += 1

            # Cek apakah setelah penghapusan masih ada objek
            if len(root.findall('object')) == 0:
                # Hapus XML dan gambarnya
                os.remove(xml_path)
                if os.path.exists(image_path):
                    os.remove(image_path)
                removed_files += 1
            else:
                # Simpan ulang XML yang sudah dibersihkan
                tree.write(xml_path)
                kept_files += 1

    print(f"Proses selesai.")
    print(f"File XML dan gambar yang dihapus: {removed_files}")
    print(f"File XML yang dipertahankan: {kept_files}")

clean_annotations(root_path)

Proses selesai.
File XML dan gambar yang dihapus: 34711
File XML yang dipertahankan: 3674


In [4]:
import os
import random
import shutil
import pandas as pd

# Lokasi dataset dan output
root_path = "D:\\Pothole Vision - AI Road Damage Detection\\dataset\\RDD2022_all_countries"
output_path = "D:\\Pothole Vision - AI Road Damage Detection\\dataset-mix"
train_val_split = 0.8  # 80% untuk train, 20% untuk val

# Membuat direktori output
os.makedirs(os.path.join(output_path, 'train', 'images'), exist_ok=True)
os.makedirs(os.path.join(output_path, 'train', 'annotations'), exist_ok=True)
os.makedirs(os.path.join(output_path, 'val', 'images'), exist_ok=True)
os.makedirs(os.path.join(output_path, 'val', 'annotations'), exist_ok=True)
os.makedirs(os.path.join(output_path, 'test', 'images'), exist_ok=True)

# Metadata untuk csv
metadata = []

def copy_file(src_file, dest_file):
    shutil.copy(src_file, dest_file)

# Proses per negara
for country in os.listdir(root_path):
    country_path = os.path.join(root_path, country)
    if not os.path.isdir(country_path):
        continue

    images_path = os.path.join(country_path, 'train', 'images')
    annotations_path = os.path.join(country_path, 'train', 'annotations', 'xmls')

    if not os.path.exists(images_path) or not os.path.exists(annotations_path):
        continue

    image_files = sorted([f for f in os.listdir(images_path) if f.endswith('.jpg')])
    annotation_files = sorted([f for f in os.listdir(annotations_path) if f.endswith('.xml')])

    # Pastikan hanya file yang cocok (image dan XML) yang digunakan
    matched_files = []
    for image in image_files:
        basename = os.path.splitext(image)[0]
        if f"{basename}.xml" in annotation_files:
            matched_files.append((image, f"{basename}.xml"))

    for image, annotation in matched_files:
        src_image = os.path.join(images_path, image)
        src_annotation = os.path.join(annotations_path, annotation)

        split = 'train' if random.random() < train_val_split else 'val'
        dest_image = os.path.join(output_path, split, 'images', image)
        dest_annotation = os.path.join(output_path, split, 'annotations', annotation)

        copy_file(src_image, dest_image)
        copy_file(src_annotation, dest_annotation)

        metadata.append({'filename': image, 'country': country, 'split': split})

    # Test set (copy ke test/images tanpa anotasi)
    test_images_path = os.path.join(country_path, 'test', 'images')
    if os.path.exists(test_images_path):
        for test_img in os.listdir(test_images_path):
            if test_img.endswith('.jpg'):
                src_test_img = os.path.join(test_images_path, test_img)
                dest_test_img = os.path.join(output_path, 'test', 'images', test_img)
                copy_file(src_test_img, dest_test_img)

# Simpan metadata
metadata_df = pd.DataFrame(metadata)
metadata_df = metadata_df.sort_values(by=['split', 'country', 'filename'])
metadata_df.to_csv(os.path.join(output_path, 'metadata.csv'), index=False)

print("✅ Dataset berhasil digabung dan displit.")

✅ Dataset berhasil digabung dan displit.


In [5]:
import os
import pandas as pd
from tabulate import tabulate

root_path = "D:\\Pothole Vision - AI Road Damage Detection\\dataset-mix"

def count_files(path, ext):
    return len([f for f in os.listdir(path) if f.endswith(ext)]) if os.path.exists(path) else 0

# Data jumlah file berdasarkan split
data = {
    'Split': ['Train', 'Val', 'Test'],
    'Images (.jpg)': [
        count_files(os.path.join(root_path, 'train', 'images'), '.jpg'),
        count_files(os.path.join(root_path, 'val', 'images'), '.jpg'),
        count_files(os.path.join(root_path, 'test', 'images'), '.jpg')
    ],
    'Annotations (.xml)': [
        count_files(os.path.join(root_path, 'train', 'annotations'), '.xml'),
        count_files(os.path.join(root_path, 'val', 'annotations'), '.xml'),
        '-'  # Test tidak punya anotasi
    ]
}

df_split = pd.DataFrame(data)

# Membaca metadata.csv
metadata_path = os.path.join(root_path, 'metadata.csv')
if os.path.exists(metadata_path):
    metadata = pd.read_csv(metadata_path)
    split_counts = metadata['split'].value_counts().reset_index()
    split_counts.columns = ['Split', 'Count']

    country_counts = metadata['country'].value_counts().reset_index()
    country_counts.columns = ['Country', 'Count']

    # Tampilkan tabel
    print("=== Jumlah File per Split ===")
    print(tabulate(df_split, headers='keys', tablefmt='pretty', showindex=False))
    print("\n=== Distribusi Metadata per Split ===")
    print(tabulate(split_counts, headers='keys', tablefmt='pretty', showindex=False))
    print("\n=== Distribusi Metadata per Negara ===")
    print(tabulate(country_counts, headers='keys', tablefmt='pretty', showindex=False))
else:
    print("❌ File metadata.csv tidak ditemukan.")

=== Jumlah File per Split ===
+-------+---------------+--------------------+
| Split | Images (.jpg) | Annotations (.xml) |
+-------+---------------+--------------------+
| Train |     2951      |        2951        |
|  Val  |      723      |        723         |
| Test  |     9035      |         -          |
+-------+---------------+--------------------+

=== Distribusi Metadata per Split ===
+-------+-------+
| Split | Count |
+-------+-------+
| train | 2951  |
|  val  |  723  |
+-------+-------+

=== Distribusi Metadata per Negara ===
+-----------------+-------+
|     Country     | Count |
+-----------------+-------+
|      India      | 1530  |
|      Japan      | 1390  |
|     Norway      |  256  |
| China_MotorBike |  164  |
|      Czech      |  154  |
|  United_States  |  116  |
|   China_Drone   |  64   |
+-----------------+-------+


Checklist Sebelum Training
-----------------------------------------
| Langkah                      | Status |
| ---------------------------- | ------ |
| Label hanya `D40`            | ✅     |
| Dataset bersih & rapi        | ✅     |
| Split 80:20                  | ✅     |
| Metadata terdokumentasi      | ✅     |
| Format dataset per algoritma | 🔜     |
| Training script siap pakai   | 🔜     |
| Evaluasi & logging per model | 🔜     |


In [6]:
import os

SOURCE_DATASET = "D:/Pothole Vision - AI Road Damage Detection/dataset-mix"
TARGET_ROOT = "D:/Pothole Vision - AI Road Damage Detection/prepared-datasets"

os.makedirs(TARGET_ROOT, exist_ok=True)

# Daftar algoritma, SSD dikecualikan karena formatnya berbeda
algorithms = ["yolov8", "retinanet", "cornernet", "coco"]  # SSD akan diproses dengan script khusus

for algo in algorithms:
    for split in ["train", "val", "test"]:
        os.makedirs(os.path.join(TARGET_ROOT, algo, split, "images"), exist_ok=True)
        if split != "test":  # hanya train dan val yang memiliki annotation
            os.makedirs(os.path.join(TARGET_ROOT, algo, split, "annotations"), exist_ok=True)

print("✅ Struktur direktori berhasil dibuat (kecuali SSD).")

✅ Struktur direktori berhasil dibuat (kecuali SSD).


In [7]:
import shutil
from tqdm import tqdm

splits = ["train", "val", "test"]

for algo in algorithms:
    print(f"\n📁 Menyalin dataset untuk {algo}...")
    for split in splits:
        src_img_dir = os.path.join(SOURCE_DATASET, split, "images")
        dest_img_dir = os.path.join(TARGET_ROOT, algo, split, "images")
        for f in tqdm(os.listdir(src_img_dir), desc=f"{algo} - {split} images"):
            if f.endswith(".jpg"):
                shutil.copy(os.path.join(src_img_dir, f), os.path.join(dest_img_dir, f))

        if split != "test":
            src_ann_dir = os.path.join(SOURCE_DATASET, split, "annotations")
            dest_ann_dir = os.path.join(TARGET_ROOT, algo, split, "annotations")
            for f in tqdm(os.listdir(src_ann_dir), desc=f"{algo} - {split} annotations"):
                if f.endswith(".xml"):
                    shutil.copy(os.path.join(src_ann_dir, f), os.path.join(dest_ann_dir, f))

print("\n✅ Semua data berhasil diduplikasi ke algoritma selain SSD.")


📁 Menyalin dataset untuk yolov8...


yolov8 - train images: 100%|██████████| 2951/2951 [00:15<00:00, 187.76it/s]
yolov8 - train annotations: 100%|██████████| 2951/2951 [00:12<00:00, 240.84it/s]
yolov8 - val images: 100%|██████████| 723/723 [00:04<00:00, 161.00it/s]
yolov8 - val annotations: 100%|██████████| 723/723 [00:03<00:00, 207.95it/s]
yolov8 - test images: 100%|██████████| 9035/9035 [00:58<00:00, 153.52it/s]



📁 Menyalin dataset untuk retinanet...


retinanet - train images: 100%|██████████| 2951/2951 [00:08<00:00, 347.41it/s] 
retinanet - train annotations: 100%|██████████| 2951/2951 [00:05<00:00, 572.85it/s] 
retinanet - val images: 100%|██████████| 723/723 [00:00<00:00, 1028.99it/s]
retinanet - val annotations: 100%|██████████| 723/723 [00:00<00:00, 1837.15it/s]
retinanet - test images: 100%|██████████| 9035/9035 [00:12<00:00, 743.75it/s] 



📁 Menyalin dataset untuk cornernet...


cornernet - train images: 100%|██████████| 2951/2951 [00:02<00:00, 1081.20it/s]
cornernet - train annotations: 100%|██████████| 2951/2951 [00:01<00:00, 2472.98it/s]
cornernet - val images: 100%|██████████| 723/723 [00:00<00:00, 964.09it/s] 
cornernet - val annotations: 100%|██████████| 723/723 [00:00<00:00, 2672.98it/s]
cornernet - test images: 100%|██████████| 9035/9035 [00:11<00:00, 791.50it/s] 



📁 Menyalin dataset untuk coco...


coco - train images: 100%|██████████| 2951/2951 [00:06<00:00, 446.98it/s] 
coco - train annotations: 100%|██████████| 2951/2951 [00:02<00:00, 1201.40it/s]
coco - val images: 100%|██████████| 723/723 [00:02<00:00, 317.09it/s] 
coco - val annotations: 100%|██████████| 723/723 [00:01<00:00, 522.19it/s]
coco - test images: 100%|██████████| 9035/9035 [00:24<00:00, 368.64it/s] 


✅ Semua data berhasil diduplikasi ke algoritma selain SSD.





## Konversi annotation ke format:
- YOLOv8      → .txt (YOLO format)
- SSD         → TFRecord / COCO JSON
- RetinaNet   → COCO JSON
- Deformable DETR → COCO JSON
- CornerNet   → COCO JSON (keypoint-style bounding box if needed)

In [8]:
import pandas as pd
from tabulate import tabulate

summary = []
for algo in algorithms:
    row = {"Algorithm": algo}
    for split in splits:
        img_dir = os.path.join(TARGET_ROOT, algo, split, "images")
        ann_dir = os.path.join(TARGET_ROOT, algo, split, "annotations") if split != "test" else "-"
        row[f"{split}_images"] = len(os.listdir(img_dir))
        row[f"{split}_annotations"] = len(os.listdir(ann_dir)) if ann_dir != "-" else "-"
    summary.append(row)

df = pd.DataFrame(summary)
print(tabulate(df, headers='keys', tablefmt='pretty', showindex=False))

+-----------+--------------+-------------------+------------+-----------------+-------------+------------------+
| Algorithm | train_images | train_annotations | val_images | val_annotations | test_images | test_annotations |
+-----------+--------------+-------------------+------------+-----------------+-------------+------------------+
|  yolov8   |     2951     |       2951        |    723     |       723       |    9035     |        -         |
| retinanet |     2951     |       2951        |    723     |       723       |    9035     |        -         |
| cornernet |     2951     |       2951        |    723     |       723       |    9035     |        -         |
|   coco    |     2951     |       2951        |    723     |       723       |    9035     |        -         |
+-----------+--------------+-------------------+------------+-----------------+-------------+------------------+


In [9]:
# Konversi anotasi XML (Pascal VOC) ke format masing-masing algoritma
# Output disimpan di folder prepared-datasets/{algo}/

import os
import xml.etree.ElementTree as ET
from tqdm import tqdm

# Dataset sumber
SOURCE_IMAGES_DIR = "dataset-mix"
SOURCE_ANN_DIR = {
    'train': os.path.join(SOURCE_IMAGES_DIR, 'train', 'annotations'),
    'val': os.path.join(SOURCE_IMAGES_DIR, 'val', 'annotations'),
}

# Target direktori per algoritma
ALGORITHMS = ['yolov8', 'ssd', 'retinanet', 'coco', 'cornernet']
PREPARED_ROOT = "prepared-datasets"

# Pastikan direktori target tersedia
def prepare_dirs():
    for algo in ALGORITHMS:
        for split in ['train', 'val']:
            os.makedirs(os.path.join(PREPARED_ROOT, algo, split, 'images'), exist_ok=True)
            os.makedirs(os.path.join(PREPARED_ROOT, algo, split, 'annotations'), exist_ok=True)
            os.makedirs(os.path.join(PREPARED_ROOT, algo, split, 'labels'), exist_ok=True)

# Konversi ke format YOLOv8
# Hanya menyimpan kelas D40 dengan index 0
def convert_to_yolo():
    for split in ['train', 'val']:
        image_dir = os.path.join(SOURCE_IMAGES_DIR, split, 'images')
        ann_dir = SOURCE_ANN_DIR[split]
        target_img_dir = os.path.join(PREPARED_ROOT, 'yolov8', split, 'images')
        target_label_dir = os.path.join(PREPARED_ROOT, 'yolov8', split, 'labels')

        for file in tqdm(os.listdir(ann_dir), desc=f"[YOLO] Converting {split}"):
            if not file.endswith(".xml"): continue
            xml_path = os.path.join(ann_dir, file)
            tree = ET.parse(xml_path)
            root = tree.getroot()
            image_filename = root.find('filename').text
            image_path = os.path.join(image_dir, image_filename)
            out_image_path = os.path.join(target_img_dir, image_filename)

            # Symlink image
            if not os.path.exists(out_image_path):
                os.symlink(os.path.abspath(image_path), out_image_path)

            size = root.find("size")
            w, h = int(size.find("width").text), int(size.find("height").text)
            yolo_lines = []

            for obj in root.findall("object"):
                name = obj.find("name").text.strip()
                if name != "D40":
                    continue  # Skip non-D40

                bndbox = obj.find("bndbox")
                xmin = int(float(bndbox.find("xmin").text))
                ymin = int(float(bndbox.find("ymin").text))
                xmax = int(float(bndbox.find("xmax").text))
                ymax = int(float(bndbox.find("ymax").text))

                # Convert to YOLO format
                x_center = ((xmin + xmax) / 2) / w
                y_center = ((ymin + ymax) / 2) / h
                bw = (xmax - xmin) / w
                bh = (ymax - ymin) / h
                yolo_lines.append(f"0 {x_center:.6f} {y_center:.6f} {bw:.6f} {bh:.6f}")

            # Simpan hasil label YOLO
            txt_path = os.path.join(target_label_dir, file.replace(".xml", ".txt"))
            with open(txt_path, "w") as f:
                f.write("\n".join(yolo_lines))

if __name__ == '__main__':
    prepare_dirs()
    convert_to_yolo()
    print("✅ Konversi YOLOv8 selesai. Untuk algoritma lain, dilanjutkan dengan modul terpisah.")

[YOLO] Converting train: 100%|██████████| 2951/2951 [00:03<00:00, 905.50it/s] 
[YOLO] Converting val: 100%|██████████| 723/723 [00:00<00:00, 1537.73it/s]

✅ Konversi YOLOv8 selesai. Untuk algoritma lain, dilanjutkan dengan modul terpisah.





In [10]:
# convert_to_ssd.py
import os
import shutil
import xml.etree.ElementTree as ET

def convert_to_ssd(dataset_root, output_root):
    """
    Konversi dataset yang sudah dalam VOC format ke struktur SSD:
    - Annotations (XML)
    - JPEGImages (images)
    - ImageSets/Main/{train.txt,val.txt,test.txt}

    Args:
        dataset_root (str): folder dataset mix dengan struktur train/val/test
        output_root (str): folder output konversi SSD
    """
    os.makedirs(output_root, exist_ok=True)
    ann_out = os.path.join(output_root, 'Annotations')
    img_out = os.path.join(output_root, 'JPEGImages')
    sets_main = os.path.join(output_root, 'ImageSets', 'Main')
    os.makedirs(ann_out, exist_ok=True)
    os.makedirs(img_out, exist_ok=True)
    os.makedirs(sets_main, exist_ok=True)

    splits = ['train', 'val', 'test']
    for split in splits:
        list_file = open(os.path.join(sets_main, f"{split}.txt"), 'w')
        ann_dir = os.path.join(dataset_root, split, 'annotations')
        img_dir = os.path.join(dataset_root, split, 'images')

        for xml_file in os.listdir(ann_dir) if split != 'test' else []:
            if not xml_file.endswith('.xml'):
                continue
            base_name = os.path.splitext(xml_file)[0]
            # Copy annotation XML
            shutil.copy(os.path.join(ann_dir, xml_file), os.path.join(ann_out, xml_file))
            # Copy image
            img_file = base_name + '.jpg'
            shutil.copy(os.path.join(img_dir, img_file), os.path.join(img_out, img_file))
            list_file.write(base_name + '\n')

        # For test split, no annotations, copy images only and write image IDs
        if split == 'test':
            for img_file in os.listdir(img_dir):
                if not img_file.endswith('.jpg'):
                    continue
                base_name = os.path.splitext(img_file)[0]
                shutil.copy(os.path.join(img_dir, img_file), os.path.join(img_out, img_file))
                list_file.write(base_name + '\n')

        list_file.close()

if __name__ == "__main__":
    dataset_root = "D:/Pothole Vision - AI Road Damage Detection/dataset-mix"
    output_root = "D:/Pothole Vision - AI Road Damage Detection/prepared-datasets/ssd"
    convert_to_ssd(dataset_root, output_root)
    print("Konversi ke SSD selesai.")

Konversi ke SSD selesai.


In [11]:
# convert_to_retinanet.py
import os
import json
import xml.etree.ElementTree as ET

def convert_to_coco(dataset_root, output_root):
    """
    Konversi dataset VOC ke COCO JSON untuk RetinaNet.

    Args:
        dataset_root (str): folder dataset mix (train/val/test)
        output_root (str): folder output untuk JSON dan images (images di luar scope)
    """
    os.makedirs(output_root, exist_ok=True)

    categories = [
        {"id": 1, "name": "D40"}
    ]

    def parse_xml(xml_path, image_id, annotation_id_start):
        tree = ET.parse(xml_path)
        root = tree.getroot()
        image_info = {
            "id": image_id,
            "file_name": root.find('filename').text,
            "height": int(root.find('size/height').text),
            "width": int(root.find('size/width').text),
        }
        annotations = []
        annotation_id = annotation_id_start
        for obj in root.findall('object'):
            label = obj.find('name').text.strip()
            if label != 'D40':  # Abaikan label selain D40
                continue
            bndbox = obj.find('bndbox')
            xmin = int(float(bndbox.find('xmin').text))
            ymin = int(float(bndbox.find('ymin').text))
            xmax = int(float(bndbox.find('xmax').text))
            ymax = int(float(bndbox.find('ymax').text))
            width = xmax - xmin
            height = ymax - ymin

            annotations.append({
                "id": annotation_id,
                "image_id": image_id,
                "category_id": 1,
                "bbox": [xmin, ymin, width, height],
                "area": width * height,
                "iscrowd": 0,
            })
            annotation_id += 1
        return image_info, annotations, annotation_id

    splits = ['train', 'val', 'test']
    for split in splits:
        images_dir = os.path.join(dataset_root, split, 'images')
        ann_dir = os.path.join(dataset_root, split, 'annotations')
        json_out_path = os.path.join(output_root, f'{split}.json')

        images = []
        annotations = []
        annotation_id = 1
        image_id = 1

        for img_file in os.listdir(images_dir):
            if not img_file.endswith('.jpg'):
                continue
            base_name = os.path.splitext(img_file)[0]
            xml_file = os.path.join(ann_dir, base_name + '.xml')

            if split == 'test' or not os.path.exists(xml_file):
                # Untuk test split, kita buat data image tanpa annotation
                images.append({
                    "id": image_id,
                    "file_name": img_file,
                    "height": None,
                    "width": None,
                })
                image_id += 1
                continue

            image_info, anns, annotation_id = parse_xml(xml_file, image_id, annotation_id)
            if anns:
                images.append(image_info)
                annotations.extend(anns)
            image_id += 1

        coco_format = {
            "images": images,
            "annotations": annotations,
            "categories": categories
        }

        with open(json_out_path, 'w') as f:
            json.dump(coco_format, f, indent=4)

    print("Konversi ke RetinaNet (COCO JSON) selesai.")

if __name__ == "__main__":
    dataset_root = "D:/Pothole Vision - AI Road Damage Detection/dataset-mix"
    output_root = "D:/Pothole Vision - AI Road Damage Detection/prepared-datasets/retinanet"
    convert_to_coco(dataset_root, output_root)

Konversi ke RetinaNet (COCO JSON) selesai.


# DETR == RettinaNet
Konversi ke format Deformable DETR (COCO JSON style, sama dengan RetinaNet)
Karena Deformable DETR juga menggunakan COCO format, kamu bisa gunakan file JSON yang sama dari script RetinaNet di atas. Jadi cukup jalankan script convert_to_retinanet.py untuk kedua algoritma tersebut.

In [12]:
# convert_to_cornernet.py
import os
import json
import xml.etree.ElementTree as ET

def convert_to_cornernet(dataset_root, output_root):
    """
    Konversi dataset VOC ke format JSON sederhana untuk CornerNet.

    Format JSON yang dihasilkan:
    {
    "images": [
        {
            "file_name": "image1.jpg",
            "bboxes": [[xmin, ymin, xmax, ymax], ...]
        },
        ...
    ]
    }
    """

    os.makedirs(output_root, exist_ok=True)

    splits = ['train', 'val', 'test']

    for split in splits:
        images_dir = os.path.join(dataset_root, split, 'images')
        ann_dir = os.path.join(dataset_root, split, 'annotations')
        json_out_path = os.path.join(output_root, f'{split}.json')

        data = {"images": []}

        for img_file in os.listdir(images_dir):
            if not img_file.endswith('.jpg'):
                continue

            img_info = {"file_name": img_file, "bboxes": []}
            base_name = os.path.splitext(img_file)[0]
            xml_file = os.path.join(ann_dir, base_name + '.xml')

            if split != 'test' and os.path.exists(xml_file):
                tree = ET.parse(xml_file)
                root = tree.getroot()

                for obj in root.findall('object'):
                    label = obj.find('name').text.strip()
                    if label != 'D40':  # Hanya kelas D40 yang diambil
                        continue

                    bndbox = obj.find('bndbox')
                    xmin = int(float(bndbox.find('xmin').text))
                    ymin = int(float(bndbox.find('ymin').text))
                    xmax = int(float(bndbox.find('xmax').text))
                    ymax = int(float(bndbox.find('ymax').text))
                    img_info["bboxes"].append([xmin, ymin, xmax, ymax])

            data["images"].append(img_info)

        with open(json_out_path, 'w') as f:
            json.dump(data, f, indent=4)

    print("Konversi ke CornerNet selesai.")

if __name__ == "__main__":
    dataset_root = "D:/Pothole Vision - AI Road Damage Detection/dataset-mix"
    output_root = "D:/Pothole Vision - AI Road Damage Detection/prepared-datasets/cornernet"
    convert_to_cornernet(dataset_root, output_root)

Konversi ke CornerNet selesai.


In [13]:
import torch
print(torch.__version__)
print(torch.version.cuda)

2.5.1+cu121
12.1


In [14]:
import torch
print(torch.__version__)
print(torch.version.cuda)
print("CUDA available:", torch.cuda.is_available())
print("GPU name:", torch.cuda.get_device_name(0) if torch.cuda.is_available() else "No GPU detected")

2.5.1+cu121
12.1
CUDA available: True
GPU name: NVIDIA GeForce RTX 3050 Laptop GPU


In [1]:
from ultralytics import YOLO

model = YOLO("yolov8n.pt")  # Bisa ganti ke yolov8s.pt, yolov8m.pt, dll

model.train(
    data="yolo.yaml",
    epochs=20,
    imgsz=640,
    batch=16,
    name="yolov8n-D40",
    device=0
)

New https://pypi.org/project/ultralytics/8.3.136 available  Update with 'pip install -U ultralytics'
Ultralytics 8.3.135  Python-3.10.9 torch-2.5.1+cu121 CUDA:0 (NVIDIA GeForce RTX 3050 Laptop GPU, 4096MiB)
[34m[1mengine\trainer: [0magnostic_nms=False, amp=True, augment=False, auto_augment=randaugment, batch=16, bgr=0.0, box=7.5, cache=False, cfg=None, classes=None, close_mosaic=10, cls=0.5, conf=None, copy_paste=0.0, copy_paste_mode=flip, cos_lr=False, cutmix=0.0, data=yolo.yaml, degrees=0.0, deterministic=True, device=0, dfl=1.5, dnn=False, dropout=0.0, dynamic=False, embed=None, epochs=20, erasing=0.4, exist_ok=False, fliplr=0.5, flipud=0.0, format=torchscript, fraction=1.0, freeze=None, half=False, hsv_h=0.015, hsv_s=0.7, hsv_v=0.4, imgsz=640, int8=False, iou=0.7, keras=False, kobj=1.0, line_width=None, lr0=0.01, lrf=0.01, mask_ratio=4, max_det=300, mixup=0.0, mode=train, model=yolov8n.pt, momentum=0.937, mosaic=1.0, multi_scale=False, name=yolov8n-D402, nbs=64, nms=False, opset

[34m[1mtrain: [0mScanning D:\Pothole Vision - AI Road Damage Detection\prepared-datasets\yolov8\train\labels.cache... 2951 images, 0 backgrounds, 0 corrupt: 100%|██████████| 2951/2951 [00:00<?, ?it/s]


[34m[1mval: [0mFast image access  (ping: 0.00.0 ms, read: 186.993.3 MB/s, size: 70.3 KB)


[34m[1mval: [0mScanning D:\Pothole Vision - AI Road Damage Detection\prepared-datasets\yolov8\val\labels.cache... 723 images, 0 backgrounds, 0 corrupt: 100%|██████████| 723/723 [00:00<?, ?it/s]


Plotting labels to d:\Pothole Vision - AI Road Damage Detection\runs\detect\yolov8n-D402\labels.jpg... 
[34m[1moptimizer:[0m 'optimizer=auto' found, ignoring 'lr0=0.01' and 'momentum=0.937' and determining best 'optimizer', 'lr0' and 'momentum' automatically... 
[34m[1moptimizer:[0m AdamW(lr=0.002, momentum=0.9) with parameter groups 57 weight(decay=0.0), 64 weight(decay=0.0005), 63 bias(decay=0.0)
Image sizes 640 train, 640 val
Using 8 dataloader workers
Logging results to [1md:\Pothole Vision - AI Road Damage Detection\runs\detect\yolov8n-D402[0m
Starting training for 20 epochs...

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       1/20       2.1G      2.259       3.41      1.714         22        640: 100%|██████████| 185/185 [00:36<00:00,  5.10it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 23/23 [00:04<00:00,  4.69it/s]


                   all        723       1305      0.282      0.202      0.161     0.0606

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       2/20      2.02G      2.229      2.635      1.747         16        640: 100%|██████████| 185/185 [00:35<00:00,  5.28it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 23/23 [00:04<00:00,  4.95it/s]


                   all        723       1305       0.33       0.25      0.194     0.0724

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       3/20      2.04G      2.265      2.464      1.756         17        640: 100%|██████████| 185/185 [00:35<00:00,  5.27it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 23/23 [00:04<00:00,  4.90it/s]

                   all        723       1305      0.273       0.19      0.142     0.0525






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       4/20      2.01G      2.245      2.384      1.756         13        640: 100%|██████████| 185/185 [00:35<00:00,  5.25it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 23/23 [00:04<00:00,  4.86it/s]

                   all        723       1305      0.352      0.259      0.219      0.088






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       5/20      2.03G      2.183       2.25      1.698         17        640: 100%|██████████| 185/185 [00:35<00:00,  5.20it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 23/23 [00:04<00:00,  4.91it/s]

                   all        723       1305      0.398      0.308       0.29      0.111






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       6/20      2.03G      2.152      2.206      1.692         22        640: 100%|██████████| 185/185 [00:36<00:00,  5.10it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 23/23 [00:05<00:00,  4.48it/s]


                   all        723       1305      0.411      0.316      0.292      0.113

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       7/20      2.04G      2.128      2.171      1.662         10        640: 100%|██████████| 185/185 [00:36<00:00,  5.01it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 23/23 [00:05<00:00,  4.37it/s]


                   all        723       1305      0.427      0.348      0.326      0.132

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       8/20      2.03G      2.087      2.107      1.641         19        640: 100%|██████████| 185/185 [00:36<00:00,  5.03it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 23/23 [00:05<00:00,  4.51it/s]


                   all        723       1305      0.435      0.349      0.356      0.142

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       9/20      2.03G      2.063       2.06      1.631         18        640: 100%|██████████| 185/185 [00:35<00:00,  5.16it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 23/23 [00:04<00:00,  4.83it/s]

                   all        723       1305      0.452      0.377      0.369      0.145






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      10/20      2.01G      2.049      1.989      1.606         24        640: 100%|██████████| 185/185 [00:35<00:00,  5.20it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 23/23 [00:04<00:00,  5.00it/s]

                   all        723       1305      0.467      0.384      0.389      0.154





Closing dataloader mosaic

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      11/20      2.02G      2.041      1.976      1.654         12        640: 100%|██████████| 185/185 [00:35<00:00,  5.27it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 23/23 [00:04<00:00,  4.79it/s]

                   all        723       1305        0.5      0.352      0.377      0.149






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      12/20      2.01G      2.021      1.918      1.643          8        640: 100%|██████████| 185/185 [00:35<00:00,  5.26it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 23/23 [00:04<00:00,  4.83it/s]

                   all        723       1305      0.502      0.359      0.382      0.154






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      13/20      2.01G      1.984      1.886      1.628         11        640: 100%|██████████| 185/185 [00:35<00:00,  5.27it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 23/23 [00:04<00:00,  4.90it/s]

                   all        723       1305      0.505      0.389      0.395      0.161






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      14/20      2.01G      1.969      1.864      1.611          8        640: 100%|██████████| 185/185 [00:34<00:00,  5.30it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 23/23 [00:04<00:00,  4.91it/s]

                   all        723       1305      0.493      0.432      0.432      0.182






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      15/20      2.03G      1.946       1.81      1.591         10        640: 100%|██████████| 185/185 [00:35<00:00,  5.28it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 23/23 [00:04<00:00,  4.88it/s]

                   all        723       1305      0.513      0.402      0.422      0.175






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      16/20      2.03G      1.945      1.799      1.593          9        640: 100%|██████████| 185/185 [00:34<00:00,  5.31it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 23/23 [00:04<00:00,  4.88it/s]

                   all        723       1305      0.532      0.433      0.454      0.186






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      17/20      2.01G      1.936      1.738      1.575         18        640: 100%|██████████| 185/185 [00:34<00:00,  5.31it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 23/23 [00:04<00:00,  4.84it/s]

                   all        723       1305      0.539      0.457       0.47      0.196






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      18/20      2.01G      1.906      1.703      1.564          9        640: 100%|██████████| 185/185 [00:35<00:00,  5.28it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 23/23 [00:04<00:00,  4.68it/s]

                   all        723       1305      0.558      0.458      0.482      0.199






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      19/20      2.01G      1.889      1.653      1.539          7        640: 100%|██████████| 185/185 [00:35<00:00,  5.28it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 23/23 [00:04<00:00,  4.83it/s]

                   all        723       1305      0.564      0.461       0.49      0.209






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      20/20      2.02G      1.865      1.631      1.535         15        640: 100%|██████████| 185/185 [00:35<00:00,  5.27it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 23/23 [00:04<00:00,  4.89it/s]

                   all        723       1305      0.596      0.449      0.494      0.211






20 epochs completed in 0.230 hours.
Optimizer stripped from d:\Pothole Vision - AI Road Damage Detection\runs\detect\yolov8n-D402\weights\last.pt, 6.2MB
Optimizer stripped from d:\Pothole Vision - AI Road Damage Detection\runs\detect\yolov8n-D402\weights\best.pt, 6.2MB

Validating d:\Pothole Vision - AI Road Damage Detection\runs\detect\yolov8n-D402\weights\best.pt...
Ultralytics 8.3.135  Python-3.10.9 torch-2.5.1+cu121 CUDA:0 (NVIDIA GeForce RTX 3050 Laptop GPU, 4096MiB)
Model summary (fused): 72 layers, 3,005,843 parameters, 0 gradients, 8.1 GFLOPs


                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 23/23 [00:04<00:00,  4.86it/s]


                   all        723       1305      0.597      0.451      0.494      0.211
Speed: 0.2ms preprocess, 2.8ms inference, 0.0ms loss, 1.0ms postprocess per image
Results saved to [1md:\Pothole Vision - AI Road Damage Detection\runs\detect\yolov8n-D402[0m


ultralytics.utils.metrics.DetMetrics object with attributes:

ap_class_index: array([0])
box: ultralytics.utils.metrics.Metric object
confusion_matrix: <ultralytics.utils.metrics.ConfusionMatrix object at 0x0000017890339750>
curves: ['Precision-Recall(B)', 'F1-Confidence(B)', 'Precision-Confidence(B)', 'Recall-Confidence(B)']
curves_results: [[array([          0,    0.001001,    0.002002,    0.003003,    0.004004,    0.005005,    0.006006,    0.007007,    0.008008,    0.009009,     0.01001,    0.011011,    0.012012,    0.013013,    0.014014,    0.015015,    0.016016,    0.017017,    0.018018,    0.019019,     0.02002,    0.021021,    0.022022,    0.023023,
          0.024024,    0.025025,    0.026026,    0.027027,    0.028028,    0.029029,     0.03003,    0.031031,    0.032032,    0.033033,    0.034034,    0.035035,    0.036036,    0.037037,    0.038038,    0.039039,     0.04004,    0.041041,    0.042042,    0.043043,    0.044044,    0.045045,    0.046046,    0.047047,
          0.0480

In [None]:
metrics = model.val()  # evaluasi otomatis pada validation set yang didefinisikan di data.yaml
print(metrics)  # lihat summary

In [None]:
# Jalankan evaluasi model
metrics = model.val()  # TANPA confusion=True

# Ambil confusion matrix (jika tersedia)
if hasattr(metrics, "confusion_matrix"):
    cm = metrics.confusion_matrix  # numpy array
    print("Confusion matrix:\n", cm)
else:
    print("Confusion matrix tidak tersedia di metrics.")

In [None]:
%matplotlib inline
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np

cm_matrix = cm.matrix  # Ambil array 2D dari ConfusionMatrix
classes = model.names  # Misalnya: ['D40']

plt.figure(figsize=(6, 5))
sns.heatmap(cm_matrix, annot=True, fmt='.0f', xticklabels=classes, yticklabels=classes, cmap='Blues')
plt.xlabel('Predicted')
plt.ylabel('Actual')
plt.title('Confusion Matrix')
plt.show()

In [None]:
import matplotlib.pyplot as plt
import matplotlib.image as mpimg

image_extensions = ['.jpg', '.jpeg', '.png']

for root, dirs, files in os.walk(target_dir):
    for file in files:
        if os.path.splitext(file)[1].lower() in image_extensions:
            path = os.path.join(root, file)
            print(f"🖼 Menampilkan: {file}")
            img = mpimg.imread(path)
            plt.figure(figsize=(6, 4))
            plt.imshow(img)
            plt.title(file)
            plt.axis('off')
            plt.show()

In [None]:
mp = metrics.box.mp    # mean precision (float)
mr = metrics.box.mr    # mean recall (float)
map50 = metrics.box.map50  # mAP@0.5 (float)
map5095 = metrics.box.map    # mAP@0.5:0.95 (float)
f1_scores = metrics.box.f1  # list F1 score per kelas
mean_f1 = sum(f1_scores) / len(f1_scores) if f1_scores else 0

print(f"Precision (mean): {mp:.4f}")
print(f"Recall (mean): {mr:.4f}")
print(f"mAP@0.5: {map50:.4f}")
print(f"mAP@0.5:0.95: {map5095:.4f}")
print(f"F1-score (mean): {mean_f1:.4f}")

In [None]:
from ultralytics import YOLO
import cv2
import matplotlib.pyplot as plt

# Load model yang sudah kamu train
model = YOLO('yolov8n.pt')  # sesuaikan path model terbaikmu

# Fungsi untuk load gambar dari file input (misal file dialog atau path file langsung)
def predict_from_file(image_path):
    img = cv2.imread(image_path)
    # Resize lebih kecil agar lebih cepat
    img = cv2.resize(img, (640, 640))
    img_rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    
    results = model(img_rgb)
    
    result_img = results[0].plot()
    plt.figure(figsize=(10, 8))
    plt.imshow(result_img)
    plt.axis('off')
    plt.show()


# Contoh panggil fungsi dengan file input gambar
predict_from_file("dataset-mix/train/images/United_States_000068.jpg")