In [None]:
!pip uninstall -y numpy pandas scipy scikit-learn tensorflow ydata-profiling ultralytics

!pip install "numpy<2.0" "pandas<2.2" "scikit-learn<1.4" "scipy" "seaborn" "matplotlib" "ultralytics"

print("\n Version Installed, Restart Session")

In [1]:
import ultralytics
from ultralytics import YOLO
import os
import shutil
import yaml
import json
import glob
import numpy as np
import torch
import random
from PIL import Image
import PIL.Image
from tqdm.notebook import tqdm
from IPython.display import Image, display
from collections import Counter
from sklearn.model_selection import train_test_split

Creating new Ultralytics Settings v0.0.6 file ✅ 
View Ultralytics Settings with 'yolo settings' or at '/root/.config/Ultralytics/settings.json'
Update Settings with 'yolo settings key=value', i.e. 'yolo settings runs_dir=path/to/dir'. For help see https://docs.ultralytics.com/quickstart/#ultralytics-settings.


In [2]:
print(f"✅ Numpy Version: {np.__version__}")
print(f"✅ Torch Version: {torch.__version__}")

✅ Numpy Version: 1.26.4
✅ Torch Version: 2.8.0+cu126


In [3]:
folder = '/kaggle/working'

print(f"Cleaning {folder}...")

for filename in os.listdir(folder):
    
    file_path = os.path.join(folder, filename)
    try:
        if os.path.isfile(file_path) or os.path.islink(file_path):
            os.unlink(file_path)
        elif os.path.isdir(file_path):
            shutil.rmtree(file_path)
    except Exception as e:
        print(f'Failed Cleaning {file_path}. Reason: {e}')

print("Finished! Folder is empty.")

Cleaning /kaggle/working...
Finished! Folder is empty.


In [None]:
ultralytics.checks()
print("\n Setup finished. GPU Ready.")

In [4]:
INPUT_ROOT = "/kaggle/input/palembang-telecomunication-provider-infrastructure"
JSON_PATH = os.path.join(INPUT_ROOT, "coco/annotations/instances_annotations_cvat.json")
OUTPUT_DIR = "/kaggle/working/yolo_dataset"

if os.path.exists(OUTPUT_DIR): shutil.rmtree(OUTPUT_DIR)
for split in ['train', 'val']:
    os.makedirs(os.path.join(OUTPUT_DIR, "images", split), exist_ok=True)
    os.makedirs(os.path.join(OUTPUT_DIR, "labels", split), exist_ok=True)

with open(JSON_PATH, 'r') as f: coco = json.load(f)
categories = {cat['id']: cat['name'] for cat in coco['categories']}

id_map = {old_id: new_id for new_id, old_id in enumerate(sorted(categories.keys()))}
names_list = [categories[old_id] for old_id in sorted(categories.keys())]

source_map = {}
for path in glob.glob(f"{INPUT_ROOT}/**/*.*", recursive=True):
    if path.lower().endswith(('jpg', 'jpeg', 'png')):
        source_map[os.path.basename(path)] = path
        
print("Analyzing Class Distribution for Stratified Split...")

all_annotations = coco['annotations']
global_cat_counts = Counter([ann['category_id'] for ann in all_annotations])

img_to_cats = {}
for ann in all_annotations:
    img_id = ann['image_id']
    if img_id not in img_to_cats:
        img_to_cats[img_id] = []
    img_to_cats[img_id].append(ann['category_id'])

images = coco['images']
X_ids = []
y_strat = []

for img in images:
    if img['id'] not in img_to_cats:
        strat_label = -1 
    else:
        cats_in_img = img_to_cats[img['id']]
        strat_label = min(cats_in_img, key=lambda c: global_cat_counts[c])
    
    X_ids.append(img)
    y_strat.append(strat_label)

try:
    train_imgs, val_imgs = train_test_split(
        X_ids, 
        test_size=0.2, 
        stratify=y_strat, 
        random_state=42
    )
    print("Stratified split successful.")
except ValueError as e:
    print(f"Warning: Stratification failed. Fallback to Random Split. Error: {e}")
    train_imgs, val_imgs = train_test_split(X_ids, test_size=0.2, random_state=42)

splits = {'train': train_imgs, 'val': val_imgs}

print("Rebuilding Dataset...")
for split, img_list in splits.items():
    for img_info in tqdm(img_list, desc=f"Processing {split}"):
        basename = os.path.basename(img_info['file_name'])
        if basename not in source_map: continue
        with PIL.Image.open(source_map[basename]) as im: 
            im = im.convert('RGB')
            im.save(os.path.join(OUTPUT_DIR, "images", split, os.path.splitext(basename)[0] + ".jpg"), quality=95)
            w_real, h_real = im.size

        txt_path = os.path.join(OUTPUT_DIR, "labels", split, os.path.splitext(basename)[0] + ".txt")
        with open(txt_path, 'w') as f_txt:
            anns = [a for a in coco['annotations'] if a['image_id'] == img_info['id']]
            for ann in anns:
                bbox = ann['bbox']
                xc, yc = (bbox[0] + bbox[2]/2)/w_real, (bbox[1] + bbox[3]/2)/h_real
                w, h = bbox[2]/w_real, bbox[3]/h_real
                
                xc, yc, w, h = [max(0.0, min(1.0, val)) for val in [xc, yc, w, h]]
                
                f_txt.write(f"{id_map[ann['category_id']]} {xc:.6f} {yc:.6f} {w:.6f} {h:.6f}\n")

yaml_content = {
    'path': OUTPUT_DIR, 
    'train': 'images/train', 
    'val': 'images/val', 
    'names': {i: n for i, n in enumerate(names_list)}
}
with open(f"{OUTPUT_DIR}/data.yaml", 'w') as f: yaml.dump(yaml_content, f)

print("FInished. Distributed dataset ready")

Analyzing Class Distribution for Stratified Split...
Stratified split successful.
Rebuilding Dataset...


Processing train:   0%|          | 0/124 [00:00<?, ?it/s]

Processing val:   0%|          | 0/32 [00:00<?, ?it/s]

FInished. Distributed dataset ready


In [None]:
#TRAINING
print("\n Train Starting...")
model = YOLO("yolov8n.pt")

model.train(
    data=f"{OUTPUT_DIR}/data.yaml",
    epochs=100,
    imgsz=640,
    batch=16,
    device=0,
    project="provider_project",
    name="aug_run",
    exist_ok=True,
    patience=100, 

    # --- AUGMENTATION SETTINGS ---
    hsv_h=0.015,     # HUE: Ubah sangat sedikit. JANGAN ubah warna marker (Merah tetep Merah).
    hsv_s=0.7,       # SATURATION: Tiang berkarat/pudar warnanya (0.7 = variasi 70%).
    hsv_v=0.4,       # VALUE: Simulasi kondisi cahaya (siang terik vs mendung).
    degrees=5.0,     # ROTASI: Tiang kadang miring dikit, tapi jangan sampai tidur (max 5 derajat).
    translate=0.1,   # GESER: Geser objek vertikal/horizontal.
    scale=0.5,       # SCALE: Penting! Biar model kenal tiang yang jauh (kecil) dan dekat (besar).
    shear=0.0,       # SHEAR: Tidak terlalu perlu untuk tiang.
    perspective=0.0, # PERSPECTIVE: Nol-kan saja biar tidak pusing.
    flipud=0.0,      # FLIP UP-DOWN: MATIKAN. Tiang provider tidak pernah terbalik vertikal.
    fliplr=0.5,      # FLIP LEFT-RIGHT: 50% chance. Tiang dilihat dari kiri/kanan sama saja.
    mosaic=0.5,      # MOSAIC: WAJIB 100% untuk dataset kecil. Menggabungkan 4 image jadi 1.
    mixup=0.1,       # MIXUP: Bantu atasi overlap tiang yang menumpuk.
    copy_paste=0.1,  # COPY-PASTE: Bagus untuk segmentasi, lumayan untuk deteksi.
    erasing=0.4,     # ERASING: INI KUNCINYA. Simulasi oklusi (tertutup poster sedot WC/pohon).
)

In [None]:
working_dir = "/kaggle/working/yolo_dataset"

In [None]:
#Validation
best_model_path = "/kaggle/working/provider_project/aug_run/weights/best.pt"
model_best = YOLO(best_model_path)

print("\nBest Model Evaluation:")
metrics = model_best.val(
    data=f"{working_dir}/data.yaml",
    augment=True
)

print(f"\n mAP50: {metrics.box.map50}")
print(f" mAP50-95: {metrics.box.map}")

In [None]:
val_images = glob.glob(f"{working_dir}/images/val/*")
sample_images = random.sample(test_images, 3) if len(test_images) >= 3 else test_images

print("Inference Sample...")

results = model_best.predict(
    source=sample_images,
    conf=0.4,
    save=True,
    project="provider-marker",
    name="inference_test",
    exist_ok=True
)

from IPython.display import Image, display

print("\nDetection Result:")
for img_path in glob.glob("/kaggle/working/provider-marker/inference_test/*.jpg"):
    display(Image(filename=img_path, width=600))

In [None]:
weights_path = "/kaggle/working/provider_project/aug_run/weights/best.pt"

print(f"Loading model from: {weights_path}")
model = YOLO(weights_path) 

val_images = glob.glob(f"{working_dir}/images/val/*.jpg")

if not test_images:
    print("Folder empty! Check you're 'working_dir' folder.")
else:
    sample_images = random.sample(test_images, 3) if len(test_images) >= 3 else test_images

    print(f"Melakukan prediksi pada {len(sample_images)} sampel...")

    results = model.predict(
        source=sample_images,
        conf=0.4,
        save=True,      
        project="provider-marker",
        name="inference_test",
        exist_ok=True,
    )

    print("\nDetection Results:")
    for img_path in glob.glob("/kaggle/working/provider-marker/inference_test/*.jpg"):
        display(Image(filename=img_path, width=600))

In [None]:
!zip -r yolo_datasetv1.zip yolo_dataset

In [None]:
from IPython.display import FileLink

FileLink(r'yolo_datasetv1.zip')