In [None]:
# @title Setup

competition = "DENTEX"  # @param
# @markdown ---

from google.colab import userdata
import json

# Get the Kaggle credentials from Colab's userdata
username = userdata.get("KAGGLE_USER")
key = userdata.get("KAGGLE_KEY")

# Echo the credentials into the kaggle.json file
!mkdir -p ~/.kaggle
!echo '{{"username":"{username}","key":"{key}"}}' > ~/.kaggle/kaggle.json
!chmod 600 /root/.kaggle/kaggle.json

competition_id = "super-ai-engineer-5-DENTEX"
!kaggle competitions download -c {competition_id}
!unzip /content/{competition_id}.zip

Archive:  /content/super-ai-engineer-5-DENTEX.zip
  inflating: DENTEX CHALLENGE 2023/DENTEX CHALLENGE 2023/test/train_0.png  
  inflating: DENTEX CHALLENGE 2023/DENTEX CHALLENGE 2023/test/train_100.png  
  inflating: DENTEX CHALLENGE 2023/DENTEX CHALLENGE 2023/test/train_103.png  
  inflating: DENTEX CHALLENGE 2023/DENTEX CHALLENGE 2023/test/train_106.png  
  inflating: DENTEX CHALLENGE 2023/DENTEX CHALLENGE 2023/test/train_107.png  
  inflating: DENTEX CHALLENGE 2023/DENTEX CHALLENGE 2023/test/train_108.png  
  inflating: DENTEX CHALLENGE 2023/DENTEX CHALLENGE 2023/test/train_109.png  
  inflating: DENTEX CHALLENGE 2023/DENTEX CHALLENGE 2023/test/train_11.png  
  inflating: DENTEX CHALLENGE 2023/DENTEX CHALLENGE 2023/test/train_114.png  
  inflating: DENTEX CHALLENGE 2023/DENTEX CHALLENGE 2023/test/train_115.png  
  inflating: DENTEX CHALLENGE 2023/DENTEX CHALLENGE 2023/test/train_116.png  
  inflating: DENTEX CHALLENGE 2023/DENTEX CHALLENGE 2023/test/train_124.png  
  inflating: DENT

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import os

import glob
import shutil
from tqdm import tqdm
import xml.etree.ElementTree as ET

## Data Prep

In [None]:
CLASS_MAP = {
    "Caries": 0,
    "Deep Caries": 1,
    "Periapical Lesion": 2,
    "Impacted": 3
}

# Paths for the original and new dataset
ORIGINAL_DATASET = "/content/DENTEX CHALLENGE 2023/DENTEX CHALLENGE 2023"
NEW_DATASET = "DENTEX_YOLO"

# Define paths for YOLO dataset
TRAIN_IMG_DIR = os.path.join(ORIGINAL_DATASET, "train")
TEST_IMG_DIR = os.path.join(ORIGINAL_DATASET, "test")

# YOLO directory structure
YOLO_TRAIN_IMG = os.path.join(NEW_DATASET, "images/train")
YOLO_TEST_IMG = os.path.join(NEW_DATASET, "images/test")
YOLO_TRAIN_LABELS = os.path.join(NEW_DATASET, "labels/train")

# Create new dataset structure
for folder in [YOLO_TRAIN_IMG, YOLO_TEST_IMG, YOLO_TRAIN_LABELS]:
    os.makedirs(folder, exist_ok=True)

# Function to convert Pascal VOC XML to YOLO format
def convert_voc_to_yolo(xml_file, output_txt_path, img_width, img_height):
    tree = ET.parse(xml_file)
    root = tree.getroot()

    with open(output_txt_path, "w") as f:
        for obj in root.findall("object"):
            class_name = obj.find("name").text
            if class_name not in CLASS_MAP:
                continue  # Skip unknown classes

            class_id = CLASS_MAP[class_name]
            bndbox = obj.find("bndbox")
            xmin = int(bndbox.find("xmin").text)
            ymin = int(bndbox.find("ymin").text)
            xmax = int(bndbox.find("xmax").text)
            ymax = int(bndbox.find("ymax").text)

            # Convert to YOLO format (normalized)
            x_center = ((xmin + xmax) / 2) / img_width
            y_center = ((ymin + ymax) / 2) / img_height
            width = (xmax - xmin) / img_width
            height = (ymax - ymin) / img_height

            f.write(f"{class_id} {x_center:.6f} {y_center:.6f} {width:.6f} {height:.6f}\n")

# Process training images and labels
xml_files = glob.glob(os.path.join(TRAIN_IMG_DIR, "*.xml"))

for xml_file in tqdm(xml_files):
    img_file = xml_file.replace(".xml", ".png")
    txt_file = os.path.join(YOLO_TRAIN_LABELS, os.path.basename(xml_file).replace(".xml", ".txt"))

    if os.path.exists(img_file):  # Ensure image exists
        # Read image size from XML
        tree = ET.parse(xml_file)
        root = tree.getroot()
        img_width = int(root.find("size/width").text)
        img_height = int(root.find("size/height").text)

        # Convert XML to YOLO format
        convert_voc_to_yolo(xml_file, txt_file, img_width, img_height)

        # Copy image to new YOLO dataset
        shutil.copy(img_file, os.path.join(YOLO_TRAIN_IMG, os.path.basename(img_file)))

print("✅ XML Annotations converted and copied to 'DENTEX_YOLO/'")

test_images = glob.glob(os.path.join(TEST_IMG_DIR, "*.png"))
for img in tqdm(test_images):
    shutil.copy(img, os.path.join(YOLO_TEST_IMG, os.path.basename(img)))

print("✅ Test images copied to 'DENTEX_YOLO/images/test/'")

100%|██████████| 406/406 [00:07<00:00, 52.22it/s]


✅ XML Annotations converted and copied to 'DENTEX_YOLO/'


100%|██████████| 271/271 [00:06<00:00, 41.99it/s]

✅ Test images copied to 'DENTEX_YOLO/images/test/'





In [None]:
yaml_content = f"""path: /content/DENTEX_YOLO
train: images/train
val: images/train
test: images/test

nc: 4
names: ['Caries', 'Deep Caries', 'Periapical Lesion', 'Impacted']
"""

with open(os.path.join(NEW_DATASET, "dataset.yaml"), "w") as f:
    f.write(yaml_content)

print("✅ dataset.yaml file created successfully in 'DENTEX_YOLO/'")

✅ dataset.yaml file created successfully in 'DENTEX_YOLO/'


## Model Training

In [None]:
!pip install ultralytics

Collecting ultralytics
  Downloading ultralytics-8.3.94-py3-none-any.whl.metadata (35 kB)
Collecting ultralytics-thop>=2.0.0 (from ultralytics)
  Downloading ultralytics_thop-2.0.14-py3-none-any.whl.metadata (9.4 kB)
Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch>=1.8.0->ultralytics)
  Downloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.4.127 (from torch>=1.8.0->ultralytics)
  Downloading nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-cupti-cu12==12.4.127 (from torch>=1.8.0->ultralytics)
  Downloading nvidia_cuda_cupti_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cudnn-cu12==9.1.0.70 (from torch>=1.8.0->ultralytics)
  Downloading nvidia_cudnn_cu12-9.1.0.70-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cublas-cu12==12.4.5.8 (from torch>=1.8.0->ultralytics)
  Downloading nv

In [None]:
from ultralytics import YOLO
import torch

model = YOLO('yolo11m.pt')

device = 'cuda' if torch.cuda.is_available() else "cpu"
holdout_val = model.train(data="DENTEX_YOLO/dataset.yaml", epochs=20, imgsz=640, batch=16, device=device)

Ultralytics 8.3.94 🚀 Python-3.11.11 torch-2.6.0+cu124 CUDA:0 (Tesla T4, 15095MiB)
[34m[1mengine/trainer: [0mtask=detect, mode=train, model=rtdetr-l.pt, data=DENTEX_YOLO/dataset.yaml, epochs=10, time=None, patience=100, batch=16, imgsz=640, save=True, save_period=-1, cache=False, device=cuda, workers=8, project=None, name=train4, exist_ok=False, pretrained=True, optimizer=auto, verbose=True, seed=0, deterministic=True, single_cls=False, rect=False, cos_lr=False, close_mosaic=10, resume=False, amp=True, fraction=1.0, profile=False, freeze=None, multi_scale=False, overlap_mask=True, mask_ratio=4, dropout=0.0, val=True, split=val, save_json=False, save_hybrid=False, conf=None, iou=0.7, max_det=300, half=False, dnn=False, plots=True, source=None, vid_stride=1, stream_buffer=False, visualize=False, augment=False, agnostic_nms=False, classes=None, retina_masks=False, embed=None, show=False, save_frames=False, save_txt=False, save_conf=False, save_crop=False, show_labels=True, show_conf=Tru

[34m[1mtrain: [0mScanning /content/DENTEX_YOLO/labels/train.cache... 406 images, 0 backgrounds, 0 corrupt: 100%|██████████| 406/406 [00:00<?, ?it/s]

[34m[1malbumentations: [0mBlur(p=0.01, blur_limit=(3, 7)), MedianBlur(p=0.01, blur_limit=(3, 7)), ToGray(p=0.01, num_output_channels=3, method='weighted_average'), CLAHE(p=0.01, clip_limit=(1.0, 4.0), tile_grid_size=(8, 8))



[34m[1mval: [0mScanning /content/DENTEX_YOLO/labels/train.cache... 406 images, 0 backgrounds, 0 corrupt: 100%|██████████| 406/406 [00:00<?, ?it/s]






Plotting labels to runs/detect/train4/labels.jpg... 
[34m[1moptimizer:[0m 'optimizer=auto' found, ignoring 'lr0=0.01' and 'momentum=0.937' and determining best 'optimizer', 'lr0' and 'momentum' automatically... 
[34m[1moptimizer:[0m AdamW(lr=0.00125, momentum=0.9) with parameter groups 143 weight(decay=0.0), 206 weight(decay=0.0005), 226 bias(decay=0.0)
[34m[1mTensorBoard: [0mmodel graph visualization added ✅
Image sizes 640 train, 640 val
Using 2 dataloader workers
Logging results to [1mruns/detect/train4[0m
Starting training for 10 epochs...
Closing dataloader mosaic
[34m[1malbumentations: [0mBlur(p=0.01, blur_limit=(3, 7)), MedianBlur(p=0.01, blur_limit=(3, 7)), ToGray(p=0.01, num_output_channels=3, method='weighted_average'), CLAHE(p=0.01, clip_limit=(1.0, 4.0), tile_grid_size=(8, 8))

      Epoch    GPU_mem  giou_loss   cls_loss    l1_loss  Instances       Size


  return Variable._execution_engine.run_backward(  # Calls into the C++ engine to run the backward pass
       1/10      12.6G      1.437      15.72     0.9817         25        640: 100%|██████████| 26/26 [00:47<00:00,  1.81s/it]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 13/13 [00:25<00:00,  1.95s/it]


                   all        406       2075    0.00879      0.214     0.0108    0.00352

      Epoch    GPU_mem  giou_loss   cls_loss    l1_loss  Instances       Size


  return Variable._execution_engine.run_backward(  # Calls into the C++ engine to run the backward pass
       2/10      12.5G     0.7852      1.207     0.3687         35        640: 100%|██████████| 26/26 [00:39<00:00,  1.53s/it]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 13/13 [00:25<00:00,  2.00s/it]


                   all        406       2075      0.509       0.22     0.0212     0.0099

      Epoch    GPU_mem  giou_loss   cls_loss    l1_loss  Instances       Size


  return Variable._execution_engine.run_backward(  # Calls into the C++ engine to run the backward pass
       3/10      12.6G     0.6025      1.226     0.2571         24        640: 100%|██████████| 26/26 [00:39<00:00,  1.53s/it]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 13/13 [00:25<00:00,  1.96s/it]

                   all        406       2075     0.0244      0.246     0.0251    0.00955






      Epoch    GPU_mem  giou_loss   cls_loss    l1_loss  Instances       Size


  return Variable._execution_engine.run_backward(  # Calls into the C++ engine to run the backward pass
       4/10      12.6G     0.5412      1.501     0.2239         19        640: 100%|██████████| 26/26 [00:40<00:00,  1.56s/it]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 13/13 [00:25<00:00,  1.94s/it]

                   all        406       2075      0.712      0.221      0.116     0.0525






      Epoch    GPU_mem  giou_loss   cls_loss    l1_loss  Instances       Size


  return Variable._execution_engine.run_backward(  # Calls into the C++ engine to run the backward pass
       5/10      12.5G     0.5073       1.16     0.2169         35        640: 100%|██████████| 26/26 [00:39<00:00,  1.50s/it]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 13/13 [00:24<00:00,  1.91s/it]


                   all        406       2075      0.732      0.181      0.134     0.0552

      Epoch    GPU_mem  giou_loss   cls_loss    l1_loss  Instances       Size


  return Variable._execution_engine.run_backward(  # Calls into the C++ engine to run the backward pass
       6/10      12.6G     0.4884      1.086     0.1991         22        640: 100%|██████████| 26/26 [00:40<00:00,  1.56s/it]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 13/13 [00:25<00:00,  1.93s/it]

                   all        406       2075        nan      0.419      0.135     0.0618






      Epoch    GPU_mem  giou_loss   cls_loss    l1_loss  Instances       Size


  return Variable._execution_engine.run_backward(  # Calls into the C++ engine to run the backward pass
       7/10      12.3G     0.4836      1.063     0.1981         25        640: 100%|██████████| 26/26 [00:37<00:00,  1.45s/it]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 13/13 [00:24<00:00,  1.91s/it]

                   all        406       2075      0.253      0.345      0.221        0.1






      Epoch    GPU_mem  giou_loss   cls_loss    l1_loss  Instances       Size


  return Variable._execution_engine.run_backward(  # Calls into the C++ engine to run the backward pass
       8/10      13.2G     0.4562      1.072     0.1888         27        640: 100%|██████████| 26/26 [00:41<00:00,  1.58s/it]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 13/13 [00:25<00:00,  1.98s/it]

                   all        406       2075      0.289       0.39      0.294       0.15






      Epoch    GPU_mem  giou_loss   cls_loss    l1_loss  Instances       Size


  return Variable._execution_engine.run_backward(  # Calls into the C++ engine to run the backward pass
       9/10      13.3G      0.426      1.088      0.171         41        640: 100%|██████████| 26/26 [00:40<00:00,  1.56s/it]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 13/13 [00:25<00:00,  1.94s/it]

                   all        406       2075      0.315      0.403      0.316      0.165






      Epoch    GPU_mem  giou_loss   cls_loss    l1_loss  Instances       Size


  return Variable._execution_engine.run_backward(  # Calls into the C++ engine to run the backward pass
      10/10      12.6G     0.4198      1.052     0.1684         28        640: 100%|██████████| 26/26 [00:39<00:00,  1.51s/it]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 13/13 [00:25<00:00,  1.93s/it]


                   all        406       2075       0.61      0.383       0.32      0.171

10 epochs completed in 0.204 hours.
Optimizer stripped from runs/detect/train4/weights/last.pt, 66.1MB
Optimizer stripped from runs/detect/train4/weights/best.pt, 66.1MB

Validating runs/detect/train4/weights/best.pt...
Ultralytics 8.3.94 🚀 Python-3.11.11 torch-2.6.0+cu124 CUDA:0 (Tesla T4, 15095MiB)
rt-detr-l summary: 302 layers, 31,991,960 parameters, 0 gradients, 103.4 GFLOPs


                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 13/13 [00:28<00:00,  2.21s/it]


                   all        406       2075      0.607      0.384       0.32      0.171
                Caries        367       1278      0.248      0.566      0.273      0.158
           Deep Caries        199        351       0.33      0.111       0.15     0.0891
     Periapical Lesion         64         88          1          0          0          0
              Impacted        153        358      0.849       0.86       0.86      0.437
Speed: 0.4ms preprocess, 18.4ms inference, 0.0ms loss, 1.2ms postprocess per image
Results saved to [1mruns/detect/train4[0m


In [None]:
result = model('/content/DENTEX CHALLENGE 2023/DENTEX CHALLENGE 2023/test/train_0.png')
result[0].boxes


image 1/1 /content/DENTEX CHALLENGE 2023/DENTEX CHALLENGE 2023/test/train_0.png: 640x640 14 Cariess, 1 Deep Caries, 98.3ms
Speed: 19.7ms preprocess, 98.3ms inference, 1.1ms postprocess per image at shape (1, 3, 640, 640)


ultralytics.engine.results.Boxes object with attributes:

cls: tensor([0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0.], device='cuda:0')
conf: tensor([0.4102, 0.4317, 0.3988, 0.3640, 0.3414, 0.4147, 0.3970, 0.3062, 0.2936, 0.3183, 0.3299, 0.2967, 0.2680, 0.2584, 0.2605], device='cuda:0')
data: tensor([[7.4411e+02, 7.1080e+02, 1.0078e+03, 9.8083e+02, 4.1020e-01, 0.0000e+00],
        [8.5880e+02, 7.4912e+02, 1.1785e+03, 1.0578e+03, 4.3169e-01, 0.0000e+00],
        [1.7880e+03, 4.7660e+02, 2.0562e+03, 7.5245e+02, 3.9885e-01, 0.0000e+00],
        [1.8922e+03, 7.1999e+02, 2.1438e+03, 9.4479e+02, 3.6403e-01, 0.0000e+00],
        [7.6285e+02, 4.3114e+02, 1.0518e+03, 7.1015e+02, 3.4142e-01, 0.0000e+00],
        [8.2042e+02, 7.4501e+02, 1.1575e+03, 1.0591e+03, 4.1468e-01, 0.0000e+00],
        [7.5751e+02, 7.1339e+02, 1.0199e+03, 9.8189e+02, 3.9700e-01, 0.0000e+00],
        [1.6376e+03, 4.8055e+02, 1.9446e+03, 7.7531e+02, 3.0624e-01, 0.0000e+00],
        [6.3733e+02, 6.7605e+02, 8.82

## Inference

In [None]:
TEST_IMAGE_DIR = "DENTEX_YOLO/images/test"
test_images = sorted(os.listdir(TEST_IMAGE_DIR))

# Store predictions
submission_data = []
for img_name in tqdm(test_images):
    img_path = os.path.join(TEST_IMAGE_DIR, img_name)

    results = model(img_path, verbose=False)

    boxes_list = []
    labels_list = []
    scores_list = []

    for r in results:
        for box in r.boxes:
            # Bounding box coordinates
            x1, y1, x2, y2 = map(int, box.xyxy[0].tolist())
            boxes_list.append([x1, y1, x2, y2])

            # Class label
            class_id = int(box.cls[0].item())
            labels_list.append(class_id)

            # Confidence score
            confidence = round(box.conf[0].item(), 2)
            scores_list.append(confidence)

    # Format data as required (empty lists if no detections)
    boxes_str = str(boxes_list) if boxes_list else "[]"
    labels_str = str(labels_list) if labels_list else "[]"
    scores_str = str(scores_list) if scores_list else "[]"

    submission_data.append([img_name, boxes_str, labels_str, scores_str])

print("✅ Predictions extracted successfully.")

100%|██████████| 271/271 [00:52<00:00,  5.19it/s]

✅ Predictions extracted successfully.





In [None]:
submission = pd.DataFrame(submission_data, columns=["id", "boxes", "labels", "scores"])
submission.head()

Unnamed: 0,id,boxes,labels,scores
0,train_0.png,"[[1881, 719, 2144, 949], [856, 749, 1186, 1048...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0]","[0.47, 0.43, 0.31, 0.4, 0.36, 0.29, 0.32, 0.34..."
1,train_100.png,"[[1859, 799, 2148, 1085], [707, 803, 981, 1064...","[0, 3, 0, 0, 0, 0, 0, 0]","[0.41, 0.6, 0.31, 0.29, 0.3, 0.25, 0.27, 0.25]"
2,train_103.png,"[[1920, 485, 2124, 653], [820, 420, 1019, 606]...","[3, 3, 3, 0, 0, 0]","[0.8, 0.76, 0.76, 0.39, 0.35, 0.33]"
3,train_106.png,"[[1902, 671, 2210, 970], [841, 676, 1145, 966]...","[0, 0, 0, 0, 0, 0]","[0.44, 0.44, 0.34, 0.32, 0.26, 0.34]"
4,train_107.png,"[[899, 913, 1208, 1207], [1775, 871, 2040, 113...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0]","[0.45, 0.43, 0.34, 0.35, 0.38, 0.37, 0.28, 0.3..."


In [None]:
submission.to_csv('rt-detr-L_20epochs.csv', index=False)