In [28]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

# import os
# for dirname, _, filenames in os.walk('/kaggle/input'):
    # for filename in filenames:
    #     print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [29]:
def is_valid_yolo_label_file(txt_path, num_classes=1):
    with open(txt_path, "r") as f:
        lines = f.readlines()

    for line in lines:
        parts = line.strip().split()
        if len(parts) != 5:
            print(f"Invalid format in: {txt_path} → {line.strip()}")
            return False
        try:
            class_id = int(parts[0])
            if not (0 <= class_id < num_classes):
                print(f"Invalid class ID in: {txt_path}")
                return False

            bbox = [float(x) for x in parts[1:]]
            if any([x <= 0.0 or x > 1.0 for x in bbox]):
                print(f"Invalid normalized bbox in: {txt_path} → {bbox}")
                return False
        except ValueError:
            print(f"Non-numeric value in: {txt_path} → {line.strip()}")
            return False

    return True

In [30]:
import os
import xml.etree.ElementTree as ET
from sklearn.model_selection import train_test_split
import shutil

# ========== CONFIGURATION ==========
DATA_DIR = "/kaggle/input/dog-data/Imagenet"
ANNOTATIONS_DIR = os.path.join(DATA_DIR, "Annotations")
IMAGES_DIR = os.path.join(DATA_DIR, "Images")
YOLO_DATASET_DIR = "/kaggle/working/yolo_dataset"

# ========== CREATE YOLO DIRECTORY STRUCTURE ==========
for split in ["train", "val"]:
    os.makedirs(os.path.join(YOLO_DATASET_DIR, "images", split), exist_ok=True)
    os.makedirs(os.path.join(YOLO_DATASET_DIR, "labels", split), exist_ok=True)

# ========== UTILITY FUNCTION ==========
def convert_bbox(size, box):
    dw = 1. / size[0]
    dh = 1. / size[1]
    x = (box[0] + box[2]) / 2.0
    y = (box[1] + box[3]) / 2.0
    w = box[2] - box[0]
    h = box[3] - box[1]
    return (x * dw, y * dh, w * dw, h * dh)

# ========== STEP 1: GATHER ALL IMAGE FILES ==========
image_files = []
for dirpath, _, filenames in os.walk(IMAGES_DIR):
    for file in filenames:
        if file.lower().endswith(('.jpg', '.jpeg', '.png')):
            image_files.append(os.path.join(dirpath, file))

print(f"Total images found: {len(image_files)}")

# ========== STEP 2: SPLIT INTO TRAIN AND VAL ==========
train_images, val_images = train_test_split(image_files, test_size=0.2, random_state=42)

# ========== STEP 3: GET XML PATH FROM IMAGE ==========
def get_annotation_path(image_path):
    image_name = os.path.splitext(os.path.basename(image_path))[0]
    folder_name = os.path.basename(os.path.dirname(image_path))
    annotation_path = os.path.join(ANNOTATIONS_DIR, folder_name, image_name)
    
    if os.path.exists(annotation_path):
        return annotation_path
    elif os.path.exists(annotation_path + ".xml"):
        return annotation_path + ".xml"
    else:
        return None

# ========== STEP 4: PARSE & CONVERT FUNCTION ==========
def process_images(image_list, split):
    for image_path in image_list:
        annotation_path = get_annotation_path(image_path)
        if annotation_path is None:
            print(f"No annotation found for {image_path}")
            continue

        try:
            tree = ET.parse(annotation_path)
            root = tree.getroot()
        except Exception as e:
            print(f"Failed to parse XML {annotation_path}: {e}")
            continue

        size_tag = root.find("size")
        w = int(size_tag.find("width").text)
        h = int(size_tag.find("height").text)

        base_name = os.path.splitext(os.path.basename(image_path))[0]
        txt_path = os.path.join(YOLO_DATASET_DIR, "labels", split, base_name + ".txt")
        with open(txt_path, "w") as f:
            for obj in root.findall("object"):
                cls_id = 0

                xml_box = obj.find("bndbox")
                box = (
                    int(xml_box.find("xmin").text),
                    int(xml_box.find("ymin").text),
                    int(xml_box.find("xmax").text),
                    int(xml_box.find("ymax").text)
                )
                yolo_box = convert_bbox((w, h), box)
                f.write(f"{cls_id} {' '.join(f'{a:.6f}' for a in yolo_box)}\n")

        # Immediately validate the file
        if not is_valid_yolo_label_file(txt_path, num_classes=1):
            # Remove label and image if invalid
            os.remove(txt_path)
            os.remove(image_path)
            continue

        out_image_path = os.path.join(YOLO_DATASET_DIR, "images", split, os.path.basename(image_path))
        shutil.copy(image_path, out_image_path)

# ========== STEP 5: PROCESS ==========
process_images(train_images, "train")
process_images(val_images, "val")

print("YOLOv8 dataset prepared successfully!")

Total images found: 10374
YOLOv8 dataset prepared successfully!


In [33]:
!pip install ultralytics



In [34]:
# Import YOLOv8 model
from ultralytics import YOLO

# Load pre-trained YOLOv8 model (nano version is fastest)
model = YOLO('yolov8s.pt')  # Use 'yolov8s.pt' or 'yolov8m.pt' for better accuracy

# Train the model
model.train(data='/kaggle/input/yolo-data-file/data.yaml', epochs=5, imgsz=640, batch=16)

Ultralytics 8.3.107 🚀 Python-3.10.12 torch-2.5.1+cu121 CUDA:0 (Tesla T4, 15095MiB)
[34m[1mengine/trainer: [0mtask=detect, mode=train, model=yolov8s.pt, data=/kaggle/input/yolo-data-file/data.yaml, epochs=5, time=None, patience=100, batch=16, imgsz=640, save=True, save_period=-1, cache=False, device=None, workers=8, project=None, name=train4, exist_ok=False, pretrained=True, optimizer=auto, verbose=True, seed=0, deterministic=True, single_cls=False, rect=False, cos_lr=False, close_mosaic=10, resume=False, amp=True, fraction=1.0, profile=False, freeze=None, multi_scale=False, overlap_mask=True, mask_ratio=4, dropout=0.0, val=True, split=val, save_json=False, conf=None, iou=0.7, max_det=300, half=False, dnn=False, plots=True, source=None, vid_stride=1, stream_buffer=False, visualize=False, augment=False, agnostic_nms=False, classes=None, retina_masks=False, embed=None, show=False, save_frames=False, save_txt=False, save_conf=False, save_crop=False, show_labels=True, show_conf=True, sho

[34m[1mtrain: [0mScanning /kaggle/working/yolo_dataset/labels/train... 8299 images, 0 backgrounds, 0 corrupt: 100%|██████████| 8299/8299 [00:07<00:00, 1180.79it/s]






[34m[1mtrain: [0mNew cache created: /kaggle/working/yolo_dataset/labels/train.cache
[34m[1malbumentations: [0mBlur(p=0.01, blur_limit=(3, 7)), MedianBlur(p=0.01, blur_limit=(3, 7)), ToGray(p=0.01, num_output_channels=3, method='weighted_average'), CLAHE(p=0.01, clip_limit=(1.0, 4.0), tile_grid_size=(8, 8))


[34m[1mval: [0mScanning /kaggle/working/yolo_dataset/labels/val... 2075 images, 0 backgrounds, 0 corrupt: 100%|██████████| 2075/2075 [00:01<00:00, 1155.56it/s]

[34m[1mval: [0mNew cache created: /kaggle/working/yolo_dataset/labels/val.cache





Plotting labels to runs/detect/train4/labels.jpg... 
[34m[1moptimizer:[0m 'optimizer=auto' found, ignoring 'lr0=0.01' and 'momentum=0.937' and determining best 'optimizer', 'lr0' and 'momentum' automatically... 
[34m[1moptimizer:[0m AdamW(lr=0.002, momentum=0.9) with parameter groups 57 weight(decay=0.0), 64 weight(decay=0.0005), 63 bias(decay=0.0)
[34m[1mTensorBoard: [0mmodel graph visualization added ✅
Image sizes 640 train, 640 val
Using 2 dataloader workers
Logging results to [1mruns/detect/train4[0m
Starting training for 5 epochs...

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


        1/5      6.31G     0.9156     0.9885      1.404         32        640: 100%|██████████| 519/519 [02:34<00:00,  3.36it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 65/65 [00:16<00:00,  3.96it/s]


                   all       2075       2208      0.777      0.762      0.822      0.491

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


        2/5      6.31G       1.04     0.9351      1.493         36        640: 100%|██████████| 519/519 [02:31<00:00,  3.43it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 65/65 [00:15<00:00,  4.12it/s]


                   all       2075       2208      0.745      0.791      0.814      0.485

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


        3/5      6.31G     0.9649     0.8598      1.435         29        640: 100%|██████████| 519/519 [02:31<00:00,  3.43it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 65/65 [00:15<00:00,  4.30it/s]


                   all       2075       2208      0.895      0.877      0.938      0.666

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


        4/5      6.31G     0.8635     0.7464      1.359         32        640: 100%|██████████| 519/519 [02:30<00:00,  3.45it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 65/65 [00:15<00:00,  4.33it/s]


                   all       2075       2208      0.909      0.932      0.958      0.731

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


        5/5      6.31G     0.7726     0.6422      1.293         24        640: 100%|██████████| 519/519 [02:30<00:00,  3.45it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 65/65 [00:15<00:00,  4.33it/s]

                   all       2075       2208      0.946      0.936      0.974      0.799






5 epochs completed in 0.234 hours.
Optimizer stripped from runs/detect/train4/weights/last.pt, 22.5MB
Optimizer stripped from runs/detect/train4/weights/best.pt, 22.5MB

Validating runs/detect/train4/weights/best.pt...
Ultralytics 8.3.107 🚀 Python-3.10.12 torch-2.5.1+cu121 CUDA:0 (Tesla T4, 15095MiB)
Model summary (fused): 72 layers, 11,125,971 parameters, 0 gradients, 28.4 GFLOPs


                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 65/65 [00:15<00:00,  4.15it/s]


                   all       2075       2208      0.946      0.936      0.974        0.8


  xa[xa < 0] = -1
  xa[xa < 0] = -1


Speed: 0.1ms preprocess, 3.9ms inference, 0.0ms loss, 0.9ms postprocess per image
Results saved to [1mruns/detect/train4[0m


ultralytics.utils.metrics.DetMetrics object with attributes:

ap_class_index: array([0])
box: ultralytics.utils.metrics.Metric object
confusion_matrix: <ultralytics.utils.metrics.ConfusionMatrix object at 0x7f4d14bf7610>
curves: ['Precision-Recall(B)', 'F1-Confidence(B)', 'Precision-Confidence(B)', 'Recall-Confidence(B)']
curves_results: [[array([          0,    0.001001,    0.002002,    0.003003,    0.004004,    0.005005,    0.006006,    0.007007,    0.008008,    0.009009,     0.01001,    0.011011,    0.012012,    0.013013,    0.014014,    0.015015,    0.016016,    0.017017,    0.018018,    0.019019,     0.02002,    0.021021,    0.022022,    0.023023,
          0.024024,    0.025025,    0.026026,    0.027027,    0.028028,    0.029029,     0.03003,    0.031031,    0.032032,    0.033033,    0.034034,    0.035035,    0.036036,    0.037037,    0.038038,    0.039039,     0.04004,    0.041041,    0.042042,    0.043043,    0.044044,    0.045045,    0.046046,    0.047047,
          0.048048, 

In [36]:
from ultralytics import YOLO

model = YOLO("runs/detect/train4/weights/best.pt")  # Load trained model

# Inference on new image or folder
results = model.predict(source="/kaggle/input/dog-data-test/test", save=True)


image 1/154 /kaggle/input/dog-data-test/test/n02104365_10071.jpg: 480x640 1 dog, 14.6ms
image 2/154 /kaggle/input/dog-data-test/test/n02104365_10139.jpg: 480x640 1 dog, 13.1ms
image 3/154 /kaggle/input/dog-data-test/test/n02104365_10247.jpg: 448x640 2 dogs, 39.7ms
image 4/154 /kaggle/input/dog-data-test/test/n02104365_10252.jpg: 480x640 1 dog, 13.7ms
image 5/154 /kaggle/input/dog-data-test/test/n02104365_10282.jpg: 640x480 2 dogs, 38.7ms
image 6/154 /kaggle/input/dog-data-test/test/n02104365_10319.jpg: 576x640 1 dog, 39.7ms
image 7/154 /kaggle/input/dog-data-test/test/n02104365_10393.jpg: 480x640 1 dog, 13.8ms
image 8/154 /kaggle/input/dog-data-test/test/n02104365_10399.jpg: 640x480 1 dog, 12.6ms
image 9/154 /kaggle/input/dog-data-test/test/n02104365_10514.jpg: 448x640 2 dogs, 13.7ms
image 10/154 /kaggle/input/dog-data-test/test/n02104365_10598.jpg: 512x640 1 dog, 39.8ms
image 11/154 /kaggle/input/dog-data-test/test/n02104365_10606.jpg: 640x448 1 dog, 38.7ms
image 12/154 /kaggle/input

In [37]:
model.export(format="torchscript")

Ultralytics 8.3.107 🚀 Python-3.10.12 torch-2.5.1+cu121 CPU (Intel Xeon 2.00GHz)

[34m[1mPyTorch:[0m starting from 'runs/detect/train4/weights/best.pt' with input shape (1, 3, 640, 640) BCHW and output shape(s) (1, 5, 8400) (21.5 MB)

[34m[1mTorchScript:[0m starting export with torch 2.5.1+cu121...
[34m[1mTorchScript:[0m export success ✅ 2.5s, saved as 'runs/detect/train4/weights/best.torchscript' (42.9 MB)

Export complete (4.3s)
Results saved to [1m/kaggle/working/runs/detect/train4/weights[0m
Predict:         yolo predict task=detect model=runs/detect/train4/weights/best.torchscript imgsz=640  
Validate:        yolo val task=detect model=runs/detect/train4/weights/best.torchscript imgsz=640 data=/kaggle/input/yolo-data-file/data.yaml  
Visualize:       https://netron.app


'runs/detect/train4/weights/best.torchscript'