In [2]:
from ultralytics import YOLO
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from pathlib import Path
import json
from sklearn.model_selection import train_test_split
from tqdm.auto import tqdm
import shutil
from PIL import Image

  from .autonotebook import tqdm as notebook_tqdm


In [3]:
base_dir = Path('C:/Codes/Summer_Internship/Data')
img_path = base_dir / 'data_object_image_2' / 'training' / 'image_2'
label_path = Path('C:/Codes/Summer_Internship/Data/labels')
with open('C:/Codes/Summer_Internship/Data/classes.json', 'r') as f:
    classes = json.load(f)

classes

{'Car': 0,
 'Pedestrian': 1,
 'Van': 2,
 'Cyclist': 3,
 'Truck': 4,
 'Misc': 5,
 'Tram': 6,
 'Person_sitting': 7}

In [4]:
ims = sorted(list(img_path.glob('*')))
labels = sorted(list(label_path.glob('*')))
pairs = list(zip(ims, labels))
pairs[:2]

[(WindowsPath('C:/Codes/Summer_Internship/Data/data_object_image_2/training/image_2/000000.png'),
  WindowsPath('C:/Codes/Summer_Internship/Data/labels/000000.txt')),
 (WindowsPath('C:/Codes/Summer_Internship/Data/data_object_image_2/training/image_2/000001.png'),
  WindowsPath('C:/Codes/Summer_Internship/Data/labels/000001.txt'))]

In [5]:
train, test = train_test_split(pairs, test_size=0.1, shuffle=True)
len(train), len(test)

(6732, 749)

In [6]:
train_path = Path('train').resolve()
train_path.mkdir(exist_ok=True)
valid_path = Path('valid').resolve()
valid_path.mkdir(exist_ok=True)

In [7]:
for t_img, t_lb in tqdm(train):
    im_path = train_path / t_img.name
    lb_path = train_path / t_lb.name
    shutil.copy(t_img, im_path)
    shutil.copy(t_lb, lb_path)

100%|██████████| 6732/6732 [01:24<00:00, 79.50it/s] 


In [8]:
for t_img, t_lb in tqdm(test):
    im_path = valid_path / t_img.name
    lb_path = valid_path / t_lb.name
    shutil.copy(t_img, im_path)
    shutil.copy(t_lb, lb_path)

100%|██████████| 749/749 [00:11<00:00, 66.96it/s]


In [9]:
yaml_file = 'names:\n'
yaml_file += '\n'.join(f'- {c}' for c in classes)
yaml_file += f'\nnc: {len(classes)}'
yaml_file += f'\ntrain: {str(train_path)}\nval: {str(valid_path)}'
with open('kitti.yaml', 'w') as f:
    f.write(yaml_file)

In [10]:
model = YOLO('yolov8n.yaml')
model = YOLO('yolov8n.pt')

Downloading https://github.com/ultralytics/assets/releases/download/v8.2.0/yolov8n.pt to 'yolov8n.pt'...


100%|██████████| 6.25M/6.25M [00:01<00:00, 3.98MB/s]


In [11]:
train_results = model.train(
    data='C:/Codes/Summer_Internship/kitti.yaml',
    epochs=10,
    patience=3,
    mixup=0.1,
    project='yolov8n-kitti',
    device=0
)

Ultralytics YOLOv8.2.42  Python-3.12.4 torch-2.3.0+cu121 CUDA:0 (NVIDIA GeForce RTX 3050 Laptop GPU, 4096MiB)
[34m[1mengine\trainer: [0mtask=detect, mode=train, model=yolov8n.pt, data=C:/Codes/Summer_Internship/kitti.yaml, epochs=10, time=None, patience=3, batch=16, imgsz=640, save=True, save_period=-1, cache=False, device=0, workers=8, project=yolov8n-kitti, name=train, exist_ok=False, pretrained=True, optimizer=auto, verbose=True, seed=0, deterministic=True, single_cls=False, rect=False, cos_lr=False, close_mosaic=10, resume=False, amp=True, fraction=1.0, profile=False, freeze=None, multi_scale=False, overlap_mask=True, mask_ratio=4, dropout=0.0, val=True, split=val, save_json=False, save_hybrid=False, conf=None, iou=0.7, max_det=300, half=False, dnn=False, plots=True, source=None, vid_stride=1, stream_buffer=False, visualize=False, augment=False, agnostic_nms=False, classes=None, retina_masks=False, embed=None, show=False, save_frames=False, save_txt=False, save_conf=False, save_

[34m[1mtrain: [0mScanning C:\Codes\Summer_Internship\train... 7400 images, 0 backgrounds, 0 corrupt: 100%|██████████| 7400/7400 [00:22<00:00, 325.55it/s]


[34m[1mtrain: [0mNew cache created: C:\Codes\Summer_Internship\train.cache


[34m[1mval: [0mScanning C:\Codes\Summer_Internship\valid... 1417 images, 0 backgrounds, 0 corrupt: 100%|██████████| 1417/1417 [00:06<00:00, 205.81it/s]


[34m[1mval: [0mNew cache created: C:\Codes\Summer_Internship\valid.cache
Plotting labels to yolov8n-kitti\train\labels.jpg... 
[34m[1moptimizer:[0m 'optimizer=auto' found, ignoring 'lr0=0.01' and 'momentum=0.937' and determining best 'optimizer', 'lr0' and 'momentum' automatically... 
[34m[1moptimizer:[0m AdamW(lr=0.000833, momentum=0.9) with parameter groups 57 weight(decay=0.0), 64 weight(decay=0.0005), 63 bias(decay=0.0)
[34m[1mTensorBoard: [0mmodel graph visualization added 
Image sizes 640 train, 640 val
Using 8 dataloader workers
Logging results to [1myolov8n-kitti\train[0m
Starting training for 10 epochs...
Closing dataloader mosaic

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       1/10      2.17G      1.404      1.901       1.11         44        640: 100%|██████████| 463/463 [02:46<00:00,  2.78it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 45/45 [00:14<00:00,  3.09it/s]


                   all       1417       7617      0.402      0.367      0.375      0.219

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       2/10      2.12G      1.271      1.222      1.069         27        640: 100%|██████████| 463/463 [02:31<00:00,  3.05it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 45/45 [00:13<00:00,  3.40it/s]


                   all       1417       7617      0.474      0.477       0.46      0.267

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       3/10      2.11G      1.216      1.026      1.053         38        640: 100%|██████████| 463/463 [02:35<00:00,  2.97it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 45/45 [00:15<00:00,  2.97it/s]


                   all       1417       7617      0.527      0.499      0.486      0.289

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       4/10      2.11G      1.165     0.9113      1.037         33        640: 100%|██████████| 463/463 [02:13<00:00,  3.47it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 45/45 [00:15<00:00,  2.84it/s]


                   all       1417       7617      0.689      0.533        0.6      0.356

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       5/10      2.11G      1.124     0.8341      1.018         33        640: 100%|██████████| 463/463 [02:07<00:00,  3.62it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 45/45 [00:16<00:00,  2.80it/s]

                   all       1417       7617      0.696      0.557      0.634      0.391






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       6/10      2.11G      1.077      0.774      0.996         43        640: 100%|██████████| 463/463 [02:15<00:00,  3.42it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 45/45 [00:14<00:00,  3.12it/s]

                   all       1417       7617      0.675      0.631      0.671      0.428






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       7/10       2.1G      1.045     0.7267     0.9857         26        640: 100%|██████████| 463/463 [02:50<00:00,  2.72it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 45/45 [00:15<00:00,  2.92it/s]

                   all       1417       7617      0.805      0.587      0.708      0.452






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       8/10      2.11G      1.009     0.6941     0.9744         55        640: 100%|██████████| 463/463 [02:34<00:00,  3.00it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 45/45 [00:15<00:00,  2.90it/s]

                   all       1417       7617      0.786      0.641      0.729      0.473






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       9/10       2.1G     0.9835      0.661     0.9629         55        640: 100%|██████████| 463/463 [02:08<00:00,  3.59it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 45/45 [00:13<00:00,  3.29it/s]

                   all       1417       7617      0.773      0.673      0.752      0.492






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      10/10      2.11G     0.9516     0.6319     0.9509         26        640: 100%|██████████| 463/463 [02:04<00:00,  3.71it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 45/45 [00:15<00:00,  2.99it/s]

                   all       1417       7617      0.806       0.66      0.764      0.519






10 epochs completed in 0.466 hours.
Optimizer stripped from yolov8n-kitti\train\weights\last.pt, 6.2MB
Optimizer stripped from yolov8n-kitti\train\weights\best.pt, 6.2MB

Validating yolov8n-kitti\train\weights\best.pt...
Ultralytics YOLOv8.2.42  Python-3.12.4 torch-2.3.0+cu121 CUDA:0 (NVIDIA GeForce RTX 3050 Laptop GPU, 4096MiB)
Model summary (fused): 168 layers, 3007208 parameters, 0 gradients, 8.1 GFLOPs


                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 45/45 [00:15<00:00,  2.98it/s]


                   all       1417       7617      0.806       0.66      0.764      0.518
                   Car       1273       5423      0.899      0.818      0.914      0.688
            Pedestrian        315        830      0.847       0.56      0.713      0.405
                   Van        387        540      0.904        0.7      0.823      0.595
               Cyclist        226        320      0.799      0.608      0.699      0.419
                 Truck        179        186      0.918      0.788      0.882      0.682
                  Misc        143        167      0.725      0.551      0.649      0.433
                  Tram         76        112      0.742      0.821      0.834      0.566
        Person_sitting         14         39       0.61      0.436      0.597      0.359
Speed: 0.2ms preprocess, 1.1ms inference, 0.0ms loss, 2.1ms postprocess per image
Results saved to [1myolov8n-kitti\train[0m


In [12]:
valid_results = model.val()

Ultralytics YOLOv8.2.42  Python-3.12.4 torch-2.3.0+cu121 CUDA:0 (NVIDIA GeForce RTX 3050 Laptop GPU, 4096MiB)


Model summary (fused): 168 layers, 3007208 parameters, 0 gradients, 8.1 GFLOPs


[34m[1mval: [0mScanning C:\Codes\Summer_Internship\valid.cache... 1417 images, 0 backgrounds, 0 corrupt: 100%|██████████| 1417/1417 [00:00<?, ?it/s]
  return F.conv2d(input, weight, bias, self.stride,
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 89/89 [00:20<00:00,  4.35it/s]


                   all       1417       7617      0.806       0.66      0.764      0.519
                   Car       1273       5423      0.899      0.818      0.914      0.689
            Pedestrian        315        830      0.843       0.56      0.715      0.403
                   Van        387        540      0.904        0.7      0.823      0.595
               Cyclist        226        320      0.793      0.606      0.697      0.418
                 Truck        179        186      0.916      0.785      0.882      0.685
                  Misc        143        167      0.737      0.551      0.653      0.437
                  Tram         76        112      0.748      0.821      0.835      0.565
        Person_sitting         14         39       0.61      0.436      0.599      0.361
Speed: 0.2ms preprocess, 2.5ms inference, 0.0ms loss, 2.1ms postprocess per image
Results saved to [1myolov8n-kitti\train2[0m


In [13]:
preds = model.predict([test[idx][0] for idx in np.random.randint(0, len(test),(20,))],save=True)




0: 640x640 4 Cars, 1 Pedestrian, 5 Cyclists, 15.2ms
1: 640x640 1 Car, 15.2ms
2: 640x640 3 Cars, 15.2ms
3: 640x640 10 Cars, 15.2ms
4: 640x640 2 Cars, 1 Truck, 15.2ms
5: 640x640 6 Cars, 1 Van, 15.2ms
6: 640x640 4 Cars, 15.2ms
7: 640x640 2 Cars, 15.2ms
8: 640x640 4 Cars, 15.2ms
9: 640x640 1 Car, 1 Pedestrian, 1 Van, 15.2ms
10: 640x640 3 Cars, 12 Pedestrians, 1 Van, 15.2ms
11: 640x640 16 Cars, 1 Van, 1 Truck, 1 Misc, 15.2ms
12: 640x640 5 Cars, 1 Van, 15.2ms
13: 640x640 3 Cars, 2 Vans, 15.2ms
14: 640x640 1 Car, 15.2ms
15: 640x640 1 Pedestrian, 15.2ms
16: 640x640 2 Cars, 1 Truck, 15.2ms
17: 640x640 2 Cars, 15.2ms
18: 640x640 1 Car, 15.2ms
19: 640x640 1 Car, 15.2ms
Speed: 3.8ms preprocess, 15.2ms inference, 3.1ms postprocess per image at shape (1, 3, 640, 640)
Results saved to [1myolov8n-kitti\train3[0m
