In [1]:
import os
from ultralytics import YOLO
from pylabel import importer
import logging
logging.getLogger().setLevel(logging.INFO)

In [6]:
dataset_dir = "data/VHA_YOLO"
dataset_name = "174instances"
path_to_yolo_labels = os.path.join(dataset_dir, dataset_name, "labels")
path_to_images = "../images"
classes_file = os.path.join(dataset_dir, dataset_name, "classes.txt")
classes = []
with open(classes_file, 'r') as f:
    classes = f.read().splitlines()

dataset = importer.ImportYoloV5(path=path_to_yolo_labels, path_to_images=path_to_images, cat_names=classes,
    img_ext="jpg", name="174instances")
print(dataset.df.head(5))
print(f"Number of images: {dataset.analyze.num_images}")
print(f"Number of classes: {dataset.analyze.num_classes}")
print(f"Classes:{dataset.analyze.classes}")
print(f"Class counts:\n{dataset.analyze.class_counts}")

Importing YOLO files...:   0%|          | 0/172 [00:00<?, ?it/s]

Importing YOLO files...: 100%|██████████| 172/172 [00:05<00:00, 33.38it/s]

   img_folder      img_filename img_path  img_id  img_width  img_height  \
id                                                                        
0   ../images  03ba0d45-429.jpg                0       1240        1744   
1   ../images  03ba0d45-429.jpg                0       1240        1744   
2   ../images  03ba0d45-429.jpg                0       1240        1744   
3   ../images  03ba0d45-429.jpg                0       1240        1744   
4   ../images  03ba0d45-429.jpg                0       1240        1744   

    img_depth ann_segmented  ann_bbox_xmin  ann_bbox_ymin  ...  ann_iscrowd  \
id                                                         ...                
0           3                    78.346810     114.386343  ...                
1           3                    75.212938     274.213836  ...                
2           3                    78.346810     423.072776  ...                
3           3                    84.614555     634.609164  ...                





### Create splits with balanced distribution of classes

In [9]:
dataset.splitter.StratifiedGroupShuffleSplit(train_pct=.7, val_pct=.15, test_pct=.15, batch_size=1)


Splitting dataset: 341it [00:01, 173.83it/s]                        


In [11]:
dataset.export.ExportToYoloV5(output_path='data/VHA_YOLO/training/labels',yaml_file='dataset.yaml', copy_images=True, use_splits=True)

Exporting YOLO files...: 100%|██████████| 172/172 [00:00<00:00, 428.49it/s]


['data\\VHA_YOLO\\training\\dataset.yaml',
 'data\\VHA_YOLO\\training\\labels\\train\\44c1501b-460.txt',
 'data\\VHA_YOLO\\training\\labels\\train\\6dbbd488-459.txt',
 'data\\VHA_YOLO\\training\\labels\\train\\470c8ec1-437.txt',
 'data\\VHA_YOLO\\training\\labels\\train\\0832ad31-363.txt',
 'data\\VHA_YOLO\\training\\labels\\train\\232e977f-468.txt',
 'data\\VHA_YOLO\\training\\labels\\train\\953bdf7c-440.txt',
 'data\\VHA_YOLO\\training\\labels\\train\\62d04c7b-475.txt',
 'data\\VHA_YOLO\\training\\labels\\train\\21237200-478.txt',
 'data\\VHA_YOLO\\training\\labels\\train\\294099b1-25.txt',
 'data\\VHA_YOLO\\training\\labels\\train\\ac8c5aba-11.txt',
 'data\\VHA_YOLO\\training\\labels\\train\\45377a15-434.txt',
 'data\\VHA_YOLO\\training\\labels\\train\\7a882af3-28.txt',
 'data\\VHA_YOLO\\training\\labels\\train\\0d4d491a-206.txt',
 'data\\VHA_YOLO\\training\\labels\\train\\aee5f126-340.txt',
 'data\\VHA_YOLO\\training\\labels\\train\\e1544fb2-477.txt',
 'data\\VHA_YOLO\\training\\la

In [2]:

model = YOLO('yolov8n.yaml')
model = YOLO('yolov8n.pt')
results = model.train(data='data/VHA_YOLO/training/dataset.yaml', epochs=500)
results = model.val()
success = model.export(format='onnx')
success



      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


    229/500      2.63G     0.9278     0.6622     0.9379        590        640: 100%|██████████| 7/7 [00:00<00:00, 18.09it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 1/1 [00:00<00:00, 11.61it/s]

                   all         28        597      0.691      0.777      0.737      0.464






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


    230/500      2.52G     0.9186     0.6543     0.9319        519        640: 100%|██████████| 7/7 [00:00<00:00, 17.24it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 1/1 [00:00<00:00, 13.74it/s]

                   all         28        597      0.719      0.744      0.735      0.461






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


    231/500      2.73G     0.9372     0.6615     0.9387        626        640: 100%|██████████| 7/7 [00:00<00:00, 15.86it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 1/1 [00:00<00:00, 12.77it/s]

                   all         28        597      0.706      0.744      0.735      0.458






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


    232/500      2.48G     0.9424     0.6716     0.9327        594        640: 100%|██████████| 7/7 [00:00<00:00, 16.47it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 1/1 [00:00<00:00, 12.35it/s]

                   all         28        597      0.697      0.748      0.735      0.458






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


    233/500      2.86G     0.9274     0.6785     0.9322        730        640: 100%|██████████| 7/7 [00:00<00:00, 15.93it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 1/1 [00:00<00:00, 11.02it/s]

                   all         28        597      0.729      0.711      0.729      0.453






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


    234/500      2.77G     0.9388     0.6725     0.9361        622        640: 100%|██████████| 7/7 [00:00<00:00, 15.70it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 1/1 [00:00<00:00, 11.64it/s]

                   all         28        597      0.716      0.705      0.699      0.424






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


    235/500      2.48G     0.9308     0.6691     0.9452        467        640: 100%|██████████| 7/7 [00:00<00:00, 16.56it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 1/1 [00:00<00:00, 13.42it/s]

                   all         28        597      0.699      0.734      0.701      0.436






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


    236/500      2.67G      0.907     0.6562     0.9357        542        640: 100%|██████████| 7/7 [00:00<00:00, 17.44it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 1/1 [00:00<00:00, 14.29it/s]

                   all         28        597        0.7      0.732      0.707      0.429






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


    237/500      2.42G     0.9061     0.6545     0.9381        532        640: 100%|██████████| 7/7 [00:00<00:00, 16.02it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 1/1 [00:00<00:00, 14.49it/s]

                   all         28        597      0.711      0.723      0.706      0.432






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


    238/500      2.65G     0.9355     0.6675      0.944        496        640: 100%|██████████| 7/7 [00:00<00:00, 16.96it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 1/1 [00:00<00:00, 13.50it/s]

                   all         28        597      0.678      0.769      0.699      0.428






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


    239/500      2.82G     0.9237      0.662     0.9341        655        640: 100%|██████████| 7/7 [00:00<00:00, 15.72it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 1/1 [00:00<00:00, 14.70it/s]

                   all         28        597      0.658      0.779      0.699      0.436






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


    240/500      2.55G     0.8945     0.6439     0.9299        408        640: 100%|██████████| 7/7 [00:00<00:00, 15.90it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 1/1 [00:00<00:00, 12.58it/s]

                   all         28        597      0.626      0.789      0.708      0.438






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


    241/500      2.81G     0.9036     0.6417     0.9281        689        640: 100%|██████████| 7/7 [00:00<00:00, 15.31it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 1/1 [00:00<00:00, 14.78it/s]

                   all         28        597      0.678      0.706      0.714      0.435






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


    242/500      2.56G     0.9291     0.6595     0.9351        574        640: 100%|██████████| 7/7 [00:00<00:00, 17.12it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 1/1 [00:00<00:00, 12.19it/s]

                   all         28        597      0.697      0.724      0.721      0.434






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


    243/500      2.52G     0.9076     0.6526     0.9354        548        640: 100%|██████████| 7/7 [00:00<00:00, 16.86it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 1/1 [00:00<00:00, 14.09it/s]

                   all         28        597       0.69      0.738      0.707      0.436






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


    244/500      2.56G     0.8966     0.6424     0.9299        624        640: 100%|██████████| 7/7 [00:00<00:00, 17.31it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 1/1 [00:00<00:00, 14.11it/s]

                   all         28        597      0.674      0.773      0.703      0.435






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


    245/500      2.53G     0.9028     0.6535     0.9365        632        640: 100%|██████████| 7/7 [00:00<00:00, 18.39it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 1/1 [00:00<00:00, 13.71it/s]

                   all         28        597      0.677       0.76      0.696      0.431






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


    246/500      2.52G     0.9073     0.6477     0.9308        576        640: 100%|██████████| 7/7 [00:00<00:00, 17.18it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 1/1 [00:00<00:00, 14.19it/s]

                   all         28        597      0.636      0.771      0.699      0.434






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


    247/500      2.54G     0.9052     0.6524     0.9359        644        640: 100%|██████████| 7/7 [00:00<00:00, 15.83it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 1/1 [00:00<00:00, 14.36it/s]

                   all         28        597      0.672      0.737      0.709      0.451






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


    248/500      2.86G     0.9043     0.6405     0.9265        613        640: 100%|██████████| 7/7 [00:00<00:00, 16.83it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 1/1 [00:00<00:00, 13.79it/s]

                   all         28        597      0.657      0.744      0.708      0.452






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


    249/500      2.53G     0.9007     0.6459     0.9349        541        640: 100%|██████████| 7/7 [00:00<00:00, 17.45it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 1/1 [00:00<00:00, 14.00it/s]


                   all         28        597      0.653      0.737      0.704      0.445

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


    250/500      2.79G     0.9279     0.6618     0.9384        623        640: 100%|██████████| 7/7 [00:00<00:00, 16.99it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 1/1 [00:00<00:00, 14.78it/s]

                   all         28        597      0.651      0.739      0.694      0.435






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


    251/500      2.52G     0.9007      0.637     0.9354        577        640: 100%|██████████| 7/7 [00:00<00:00, 17.56it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 1/1 [00:00<00:00, 14.53it/s]

                   all         28        597      0.676      0.717      0.686      0.427






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


    252/500      2.56G     0.9133     0.6564     0.9369        559        640: 100%|██████████| 7/7 [00:00<00:00, 16.50it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 1/1 [00:00<00:00, 13.68it/s]

                   all         28        597      0.682      0.725      0.685      0.425






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


    253/500      2.51G     0.9022     0.6444     0.9315        507        640: 100%|██████████| 7/7 [00:00<00:00, 17.93it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 1/1 [00:00<00:00, 13.13it/s]

                   all         28        597       0.68       0.72      0.686      0.423






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


    254/500      2.68G     0.8787     0.6245      0.921        550        640: 100%|██████████| 7/7 [00:00<00:00, 16.90it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 1/1 [00:00<00:00, 14.41it/s]

                   all         28        597      0.726      0.692      0.698      0.429






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


    255/500      2.99G     0.8908     0.6504      0.935        580        640: 100%|██████████| 7/7 [00:00<00:00, 15.52it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 1/1 [00:00<00:00, 14.29it/s]

                   all         28        597      0.687      0.737      0.697       0.44






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


    256/500      2.55G      0.887     0.6284     0.9268        554        640: 100%|██████████| 7/7 [00:00<00:00, 16.87it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 1/1 [00:00<00:00, 13.50it/s]

                   all         28        597      0.707      0.723      0.711      0.447






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


    257/500      2.71G     0.8897     0.6366     0.9258        685        640: 100%|██████████| 7/7 [00:00<00:00, 16.00it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 1/1 [00:00<00:00, 13.33it/s]

                   all         28        597      0.706      0.731      0.711      0.444






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


    258/500      2.55G     0.9066      0.641     0.9338        579        640: 100%|██████████| 7/7 [00:00<00:00, 16.49it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 1/1 [00:00<00:00, 11.08it/s]

                   all         28        597      0.707       0.73      0.712      0.439






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


    259/500      2.82G     0.8929     0.6368     0.9285        594        640: 100%|██████████| 7/7 [00:00<00:00, 16.64it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 1/1 [00:00<00:00, 13.91it/s]

                   all         28        597      0.682      0.762      0.712      0.443






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


    260/500      2.47G      0.884     0.6361     0.9294        510        640: 100%|██████████| 7/7 [00:00<00:00, 16.15it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 1/1 [00:00<00:00, 11.58it/s]

                   all         28        597      0.633      0.808      0.705      0.441






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


    261/500      2.82G     0.8859     0.6405     0.9285        515        640: 100%|██████████| 7/7 [00:00<00:00, 17.09it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 1/1 [00:00<00:00, 13.26it/s]

                   all         28        597      0.655      0.769      0.701      0.439
[34m[1mEarlyStopping: [0mTraining stopped early as no improvement observed in last 100 epochs. Best results observed at epoch 161, best model saved as best.pt.
To update EarlyStopping(patience=100) pass a new patience value, i.e. `patience=300` or use `patience=0` to disable EarlyStopping.






261 epochs completed in 0.065 hours.
Optimizer stripped from c:\Users\Fergons\Desktop\KNN\KIE-1\runs\detect\train7\weights\last.pt, 6.3MB
Optimizer stripped from c:\Users\Fergons\Desktop\KNN\KIE-1\runs\detect\train7\weights\best.pt, 6.3MB

Validating c:\Users\Fergons\Desktop\KNN\KIE-1\runs\detect\train7\weights\best.pt...
Ultralytics YOLOv8.2.2  Python-3.8.10 torch-2.3.0+cu118 CUDA:0 (NVIDIA GeForce RTX 4090, 24564MiB)
Model summary (fused): 168 layers, 3007988 parameters, 0 gradients, 8.1 GFLOPs


                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 1/1 [00:00<00:00,  7.36it/s]


                   all         28        597      0.709      0.753      0.771      0.473
            birth_date         28         34      0.555      0.647      0.534      0.282
            death_book         28         28      0.858      0.929      0.904      0.669
            death_date         28         36      0.475      0.306      0.387      0.203
          funeral_date         28         24      0.883      0.958      0.969      0.604
              grave_id         28         11      0.866      0.909      0.976        0.6
        grave_location         28         27       0.63      0.815      0.827      0.518
    information_source         28         30      0.558      0.631      0.699      0.401
                   key         28        323      0.803      0.842      0.877      0.528
                  name         28         33      0.776      0.879      0.816      0.591
           nationality         28         21      0.818      0.641      0.809      0.399
                  ran

[34m[1mval: [0mScanning C:\Users\Fergons\Desktop\KNN\KIE-1\data\VHA_YOLO\training\labels\val.cache... 28 images, 0 backgrounds, 0 corrupt: 100%|██████████| 28/28 [00:00<?, ?it/s]




  return F.conv2d(input, weight, bias, self.stride,
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 2/2 [00:03<00:00,  1.90s/it]


                   all         28        597      0.711      0.748       0.77      0.473
            birth_date         28         34      0.563      0.647      0.535      0.281
            death_book         28         28       0.86      0.929      0.904      0.669
            death_date         28         36      0.498      0.306      0.385      0.198
          funeral_date         28         24       0.88      0.958      0.969      0.617
              grave_id         28         11      0.869      0.909      0.976      0.618
        grave_location         28         27      0.635      0.815      0.827      0.514
    information_source         28         30      0.542      0.592      0.696       0.39
                   key         28        323      0.805      0.842      0.877      0.527
                  name         28         33      0.778      0.879      0.816      0.587
           nationality         28         21      0.817      0.637      0.808      0.399
                  ran

'c:\\Users\\Fergons\\Desktop\\KNN\\KIE-1\\runs\\detect\\train7\\weights\\best.onnx'

In [3]:
# model = YOLO('runs/detect/train9/weights/best.pt')
# test set results
model.val(data='data/VHA_YOLO/training/dataset_test.yaml')

Ultralytics YOLOv8.2.2  Python-3.8.10 torch-2.3.0+cu118 CUDA:0 (NVIDIA GeForce RTX 4090, 24564MiB)


[34m[1mval: [0mScanning C:\Users\Fergons\Desktop\KNN\KIE-1\data\VHA_YOLO\training\labels\val.cache... 28 images, 0 backgrounds, 0 corrupt: 100%|██████████| 28/28 [00:00<?, ?it/s]




                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 2/2 [00:04<00:00,  2.31s/it]


                   all         28        597      0.711      0.748       0.77      0.473
            birth_date         28         34      0.563      0.647      0.535      0.281
            death_book         28         28       0.86      0.929      0.904      0.669
            death_date         28         36      0.498      0.306      0.385      0.198
          funeral_date         28         24       0.88      0.958      0.969      0.617
              grave_id         28         11      0.869      0.909      0.976      0.618
        grave_location         28         27      0.635      0.815      0.827      0.514
    information_source         28         30      0.542      0.592      0.696       0.39
                   key         28        323      0.805      0.842      0.877      0.527
                  name         28         33      0.778      0.879      0.816      0.587
           nationality         28         21      0.817      0.637      0.808      0.399
                  ran

ultralytics.utils.metrics.DetMetrics object with attributes:

ap_class_index: array([ 0,  1,  2,  3,  4,  5,  7,  8,  9, 10, 11])
box: ultralytics.utils.metrics.Metric object
confusion_matrix: <ultralytics.utils.metrics.ConfusionMatrix object at 0x0000013EC5279D60>
curves: ['Precision-Recall(B)', 'F1-Confidence(B)', 'Precision-Confidence(B)', 'Recall-Confidence(B)']
curves_results: [[array([          0,    0.001001,    0.002002,    0.003003,    0.004004,    0.005005,    0.006006,    0.007007,    0.008008,    0.009009,     0.01001,    0.011011,    0.012012,    0.013013,    0.014014,    0.015015,    0.016016,    0.017017,    0.018018,    0.019019,     0.02002,    0.021021,    0.022022,    0.023023,
          0.024024,    0.025025,    0.026026,    0.027027,    0.028028,    0.029029,     0.03003,    0.031031,    0.032032,    0.033033,    0.034034,    0.035035,    0.036036,    0.037037,    0.038038,    0.039039,     0.04004,    0.041041,    0.042042,    0.043043,    0.044044,    0.045045,  