In [1]:
!pip install ultralytics
!pip install albumentations==1.0.3
!pip install ipywidgets

Collecting ultralytics
  Obtaining dependency information for ultralytics from https://files.pythonhosted.org/packages/f4/79/b192c64e13eefc7f8543a10080767ab057850fa9bdbc38e7c261a3432c93/ultralytics-8.0.227-py3-none-any.whl.metadata
  Downloading ultralytics-8.0.227-py3-none-any.whl.metadata (32 kB)
Collecting thop>=0.1.1 (from ultralytics)
  Downloading thop-0.1.1.post2209072238-py3-none-any.whl (15 kB)
Downloading ultralytics-8.0.227-py3-none-any.whl (660 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m660.5/660.5 kB[0m [31m21.4 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: thop, ultralytics
Successfully installed thop-0.1.1.post2209072238 ultralytics-8.0.227
Collecting albumentations==1.0.3
  Downloading albumentations-1.0.3-py3-none-any.whl (98 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m98.7/98.7 kB[0m [31m4.2 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: albumentations
  Attempting uninstall: albumen

In [4]:
from tqdm.auto import tqdm

import numpy as np

import os
import cv2


def tile(bounds, x_start, y_start, size):
    _class, x_min, y_min, x_max, y_max = bounds
    x_min, y_min, x_max, y_max = x_min - x_start, y_min - y_start, x_max - x_start, y_max - y_start

    if (x_min > size) or (x_max < 0.0) or (y_min > size) or (y_max < 0.0):
        return None
    
    x_max_trunc = min(x_max, size) 
    x_min_trunc = max(x_min, 0)

    if x_max == x_min or y_max == y_min: return None

    if (x_max_trunc - x_min_trunc) / (x_max - x_min) < 0.3:
        return None

    y_max_trunc = min(y_max, size) 
    y_min_trunc = max(y_min, 0) 
    if (y_max_trunc - y_min_trunc) / (y_max - y_min) < 0.3:
        return None
        
    x_center = (x_min_trunc + x_max_trunc) / 2.0 / size
    y_center = (y_min_trunc + y_max_trunc) / 2.0 / size
    x_extend = (x_max_trunc - x_min_trunc) / size
    y_extend = (y_max_trunc - y_min_trunc) / size
    
    return (_class, x_center, y_center, x_extend, y_extend)


# https://stackoverflow.com/a/64097592/16660603
def yolobbox2bbox(coords: list, size: int) -> list:
    _c, x, y, w, h = coords

    x1 = int((x - w / 2) * size)
    x2 = int((x + w / 2) * size)
    y1 = int((y - h / 2) * size)
    y2 = int((y + h / 2) * size)
    
    if x1 < 0:
        x1 = 0
    if x2 > size - 1:
        x2 = size - 1
    if y2 < 0:
        y1 = 0
    if y2 > size - 1:
        y2 = size - 1

    return [int(_c), x1, y1, x2, y2]


def load_annotation(path: str) -> list:
    with open(path, 'r') as f:
        ann = [list(map(float, line.replace('\n', '').split())) for line in f]

    return ann


def load_img_path(load_data_path: str, img_path: str, labels_path: str) -> tuple:
    img_paths, labels_paths = [], []
    img, lbl = os.listdir(img_path), os.listdir(labels_path)
    img.sort(); lbl.sort()

    with open(load_data_path, 'r') as f:
        for name in f:
            name = name.strip()
            name = name.replace('\n', '') + '.jpg'
            if name in img: img_paths.append(f'{img_path}/{name}')
            else: continue

            name = name.replace(".jpg", ".txt")
            if name in lbl: labels_paths.append(f'{labels_path}/{name}')
            else: img_paths.remove(f'{img_path}/{name}') # Fail safe
    
    return (img_paths, labels_paths)


def process_data(mode: str, load_data_list: tuple, save_path: str) -> None:
    img, label = load_img_path(*load_data_list)
    assert len(img) == len(label), 'Image and label length mismatch'

    default_size = 800
    tile_size = 512
    tile_overlap = 64

    output_paths = [save_path + f'/{mode}/images', save_path + f'/{mode}/labels']
    for _path in output_paths:
        if not os.path.isdir(_path):
            os.makedirs(_path)

    for i in tqdm(range(len(img))):
        image_path = img[i]
        label_path = label[i]

        _img_name = os.path.basename(image_path)
        _annot_name = os.path.basename(label_path)

        image = cv2.imread(image_path)
        image = cv2.resize(image, (default_size, default_size))
        size = image.shape[0]

        annotation_list = load_annotation(label_path)
        coords_list = [yolobbox2bbox(al, size=size) for al in annotation_list]

        x_tiles = (size + tile_size - tile_overlap - 1) // (tile_size - tile_overlap)
        y_tiles = (size + tile_size - tile_overlap - 1) // (tile_size - tile_overlap)

        for x in range(x_tiles):
            for y in range(y_tiles):
                x_end = min((x + 1) * tile_size - tile_overlap * (x != 0), size)
                x_start = x_end - tile_size
                y_end = min((y + 1) * tile_size - tile_overlap * (y != 0), size)
                y_start = y_end - tile_size

                save_tile_path = output_paths[0] + f'/{_img_name.split(".")[0]}_{x_start}_{y_start}.jpg'
                save_label_path = output_paths[1] + f'/{_annot_name.split(".")[0]}_{x_start}_{y_start}.txt'

                cut_tile = np.zeros(shape=(tile_size, tile_size, 3), dtype=np.uint8)
                cut_tile[0:tile_size, 0:size, :] = image[y_start:y_end, x_start:x_end, :]
                cv2.imwrite(save_tile_path, cut_tile)

                found_tags = [
                    tile(bounds, x_start, y_start, tile_size)
                    for bounds in coords_list]
                found_tags = [el for el in found_tags if el is not None]

                with open(save_label_path, 'w+') as f:
                    for tags in found_tags:
                        f.write(' '.join(str(x) for x in tags) + '\n')


data_path = '../input/diordata'
save_path = '/kaggle/temp'

train_img_name_path = data_path + '/ImageSets/train.txt'
val_img_name_path = data_path + '/ImageSets/val.txt'
test_img_name_path = data_path + '/ImageSets/test.txt'

img_path = data_path + '/images'
labels_path = data_path + '/labels'

#Train
process_data('train', (train_img_name_path, img_path, labels_path), save_path)
# Val
process_data('val', (val_img_name_path, img_path, labels_path), save_path)
# Test
process_data('test', (test_img_name_path, img_path, labels_path), save_path)

  0%|          | 0/19004 [00:00<?, ?it/s]

  0%|          | 0/2112 [00:00<?, ?it/s]

  0%|          | 0/2347 [00:00<?, ?it/s]

In [9]:
from ultralytics import YOLO

import yaml

model_data_path = '/kaggle/working/Model'

model = YOLO('yolov8n.yaml')
labels = [
    'Storage Tank',
    'Baseball field',
    'Tennis court',
    'Basketball Court',
    'Wind mill',
    'Vehicle',
    'Harbor',
    'Ship',
    'Airplane',
    'Bridge',
    'Overpass',
    'Expressway toll station',
    'Train station',
    'Chimney',
    'Ground Track Field',
    'Dam',
    'Expressway service area',
    'Stadium',
    'Airport',
    'Golf course'
]
config = {
    'path': '/kaggle/temp',
    'train': 'train/images',
    'val': 'val/images',
    'test': 'test/images',
    'names': {str(i): label for i, label in enumerate(labels)}
}

with open(model_data_path + '\config.yaml', 'w') as f:
    yaml.dump(config, f, default_flow_style=False)

model.train(data=model_data_path + '\config.yaml', imgsz=512, epochs=20, batch=64, name='Yolov8_3')


                   from  n    params  module                                       arguments                     
  0                  -1  1       464  ultralytics.nn.modules.conv.Conv             [3, 16, 3, 2]                 
  1                  -1  1      4672  ultralytics.nn.modules.conv.Conv             [16, 32, 3, 2]                
  2                  -1  1      7360  ultralytics.nn.modules.block.C2f             [32, 32, 1, True]             
  3                  -1  1     18560  ultralytics.nn.modules.conv.Conv             [32, 64, 3, 2]                
  4                  -1  2     49664  ultralytics.nn.modules.block.C2f             [64, 64, 2, True]             
  5                  -1  1     73984  ultralytics.nn.modules.conv.Conv             [64, 128, 3, 2]               
  6                  -1  2    197632  ultralytics.nn.modules.block.C2f             [128, 128, 2, True]           
  7                  -1  1    295424  ultralytics.nn.modules.conv.Conv             [128

[34m[1mtrain: [0mScanning /kaggle/temp/train/labels.cache... 76016 images, 4889 backgrounds, 0 corrupt: 100%|██████████| 76016/76016 [00:00<?, ?it/s]






[34m[1malbumentations: [0mBlur(p=0.01, blur_limit=(3, 7)), MedianBlur(p=0.01, blur_limit=(3, 7)), ToGray(p=0.01), CLAHE(p=0.01, clip_limit=(1, 4.0), tile_grid_size=(8, 8))


[34m[1mval: [0mScanning /kaggle/temp/val/labels.cache... 8448 images, 535 backgrounds, 0 corrupt: 100%|██████████| 8448/8448 [00:00<?, ?it/s]


Plotting labels to runs/detect/Yolov8_35/labels.jpg... 
[34m[1moptimizer:[0m 'optimizer=auto' found, ignoring 'lr0=0.01' and 'momentum=0.937' and determining best 'optimizer', 'lr0' and 'momentum' automatically... 
[34m[1moptimizer:[0m SGD(lr=0.01, momentum=0.9) with parameter groups 57 weight(decay=0.0), 64 weight(decay=0.0005), 63 bias(decay=0.0)
Image sizes 512 train, 512 val
Using 4 dataloader workers
Logging results to [1mruns/detect/Yolov8_35[0m
Starting training for 20 epochs...

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       1/20      13.4G      3.164      4.446      3.572        259        512: 100%|██████████| 1188/1188 [12:07<00:00,  1.63it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 66/66 [01:19<00:00,  1.21s/it]


                   all       8448      36967      0.405      0.155      0.129     0.0623

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       2/20      15.5G      1.925      2.537      2.004        269        512: 100%|██████████| 1188/1188 [12:04<00:00,  1.64it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 66/66 [01:07<00:00,  1.02s/it]


                   all       8448      36967      0.395       0.32      0.313      0.174

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       3/20      12.7G      1.624      1.919      1.623        452        512: 100%|██████████| 1188/1188 [10:43<00:00,  1.85it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 66/66 [00:46<00:00,  1.42it/s]


                   all       8448      36967       0.56      0.427      0.434      0.253

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       4/20        12G      1.494       1.57      1.475        219        512: 100%|██████████| 1188/1188 [10:40<00:00,  1.85it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 66/66 [00:56<00:00,  1.16it/s]


                   all       8448      36967      0.615       0.53      0.556      0.342

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       5/20      11.2G      1.401      1.383      1.401        304        512: 100%|██████████| 1188/1188 [11:00<00:00,  1.80it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 66/66 [01:01<00:00,  1.07it/s]


                   all       8448      36967      0.662      0.572      0.613      0.388

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       6/20      13.3G      1.347      1.271      1.356        318        512: 100%|██████████| 1188/1188 [11:03<00:00,  1.79it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 66/66 [01:05<00:00,  1.01it/s]


                   all       8448      36967      0.704      0.611      0.657      0.423

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       7/20      10.4G      1.306      1.191      1.321        390        512: 100%|██████████| 1188/1188 [10:43<00:00,  1.85it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 66/66 [00:45<00:00,  1.44it/s]


                   all       8448      36967      0.747      0.639      0.696      0.462

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       8/20      8.65G      1.282      1.149      1.302        368        512: 100%|██████████| 1188/1188 [10:38<00:00,  1.86it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 66/66 [00:45<00:00,  1.45it/s]


                   all       8448      36967      0.755      0.668      0.722      0.481

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       9/20      12.5G      1.256      1.101      1.283        324        512: 100%|██████████| 1188/1188 [10:04<00:00,  1.97it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 66/66 [00:44<00:00,  1.47it/s]


                   all       8448      36967      0.779      0.673      0.737        0.5

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      10/20      13.7G      1.238      1.064      1.267        216        512: 100%|██████████| 1188/1188 [10:16<00:00,  1.93it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 66/66 [00:46<00:00,  1.41it/s]


                   all       8448      36967      0.785      0.684       0.75      0.509
Closing dataloader mosaic
[34m[1malbumentations: [0mBlur(p=0.01, blur_limit=(3, 7)), MedianBlur(p=0.01, blur_limit=(3, 7)), ToGray(p=0.01), CLAHE(p=0.01, clip_limit=(1, 4.0), tile_grid_size=(8, 8))

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      11/20      11.8G      1.249     0.9454      1.272        141        512: 100%|██████████| 1188/1188 [09:33<00:00,  2.07it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 66/66 [01:07<00:00,  1.03s/it]


                   all       8448      36967      0.811      0.701      0.775      0.535

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      12/20      13.7G       1.23     0.9074      1.254         94        512: 100%|██████████| 1188/1188 [09:47<00:00,  2.02it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 66/66 [00:57<00:00,  1.15it/s]


                   all       8448      36967      0.816      0.711      0.784      0.545

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      13/20      13.8G      1.212     0.8782       1.24        175        512: 100%|██████████| 1188/1188 [09:26<00:00,  2.10it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 66/66 [00:44<00:00,  1.49it/s]


                   all       8448      36967      0.821      0.717      0.791      0.552

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      14/20      14.1G      1.195      0.852      1.229        133        512: 100%|██████████| 1188/1188 [09:17<00:00,  2.13it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 66/66 [00:45<00:00,  1.46it/s]


                   all       8448      36967      0.826      0.722      0.796      0.558

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      15/20      12.6G      1.179     0.8288      1.216        277        512: 100%|██████████| 1188/1188 [09:18<00:00,  2.13it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 66/66 [00:55<00:00,  1.18it/s]


                   all       8448      36967       0.83      0.726        0.8      0.563

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      16/20      15.4G      1.163     0.8075      1.204        122        512: 100%|██████████| 1188/1188 [09:21<00:00,  2.12it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 66/66 [00:51<00:00,  1.29it/s]


                   all       8448      36967      0.829      0.731      0.804      0.566

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      17/20      14.8G      1.149     0.7858      1.193        181        512: 100%|██████████| 1188/1188 [09:14<00:00,  2.14it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 66/66 [00:48<00:00,  1.35it/s]


                   all       8448      36967      0.829      0.736      0.807      0.569

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      18/20      15.2G      1.131     0.7632      1.181        115        512: 100%|██████████| 1188/1188 [09:16<00:00,  2.13it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 66/66 [00:52<00:00,  1.25it/s]


                   all       8448      36967      0.833      0.738      0.809      0.572

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      19/20      13.7G      1.118      0.743      1.173        159        512: 100%|██████████| 1188/1188 [09:19<00:00,  2.12it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 66/66 [00:49<00:00,  1.34it/s]


                   all       8448      36967       0.83      0.744      0.811      0.574

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      20/20      13.2G        1.1     0.7198      1.162        122        512: 100%|██████████| 1188/1188 [09:30<00:00,  2.08it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 66/66 [00:53<00:00,  1.24it/s]


                   all       8448      36967      0.831      0.746      0.813      0.576

20 epochs completed in 3.716 hours.
Optimizer stripped from runs/detect/Yolov8_35/weights/last.pt, 6.2MB
Optimizer stripped from runs/detect/Yolov8_35/weights/best.pt, 6.2MB

Validating runs/detect/Yolov8_35/weights/best.pt...
Ultralytics YOLOv8.0.227 🚀 Python-3.10.12 torch-2.0.0 CUDA:0 (Tesla P100-PCIE-16GB, 16281MiB)
YOLOv8n summary (fused): 168 layers, 3009548 parameters, 0 gradients, 8.1 GFLOPs


                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 66/66 [01:08<00:00,  1.03s/it]


                   all       8448      36967       0.83      0.746      0.813      0.576
          Storage Tank       8448       5691       0.88      0.719      0.806      0.491
        Baseball field       8448        935      0.871      0.913      0.948      0.804
          Tennis court       8448       2237      0.926      0.913      0.954      0.823
      Basketball Court       8448        713      0.874      0.887      0.923      0.792
             Wind mill       8448        872      0.891      0.768       0.84      0.445
               Vehicle       8448       6955      0.819      0.487      0.619       0.34
                Harbor       8448       1026      0.718      0.636      0.685      0.447
                  Ship       8448      11155      0.897       0.93      0.952      0.566
              Airplane       8448       1917      0.918      0.905      0.935      0.661
                Bridge       8448        857      0.796      0.447      0.568      0.333
              Overpas

VBox(children=(Label(value='16.933 MB of 16.933 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
lr/pg0,▃▆███▇▇▆▆▅▅▅▄▄▃▃▂▂▁▁
lr/pg1,▃▆███▇▇▆▆▅▅▅▄▄▃▃▂▂▁▁
lr/pg2,▃▆███▇▇▆▆▅▅▅▄▄▃▃▂▂▁▁
metrics/mAP50(B),▁▃▄▅▆▆▇▇▇▇██████████
metrics/mAP50-95(B),▁▃▄▅▅▆▆▇▇▇▇█████████
metrics/precision(B),▁▁▄▅▅▆▇▇▇▇██████████
metrics/recall(B),▁▃▄▅▆▆▇▇▇▇▇█████████
model/GFLOPs,▁
model/parameters,▁
model/speed_PyTorch(ms),▁

0,1
lr/pg0,0.00109
lr/pg1,0.00109
lr/pg2,0.00109
metrics/mAP50(B),0.81298
metrics/mAP50-95(B),0.57578
metrics/precision(B),0.82992
metrics/recall(B),0.74636
model/GFLOPs,8.215
model/parameters,3014748.0
model/speed_PyTorch(ms),1.259


ultralytics.utils.metrics.DetMetrics object with attributes:

ap_class_index: array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19])
box: ultralytics.utils.metrics.Metric object
confusion_matrix: <ultralytics.utils.metrics.ConfusionMatrix object at 0x7d8930705f00>
curves: ['Precision-Recall(B)', 'F1-Confidence(B)', 'Precision-Confidence(B)', 'Recall-Confidence(B)']
curves_results: [[array([          0,    0.001001,    0.002002,    0.003003,    0.004004,    0.005005,    0.006006,    0.007007,    0.008008,    0.009009,     0.01001,    0.011011,    0.012012,    0.013013,    0.014014,    0.015015,    0.016016,    0.017017,    0.018018,    0.019019,     0.02002,    0.021021,    0.022022,    0.023023,
          0.024024,    0.025025,    0.026026,    0.027027,    0.028028,    0.029029,     0.03003,    0.031031,    0.032032,    0.033033,    0.034034,    0.035035,    0.036036,    0.037037,    0.038038,    0.039039,     0.04004,    0.041041,    0.042042,    0.043