In [2]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import glob
import shutil
import cv2
import os
from tqdm import tqdm

In [3]:
%pip install ultralytics
import ultralytics
ultralytics.checks()

Ultralytics YOLOv8.1.14 🚀 Python-3.10.12 torch-2.0.0 CUDA:0 (Tesla P100-PCIE-16GB, 16276MiB)
Setup complete ✅ (4 CPUs, 31.4 GB RAM, 5432.1/8062.4 GB disk)


In [4]:
from ultralytics import YOLO

In [5]:
import os
import os.path as pt
import random
import shutil

import pandas as pd
import yaml
from tqdm import tqdm


def exists(path):
    if not os.path.exists(path):
        os.makedirs(path)


def move_image(src_folder, dest_folder, image_name):
    # 源文件的路径
    src_path = os.path.join(src_folder, image_name)

    # 目标文件的路径
    dest_path = os.path.join(dest_folder, image_name)

    # 移动文件
    shutil.copy(src_path, dest_path)


def progress(list_, img_target, label_target, labels, data_path):
    yy = tqdm(list_)
    for csv_filename in yy:
        df = pd.read_csv(os.path.join(data_path, csv_filename))
        csv_name = csv_filename.split('.')[0]
        txt_file_name = csv_name + '.txt'
        img_file_name = csv_name + '.jpg'

        for i in df.values:
            # i-->[file_name, w, h, label, x1, y1, x2, y2]
            
            txt_name, w, h, label, x1, y1, x2, y2 = i
            
            move_image(data_path, img_target, img_file_name)
            if label not in labels:
                labels.append(label)
            label = labels.index(label)

            x_ = (x1 + x2) / (2 * w)
            y_ = (y1 + y2) / (2 * h)
            w_ = (x2 - x1) / w
            h_ = (y2 - y1) / h
            with open(pt.join(label_target, txt_file_name), 'a') as f:
                f.write(f"{label} {x_} {y_} {w_} {h_}\n")


def generate_yaml(train_path, val_path, names, nc, base):
    data = {
        "train": train_path,
        "val": val_path,
        "names": names,
        "nc": nc
    }

    with open(pt.join(base, 'mydata.yaml'), 'w') as outfile:
        yaml.dump(data, outfile, default_flow_style=False)


def main(save_path, source_path, scale):
    # 创建保存txt的路径
    base = save_path
    img_path = pt.join(base, 'images')
    label_path = pt.join(base, 'labels')
    img_path_train = pt.join(img_path, 'train')
    img_path_val = pt.join(img_path, 'val')
    label_path_train = pt.join(label_path, 'train')
    label_path_val = pt.join(label_path, 'val')
    exists(img_path_train)
    exists(img_path_val)
    exists(label_path_train)
    exists(label_path_val)

    data_path = source_path
    filenames = os.listdir(data_path)
    csv_filenames = [filename for filename in filenames if filename.endswith('.csv')]

    # 按比例将csv文件分成train和val
    labels1 = {}
    for csv_name in csv_filenames:
        df = pd.read_csv(os.path.join(data_path, csv_name))
        for i in df.values:
            txt_name, w, h, label, x1, y1, x2, y2 = i
            if label in labels1:
                labels1[label].append(csv_name)
            else:
                labels1[label] = [csv_name]
            break

    train_files = []
    valid_files = []
    for i in labels1.values():
        random.shuffle(i)
        num_train = int(len(i) * scale)  # scale learning rate

        train_files.extend(i[:num_train])
        valid_files.extend(i[num_train:])

    Alabels = []
    print(len(train_files) / len(valid_files))

    progress(train_files, img_path_train, label_path_train, Alabels, data_path)
    progress(valid_files, img_path_val, label_path_val, Alabels, data_path)

    names = {i: name for i, name in enumerate(Alabels)}
    nc = len(Alabels)
    generate_yaml(img_path_train, img_path_val, names, nc, base)


if __name__ == "__main__":
    main(save_path='/kaggle/working/data',
         source_path='/kaggle/input/militaryaircraftdetectiondataset/dataset',
         scale=0.9
         )

8.866615853658537


100%|██████████| 11633/11633 [04:38<00:00, 41.76it/s]
100%|██████████| 1312/1312 [00:30<00:00, 43.10it/s]


In [6]:
model = YOLO('yolov8m.pt')

# default params
# results = model.train(data='/kaggle/working/data/mydata.yaml', epochs=15, imgsz=640, lrf=0.1)

# most best params 0.79 acc
# results = model.train(data='/kaggle/working/data/mydata.yaml', epochs=15, imgsz=800)

# now test params
results = model.train(data='/kaggle/working/data/mydata.yaml', epochs=15, imgsz=860)    # 0.821 acc


Downloading https://github.com/ultralytics/assets/releases/download/v8.1.0/yolov8m.pt to 'yolov8m.pt'...


100%|██████████| 49.7M/49.7M [00:00<00:00, 278MB/s]


Ultralytics YOLOv8.1.14 🚀 Python-3.10.12 torch-2.0.0 CUDA:0 (Tesla P100-PCIE-16GB, 16276MiB)
[34m[1mengine/trainer: [0mtask=detect, mode=train, model=yolov8m.pt, data=/kaggle/working/data/mydata.yaml, epochs=15, time=None, patience=50, batch=16, imgsz=860, save=True, save_period=-1, cache=False, device=None, workers=8, project=None, name=train, exist_ok=False, pretrained=True, optimizer=auto, verbose=True, seed=0, deterministic=True, single_cls=False, rect=False, cos_lr=False, close_mosaic=10, resume=False, amp=True, fraction=1.0, profile=False, freeze=None, multi_scale=False, overlap_mask=True, mask_ratio=4, dropout=0.0, val=True, split=val, save_json=False, save_hybrid=False, conf=None, iou=0.7, max_det=300, half=False, dnn=False, plots=True, source=None, vid_stride=1, stream_buffer=False, visualize=False, augment=False, agnostic_nms=False, classes=None, retina_masks=False, embed=None, show=False, save_frames=False, save_txt=False, save_conf=False, save_crop=False, show_labels=Tru



Overriding model.yaml nc=80 with nc=46

                   from  n    params  module                                       arguments                     
  0                  -1  1      1392  ultralytics.nn.modules.conv.Conv             [3, 48, 3, 2]                 
  1                  -1  1     41664  ultralytics.nn.modules.conv.Conv             [48, 96, 3, 2]                
  2                  -1  2    111360  ultralytics.nn.modules.block.C2f             [96, 96, 2, True]             
  3                  -1  1    166272  ultralytics.nn.modules.conv.Conv             [96, 192, 3, 2]               
  4                  -1  4    813312  ultralytics.nn.modules.block.C2f             [192, 192, 4, True]           
  5                  -1  1    664320  ultralytics.nn.modules.conv.Conv             [192, 384, 3, 2]              
  6                  -1  4   3248640  ultralytics.nn.modules.block.C2f             [384, 384, 4, True]           
  7                  -1  1   1991808  ultralytic

100%|██████████| 6.23M/6.23M [00:00<00:00, 106MB/s]


[34m[1mAMP: [0mchecks passed ✅


[34m[1mtrain: [0mScanning /kaggle/working/data/labels/train... 11627 images, 0 backgrounds, 6 corrupt: 100%|██████████| 11633/11633 [00:09<00:00, 1206.52it/s]






[34m[1mtrain: [0mNew cache created: /kaggle/working/data/labels/train.cache
[34m[1malbumentations: [0mBlur(p=0.01, blur_limit=(3, 7)), MedianBlur(p=0.01, blur_limit=(3, 7)), ToGray(p=0.01), CLAHE(p=0.01, clip_limit=(1, 4.0), tile_grid_size=(8, 8))


[34m[1mval: [0mScanning /kaggle/working/data/labels/val... 1312 images, 0 backgrounds, 0 corrupt: 100%|██████████| 1312/1312 [00:01<00:00, 925.99it/s] 






[34m[1mval: [0mNew cache created: /kaggle/working/data/labels/val.cache
Plotting labels to runs/detect/train/labels.jpg... 
[34m[1moptimizer:[0m 'optimizer=auto' found, ignoring 'lr0=0.01' and 'momentum=0.937' and determining best 'optimizer', 'lr0' and 'momentum' automatically... 
[34m[1moptimizer:[0m AdamW(lr=0.0002, momentum=0.9) with parameter groups 77 weight(decay=0.0), 84 weight(decay=0.0005), 83 bias(decay=0.0)
[34m[1mTensorBoard: [0mmodel graph visualization added ✅
Image sizes 864 train, 864 val
Using 4 dataloader workers
Logging results to [1mruns/detect/train[0m
Starting training for 15 epochs...

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       1/15      12.2G     0.7778      3.594      1.203         24        864: 100%|██████████| 727/727 [11:49<00:00,  1.02it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 41/41 [00:37<00:00,  1.08it/s]


                   all       1312       2145      0.184      0.259      0.161      0.138

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       2/15      12.4G     0.7676      2.527      1.175         51        864: 100%|██████████| 727/727 [11:43<00:00,  1.03it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 41/41 [00:39<00:00,  1.05it/s]


                   all       1312       2145      0.299      0.342      0.259      0.219

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       3/15      12.4G     0.7752      2.271      1.173         49        864: 100%|██████████| 727/727 [11:41<00:00,  1.04it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 41/41 [00:36<00:00,  1.11it/s]


                   all       1312       2145      0.307      0.372      0.313      0.256

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       4/15      12.4G     0.7538      2.073      1.161         31        864: 100%|██████████| 727/727 [11:37<00:00,  1.04it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 41/41 [00:37<00:00,  1.11it/s]


                   all       1312       2145      0.496      0.386       0.43      0.367

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       5/15      12.3G     0.7246      1.907      1.138         44        864: 100%|██████████| 727/727 [11:42<00:00,  1.04it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 41/41 [00:37<00:00,  1.10it/s]


                   all       1312       2145      0.539      0.456      0.502      0.436
Closing dataloader mosaic
[34m[1malbumentations: [0mBlur(p=0.01, blur_limit=(3, 7)), MedianBlur(p=0.01, blur_limit=(3, 7)), ToGray(p=0.01), CLAHE(p=0.01, clip_limit=(1, 4.0), tile_grid_size=(8, 8))

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       6/15      12.4G     0.6193      1.663      1.078         20        864: 100%|██████████| 727/727 [11:39<00:00,  1.04it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 41/41 [00:38<00:00,  1.06it/s]


                   all       1312       2145      0.607      0.465      0.536      0.468

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       7/15      12.3G     0.5774      1.474      1.048         14        864: 100%|██████████| 727/727 [11:36<00:00,  1.04it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 41/41 [00:37<00:00,  1.09it/s]


                   all       1312       2145      0.594      0.541      0.597      0.527

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       9/15      12.3G     0.5203       1.19      1.005         14        864: 100%|██████████| 727/727 [11:36<00:00,  1.04it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 41/41 [00:36<00:00,  1.13it/s]

                   all       1312       2145      0.698      0.599      0.677      0.604






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      10/15      12.4G      0.493      1.044     0.9878         15        864: 100%|██████████| 727/727 [11:36<00:00,  1.04it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 41/41 [00:37<00:00,  1.10it/s]

                   all       1312       2145      0.711      0.633      0.713      0.634






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      11/15      12.3G      0.468     0.9399     0.9704         14        864: 100%|██████████| 727/727 [11:35<00:00,  1.05it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 41/41 [00:35<00:00,  1.15it/s]


                   all       1312       2145      0.797       0.63      0.744       0.67

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      12/15      12.4G     0.4501     0.8472     0.9563         15        864: 100%|██████████| 727/727 [11:35<00:00,  1.04it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 41/41 [00:36<00:00,  1.13it/s]

                   all       1312       2145      0.783      0.664      0.762       0.69






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      13/15      12.3G      0.422      0.744     0.9374         25        864: 100%|██████████| 727/727 [11:34<00:00,  1.05it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 41/41 [00:36<00:00,  1.12it/s]


                   all       1312       2145       0.79      0.693      0.789      0.722

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      14/15      12.4G     0.4045     0.6554     0.9261         17        864: 100%|██████████| 727/727 [11:38<00:00,  1.04it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 41/41 [00:36<00:00,  1.12it/s]

                   all       1312       2145      0.845       0.67      0.796      0.727






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      15/15      12.3G     0.3829     0.5799      0.913         20        864: 100%|██████████| 727/727 [11:37<00:00,  1.04it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 41/41 [00:35<00:00,  1.15it/s]

                   all       1312       2145      0.831      0.713      0.821      0.751






15 epochs completed in 3.087 hours.
Optimizer stripped from runs/detect/train/weights/last.pt, 52.1MB
Optimizer stripped from runs/detect/train/weights/best.pt, 52.1MB

Validating runs/detect/train/weights/best.pt...
Ultralytics YOLOv8.1.14 🚀 Python-3.10.12 torch-2.0.0 CUDA:0 (Tesla P100-PCIE-16GB, 16276MiB)
Model summary (fused): 218 layers, 25866394 parameters, 0 gradients, 78.8 GFLOPs


                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 41/41 [00:37<00:00,  1.10it/s]


                   all       1312       2145      0.828      0.713      0.821      0.751
                EF2000       1312         51      0.776      0.549      0.752      0.709
                   F16       1312        120      0.749      0.542      0.729      0.628
                   F18       1312        106      0.792      0.783       0.86      0.739
                    P3       1312         23      0.893      0.652      0.786      0.775
                 A400M       1312         38      0.864      0.711      0.811      0.762
                   B52       1312         64      0.893      0.786      0.894      0.796
               Tornado       1312         45      0.746        0.6       0.65      0.613
                  Tu95       1312         22      0.897       0.79      0.849      0.777
                  Su34       1312         30      0.903      0.567      0.743      0.661
                   F15       1312        108      0.649       0.75      0.819      0.758
                   F3

In [7]:
print("results")
print(results.results_dict)
res_dict = results.results_dict
print(res_dict.values())
score_precision, score_recall, score_mAP50, score_mAP50_95, score_fitness = list(res_dict.values())

round_score_mAP50 = round(score_mAP50, 3)


results
{'metrics/precision(B)': 0.8279442972975345, 'metrics/recall(B)': 0.7134903486440998, 'metrics/mAP50(B)': 0.8210655451973937, 'metrics/mAP50-95(B)': 0.7505190080902957, 'fitness': 0.7575736618010055}
dict_values([0.8279442972975345, 0.7134903486440998, 0.8210655451973937, 0.7505190080902957, 0.7575736618010055])


In [8]:
# Export working dir
import shutil
import datetime
import pytz

directory_path = "/kaggle/working/runs"

# Specify Thailand timezone
thailand_tz = pytz.timezone('Asia/Bangkok')

# Get current time in Thailand
now_thailand = datetime.datetime.now(thailand_tz)
date_string = now_thailand.strftime("%Y-%m-%d")
print(date_string)

output_filename = f"""skw_notebook_{date_string}_{string_score_mAP50}"""
print("zipping file ", output_filename)
shutil.make_archive(output_filename, 'zip', directory_path)
print("zipping file completed")
%ls


2024-02-18
zipping file  skw_notebook_2024-02-18_0821
zipping file completed
[0m[01;34mdata[0m/  [01;34mruns[0m/  skw_notebook_2024-02-18_0821.zip  yolov8m.pt  yolov8n.pt


In [9]:
import os
import shutil


def remove_files(path):
    
    need_to_delete = path
    
    # List all files and directories in the working directory
    files_and_directories = os.listdir(need_to_delete)

    # Iterate through the list and remove each file or directory
    for item in files_and_directories:
        item_path = os.path.join(need_to_delete, item)

        # Check if it's a file or directory before removing
        if os.path.isfile(item_path):
            os.remove(item_path)
        elif os.path.isdir(item_path):
            shutil.rmtree(item_path)

    print(f"All files in {need_to_delete} directory have been cleared.")

    
    
# Get the current working directory
path_to_delete = '/kaggle/working/'

# remove_files(path_to_delete)

print("end")

end
