In [47]:
# 配置环境和下载 YOLOv5
import torch
import torchvision
import wandb
import os
import gc
import cv2
import numpy as np
import pandas as pd
from tqdm import tqdm
import json
import yaml
from shutil import copyfile
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
import os


# 查看 torch 版本和 GPU 可用性
# print(f"Setup complete. Usidu -sh /tmp/ng torch {torch.__version__}, torchvision {torchvision.__version__} ({torch.cuda.get_device_properties(0).name if torch.cuda.is_available() else 'CPU'})")

In [48]:
# 配置 tensorborad 可视化平台（可选）
# load_ext tensorboard
# 读取目录中的日志文件，用于可视化
# tensorboard --logdir /kaggle/runs

In [49]:
# 配置 W&B 可视化平台
# 用来机器学习跟踪和可视化的工具，可以记录和可视化模型训练中的指标和超参数，模型结构等信息
# pip install -q --upgrade wandb  
# wandb.login()  # 登录认证，返回访问令牌，将其保存在本地，以便后续使用无需重复登录

In [50]:
# 3.将标注框 bounding box 从 COCO 数据集格式转换为 YOLO 数据集格式
# 加载 COCO 格式标注的 JSON 文件
json_file_path = '/root/autodl-tmp/data/cowboyoutfits/train.json'

data = json.load(open(json_file_path, 'r'))

# 创建目录，用于保存 YOLO 格式的标注
yolo_anno_path = '/root/autodl-tmp/kaggle/training/yolo_anno/'

if not os.path.exists(yolo_anno_path):
    os.makedirs(yolo_anno_path)

In [51]:
# 因为我们的 annotation label 是不连续，会导致后面报错，所以这里生成 map 映射
# 将 COCO 标注数据中的类别ID映射为连续的整数，以解决类别ID不连续导致后续报错问题
cate_id_map = {}
num = 0
for cate in data['categories']:
    cate_id_map[cate['id']] = num
    num+=1

In [52]:
cate_id_map

{87: 0, 1034: 1, 131: 2, 318: 3, 588: 4}

In [53]:
# 对比
data['categories']

[{'id': 87, 'name': 'belt', 'freebase_id': '/m/0176mf'},
 {'id': 1034, 'name': 'sunglasses', 'freebase_id': '/m/017ftj'},
 {'id': 131, 'name': 'boot', 'freebase_id': '/m/01b638'},
 {'id': 318, 'name': 'cowboy_hat', 'freebase_id': '/m/025rp__'},
 {'id': 588, 'name': 'jacket', 'freebase_id': '/m/032b3c'}]

In [54]:
# 用于将边界框 bounding box 从 COCO 转换为 YOLO 格式
def cc2yolo_bbox(img_width, img_height, bbox):
    dw = 1. / img_width
    dh = 1. / img_height
    x = bbox[0] + bbox[2] / 2.0
    y = bbox[1] + bbox[3] / 2.0
    w = bbox[2]
    h = bbox[3]
 
    x = x * dw
    w = w * dw
    y = y * dh
    h = h * dh
    return (x, y, w, h)

In [55]:
# 创建文件用于训练数据集的信息
# 将COCO格式的标注（bound box）转换为YOLO格式，并生成一个训练数据集的CSV文件
f = open('/root/autodl-tmp/kaggle/training/train.csv','w')
# 写其文件的表头 id，file_name
f.write('id,file_name\n')

# 对 COCO 数据集中的每个图像进行遍历
for i in tqdm(range(len(data['images']))):
    filename = data['images'][i]['file_name']   # 图像文件名
    img_width = data['images'][i]['width']      # 图像宽度
    img_height = data['images'][i]['height']
    img_id = data['images'][i]['id']
    yolo_txt_name = filename.split('.')[0] + '.txt' # 生成YOLO格式的标注文件名，将文件后缀改成".txt"
    
    # 将当前图像的ID和文件名写到 CSV 文件
    f.write('{},{}\n'.format(img_id, filename)) 

    # 创建用于写到YOLO格式标注的文本
    yolo_txt_file = open(os.path.join(yolo_anno_path, yolo_txt_name), 'w')
    
    # 遍历COCO数据集中的每个标注
    for anno in data['annotations']:
        # 判断当前标注是否与图像匹配
        if anno['image_id'] == img_id:
            # 调用函数，将 COCO 格式的边界框转换为 YOLO 格式
            yolo_bbox = cc2yolo_bbox(img_width, img_height, anno['bbox']) # "bbox": [x,y,width,height]      
            # 将 YOLO 格式的标注写到 YOLO 格式标注文件  
            yolo_txt_file.write('{} {} {} {} {}\n'.format(cate_id_map[anno['category_id']], yolo_bbox[0], yolo_bbox[1], yolo_bbox[2], yolo_bbox[3]))
    yolo_txt_file.close()
f.close()

100%|██████████| 3062/3062 [00:02<00:00, 1354.44it/s]


In [56]:
# 查看刚生成的训练集
train = pd.read_csv('/root/autodl-tmp/kaggle/training/train.csv')
train.head()

Unnamed: 0,id,file_name
0,9860841628484337660,88d8bf3754317ffc.jpg
1,15984033263460081658,ddd2b190ea90dffa.jpg
2,76077631043502082,010e4833cdb38002.jpg
3,18065680256228130812,fab6307a1a43fffc.jpg
4,9491379842992996352,83b827ae01e68000.jpg


In [57]:
# 4.将数据划分为训练集和验证集
# 将训练数据随机分成两部分:训练集和验证集，通过固定随机种子 random=233 进行划分，10%做验证集
train_df, valid_df = train_test_split(train, test_size=0.10, random_state=233)
print(f'Size of total training images: {len(train)}, training images: {len(train_df)}. validation images: {len(valid_df)}')

Size of total training images: 3062, training images: 2755. validation images: 307


In [58]:
# 用于生成新的数据集并添加 split 列，用于标记每个样本属于训练集还是验证集，通过将训练集和验证集合并生成新的数据集 df
train_df.loc[:, 'split'] = 'train'
valid_df.loc[:, 'split'] = 'valid'
df = pd.concat([train_df, valid_df]).reset_index(drop=True)
df.sample(10)

Unnamed: 0,id,file_name,split
1908,6313228391827179284,579d17c600c50714.jpg,train
1636,10846323739029586428,9685e1f96fbec1fc.jpg,train
361,13386314881303920819,b9c5be75f7a540b3.jpg,train
2186,6443506009312068821,596bee93892404d5.jpg,train
1393,11513085654045198804,9fc6b267460289d4.jpg,train
1314,2104996849403692053,1d36737b56060415.jpg,train
753,1494888502052588152,14bee92c1fa90a78.jpg,train
1412,1932289094778455674,1ad0deb1a9a1867a.jpg,train
767,12096828553779053656,a7e0917a58cd8458.jpg,train
634,2155400085207516008,1de984f52ba40368.jpg,train


In [59]:
# 为进行 YOLO 自定义训练，准备特定的文件夹结构
os.makedirs('../kaggle/training/cowboy/images/train', exist_ok=True)
os.makedirs('../kaggle/training/cowboy/images/valid', exist_ok=True)

os.makedirs('../kaggle/training/cowboy/labels/train', exist_ok=True)
os.makedirs('../kaggle/training/cowboy/labels/valid', exist_ok=True)

In [60]:
# 将图像和标注移动到相关的拆分文件 训练集和验证集
for i in tqdm(range(len(df))):
    row = df.loc[i]
    name = row.file_name.split('.')[0] # 提出 file_name 
    if row.split == 'train':
        copyfile(f'/root/autodl-tmp/data/cowboyoutfits/images/{name}.jpg', f'/root/autodl-tmp/kaggle/training/cowboy/images/train/{name}.jpg')
        copyfile(f'/root/autodl-tmp/kaggle/training/yolo_anno/{name}.txt', f'/root/autodl-tmp/kaggle/training/cowboy/labels/train/{name}.txt')
    else:
        copyfile(f'/root/autodl-tmp/data/cowboyoutfits/images/{name}.jpg', f'/root/autodl-tmp/kaggle/training/cowboy/images/valid/{name}.jpg')
        copyfile(f'/root/autodl-tmp/kaggle/training/yolo_anno/{name}.txt', f'/root/autodl-tmp/kaggle/training/cowboy/labels/valid/{name}.txt')

100%|██████████| 3062/3062 [00:02<00:00, 1376.26it/s]


In [61]:
# 6.创建数据集配置文件
# 数据集配置信息，训练集和验证集的路径，类别数量和类别名称


data_yaml = dict(
    train = '/root/autodl-tmp/kaggle/training/cowboy/images/train/',
    val = '/root/autodl-tmp/kaggle/training/cowboy/images/valid/',
    nc = 5,  # 要检测的类别数量为5
    names = ['belt', 'sunglasses', 'boot', 'cowboy_hat', 'jacket'] # 类别名称的列表
)

# we will make the file under the yolov5/data/ directory
with open('/root/autodl-tmp/kaggle//training/yolov5/data/data.yaml', 'w') as outfile:
    yaml.dump(data_yaml, outfile, default_flow_style=True)

In [62]:
# 超参数
# Hyperparameters for COCO training from scratch
# python train.py --batch 40 --cfg yolov5m.yaml --weights '' --data coco.yaml --img 640 --epochs 300
# 水平有限，都是默认值


lr0: 0.01  # initial learning rate (SGD=1E-2, Adam=1E-3)
lrf: 0.2  # final OneCycleLR learning rate (lr0 * lrf)
momentum: 0.937  # SGD momentum/Adam beta1
weight_decay: 0.0005  # optimizer weight decay 5e-4
warmup_epochs: 3.0  # warmup epochs (fractions ok)
warmup_momentum: 0.8  # warmup initial momentum
warmup_bias_lr: 0.1  # warmup initial bias lr
box: 0.05  # box loss gain
cls: 0.5  # cls loss gain
cls_pw: 1.0  # cls BCELoss positive_weight
obj: 1.0  # obj loss gain (scale with pixels)
obj_pw: 1.0  # obj BCELoss positive_weight
iou_t: 0.20  # IoU training threshold
anchor_t: 4.0  # anchor-multiple threshold
# anchors: 3  # anchors per output layer (0 to ignore)
fl_gamma: 0.0  # focal loss gamma (efficientDet default gamma=1.5)
hsv_h: 0.015  # image HSV-Hue augmentation (fraction
hsv_s: 0.7  # image HSV-Saturation augmentation (fraction)
hsv_v: 0.4  # image HSV-Value augmentation (fraction)
degrees: 0.0  # image rotation (+/- deg)
translate: 0.1  # image translation (+/- fraction)
scale: 0.5  # image scale (+/- gain)
shear: 0.0  # image shear (+/- deg)
perspective: 0.0  # image perspective (+/- fraction), range 0-0.001
flipud: 0.0  # image flip up-down (probability)
fliplr: 0.5  # image flip left-right (probability)
mosaic: 1.0  # image mosaic (probability)
mixup: 0.0  # image mixup (probability)
copy_paste: 0.0  # segment copy-paste (probability)

In [63]:
# 设置训练参数和模型名称的变量

BATCH_SIZE = 32     # 批量大小，用于指定每次训练时使用的样本数 
EPOCHS = 5          # 训练的轮数，整个数据集被模型处理的次数，每个 epoch 都包含训练和验证过程
MODEL = 'yolov5m.pt'  # 模型的名称，即 YOLOv5 预训练权重文件
name = f'{MODEL}_BS_{BATCH_SIZE}_EP_{EPOCHS}'   # 用于生成训练的标识名称，即模型名称，批量大小，轮数信息


In [66]:
# 8.训练
# 在训练过程中，可视化其过程 
# 先到 yolov5 文件夹中
%cd /root/autodl-tmp/kaggle/training/yolov5

# 训练脚本
!python train.py --batch {BATCH_SIZE} \
                 --epochs {EPOCHS} \
                 --data data.yaml \
                 --weights {MODEL} \
                 --save-period 1 \
                 --project /root/autodl-tmp/kaggle/working/kaggle-cowboy \
                 --name {name} \
                 --cache images

/root/autodl-tmp/kaggle/training/yolov5
[34m[1mwandb[0m: Currently logged in as: [33mboke[0m ([33mcompetition_ml[0m). Use [1m`wandb login --relogin`[0m to force relogin
[34m[1mtrain: [0mweights=yolov5m.pt, cfg=, data=data.yaml, hyp=data/hyps/hyp.scratch-low.yaml, epochs=5, batch_size=32, imgsz=640, rect=False, resume=False, nosave=False, noval=False, noautoanchor=False, noplots=False, evolve=None, bucket=, cache=images, image_weights=False, device=, multi_scale=False, single_cls=False, optimizer=SGD, sync_bn=False, workers=8, project=/root/autodl-tmp/kaggle/working/kaggle-cowboy, name=yolov5m.pt_BS_32_EP_5, exist_ok=False, quad=False, cos_lr=False, label_smoothing=0.0, patience=100, freeze=[0], save_period=1, seed=0, local_rank=-1, entity=None, upload_dataset=False, bbox_interval=-1, artifact_alias=latest
[34m[1mgithub: [0mup to date with https://github.com/ultralytics/yolov5 ✅
YOLOv5 🚀 v7.0-203-g0897415 Python-3.10.8 torch-2.0.1+cu117 CUDA:0 (NVIDIA GeForce RTX 2080 Ti,

In [None]:
# 使用 W&B 在线展示训练结果，并在本地进行可视化
# 将训练结果压缩成ZIP，便于可视化
# zip -r /root/autodl-tmp/kaggle/working/output.zip /root/autodl-tmp/kaggle/working/kaggle-cowboy