# YOLOv8目标检测使用范例——风机识别

In [1]:
import sys
from osgeo import gdal
from ultralytics import YOLO
import cv2
import numpy as np
import time
import random
import yaml
import json
import logging
import os
from shapely import Polygon, MultiPolygon
from tqdm import tqdm
import requests
sys.path.append('../../ultralytics/dataset_preparation')
sys.path.append('../../ultralytics/inference/utils')
import yolo_dataset_utils
import split_image_v1
import split_image_v2
import yolov8_seg_handle

logger = logging.getLogger()
logger.setLevel(logging.INFO)
IMAGE_EXTENSIONS = ('.jpg', '.jpeg', '.png', '.bmp', '.tif', '.tiff', '.gif', '.webp')


## 1.数据处理
### 下载标注数据集
#### 数据集格式如下：
风机数据
____风机1
____风机2
________标注内容
____________图片1标注.shp
____________图片2标注.shp
________图片1.tif
________图片2.tif
____风机3
...

其中，风机1、2等代表标注的点，里面的图片代表每个点的标注期

运行以下代码下载数据集

In [None]:
def download_file_from_google_drive(id, destination):
    URL = "https://docs.google.com/uc?export=download&confirm=1"

    session = requests.Session()

    response = session.get(URL, params={"id": id}, stream=True)
    token = get_confirm_token(response)

    if token:
        params = {"id": id, "confirm": token}
        response = session.get(URL, params=params, stream=True)

    save_response_content(response, destination)


def get_confirm_token(response):
    for key, value in response.cookies.items():
        if key.startswith("download_warning"):
            return value

    return None


def save_response_content(response, destination):
    CHUNK_SIZE = 32768

    with open(destination, "wb") as f:
        for chunk in response.iter_content(CHUNK_SIZE):
            if chunk:  # filter out keep-alive new chunks
                f.write(chunk)

import zipfile
import os

zip_path = "wind_turbine.zip"
extract_path = "./wind_turbine/风机数据"

with zipfile.ZipFile(zip_path, 'r') as zip_ref:
    for file_info in zip_ref.infolist():
        file_name = file_info.filename

        # Attempt to decode the file name
        try:
            file_name = file_name.encode('cp437').decode('gbk')  # Adjust the encoding as needed
        except:
            file_name = file_name.encode('cp437').decode('utf-8', 'ignore')  # Fallback to UTF-8

        extracted_path = os.path.join(extract_path, file_name)
        
        # Create directory structure
        if file_info.is_dir():
            os.makedirs(extracted_path, exist_ok=True)
        else:
            os.makedirs(os.path.dirname(extracted_path), exist_ok=True)
            
            # Extract and write the file
            with open(extracted_path, 'wb') as f:
                f.write(zip_ref.read(file_info.filename))



运行这个脚本，将数据集转化为yolo格式的训练集

In [None]:
data_path = 'wind_turbine'
# 先将标注转成geojson
yolo_dataset_utils.batch_convert_shapely(data_path)
# 而后按照类别生成yolo格式标注
yolo_dataset_utils.wind_turbine_geojson_classify(data_path)

现在，wind_turbine/wind_turbine里面就是我们要的风机数据集。包括tif格式的图片和对应的txt格式文件。目前txt文件为矩形框，后面要转成锚框

运行以下代码，划分训练集、测试集与验证集

In [None]:
img_path = 'wind_turbine/wind_turbine/images'
txt_path = 'wind_turbine/wind_turbine/label_txt'
# 运行以下脚本，划分训练集、验证集和测试集
yolo_dataset_utils.split_train_val_test(img_path, txt_path, val_percentage=0.1, test_percentage=0.1)

运行以下代码，将大图按照一定比例分割成小图

In [None]:
# 分别进入train, val, test目录下，运行分割图的脚本。运行后手动删除除了images和txt_bbox的其余文件夹。将txt_bbox重命名为labels
split_sizes = [[700, 700], [900, 900], [1100, 1100]] # 此处数字代表像素点大小
img_path = "wind_turbine/wind_turbine/train/training_img"
label_path = "wind_turbine/wind_turbine/train/training_label"
# split_image_v1为平切算法，即切割图片无重叠
# split_image_v1(img_path, split_sizes, label_path, with_edge=False) # with_edge: 是否保留在边缘的目标
# split_image_v2为有重叠切法，即切割图片之前有重叠部分
split_image_v2.split_images_segment_v2(img_path, split_sizes, label_path, with_edge=False) # with_edge: 是否保留在边缘的目标

# 运行以下代码，将分割格式label转化成目标检测的锚框
segment_label_path = "wind_turbine/wind_turbine/train/labels"
yolo_dataset_utils.txt2yolo_bbox(segment_label_path)

运行以下代码，打印出图片看看标注是否正确。确定正确后删除out文件夹

In [None]:
img_path = "wind_turbine/wind_turbine/train/images"
label_path = "wind_turbine/wind_turbine/train/labels"
yolo_dataset_utils.show_batch_image_bbox(img_path, label_path)

至此，数据集准备工作完毕。

## 2.模型训练
模型训练通过命令行执行。需要配置一个.yaml文件，说明数据集的位置。yaml文件范例见ultralytics/training/config

运行如下代码下载预训练模型到本地


In [None]:
# 运行如下代码下载预训练模型到本地
def download_file(url, filename):
    # Send a GET request to the URL
    response = requests.get(url)

    # Check if the request was successful
    if response.status_code == 200:
        # Open the file in write-binary mode and write the contents
        with open(filename, 'wb') as file:
            file.write(response.content)
        print(f"File downloaded successfully: {filename}")
    else:
        print("Failed to download file")

# Example usage
url = "https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8x.pt"  # Replace with the actual URL
filename = "../../ultralytics/training/pre_models/yolov8x.pt"      # Replace with your desired file name
download_file(url, filename)

config配置完成后，在命令行运行如下代码开启训练过程：
*yolo task=detect mode=train project=wind_turbine name=wind_turbine model=ultralytics/training/pre_models/yolov8x.pt data=ultralytics/training/config/wind_turbine.yaml  batch=16  epochs=401  device=0  patience=50  save_period=50 degrees=45 flipud=0.5 fliplr=0.5*

其中，degrees，flipud和fliplr为数据增强的参数。更多参数详见官网：[https://docs.ultralytics.com/usage/cfg/#export](https://docs.ultralytics.com/usage/cfg/#export)

## 3.模型预测
模型预测代码示例如下：

In [4]:
logger = logging.getLogger()
logger.setLevel(logging.INFO)
IMAGE_EXTENSIONS = ('.jpg', '.jpeg', '.png', '.bmp', '.tif', '.tiff', '.gif', '.webp')

# 读取预测用config文件
def get_config():
    config_path = "D:/Code/gitcode/train_pipeline/ultralytics/inference/config/wind_turbine_config.yaml"
    config_file = open(config_path, 'r', encoding='utf-8')
    file_info = config_file.read()
    config_dict = yaml.safe_load(file_info)
    return config_dict

# 图片预处理，将大图按照一定比例切割成小图
def pre_handle(config_dict):
    image_path = config_dict['image_path']
    split_arr = config_dict['split_arr']
    coordinates = config_dict['coordinates']
    pixel_arr = yolov8_seg_handle.lon_lat_to_pixel(image_path, coordinates)
    mid_dict = {'pixel_arr': pixel_arr}  # 用于存储中间结果
    # 图片切割
    logging.info('开始切割图片')
    split_images_dict = yolov8_seg_handle.split_image_large(image_path, split_arr, pixel_arr)
    mid_dict['split_images_dict'] = split_images_dict
    logging.info("切割图片完成!!!")
    return mid_dict

# 预测后处理，按照比面积或者比置信度将重叠的锚框去重
def after_handle_bbox(config_dict, mid_dict, method='area'):
    import geopandas as gpd
    logging.info('开始后处理')
    predict_result = mid_dict['predict_result']
    all_box_arr = predict_result['all_box_arr']
    weight_arr = predict_result['weight_arr']
    label_arr = predict_result['label_arr']

    overlap_percent = config_dict['overlap_percent']

    # Create GeoDataFrame
    gdf = gpd.GeoDataFrame({
        'weight': weight_arr,
        'label': label_arr,
        'box': all_box_arr,
        'geometry': [Polygon(p) for p in all_box_arr]
    })

    # Spatial self-join to find overlapping polygons
    joined_gdf = gpd.sjoin(gdf, gdf, how='inner', predicate='intersects')
    # print(joined_gdf.columns)
    # Initialize a set to keep track of processed indices
    processed_indices = set()

    for idx, row in tqdm(joined_gdf.iterrows(), total=joined_gdf.shape[0]):
        row1 = idx
        row2 = row['index_right']

        if row1 == row2 or row1 in processed_indices or row2 in processed_indices:
            continue

        poly1 = gdf.at[row1, 'geometry']
        poly2 = gdf.at[row2, 'geometry']
        area1 = poly1.area
        area2 = poly2.area
        over_area = poly1.intersection(poly2).area

        if over_area / area1 >= overlap_percent or over_area / area2 >= overlap_percent:
            # 取大的
            if method == 'area':
                if area1 >= area2:
                    processed_indices.add(row2)
                else:
                    processed_indices.add(row1)
            else:
                if gdf['weight'].tolist()[row1] >= gdf['weight'].tolist()[row2]:
                    processed_indices.add(row2)
                else:
                    processed_indices.add(row1)
    # Remove processed (merged) polygons
    gdf = gdf.drop(index=list(processed_indices))
    # Reconstruct the result
    mid_dict['res_result'] = {
        'res_box': gdf['box'].tolist(),
        # Add other necessary fields
        'res_weight': gdf['weight'].tolist(),
        'res_label': gdf['label'].tolist()
    }
    logging.info('后处理完成')

    return mid_dict

# 生成geojson文件
def create_geojson(config_dict, mid_dict):
    start_time = config_dict['start_time']
    image_path = config_dict['image_path']
    out_flag = config_dict['out_flag']
    out_file_path = config_dict['out_file_path']
    class_names = config_dict['class_dict']

    res_result = mid_dict['res_result']
    res_box = res_result['res_box']
    res_weight = res_result['res_weight']
    res_label = res_result['res_label']
    if not out_flag:
        return mid_dict
    time_now = int(time.time())
    image_name = image_path.split('/')[-1]
    path = image_path.replace('/' + image_name, '')
    suf = image_name.split('.')[1]
    show_path = path + '/out/' + image_name.replace('.' + suf, '_predict.geojson')

    gdal.AllRegister()
    dataset = gdal.Open(image_path)
    adfGeoTransform = dataset.GetGeoTransform()

    res_dict = {
        "type": "FeatureCollection",
        "crs": {"type": "name", "properties": {"name": "urn:ogc:def:crs:OGC:1.3:CRS84"}},
        "features": []
    }
    date = image_name.split('.')[0][-8:]
    for index in range(len(res_box)):
        polygon = res_box[index]
        label = res_label[index]
        weight = res_weight[index]
        name = class_names[label]
        feature = {"type": "Feature",
                   "properties": {"Id": 0, "name": name, "date": date, "area": 0.0, "label": label, "result": 1,
                                  "XMMC": "", "HYMC": "", "weight": weight, "bz": 0},
                   "geometry": {"type": "Polygon", "coordinates": []}}
        coordinate = []
        for xy in polygon:
            location = [xy[0] * adfGeoTransform[1] + adfGeoTransform[0],
                        xy[1] * adfGeoTransform[5] + adfGeoTransform[3]]
            coordinate.append(location)
        coordinate.append(coordinate[0])
        feature['geometry']['coordinates'].append(coordinate)
        res_dict['features'].append(feature)

    end_time = time.time()
    consume_time = end_time - start_time
    res = str(res_dict).replace('\'', '"').replace('None', '"None"')
    res_dict['consume_time'] = consume_time
    mid_dict['res'] = res

    # 输出json文件， 默认不输出
    out_file = open(show_path, 'w', encoding='utf8')
    out_file.write(res)
    out_file.close()

    logging.info('图片路径：' + image_path + ' 总耗时（单位s）：' + str(consume_time))

    return mid_dict

# 绘制锚框
def plot_one_box(x, img, color=None, label=None, line_thickness=3):
    # Plots one bounding box on image img
    tl = line_thickness or round(0.002 * (img.shape[0] + img.shape[1]) / 2) + 1  # line/font thickness
    color = color or [random.randint(0, 255) for _ in range(3)]
    c1, c2 = (int(x[0]), int(x[1])), (int(x[2]), int(x[3]))
    cv2.rectangle(img, c1, c2, color, thickness=tl, lineType=cv2.LINE_AA)
    if label:
        tf = max(tl - 1, 1)  # font thickness
        t_size = cv2.getTextSize(label, 0, fontScale=tl / 3, thickness=tf)[0]
        c2 = c1[0] + t_size[0], c1[1] - t_size[1] - 3
        cv2.rectangle(img, c1, c2, color, -1, cv2.LINE_AA)  # filled
        cv2.putText(img, label, (c1[0], c1[1] - 2), 0, tl / 3, [225, 255, 255], thickness=tf, lineType=cv2.LINE_AA)

# 生成预测后的示意图
def show_images(config_dict, mid_dict):
    image_path = config_dict['image_path']
    model_path = config_dict['model_path']
    show_flag = config_dict['show_flag']
    class_dict = config_dict['class_dict']
    if not show_flag:
        return 0
    res_result = mid_dict['res_result']
    res_box = res_result['res_box']
    res_weight = res_result['res_weight']
    res_label = res_result['res_label']

    image_name = image_path.split('/')[-1]
    model_name = model_path.split('/')[-1].split('.')[0]
    path = image_path.replace('/' + image_name, '')
    suf = image_name.split('.')[1]
    show_path = path + '/out/' + image_name.replace('.' + suf, '_show')
    # 大图打上标签
    if not os.path.exists(show_path):  # 判断是否存在文件夹如果不存在则创建为文件夹
        os.makedirs(show_path)
    image_name = image_path.split('/')[-1]
    suf = image_name.split('.')[1]
    try:
        img = cv2.imread(image_path)
    except:
        return 0
    box_len = len(res_box)

    # Start with the bounding boxes
    img_bbox = img.copy()  # Create a copy of the original image for bounding boxes

    for row in range(box_len):
        arr = res_box[row]
        label = class_dict[res_label[row]]
        confidence = res_weight[row]
        xyxy = [arr[0][0], arr[0][1], arr[2][0], arr[2][1]]

        plot_one_box(xyxy, img_bbox, label=f'{label} {round(confidence, 2)}', color=(0, 0, 255), line_thickness=2)

    cv2.imwrite(os.path.join(show_path, image_name.replace('.' + suf, "_bbox" + f'_{model_name}.' + suf)), img_bbox)

    logger.info('生成图片完成')

# 只预测一张图片
def single_predict(image_path, config_dict):
    config_dict['image_path'] = image_path
    # 2，预处理
    mid_dict = pre_handle(config_dict)
    # 3，模型识别
    mid_dict = yolov8_seg_handle.model_predict_bbox(config_dict, mid_dict)
    # 4，后处理
    mid_dict = after_handle_bbox(config_dict, mid_dict)

    # 5，生成输出结果
    mid_dict = create_geojson(config_dict, mid_dict)
    # 6，输出图片
    show_images(config_dict, mid_dict)

# 预测多张图片
def batch_predict(image_file_path, config_dict):
    image_names = os.listdir(image_file_path)
    for image_name in image_names:
        if image_name.endswith(IMAGE_EXTENSIONS):
            # if image_name != 'car20210205.tif':
            #    continue
            print('预测，', image_name)
            image_path = image_file_path + '/' + image_name
            single_predict(image_path, config_dict)
def main():
    start_time = time.time()
    # 1，参数处理
    config_dict = get_config()
    image_path = "D:/Code/gitcode/train_pipeline/examples/yolov8_wind_turbine/wind_turbine/wind_turbine/test/test_img/wind_turbine_fengji9_20220906.tif"
    coordinates = '[]' #'[[119.64718, 34.44381], [119.68986, 34.41249]]'
    config_dict['out_flag'] = False
    config_dict['start_time'] = start_time
    config_dict['image_path'] = image_path
    # config_dict['image_path'] = image_path
    coordinates = json.loads(coordinates)
    config_dict['coordinates'] = coordinates
    single_predict(image_path, config_dict)

    image_file_path = "D:/Code/Datasets/wind_turbine/dataset20240103/wind_turbine_blade/test_img"
    # batch_predict(image_file_path, config_dict)



if __name__ == '__main__':
    main()



INFO:root:开始切割图片
INFO:root:切割图片完成!!!
INFO:root:开始模型预测
  0%|          | 0/14 [00:00<?, ?it/s]
0: 640x640 1 wind turbine, 1: 640x640 (no detections), 2: 640x640 1 wind turbine, 3: 640x640 (no detections), 4: 640x640 1 wind turbine, 5: 640x640 (no detections), 6: 640x640 1 wind turbine, 7: 640x640 1 wind turbine, 8: 640x640 (no detections), 9: 640x640 1 wind turbine, 10: 640x640 (no detections), 11: 640x640 (no detections), 12: 640x640 (no detections), 13: 640x640 (no detections), 14: 640x640 1 wind turbine, 15: 640x640 1 wind turbine, 333.7ms
Speed: 2.3ms preprocess, 20.9ms inference, 1.3ms postprocess per image at shape (1, 3, 640, 640)
  7%|▋         | 1/14 [00:01<00:15,  1.19s/it]
0: 640x640 1 wind turbine, 1: 640x640 1 wind turbine, 2: 640x640 1 wind turbine, 3: 640x640 1 wind turbine, 4: 640x640 2 wind turbines, 5: 640x640 1 wind turbine, 6: 640x640 1 wind turbine, 7: 640x640 (no detections), 8: 640x640 (no detections), 9: 640x640 (no detections), 10: 640x640 (no detections), 11: 64