# 基于YOLO的车牌字符识别

本文档包含使用YOLO模型进行车牌字符识别的完整训练流程代码。
## 安装和导入依赖

In [None]:
import pandas as pd
import os
import requests
import zipfile
import shutil
from glob import glob
import json
from PIL import Image
import numpy as np
import matplotlib.pyplot as plt
import torch
from tqdm.auto import tqdm

# 安装YOLO依赖（如果需要）
# pip install ultralytics

# 引入YOLO相关库
from ultralytics import YOLO

# 设置PyTorch资源限制
# 限制PyTorch内部线程数
torch.set_num_threads(4)

# 限制GPU内存使用（如果使用GPU）
if torch.cuda.is_available():
    torch.cuda.empty_cache()  # 清空缓存
    torch.cuda.set_per_process_memory_fraction(0.8)  # 使用80%的GPU内存
    print(f"CUDA是否可用: {torch.cuda.is_available()}")
    print(f"CUDA设备数量: {torch.cuda.device_count()}")
    print(f"当前CUDA设备: {torch.cuda.current_device()}")
    print(f"CUDA设备名称: {torch.cuda.get_device_name(0)}")
    print(f"已分配GPU内存: {torch.cuda.memory_allocated(0) / 1024**2:.2f} MB")
    print(f"缓存的GPU内存: {torch.cuda.memory_reserved(0) / 1024**2:.2f} MB")

下载数据集

In [None]:
# 下载数据集
links = pd.read_csv('./mchar_data_list_0515.csv')    #修改成你电脑对应的路径
dataset_path = "./dataset"
print(f"数据集目录：{dataset_path}")
if not os.path.exists(dataset_path):
    os.mkdir(dataset_path)
for i,link in enumerate(links['link']):
    file_name = links['file'][i]
    print(file_name, '\t', link)
    file_name = os.path.join(dataset_path,file_name)
    if not os.path.exists(file_name):
        response = requests.get(link, stream=True)
        with open(file_name, 'wb') as f:
            for chunk in response.iter_content(chunk_size=1024):
                if chunk:
                    f.write(chunk)
zip_list = ['mchar_train', 'mchar_test_a', 'mchar_val']
for little_zip in zip_list:
    zip_name = os.path.join(dataset_path,little_zip)
    if not os.path.exists(zip_name):
        zip_file = zipfile.ZipFile(os.path.join(dataset_path,f"{little_zip}.zip"), 'r')
        zip_file.extractall(path = dataset_path)

构建索引

In [None]:
data_dir = {
    'train_data': f'{dataset_path}/mchar_train/',
    'val_data': f'{dataset_path}/mchar_val/',
    'test_data': f'{dataset_path}/mchar_test_a/',
    'train_label': f'{dataset_path}/mchar_train.json',
    'val_label': f'{dataset_path}/mchar_val.json',
    'submit_file': f'{dataset_path}/mchar_sample_submit_A.csv'
}

train_list = glob(data_dir['train_data']+'*.png')
test_list = glob(data_dir['test_data']+'*.png')
val_list = glob(data_dir['val_data']+'*.png')
print('train image counts: %d'%len(train_list))
print('val image counts: %d'%len(val_list))
print('test image counts: %d'%len(test_list))

## 图像查看和数据分析工具

In [None]:
def view_image(img_path, show_label=True):
    # 显示图片
    img = Image.open(img_path)
    plt.figure(figsize=(6, 4))
    plt.imshow(img)
    plt.axis('off')
    
    # 如果需要显示标签信息
    if show_label and ('train' in img_path or 'val' in img_path):
        try:
            img_name = os.path.basename(img_path)
            if 'train' in img_path:
                label_file = data_dir['train_label']
            else:
                label_file = data_dir['val_label']
                
            with open(label_file, 'r') as f:
                labels = json.load(f)
            
            if img_name in labels:
                label_info = labels[img_name]
                title = f"标签: {label_info['label']}"
                
                # 显示边界框
                plt.title(title)
                ax = plt.gca()
                for i in range(len(label_info['label'])):
                    rect = plt.Rectangle(
                        (label_info['left'][i], label_info['top'][i]),
                        label_info['width'][i], label_info['height'][i],
                        fill=False, edgecolor='red', linewidth=2
                    )
                    ax.add_patch(rect)
                    plt.text(
                        label_info['left'][i], label_info['top'][i]-5, 
                        str(label_info['label'][i]), 
                        color='red', fontsize=12
                    )
        except Exception as e:
            print(f"获取标签信息失败: {e}")
    
    plt.show()

# 查看几个样本图片
if len(train_list) > 0:
    sample_img = train_list[0]
    view_image(sample_img)

# 查看train数据集第一张的信息，长宽高等
def look_train_json():
    with open(data_dir['train_label'], 'r', encoding='utf-8') as f:
        content = f.read()

    content = json.loads(content)
    print(content['000000.png'])

look_train_json()

## 数据集统计分析

In [None]:
def img_size_summary():
    sizes = []

    for img in glob(data_dir['train_data']+'*.png')[:100]:  # 只取部分图片以加快处理
        img = Image.open(img)
        sizes.append(img.size)

    sizes = np.array(sizes)

    plt.figure(figsize=(10, 8))
    plt.scatter(sizes[:, 0], sizes[:, 1])
    plt.xlabel('Width')
    plt.ylabel('Height')
    plt.title('image width-height summary')
    plt.show()

img_size_summary()

# 统计bbox的大小

In [None]:
def bbox_summary():
    marks = json.loads(open(data_dir['train_label'], 'r').read())
    bboxes = []

    for img, mark in marks.items():
        for i in range(len(mark['label'])):
            bboxes.append([mark['left'][i], mark['top'][i], mark['width'][i], mark['height'][i]])

    bboxes = np.array(bboxes)

    fig, ax = plt.subplots(figsize=(12, 8))
    ax.scatter(bboxes[:, 2], bboxes[:, 3])
    ax.set_title('bbox width-height summary')
    ax.set_xlabel('width')
    ax.set_ylabel('height')
    plt.show()

bbox_summary()

## 准备YOLO格式数据

YOLO要求特定的数据格式，我们需要将已有的数据转换为YOLO格式。

In [None]:
def prepare_yolo_data():
    """将现有数据集转换为YOLO格式"""
    # 创建YOLO数据目录
    yolo_data_dir = './yolo_dataset'
    os.makedirs(f'{yolo_data_dir}/images/train', exist_ok=True)
    os.makedirs(f'{yolo_data_dir}/images/val', exist_ok=True)
    os.makedirs(f'{yolo_data_dir}/labels/train', exist_ok=True)
    os.makedirs(f'{yolo_data_dir}/labels/val', exist_ok=True)
    
    # 处理训练集
    train_labels = json.load(open(data_dir['train_label'], 'r'))
    for img_path in tqdm(train_list, desc="处理训练集"):
        img_name = os.path.basename(img_path)
        if img_name not in train_labels:
            continue
            
        # 复制图像
        shutil.copy(img_path, f'{yolo_data_dir}/images/train/{img_name}')
        
        # 创建标签文件
        img = Image.open(img_path)
        img_w, img_h = img.size
        label_info = train_labels[img_name]
        
        with open(f'{yolo_data_dir}/labels/train/{os.path.splitext(img_name)[0]}.txt', 'w') as f:
            for i in range(len(label_info['label'])):
                # YOLO格式: <class> <x_center> <y_center> <width> <height>
                cls = label_info['label'][i]
                x = label_info['left'][i] / img_w
                y = label_info['top'][i] / img_h
                w = label_info['width'][i] / img_w
                h = label_info['height'][i] / img_h
                x_center = x + w/2
                y_center = y + h/2
                f.write(f"{cls} {x_center} {y_center} {w} {h}\n")
    
    # 处理验证集
    val_labels = json.load(open(data_dir['val_label'], 'r'))
    for img_path in tqdm(val_list, desc="处理验证集"):
        img_name = os.path.basename(img_path)
        if img_name not in val_labels:
            continue
            
        # 复制图像
        shutil.copy(img_path, f'{yolo_data_dir}/images/val/{img_name}')
        
        # 创建标签文件
        img = Image.open(img_path)
        img_w, img_h = img.size
        label_info = val_labels[img_name]
        
        with open(f'{yolo_data_dir}/labels/val/{os.path.splitext(img_name)[0]}.txt', 'w') as f:
            for i in range(len(label_info['label'])):
                cls = label_info['label'][i]
                x = label_info['left'][i] / img_w
                y = label_info['top'][i] / img_h
                w = label_info['width'][i] / img_w
                h = label_info['height'][i] / img_h
                x_center = x + w/2
                y_center = y + h/2
                f.write(f"{cls} {x_center} {y_center} {w} {h}\n")
    
    # 创建数据配置文件
    data_yaml = f"""path: {os.path.abspath(yolo_data_dir)}
train: images/train
val: images/val

nc: 10
names: ['0', '1', '2', '3', '4', '5', '6', '7', '8', '9']"""
    
    with open(f'{yolo_data_dir}/data.yaml', 'w') as f:
        f.write(data_yaml)
        
    print(f"YOLO数据集准备完成: {yolo_data_dir}/data.yaml")
    return f"{yolo_data_dir}/data.yaml"

# 准备YOLO格式数据
data_yaml = prepare_yolo_data()

## 训练YOLO模型

In [None]:
def train_yolo(data_yaml, epochs=20):
    """训练YOLO模型"""
    # 从头开始训练，不使用预训练模型
    model = YOLO('yolov8n.yaml')  # 使用模型配置而不是预训练权重
    
    # 限制资源使用
    results = model.train(
        data=data_yaml,
        epochs=epochs,
        imgsz=640,
        batch=16,  # 小批量减少内存使用
        device=0 if torch.cuda.is_available() else 'cpu',  # 使用第一个GPU或CPU
        workers=2,  # 减少worker数量
        pretrained=False  # 确保不使用预训练权重
    )
    
    print(f"YOLO模型训练完成: {model.ckpt_path}")
    return model

# 训练模型
epochs = 10  # 可以根据需要调整
trained_model = train_yolo(data_yaml, epochs=epochs)

## 评估YOLO模型

In [None]:
# 在验证集上评估模型
val_results = trained_model.val()

## 使用YOLO模型进行预测

In [None]:
def predict_with_yolo(model, test_images):
    """使用YOLO模型进行预测"""
    results = []
    
    # 对测试集进行预测
    for img_path in tqdm(test_images, desc="YOLO预测"):
        # 预测
        preds = model.predict(img_path, verbose=False)
        
        # 提取检测结果
        detected_digits = []
        for r in preds:
            boxes = []
            for box in r.boxes:
                cls = int(box.cls.item())
                conf = box.conf.item()
                x1, y1, x2, y2 = box.xyxy[0].tolist()
                
                boxes.append({
                    'cls': cls,
                    'conf': conf,
                    'bbox': [x1, y1, x2, y2]
                })
                
            # 按照x坐标排序（从左到右）
            boxes.sort(key=lambda x: x['bbox'][0])
            
            # 提取数字
            detected_digits = [str(box['cls']) for box in boxes]
        
        # 如果没有检测到数字，或检测到的数字少于4个，用空字符填充
        code = ''.join(detected_digits[:4])  # 只取前4个
        while len(code) < 4:
            code += ''
            
        results.append([img_path, code])
    
    return results

# 在测试集上进行预测
yolo_predictions = predict_with_yolo(trained_model, test_list)

## 将预测结果保存为提交格式

In [None]:
def write2csv(results, csv_path):
    """将结果写入CSV文件"""
    # 定义输出文件
    df = pd.DataFrame(results, columns=['file_name', 'file_code'])
    df['file_name'] = df['file_name'].apply(lambda x: os.path.basename(x))
    save_name = csv_path
    df.to_csv(save_name, sep=',', index=None)
    print('Results saved to %s' % save_name)

# 保存预测结果
write2csv(yolo_predictions, "yolo_predictions.csv")

## 可视化预测结果

In [None]:
def visualize_predictions(model, sample_images):
    """可视化YOLO模型的预测结果"""
    plt.figure(figsize=(15, 12))
    for i, img_path in enumerate(sample_images[:9]):
        # 预测并绘制
        results = model.predict(img_path)
        
        # 获取带注释的图像
        img = results[0].plot()
        
        # 显示图像
        plt.subplot(3, 3, i+1)
        plt.imshow(img)
        plt.axis('off')
        plt.title(f"样本 {i+1}")
    
    plt.tight_layout()
    plt.show()

# 随机选择一些测试图像
import random
sample_test_images = random.sample(test_list, min(9, len(test_list)))
visualize_predictions(trained_model, sample_test_images)

## 导出模型

In [None]:
exported_model = trained_model.export(format="onnx")
print(f"模型已导出为: {exported_model}")

## 创建YOLO与ResNet集成模型

In [None]:
from matplotlib import transforms
from baseline import DigitsResnet50


class EnsembleModel:
    def __init__(self, resnet_path, yolo_path):
        """初始化集成模型"""
        # 加载ResNet模型
        self.resnet = DigitsResnet50().to('cuda' if torch.cuda.is_available() else 'cpu')
        self.resnet.load_state_dict(torch.load(resnet_path)['model'])
        self.resnet.eval()
        
        # 加载YOLO模型
        self.yolo = YOLO(yolo_path)
        
        # 设置设备
        self.device = 'cuda' if torch.cuda.is_available() else 'cpu'
        
        # 预处理转换
        self.transforms = transforms.Compose([
            transforms.Resize(128),
            transforms.CenterCrop((128, 224)),
            transforms.ToTensor(),
            transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
        ])
    
    def predict_resnet(self, img):
        """使用ResNet模型预测"""
        img_tensor = self.transforms(img).unsqueeze(0).to(self.device)
        with torch.no_grad():
            pred = self.resnet(img_tensor)
        
        # 获取每个位置的概率和预测值
        probs = []
        digits = []
        
        for p in pred:
            prob, pred_idx = torch.max(torch.softmax(p, dim=1), dim=1)
            probs.append(prob.item())
            digits.append(pred_idx.item())
        
        # 返回预测的数字和对应的概率
        return digits, probs
    
    def predict_yolo(self, img):
        """使用YOLO模型预测"""
        results = self.yolo(img, verbose=False)
        
        # 提取检测结果
        boxes = []
        for r in results:
            for box in r.boxes:
                cls = int(box.cls.item())
                conf = box.conf.item()
                x1, y1, x2, y2 = box.xyxy[0].tolist()
                
                boxes.append({
                    'cls': cls,
                    'conf': conf,
                    'bbox': [x1, y1, x2, y2]
                })
        
        # 按照x坐标排序
        boxes.sort(key=lambda x: x['bbox'][0])
        
        # 提取数字和概率
        digits = [box['cls'] for box in boxes]
        probs = [box['conf'] for box in boxes]
        
        # 统一格式为4位，若不足则补10（空白）
        while len(digits) < 4:
            digits.append(10)
            probs.append(0.0)
        
        # 只保留前4位
        digits = digits[:4]
        probs = probs[:4]
        
        return digits, probs
    
    def predict(self, img_path):
        """集成预测"""
        img = Image.open(img_path)
        
        # ResNet预测
        resnet_digits, resnet_probs = self.predict_resnet(img)
        
        # YOLO预测
        yolo_digits, yolo_probs = self.predict_yolo(img)
        
        # 集成结果：选择概率最高的
        final_digits = []
        for i in range(4):
            if i < len(yolo_digits) and i < len(resnet_digits):
                if yolo_probs[i] > resnet_probs[i]:
                    final_digits.append(yolo_digits[i])
                else:
                    final_digits.append(resnet_digits[i])
            elif i < len(resnet_digits):
                final_digits.append(resnet_digits[i])
            elif i < len(yolo_digits):
                final_digits.append(yolo_digits[i])
            else:
                final_digits.append(10)  # 空白
        
        # 转换为字符串格式
        char_list = [str(i) for i in range(10)]
        char_list.append('')
        final_result = ''.join([char_list[d] for d in final_digits])
        
        return final_result

def ensemble_predict(resnet_path, yolo_path, output_csv):
    """使用集成模型进行预测并生成提交文件"""
    model = EnsembleModel(resnet_path, yolo_path)
    results = []
    
    # 对测试集进行预测
    for img_path in tqdm(test_list, desc="集成预测"):
        code = model.predict(img_path)
        results.append([img_path, code])
    
    # 排序并写入CSV
    results = sorted(results, key=lambda x: x[0])
    write2csv(results, output_csv)
    print(f"集成预测结果已保存到 {output_csv}")
    return results

def run_ensemble_workflow(resnet_model_path, epochs=10):
    """运行完整的集成模型工作流程
    
    参数:
        resnet_model_path: ResNet模型的路径，例如'./checkpoints/epoch-resnet50-30-acc-95.67.pth'
        epochs: YOLO训练的轮数
    """
    print("步骤1: 准备YOLO训练数据")
    data_yaml = prepare_yolo_data()
    
    print("步骤2: 训练YOLO模型")
    trained_model = train_yolo(data_yaml, epochs)
    
    print(f"步骤3: 使用ResNet模型: {resnet_model_path}")
    
    print("步骤4: 使用集成模型进行预测")
    results = ensemble_predict(resnet_model_path, trained_model.ckpt_path, "ensemble_result.csv")
    
    print("完成! 结果已保存到 ensemble_result.csv")
    return results


## 运行完整工作流

In [None]:
resnet_model_path = './checkpoints/epoch-resnet50-30-acc-95.67.pth'  # 替换为实际的ResNet模型路径
results = run_ensemble_workflow(resnet_model_path, epochs=30)