### transform the dataset downloaded from https://trust-hub.org/#/data/fics-pcb to YOLO format

### 将 https://trust-hub.org/#/data/fics-pcb 下载的数据集文件转化为YOLO格式

In [1]:
import os
import csv
import json
import cv2
import shutil

In [9]:
def dataset_trans(ANNOTATION_PATH, IMAGE_PATH, IMAGE_NAME, OUTPUT_DIR):

    # 设置路径
    CLASSES_FILE = os.path.join(OUTPUT_DIR, 'classes.txt')
    LABELS_DIR = os.path.join(OUTPUT_DIR, 'labels')
    IMAGES_DIR = os.path.join(OUTPUT_DIR, 'images')

    # 创建输出目录
    os.makedirs(LABELS_DIR, exist_ok=True)
    os.makedirs(IMAGES_DIR, exist_ok=True)

    # 复制原始图像（假设图像在 img/ 目录下）
    temp_img=cv2.imread(os.path.join(IMAGE_PATH, IMAGE_NAME),cv2.IMREAD_UNCHANGED)
    cv2.imwrite(os.path.join(IMAGES_DIR,IMAGE_NAME)[0:-3]+'png',temp_img)
    #shutil.copy(os.path.join(IMAGE_PATH, IMAGE_NAME), os.path.join(IMAGES_DIR, IMAGE_NAME))

    # 提取类别并排序
    unique_classes = set()

    with open(ANNOTATION_PATH, mode='r', encoding='utf-8') as f:
        reader = csv.DictReader(f)
        for row in reader:
            unique_classes.add(row['component_type'])

    class_list = sorted(unique_classes)

    # 写入 classes.txt
    with open(CLASSES_FILE, 'w', encoding='utf-8') as f:
        for cls in class_list:
            f.write(f"{cls}\n")

    class_to_id = {cls: idx for idx, cls in enumerate(class_list)}
    class_to_id = {'ICs':0, 'resistors':3, 'capacitors':1, 'inductors':2, 'diodes':4}

    # 解析 CSV 并写入 YOLO 标注文件
    label_file_path = os.path.join(LABELS_DIR, os.path.splitext(IMAGE_NAME)[0] + '.txt')

    with open(ANNOTATION_PATH, mode='r', encoding='utf-8') as f_in, \
         open(label_file_path, mode='w', encoding='utf-8') as f_out:

        reader = csv.DictReader(f_in)
        for row in reader:
            component_type = row['component_type']
            if(class_to_id.get(component_type)!=None):
                class_id = class_to_id[component_type]

                # 解析 JSON 字符串
                rect_data = json.loads(row['component_location'])
                x = rect_data['x']
                y = rect_data['y']
                width = rect_data['width']
                height = rect_data['height']
    
                # 假设图像大小是固定的（你可以从图像中获取实际尺寸）
                #image_width = 7200  # 根据你的 tif 图像分辨率设置
                #image_height = 5400
                img=cv2.imread(os.path.join(IMAGE_PATH,IMAGE_NAME))
                image_width=img.shape[1]
                image_height=img.shape[0]
                
                # 归一化坐标
                x_center = (x + width / 2) / image_width
                y_center = (y + height / 2) / image_height
                bbox_width = width / image_width
                bbox_height = height / image_height

                # 写入 YOLO 格式
                f_out.write(f"{class_id} {x_center:.6f} {y_center:.6f} {bbox_width:.6f} {bbox_height:.6f}\n")

    #print("✅ 数据集已成功转换为 YOLO 格式！")
    #print(f"📁 输出路径: {OUTPUT_DIR}")

In [11]:
for i in range(1,17):
    ann_path_f = f'.\\dataset\\s{i}\\s{i}\\DSLR\\annotation\\s{i}_front.csv'
    ann_path_b = f'.\\dataset\\s{i}\\s{i}\\DSLR\\annotation\\s{i}_back.csv'
    img_path = f'.\\dataset\\s{i}\\s{i}\\DSLR\\img'
    img_name_f = f's{i}_front.tif'
    img_name_b = f's{i}_back.tif'
    output = '.\\dataset'
    if(os.path.exists(ann_path_f) and os.path.exists(os.path.join(img_path, img_name_f))):
        dataset_trans(ann_path_f,img_path,img_name_f,output)
        print(f"✅ s{i} front transformed!")
    else:
        print(f"❎ s{i} front not found!")
    if(os.path.exists(ann_path_b)and os.path.exists(os.path.join(img_path, img_name_f))):
        dataset_trans(ann_path_b,img_path,img_name_b,output)
        print(f"✅ s{i} back transformed!")
    else:
        print(f"❎ s{i} back not found!")
        


✅ s1 front transformed!
❎ s1 back not found!
✅ s2 front transformed!
✅ s2 back transformed!
✅ s3 front transformed!
✅ s3 back transformed!
✅ s4 front transformed!
✅ s4 back transformed!
✅ s5 front transformed!
✅ s5 back transformed!
✅ s6 front transformed!
✅ s6 back transformed!
✅ s7 front transformed!
✅ s7 back transformed!
✅ s8 front transformed!
✅ s8 back transformed!
✅ s9 front transformed!
✅ s9 back transformed!
✅ s10 front transformed!
❎ s10 back not found!
❎ s11 front not found!
❎ s11 back not found!
✅ s12 front transformed!
✅ s12 back transformed!
❎ s13 front not found!
❎ s13 back not found!
❎ s14 front not found!
❎ s14 back not found!
❎ s15 front not found!
❎ s15 back not found!
✅ s16 front transformed!
✅ s16 back transformed!


In [12]:
for i in [11,14,15]:
    for j in range(1,4):
        ann_path_f = f'.\\dataset\\s{i}\\s{i}\\DSLR\\annotation\\s{i}_front{j}.csv'
        ann_path_b = f'.\\dataset\\s{i}\\s{i}\\DSLR\\annotation\\s{i}_back{j}.csv'
        img_path = f'.\\dataset\\s{i}\\s{i}\\DSLR\\img'
        img_name_f = f's{i}_front{j}.tif'
        img_name_b = f's{i}_back{j}.tif'
        output = '.\\dataset'
        if(os.path.exists(ann_path_f) and os.path.exists(os.path.join(img_path, img_name_f))):
            dataset_trans(ann_path_f,img_path,img_name_f,output)
            print(f"✅ s{i} front transformed!")
        else:
            print(f"❎ s{i} front not found!")
        if(os.path.exists(ann_path_b)and os.path.exists(os.path.join(img_path, img_name_f))):
            dataset_trans(ann_path_b,img_path,img_name_b,output)
            print(f"✅ s{i} back transformed!")
        else:
            print(f"❎ s{i} back not found!")

✅ s11 front transformed!
❎ s11 back not found!
✅ s11 front transformed!
❎ s11 back not found!
✅ s11 front transformed!
❎ s11 back not found!
✅ s14 front transformed!
✅ s14 back transformed!
✅ s14 front transformed!
✅ s14 back transformed!
❎ s14 front not found!
❎ s14 back not found!
✅ s15 front transformed!
✅ s15 back transformed!
✅ s15 front transformed!
✅ s15 back transformed!
❎ s15 front not found!
❎ s15 back not found!
