In [None]:
import SimpleITK as sitk
import numpy as np
import pandas as pd
import os

def save_yolo_annotations(ct_scan, nodules, Origin, Spacing, seriesuid, output_folder):
    img_width, img_height = ct_scan.GetSize()[0], ct_scan.GetSize()[1]  # 確保使用 ITK 影像物件的尺寸
    
    for idx, nodule in enumerate(nodules):
        x, y, z, diameter = nodule
        
        # 計算影像中結節的相對位置
        x_center = (x - Origin[0]) / Spacing[0]
        y_center = (y - Origin[1]) / Spacing[1]
        z_center = int((z - Origin[2]) / Spacing[2])  # 假設 z 是 slice 編號
        side_length_x = diameter / Spacing[0]  # 計算 x 軸的相對大小
        side_length_y = diameter / Spacing[1]  # 計算 y 軸的相對大小

        # 計算 YOLO 格式中的相對位置與大小
        yolo_x_center = x_center / img_width
        yolo_y_center = y_center / img_height
        yolo_width = side_length_x / img_width
        yolo_height = side_length_y / img_height

        # 生成 YOLO 標記格式
        annotation = f"0 {yolo_x_center} {yolo_y_center} {yolo_width} {yolo_height}\n"
        
        # 儲存為 .txt 檔案
        output_filename = os.path.join(output_folder, f"{seriesuid}_nodule_{idx}_slice_{z_center}.txt")
        with open(output_filename, "w") as file:
            file.write(annotation)

def read_mhd_and_save_yolo_annotations(folder_path, csv_file, output_folder):
    if not os.path.exists(output_folder):
        os.makedirs(output_folder)
    
    nodules_info = pd.read_csv(csv_file)
    for filename in os.listdir(folder_path):
        if filename.endswith('.mhd'):
            seriesuid = os.path.splitext(filename)[0]
            file_path = os.path.join(folder_path, filename)
            
            # 讀取 CT 掃描影像
            itkimage = sitk.ReadImage(file_path)
            numpyImage = sitk.GetArrayFromImage(itkimage)  # 未使用，但保留
            Origin = itkimage.GetOrigin()
            Spacing = itkimage.GetSpacing()

            # 過濾出對應的結節資料
            current_nodules = nodules_info[nodules_info['seriesuid'] == seriesuid][['coordX', 'coordY', 'coordZ', 'diameter_mm']].values
            save_yolo_annotations(itkimage, current_nodules, Origin, Spacing, seriesuid, output_folder)

# 指定資料夾與檔案路徑
folder_path = r'luna_dataset\subset0'
csv_file = r'luna_dataset\annotations.csv'
output_folder = r'yolo_annotations'

read_mhd_and_save_yolo_annotations(folder_path, csv_file, output_folder)


: 

In [None]:
import SimpleITK as sitk
import numpy as np
import pandas as pd
import os
from PIL import Image

def save_yolo_annotations_and_images(ct_scan, nodules, seriesuid, output_folder):
    img_size = ct_scan.GetSize()  # (width, height, depth)
    img_width, img_height, img_depth = img_size[0], img_size[1], img_size[2]
    Origin = ct_scan.GetOrigin()
    Spacing = ct_scan.GetSpacing()

    # 讀取影像方向，確認座標系統
    Direction = ct_scan.GetDirection()

    for idx, nodule in enumerate(nodules):
        try:
            x, y, z, diameter = nodule

            # 將世界座標轉換為影像索引座標
            index = ct_scan.TransformPhysicalPointToIndex((x, y, z))
            x_center_idx, y_center_idx, z_center_idx = index

            # 確保索引在影像範圍內
            if not (0 <= x_center_idx < img_width and 0 <= y_center_idx < img_height and 0 <= z_center_idx < img_depth):
                print(f"Warning: Nodule center at index {index} is out of bounds for seriesuid {seriesuid}")
                continue

            # 計算結節在像素單位中的大小
            diameter_x = diameter / Spacing[0]
            diameter_y = diameter / Spacing[1]

            # 計算 YOLO 格式中的相對位置與大小（歸一化到 [0,1]）
            yolo_x_center = x_center_idx / img_width
            yolo_y_center = y_center_idx / img_height
            yolo_width = diameter_x / img_width
            yolo_height = diameter_y / img_height

            # 檢查 YOLO 參數是否在有效範圍內
            if not (0 <= yolo_x_center <= 1 and 0 <= yolo_y_center <= 1):
                print(f"Warning: YOLO center coordinates out of bounds for seriesuid {seriesuid}")
                continue

            # 生成 YOLO 標記格式
            class_id = 0  # 假設只有一個類別
            annotation = f"{class_id} {yolo_x_center:.6f} {yolo_y_center:.6f} {yolo_width:.6f} {yolo_height:.6f}\n"

            # 儲存 YOLO 標記檔案 (.txt)
            output_label_filename = os.path.join(output_folder, f"{seriesuid}_nodule_{idx}_slice_{z_center_idx}.txt")
            with open(output_label_filename, "w") as file:
                file.write(annotation)

            # 取得對應的影像切片
            array = sitk.GetArrayFromImage(ct_scan)  # 取得影像數組，形狀為 [depth, height, width]
            slice_image = array[z_center_idx, :, :]  # 獲取對應的影像切片

            # 將影像數據歸一化到 [0, 255] 並轉換為 8 位無符號整數
            slice_image_norm = (slice_image - np.min(slice_image)) / (np.max(slice_image) - np.min(slice_image))
            slice_image_8bit = (slice_image_norm * 255).astype(np.uint8)

            # 儲存影像
            output_image_filename = os.path.join(output_folder, f"{seriesuid}_nodule_{idx}_slice_{z_center_idx}.png")
            image = Image.fromarray(slice_image_8bit)
            image.save(output_image_filename)

        except Exception as e:
            print(f"Error processing nodule {idx} in seriesuid {seriesuid}: {e}")

def process_subsets(subset_indices, csv_file, output_folder):
    if not os.path.exists(output_folder):
        os.makedirs(output_folder)

    nodules_info = pd.read_csv(csv_file)

    for i in subset_indices:
        folder_path = os.path.join('luna_dataset', f'subset{i}')
        if not os.path.exists(folder_path):
            print(f"Warning: Folder {folder_path} does not exist.")
            continue

        for filename in os.listdir(folder_path):
            if filename.endswith('.mhd'):
                seriesuid = os.path.splitext(filename)[0]
                file_path = os.path.join(folder_path, filename)

                # 讀取 CT 掃描影像
                itkimage = sitk.ReadImage(file_path)

                # 過濾出對應的結節資料
                current_nodules = nodules_info[nodules_info['seriesuid'] == seriesuid][['coordX', 'coordY', 'coordZ', 'diameter_mm']].values
                save_yolo_annotations_and_images(itkimage, current_nodules, seriesuid, output_folder)

# 指定 CSV 檔案路徑
csv_file = r'luna_dataset\annotations.csv'

# 處理 subset0-7 到 train 資料夾
train_output_folder = r'luna_dataset\train'
train_subsets = list(range(0, 8))  # subset0 到 subset7
process_subsets(train_subsets, csv_file, train_output_folder)

# 處理 subset8 到 val 資料夾
val_output_folder = r'luna_dataset\val'
val_subsets = [8]
process_subsets(val_subsets, csv_file, val_output_folder)

# 處理 subset9 到 test 資料夾
test_output_folder = r'luna_dataset\test'
test_subsets = [9]
process_subsets(test_subsets, csv_file, test_output_folder)
