#### 1.設置環境

從畫面右上角確認有使用到GPU

如果沒連到的話請切換到GPU

In [None]:
!nvidia-smi

In [None]:
import locale
def getpreferredencoding(do_setlocale = True):
    return "UTF-8"
locale.getpreferredencoding = getpreferredencoding

In [None]:
!pip install ultralytics
import ultralytics
ultralytics.checks()

#### 2. 上傳資料集和 .yaml 檔

In [None]:
#下載資料集

import os
import shutil


In [None]:
#移動檔案
def find_patient_root(root):
    """往下找，直到找到含有 patientXXXX 的資料夾"""
    for dirpath, dirnames, filenames in os.walk(root):
        if any(d.startswith("patient") for d in dirnames):
            return dirpath
    return root  # fallback

# 解壓縮到固定資料夾
if not os.path.isdir("./training_image") and os.path.exists("training_image.zip"):
    os.makedirs("./training_image", exist_ok=True)
    !unzip -q training_image.zip -d ./training_image

if not os.path.isdir("./training_label") and os.path.exists("training_label.zip"):
    os.makedirs("./training_label", exist_ok=True)
    !unzip -q training_label.zip -d ./training_label

IMG_ROOT = find_patient_root("./training_image")
LBL_ROOT = find_patient_root("./training_label")

print("IMG_ROOT =", IMG_ROOT)
print("LBL_ROOT =", LBL_ROOT)

# 建立並清空輸出資料夾（若存在）
def ensure_clean_dir(path):
    if os.path.isdir(path):
        shutil.rmtree(path)
    os.makedirs(path, exist_ok=True)

ensure_clean_dir("./datasets/train/images")
ensure_clean_dir("./datasets/train/labels")
ensure_clean_dir("./datasets/val/images")
ensure_clean_dir("./datasets/val/labels")

def move_patients(start, end, split):
    for i in range(start, end + 1):
        patient = f"patient{i:04d}"
        img_dir = os.path.join(IMG_ROOT, patient)
        lbl_dir = os.path.join(LBL_ROOT, patient)
        if not os.path.isdir(lbl_dir):
            continue

        for fname in os.listdir(lbl_dir):
            if not fname.endswith(".txt"):
                continue

            label_path = os.path.join(lbl_dir, fname)
            base, _ = os.path.splitext(fname)  # 取出檔名不含副檔名
            img_path = os.path.join(img_dir, base + ".png")
            if not os.path.exists(img_path):
                print(f"找不到對應圖片: {img_path}")
                continue

            shutil.copy2(img_path, f"./datasets/{split}/images/")
            shutil.copy2(label_path, f"./datasets/{split}/labels/")

# patient0001~0030 → train
move_patients(1, 30, "train")

# patient0031~0050 → val
move_patients(31, 50, "val")

print("完成移動！")

In [None]:
!ls

In [None]:
print('訓練集圖片數量 : ',len(os.listdir("./datasets/train/images")))
print('訓練集標記數量 : ',len(os.listdir("./datasets/train/labels")))
print('驗證集圖片數量 : ',len(os.listdir("./datasets/val/images")))
print('驗證集標記數量 : ',len(os.listdir("./datasets/val/labels")))

#### 3. 訓練模型(運行時間約15分鐘)

In [None]:
import os
import shutil
from pathlib import Path
from ultralytics import YOLO
import torch
import numpy as np
import yaml
from typing import List

# --- 新增：路徑偵測工具 (FIX 雙層嵌套路徑) ---
def find_nested_root(base_folder):
    """偵測並返回包含 'patient' 資料夾的實際根目錄"""
    path = Path(base_folder)
    if not path.exists():
        raise FileNotFoundError(f"基礎數據目錄 {base_folder} 不存在。請確認已執行初始解壓縮。")

    # 檢查雙層嵌套 (e.g., training_image/training_image)
    if (path / base_folder).is_dir() and any((path / base_folder).glob('patient*')):
        return str(path / base_folder)

    # 檢查單層嵌套
    if any(path.glob('patient*')):
        return str(path)

    raise FileNotFoundError(f"在 {base_folder} 中找不到任何 'patient*' 資料夾。數據結構不正確。")
# ---------------------------------------------


# ===== 1. 準備 5-Fold 數據分割 (保持不變) =====
def create_5fold_splits(base_dir='./datasets', patients=range(1, 51)):
    """
    創建 5-Fold 交叉驗證數據集
    """
    patients = list(patients)
    np.random.seed(42)
    np.random.shuffle(patients)

    fold_size = len(patients) // 5
    folds = []

    for fold_idx in range(5):
        val_start = fold_idx * fold_size
        val_end = val_start + fold_size

        val_patients = patients[val_start:val_end]
        train_patients = [p for p in patients if p not in val_patients]

        folds.append({
            'fold': fold_idx,
            'train': train_patients,
            'val': val_patients
        })

        print(f"Fold {fold_idx}: Train={len(train_patients)}, Val={len(val_patients)}")

    return folds

# --- 修正：準備單個 Fold 數據 (傳入正確的根路徑) ---
def prepare_fold_data(fold_info, source_root, label_root):
    """準備單個 fold 的數據，使用偵測到的實際根目錄"""
    fold_idx = fold_info['fold']
    fold_dir = Path(f'./datasets/fold_{fold_idx}')

    # 創建目錄結構
    for split in ['train', 'val']:
        (fold_dir / split / 'images').mkdir(parents=True, exist_ok=True)
        (fold_dir / split / 'labels').mkdir(parents=True, exist_ok=True)

    # 複製文件
    for split in ['train', 'val']:
        patients = fold_info[split]
        for patient_id in patients:
            patient_str = f'patient{patient_id:04d}'

            # 使用傳入的正確根路徑
            src_img_dir = Path(source_root) / patient_str
            src_lbl_dir = Path(label_root) / patient_str

            if src_img_dir.exists():
                for img_file in src_img_dir.glob('*.png'):
                    # 檢查是否有對應標註
                    lbl_file = src_lbl_dir / f'{img_file.stem}.txt'
                    if lbl_file.exists():
                        # 複製圖片和標註
                        shutil.copy(img_file, fold_dir / split / 'images' / img_file.name)
                        shutil.copy(lbl_file, fold_dir / split / 'labels' / lbl_file.name)

    # 創建 yaml 配置 (保持不變)
    yaml_content = {
        'path': str(fold_dir.absolute()),
        'train': 'train/images',
        'val': 'val/images',
        'names': {0: 'aortic_valve'},
        'nc': 1
    }

    yaml_path = fold_dir / 'data.yaml'
    with open(yaml_path, 'w') as f:
        yaml.dump(yaml_content, f)

    return yaml_path

# ===== 2. 訓練策略：每個 fold 訓練 2 個模型 (保持不變) =====
def train_fold_models(fold_idx, yaml_path) -> List[str]:
    """
    為每個 fold 訓練 YOLOv12n 和 YOLOv12s
    返回兩個模型的路徑
    """
    torch.cuda.empty_cache()
    models_paths = []

    # 訓練配置（優化過的）
    base_config = {
        'data': str(yaml_path),
        'epochs': 150,
        'device': 0,
        'cache': True,
        'workers': 8,
        'patience': 40,
        'save': True,
        'plots': True,
        'exist_ok': True,
        # ... (其餘超參數保持不變) ...
        'degrees': 8, 'translate': 0.1, 'scale': 0.5, 'flipud': 0.5, 'fliplr': 0.5,
        'mosaic': 1.0, 'mixup': 0.1, 'hsv_v': 0.4, 'optimizer': 'AdamW',
        'momentum': 0.937, 'weight_decay': 0.0005, 'warmup_epochs': 5, 'box': 7.5,
        'cls': 0.5, 'dfl': 1.5, 'close_mosaic': 10, 'amp': True, 'single_cls': True,
        'cos_lr': True,
    }

    # 1. 訓練 YOLOv12n
    print(f"\n{'='*60}")
    print(f"訓練 Fold {fold_idx} - YOLOv12n")
    print(f"{'='*60}")

    model_n = YOLO('yolo12n.pt')
    results_n = model_n.train(
        **base_config, batch=12, imgsz=640, lr0=0.001, lrf=0.01, name=f'fold{fold_idx}_yolo12n', project='runs/detect',
    )
    models_paths.append(f'runs/detect/fold{fold_idx}_yolo12n/weights/best.pt')

    # 2. 訓練 YOLOv12s
    print(f"\n{'='*60}")
    print(f"訓練 Fold {fold_idx} - YOLOv12s")
    print(f"{'='*60}")

    torch.cuda.empty_cache()
    model_s = YOLO('yolo12s.pt')
    results_s = model_s.train(
        **base_config, batch=8, imgsz=640, lr0=0.001, lrf=0.01, name=f'fold{fold_idx}_yolo12s', project='runs/detect',
    )
    models_paths.append(f'runs/detect/fold{fold_idx}_yolo12s/weights/best.pt')

    return models_paths

# --- 修正：主流程 (main) - 移除測試集預測 ---
def main():
    # 1. 偵測實際的數據根目錄 (FIX)
    try:
        # 這個步驟會偵測到 'training_image/training_image'
        IMG_ROOT = find_nested_root('training_image')
        LBL_ROOT = find_nested_root('training_label')
        print(f"偵測到的圖片根目錄: {IMG_ROOT}")
        print(f"偵測到的標註根目錄: {LBL_ROOT}")
    except FileNotFoundError as e:
        print(f"FATAL ERROR: {e}")
        return

    # Step 1: 創建 5-Fold 分割
    print("\n創建 5-Fold 交叉驗證分割...")
    folds = create_5fold_splits()

    # Step 2: 為每個 fold 訓練模型
    all_models = []

    for fold_info in folds:
        fold_idx = fold_info['fold']
        print(f"\n處理 Fold {fold_idx}...")

        # 準備數據 - 傳入正確的根路徑
        yaml_path = prepare_fold_data(fold_info, IMG_ROOT, LBL_ROOT)

        # 訓練模型
        model_paths = train_fold_models(fold_idx, yaml_path)
        all_models.extend(model_paths)

    # --- 移除測試集預測和模型載入的步驟 ---
    print("\n" + "="*60)
    print("所有 Fold 訓練完成！")
    print("所有模型權重路徑:")
    for i, path in enumerate(all_models, 1):
        print(f"  {i}. {path}")
    print("請使用單獨的預測腳本來執行 Ensemble 預測。")
    print("="*60)

    return all_models

# --- 程式碼尾部（移除不必要的函數調用） ---
# (保留您原本的 ensemble_predict_weighted 函數和 quick_version 函數，但不用執行)

if __name__ == '__main__':
    # 無條件執行完整版本 main()
    print("強制執行完整版本（5-Fold, 10個模型）...")
    main()


In [None]:
from ultralytics import YOLO

model = YOLO('best.pt') #將模型改成'best.pt'或其他訓練過的模型名稱
results = model.train(data="./aortic_valve_colab.yaml",
            epochs=10, #跑幾個epoch
            batch=16, #batch_size
            imgsz=640, #圖片大小640*640
            device=0 #使用GPU進行訓練
            )