In [36]:
import os
import shutil
import yaml
from glob import glob
from ultralytics import YOLO
from roboflow import Roboflow

# --- 1. 기본 경로 및 최종 설정 ---
BASE_DATA_PATH = 'C:/Users/Admin/work space/2nd/'
ROBOFLOW_API_KEY = "NjIXpou4o4gsuGClT8hI"
PREVIOUS_MODEL_PATH = os.path.join(BASE_DATA_PATH, 'acne_model_8_classes_retrained', 'weights', 'best.pt')
DATASET1_NAME = 'acne04-1'
DATASET2_NAME = 'Acne-detection-1'
# 사용자가 확인하고 수정한 실제 폴더 이름
DATASET3_NAME = "Acne-detection-9"
DATASET4_NAME = "acne-detection-nv9i6-ddxoy-1"
COMBINED_DATASET_NAME = 'acne_dataset_final_ALL'
TRAINING_NAME = 'acne_model_final_ALL'

In [37]:
# --- 2. 데이터셋 폴더 확인 ---
print("🚀 데이터셋 폴더 존재 여부를 확인합니다...")
if os.path.exists(os.path.join(BASE_DATA_PATH, DATASET3_NAME)): print(f"✅ '{DATASET3_NAME}' 데이터셋 폴더를 찾았습니다.")
else: print(f"🚨 '{DATASET3_NAME}' 데이터셋 폴더를 찾을 수 없습니다.")
if os.path.exists(os.path.join(BASE_DATA_PATH, DATASET4_NAME)): print(f"✅ '{DATASET4_NAME}' 데이터셋 폴더를 찾았습니다.")
else: print(f"🚨 '{DATASET4_NAME}' 데이터셋 폴더를 찾을 수 없습니다.")
print("-" * 50)

🚀 데이터셋 폴더 존재 여부를 확인합니다...
✅ 'Acne-detection-9' 데이터셋 폴더를 찾았습니다.
✅ 'acne-detection-nv9i6-ddxoy-1' 데이터셋 폴더를 찾았습니다.
--------------------------------------------------


In [38]:
# --- 3. 클래스 목록 정의 및 확인 (수정된 함수) ---
FINAL_CLASS_NAMES = sorted(['blackheads', 'comedone', 'cyst', 'fore', 'nodule', 'papule', 'pustule', 'whiteheads'])
final_class_set = set(FINAL_CLASS_NAMES)

def get_class_names(dataset_path):
    yaml_path = os.path.join(dataset_path, 'data.yaml')
    if os.path.exists(yaml_path):
        with open(yaml_path, 'r', encoding='utf-8') as f:
            # ❗️[수정]❗️ yaml.safe_load 로 수정
            return yaml.safe_load(f).get('names', [])
    return []

src_path_d3 = os.path.join(BASE_DATA_PATH, DATASET3_NAME)
src_path_d4 = os.path.join(BASE_DATA_PATH, DATASET4_NAME)
d3_names = get_class_names(src_path_d3)
d4_names = get_class_names(src_path_d4)
print(f"데이터셋 3 원본 클래스: {d3_names}")
print(f"데이터셋 4 원본 클래스: {d4_names}")
print("-" * 50)

데이터셋 3 원본 클래스: ['Pustula', 'acne fulminans', 'blackhead', 'fungal acne', 'nodules', 'papula', 'whitehead']
데이터셋 4 원본 클래스: ['Acne', 'Birthmark', 'Blackhead', 'Cysts', 'Milium', 'Papular', 'Purulent', 'Scars', 'Whitehead']
--------------------------------------------------


In [39]:
# --- 4. 클래스 ID 재매핑 테이블 생성 (수동 매핑 적용) ---
final_name_to_id = {name: i for i, name in enumerate(FINAL_CLASS_NAMES)}

# 기존 데이터셋 1, 2 매핑
ORIGINAL_D1_NAMES = ['fore', 'papule', 'pustule', 'nodule', 'whiteheads', 'cyst', 'blackheads']
remap_d1 = {i: final_name_to_id[name] for i, name in enumerate(ORIGINAL_D1_NAMES)}
ORIGINAL_D2_NAMES = ['comedone', 'pustule', 'papule', 'cyst', 'nodule']
remap_d2 = {i: final_name_to_id[name] for i, name in enumerate(ORIGINAL_D2_NAMES)}

# 수동 매핑 규칙
MANUAL_MAP_D3 = {
    'blackhead': 'blackheads',
    'nodules': 'nodule',
    'papula': 'papule',
    'Pustula': 'pustule',
    'whitehead': 'whiteheads'
}
MANUAL_MAP_D4 = {
    'Blackhead': 'blackheads',
    'Cysts': 'cyst',
    'Papular': 'papule',
    'Whitehead': 'whiteheads'
}
print("✅ 수동 매핑 규칙이 적용되었습니다.")
print(f"데이터셋 3 매핑: {MANUAL_MAP_D3}")
print(f"데이터셋 4 매핑: {MANUAL_MAP_D4}")

# 수동 매핑 규칙 기반 ID 변환 테이블 생성
original_d3_name_to_id = {name: i for i, name in enumerate(d3_names)}
remap_d3 = {
    original_d3_name_to_id[new_name]: final_name_to_id[final_name]
    for new_name, final_name in MANUAL_MAP_D3.items()
    if new_name in original_d3_name_to_id
}
id_to_name_d3 = {i: name for name, i in original_d3_name_to_id.items()}

original_d4_name_to_id = {name: i for i, name in enumerate(d4_names)}
remap_d4 = {
    original_d4_name_to_id[new_name]: final_name_to_id[final_name]
    for new_name, final_name in MANUAL_MAP_D4.items()
    if new_name in original_d4_name_to_id
}
id_to_name_d4 = {i: name for name, i in original_d4_name_to_id.items()}


✅ 수동 매핑 규칙이 적용되었습니다.
데이터셋 3 매핑: {'blackhead': 'blackheads', 'nodules': 'nodule', 'papula': 'papule', 'Pustula': 'pustule', 'whitehead': 'whiteheads'}
데이터셋 4 매핑: {'Blackhead': 'blackheads', 'Cysts': 'cyst', 'Papular': 'papule', 'Whitehead': 'whiteheads'}


In [40]:
# --- 5. 데이터 병합 및 라벨 재매핑 실행 ---
src_path_d1 = os.path.join(BASE_DATA_PATH, DATASET1_NAME)
src_path_d2 = os.path.join(BASE_DATA_PATH, DATASET2_NAME)
combined_path = os.path.join(BASE_DATA_PATH, COMBINED_DATASET_NAME)

if os.path.exists(combined_path):
    shutil.rmtree(combined_path)
print(f"\n'{combined_path}' 폴더를 생성하고 4개 데이터셋 병합을 시작합니다...")

def process_dataset(src_path, remap_dict, dest_path, classes_to_keep=None, id_to_name_map=None):
    for split in ['train', 'valid', 'test']:
        src_image_dir, src_label_dir = os.path.join(src_path, split, 'images'), os.path.join(src_path, split, 'labels')
        if not os.path.exists(src_image_dir): continue
        dest_image_dir, dest_label_dir = os.path.join(dest_path, split, 'images'), os.path.join(dest_path, split, 'labels')
        os.makedirs(dest_image_dir, exist_ok=True); os.makedirs(dest_label_dir, exist_ok=True)
        for label_filename in glob(os.path.join(src_label_dir, '*.txt')):
            base_filename = os.path.basename(label_filename)
            with open(label_filename, 'r') as f_in: lines = f_in.readlines()
            new_lines = []
            for line in lines:
                parts = line.strip().split();
                if not parts: continue
                original_id = int(parts[0])
                if classes_to_keep and id_to_name_map.get(original_id) not in classes_to_keep: continue
                if original_id in remap_dict:
                    parts[0] = str(remap_dict[original_id]); new_lines.append(' '.join(parts) + '\n')
            if new_lines:
                with open(os.path.join(dest_label_dir, base_filename), 'w') as f_out: f_out.writelines(new_lines)
                img_name_base = os.path.splitext(base_filename)[0]
                for ext in ['.jpg', '.jpeg', '.png']:
                    src_image_path = os.path.join(src_image_dir, img_name_base + ext)
                    if os.path.exists(src_image_path): shutil.copy(src_image_path, os.path.join(dest_image_dir, img_name_base + ext)); break

print("1/4 - 기존 데이터셋 1 처리 중..."); process_dataset(src_path_d1, remap_d1, combined_path)
print("2/4 - 기존 데이터셋 2 처리 중..."); process_dataset(src_path_d2, remap_d2, combined_path)
print("3/4 - 추가 데이터셋 3 처리 중..."); process_dataset(src_path_d3, remap_d3, combined_path, classes_to_keep=list(MANUAL_MAP_D3.keys()), id_to_name_map=id_to_name_d3)
print("4/4 - 추가 데이터셋 4 처리 중..."); process_dataset(src_path_d4, remap_d4, combined_path, classes_to_keep=list(MANUAL_MAP_D4.keys()), id_to_name_map=id_to_name_d4)
print("✅ 데이터 병합 완료!"); print("-" * 50)



'C:/Users/Admin/work space/2nd/acne_dataset_final_ALL' 폴더를 생성하고 4개 데이터셋 병합을 시작합니다...
1/4 - 기존 데이터셋 1 처리 중...
2/4 - 기존 데이터셋 2 처리 중...
3/4 - 추가 데이터셋 3 처리 중...
4/4 - 추가 데이터셋 4 처리 중...
✅ 데이터 병합 완료!
--------------------------------------------------


In [41]:
# --- 6. 최종 data.yaml 파일 생성 ---
final_yaml_path = os.path.join(combined_path, 'data.yaml')
yaml_data = {'train': 'train/images', 'val': 'valid/images', 'test': 'test/images', 'nc': len(FINAL_CLASS_NAMES), 'names': FINAL_CLASS_NAMES}
with open(final_yaml_path, 'w', encoding='utf-8') as f:
    yaml.dump(yaml_data, f, allow_unicode=True, sort_keys=False)
print(f"✅ 최종 '{final_yaml_path}' 파일 생성 완료!"); print("-" * 50)



✅ 최종 'C:/Users/Admin/work space/2nd/acne_dataset_final_ALL\data.yaml' 파일 생성 완료!
--------------------------------------------------


In [42]:
# --- 7. 최종 모델 추가 학습 (Fine-tuning) ---
if not os.path.exists(PREVIOUS_MODEL_PATH):
     print(f"🚨 에러: 이전 모델 '{PREVIOUS_MODEL_PATH}'를 찾을 수 없습니다.")
else:
    print(f"🚀 이전 최강 모델 '{PREVIOUS_MODEL_PATH}'을 불러와 최종 학습을 시작합니다...")
    model = YOLO(PREVIOUS_MODEL_PATH)
    results = model.train(data=final_yaml_path, epochs=150, imgsz=640, batch=16, patience=50, project=BASE_DATA_PATH, name=TRAINING_NAME, workers=4, cache=True)
    print(f"\n🎉 최종 학습 완료! 결과 저장 경로: {os.path.join(BASE_DATA_PATH, TRAINING_NAME)}")

    # --- 8. 최종 모델 성능 검증 ---
    print("\n🚀 테스트 데이터셋으로 최종 모델 성능을 검증합니다...")
    best_model_path = os.path.join(BASE_DATA_PATH, TRAINING_NAME, 'weights', 'best.pt')
    if os.path.exists(best_model_path):
        model = YOLO(best_model_path)
        metrics = model.val(data=final_yaml_path, split='test', imgsz=640)
        print("\n--- 최종 성능 지표 ---")
        print(f"mAP50-95: {metrics.box.map:.4f}")
        print(f"mAP50: {metrics.box.map50:.4f}")
        print(f"mAP75: {metrics.box.map75:.4f}")

🚀 이전 최강 모델 'C:/Users/Admin/work space/2nd/acne_model_8_classes_retrained\weights\best.pt'을 불러와 최종 학습을 시작합니다...
New https://pypi.org/project/ultralytics/8.3.202 available  Update with 'pip install -U ultralytics'
Ultralytics 8.3.200  Python-3.13.7 torch-2.7.1+cu118 CUDA:0 (NVIDIA GeForce RTX 4060 Laptop GPU, 8188MiB)
[34m[1mengine\trainer: [0magnostic_nms=False, amp=True, augment=False, auto_augment=randaugment, batch=16, bgr=0.0, box=7.5, cache=True, cfg=None, classes=None, close_mosaic=10, cls=0.5, compile=False, conf=None, copy_paste=0.0, copy_paste_mode=flip, cos_lr=False, cutmix=0.0, data=C:/Users/Admin/work space/2nd/acne_dataset_final_ALL\data.yaml, degrees=0.0, deterministic=True, device=None, dfl=1.5, dnn=False, dropout=0.0, dynamic=False, embed=None, epochs=150, erasing=0.4, exist_ok=False, fliplr=0.5, flipud=0.0, format=torchscript, fraction=1.0, freeze=None, half=False, hsv_h=0.015, hsv_s=0.7, hsv_v=0.4, imgsz=640, int8=False, iou=0.7, keras=False, kobj=1.0, line_width=No

KeyboardInterrupt: 