In [None]:
from google.colab import drive
drive.mount('/content/drive')

%cd '/content/drive/MyDrive/CV/25-2컴비'

Mounted at /content/drive
/content/drive/.shortcut-targets-by-id/1aE_a_G0fPBK8xZhdBCIjPHz19X4kFBp1/25-2컴비


In [None]:
import cv2
import numpy as np
import matplotlib.pyplot as plt
from pathlib import Path

In [None]:
class LeafPreprocessor:
    """Preprocessing plant leaf images"""

    def __init__(self, target_size=(224, 224)):
        self.target_size = target_size

    def load_image(self, image_path):
        """Load image"""
        img = cv2.imread(str(image_path))
        if img is None:
            raise ValueError(f"Cannot load image: {image_path}")
        return cv2.cvtColor(img, cv2.COLOR_BGR2RGB)

    def remove_noise(self, image, method='gaussian'):
        """Remove noise
        Args:
            image: input image
            method: 'gaussian', 'median', 'bilateral' 중 선택
        """
        if method == 'gaussian': # smoothing
            return cv2.GaussianBlur(image, (5, 5), 0)
        elif method == 'median': # salt-and-pepper noise에 효과적
            return cv2.medianBlur(image, 5)
        elif method == 'bilateral': # preserve edge, remove noise
            return cv2.bilateralFilter(image, 9, 75, 75)
        return image

    # Version 1
    def remove_background_green_mask_v1(self, image):
        """녹색 픽셀 기반 배경 제거"""
        # RGB to HSV
        hsv = cv2.cvtColor(image, cv2.COLOR_RGB2HSV)

        # 녹색 범위 정의 (잎 색상)
        # 여러 녹색 톤을 포착하기 위해 2개의 범위 사용
        lower_green1 = np.array([25, 40, 40])
        upper_green1 = np.array([85, 255, 255])

        # 황녹색/밝은 녹색
        lower_green2 = np.array([35, 30, 30])
        upper_green2 = np.array([90, 255, 255])

        # 마스크 생성
        mask1 = cv2.inRange(hsv, lower_green1, upper_green1)
        mask2 = cv2.inRange(hsv, lower_green2, upper_green2)
        mask = cv2.bitwise_or(mask1, mask2)

        # 모폴로지 연산으로 마스크 정제
        kernel = np.ones((5, 5), np.uint8)
        mask = cv2.morphologyEx(mask, cv2.MORPH_CLOSE, kernel, iterations=2)
        mask = cv2.morphologyEx(mask, cv2.MORPH_OPEN, kernel, iterations=1)

        # 마스크 적용
        result = cv2.bitwise_and(image, image, mask=mask)

        # 배경을 흰색으로 (선택사항: 검은색은 0, 흰색은 255)
        background = np.full(image.shape, 255, dtype=np.uint8)
        background_mask = cv2.bitwise_not(mask)
        background = cv2.bitwise_and(background, background, mask=background_mask)
        result = cv2.add(result, background)

        return result, mask

    def remove_background_grabcut(self, image):
        """Remove background with GrabCut algorithm"""
        mask = np.zeros(image.shape[:2], np.uint8) # 마스크 초기화

        # GrabCut 임시 배열
        bgd_model = np.zeros((1, 65), np.float64)
        fgd_model = np.zeros((1, 65), np.float64)

        # ROI 정의: 이미지 중앙 80%
        h, w = image.shape[:2]
        rect = (int(w*0.1), int(h*0.1), int(w*0.8), int(h*0.8))

        cv2.grabCut(image, mask, rect, bgd_model, fgd_model, 5, cv2.GC_INIT_WITH_RECT) # GrabCut 실행

        # 확실한 전경과 가능한 전경을 합침
        mask2 = np.where((mask == 2) | (mask == 0), 0, 1).astype('uint8')

        result = image * mask2[:, :, np.newaxis] # 마스크 적용

        # 배경을 흰색으로
        background = np.full(image.shape, 255, dtype=np.uint8)
        background_mask = 1 - mask2
        background = background * background_mask[:, :, np.newaxis]
        result = result + background

        return result, mask2

    # Version 2
    def remove_background_green_mask_v2(self, image):
        """개선된 색상 기반 배경 제거 (녹색 + 갈색/황색 병변 포함)"""
        # RGB to HSV
        hsv = cv2.cvtColor(image, cv2.COLOR_RGB2HSV)

        # 1. 녹색 범위 (건강한 잎)
        lower_green = np.array([25, 40, 40]) # H: 25-85, S: 40-255, V: 40-255 -> 이 범위를 포함
        upper_green = np.array([85, 255, 255])
        mask_green = cv2.inRange(hsv, lower_green, upper_green)

        # 2. 황색 범위 (시든 잎, 일부 질병)
        lower_yellow = np.array([15, 40, 40])
        upper_yellow = np.array([35, 255, 255])
        mask_yellow = cv2.inRange(hsv, lower_yellow, upper_yellow)

        # 3. 갈색 범위 (병든 부위, 마른 잎)
        # 갈색은 낮은 채도의 주황/빨강
        lower_brown = np.array([0, 20, 20])
        upper_brown = np.array([20, 200, 200])
        mask_brown = cv2.inRange(hsv, lower_brown, upper_brown)

        # 모든 마스크 결합
        mask = cv2.bitwise_or(mask_green, mask_yellow)
        mask = cv2.bitwise_or(mask, mask_brown)

        # 모폴로지 연산으로 마스크 정제
        kernel = np.ones((5, 5), np.uint8)
        mask = cv2.morphologyEx(mask, cv2.MORPH_CLOSE, kernel, iterations=2)
        mask = cv2.morphologyEx(mask, cv2.MORPH_OPEN, kernel, iterations=1)

        # 마스크 적용
        result = cv2.bitwise_and(image, image, mask=mask)

        # 배경을 흰색으로 (선택사항: 검은색은 0, 흰색은 255)
        background = np.full(image.shape, 255, dtype=np.uint8)
        background_mask = cv2.bitwise_not(mask)
        background = cv2.bitwise_and(background, background, mask=background_mask)
        result = cv2.add(result, background)

        return result, mask

    def remove_background_hybrid(self, image):
        """하이브리드: 색상 기반 + GrabCut 결합

        1단계: 색상으로 대략적인 전경 영역 찾기
        2단계: GrabCut으로 정교하게 다듬기
        """
        # 1단계: 색상 기반 초기 마스크
        hsv = cv2.cvtColor(image, cv2.COLOR_RGB2HSV)

        # 녹색 + 황색 + 갈색
        lower_green = np.array([25, 40, 40])
        upper_green = np.array([85, 255, 255])
        mask_green = cv2.inRange(hsv, lower_green, upper_green)

        lower_yellow = np.array([15, 40, 40])
        upper_yellow = np.array([35, 255, 255])
        mask_yellow = cv2.inRange(hsv, lower_yellow, upper_yellow)

        lower_brown = np.array([0, 20, 20])
        upper_brown = np.array([20, 200, 200])
        mask_brown = cv2.inRange(hsv, lower_brown, upper_brown)

        initial_mask = cv2.bitwise_or(mask_green, mask_yellow)
        initial_mask = cv2.bitwise_or(initial_mask, mask_brown)

        # 모폴로지로 정제
        kernel = np.ones((5, 5), np.uint8)
        initial_mask = cv2.morphologyEx(initial_mask, cv2.MORPH_CLOSE, kernel, iterations=2)
        initial_mask = cv2.morphologyEx(initial_mask, cv2.MORPH_OPEN, kernel, iterations=1)

        # 2단계: GrabCut으로 정교화
        # 초기 마스크를 GrabCut의 시작점으로 사용
        mask_grabcut = np.where(initial_mask > 0, cv2.GC_PR_FGD, cv2.GC_PR_BGD).astype('uint8')

        bgd_model = np.zeros((1, 65), np.float64)
        fgd_model = np.zeros((1, 65), np.float64)

        # GrabCut 실행 (반복 횟수 줄여서 속도 향상)
        try:
            cv2.grabCut(image, mask_grabcut, None, bgd_model, fgd_model, 3, cv2.GC_INIT_WITH_MASK)
            final_mask = np.where((mask_grabcut == 2) | (mask_grabcut == 0), 0, 1).astype('uint8')
        except:
            # GrabCut 실패 시 초기 마스크 사용
            final_mask = (initial_mask > 0).astype('uint8')

        # 마스크 적용
        result = cv2.bitwise_and(image, image, mask=final_mask)

        # 배경을 흰색으로
        background = np.full(image.shape, 255, dtype=np.uint8)
        background_mask = cv2.bitwise_not(final_mask * 255)
        background = cv2.bitwise_and(background, background, mask=background_mask)
        result = cv2.add(result, background)

        return result, final_mask
        """GrabCut 알고리즘으로 배경 제거"""
        # 마스크 초기화
        mask = np.zeros(image.shape[:2], np.uint8)

        # GrabCut 임시 배열
        bgd_model = np.zeros((1, 65), np.float64)
        fgd_model = np.zeros((1, 65), np.float64)

        # 관심 영역(ROI) 정의: 이미지 중앙 80%
        h, w = image.shape[:2]
        rect = (int(w*0.1), int(h*0.1), int(w*0.8), int(h*0.8))

        # GrabCut 실행
        cv2.grabCut(image, mask, rect, bgd_model, fgd_model, 5, cv2.GC_INIT_WITH_RECT)

        # 확실한 전경과 가능한 전경을 합침
        mask2 = np.where((mask == 2) | (mask == 0), 0, 1).astype('uint8')

        # 마스크 적용
        result = image * mask2[:, :, np.newaxis]

        # 배경을 흰색으로
        background = np.full(image.shape, 255, dtype=np.uint8)
        background_mask = 1 - mask2
        background = background * background_mask[:, :, np.newaxis]
        result = result + background

        return result, mask2

    def apply_otsu_threshold(self, image):
        """Otsu thresholding"""
        if len(image.shape) == 3:
            gray = cv2.cvtColor(image, cv2.COLOR_RGB2GRAY) # grayscale transformation
        else:
            gray = image

        _, binary = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)

        return binary

    def watershed_segmentation(self, image):
        """Watershed 알고리즘으로 객체 분리"""
        # grayscale, binary
        gray = cv2.cvtColor(image, cv2.COLOR_RGB2GRAY)
        _, binary = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)

        # 노이즈 제거
        kernel = np.ones((3, 3), np.uint8)
        opening = cv2.morphologyEx(binary, cv2.MORPH_OPEN, kernel, iterations=2)

        sure_bg = cv2.dilate(opening, kernel, iterations=3) # 확실한 배경 영역

        # 확실한 전경 영역
        dist_transform = cv2.distanceTransform(opening, cv2.DIST_L2, 5)
        _, sure_fg = cv2.threshold(dist_transform, 0.7 * dist_transform.max(), 255, 0)
        sure_fg = np.uint8(sure_fg)

        unknown = cv2.subtract(sure_bg, sure_fg) # 불확실한 영역

        _, markers = cv2.connectedComponents(sure_fg) # 마커 라벨링
        markers = markers + 1
        markers[unknown == 255] = 0

        markers = cv2.watershed(image, markers) # Watershed 적용

        result = image.copy() # mark boundary with red color
        result[markers == -1] = [255, 0, 0]

        return result, markers

    def preprocess_pipeline(self, image_path, method='simple'):
        """
        Args:
            image_path: 이미지 경로
            method: 'simple', 'advanced', 'grabcut' 중 선택

        Returns:
            processed_image, intermediate_results (시각화용)
        """
        # 1. Load image
        original = self.load_image(image_path)
        results = {'original': original.copy()}

        # 2. Resize
        resized = cv2.resize(original, (512, 512))  # 전처리용 큰 사이즈
        results['resized'] = resized.copy()

        if method == 'simple': # Gaussian + Green mask v1
            # 3. Remove noise
            denoised = self.remove_noise(resized, method='gaussian')
            results['denoised'] = denoised.copy()

            # 4. Remove background (Green mask)
            bg_removed, mask = self.remove_background_green_mask_v1(denoised)
            results['mask'] = mask
            results['bg_removed'] = bg_removed.copy()

            final = bg_removed

        elif method == 'simplev2': # Gaussian + Green mask v2
            denoised = self.remove_noise(resized, method='gaussian')
            results['denoised'] = denoised.copy()

            bg_removed, mask = self.remove_background_green_mask_v2(denoised)
            results['mask'] = mask
            results['bg_removed'] = bg_removed.copy()

            final = bg_removed

        elif method == 'grabcut': # GrabCut-based
            denoised = self.remove_noise(resized, method='bilateral')
            results['denoised'] = denoised.copy()

            bg_removed, mask = self.remove_background_grabcut(denoised)
            results['mask'] = mask
            results['bg_removed'] = bg_removed.copy()

            final = bg_removed

        elif method == 'hybrid': # 색상 + GrabCut
            denoised = self.remove_noise(resized, method='bilateral')
            results['denoised'] = denoised.copy()

            bg_removed, mask = self.remove_background_hybrid(denoised)
            results['mask'] = mask
            results['bg_removed'] = bg_removed.copy()

            final = bg_removed

        elif method == 'advanced': # includes Watershed
            denoised = self.remove_noise(resized, method='bilateral')
            results['denoised'] = denoised.copy()

            bg_removed, mask = self.remove_background_green_mask_v2(denoised)
            results['mask'] = mask
            results['bg_removed'] = bg_removed.copy()

            watershed_result, markers = self.watershed_segmentation(bg_removed)
            results['watershed'] = watershed_result.copy()

            final = bg_removed

        # Final resize (model input size)
        final_resized = cv2.resize(final, self.target_size)
        results['final'] = final_resized

        return final_resized, results

    def visualize_results(self, results, save_path=None):
        n_images = len(results)
        cols = 3
        rows = (n_images + cols - 1) // cols

        fig, axes = plt.subplots(rows, cols, figsize=(15, 5 * rows))
        axes = axes.flatten() if n_images > 1 else [axes]

        for idx, (name, img) in enumerate(results.items()):
            if idx >= len(axes):
                break

            if len(img.shape) == 2:  # Grayscale or mask
                axes[idx].imshow(img, cmap='gray')
            else:
                axes[idx].imshow(img)
            axes[idx].set_title(name.replace('_', ' ').title())
            axes[idx].axis('off')

        # Remove empty subplot
        for idx in range(n_images, len(axes)):
            axes[idx].axis('off')

        plt.tight_layout()

        if save_path:
            plt.savefig(save_path, dpi=150, bbox_inches='tight')

        plt.show()


### Run PlantDoc Segmentation

In [None]:
import os
import cv2
import numpy as np
from pathlib import Path
from tqdm import tqdm
import json

def batch_preprocess_dataset(input_dir, output_dir, method='simple', target_size=(224, 224)):
    """
    전체 데이터셋을 일괄 전처리

    Args:
        input_dir: 원본 이미지 디렉토리
        output_dir: 전처리된 이미지 저장 디렉토리
        method: 'simple', 'grabcut', 'advanced'
        target_size: 최종 이미지 크기
    """
    preprocessor = LeafPreprocessor(target_size=target_size)

    # 출력 디렉토리 생성
    Path(output_dir).mkdir(parents=True, exist_ok=True)

    image_extensions = ['.jpg', '.jpeg', '.png', '.JPG', '.JPEG', '.PNG']
    image_files = []

    for ext in image_extensions:
        image_files.extend(Path(input_dir).rglob(f'*{ext}'))

    print(f"Found {len(image_files)} images")

    # 통계 수집
    stats = {
        'total': len(image_files),
        'success': 0,
        'failed': 0,
        'failed_files': []
    }

    # 일괄 처리
    for img_path in tqdm(image_files, desc="Processing images"):
        try:
            # 전처리 실행
            processed, _ = preprocessor.preprocess_pipeline(str(img_path), method=method)

            # 상대 경로 유지하며 저장
            rel_path = img_path.relative_to(input_dir)
            output_path = Path(output_dir) / rel_path
            output_path.parent.mkdir(parents=True, exist_ok=True)

            # 저장 (RGB -> BGR for OpenCV)
            cv2.imwrite(str(output_path), cv2.cvtColor(processed, cv2.COLOR_RGB2BGR))

            stats['success'] += 1

        except Exception as e:
            print(f"\nError processing {img_path}: {e}")
            stats['failed'] += 1
            stats['failed_files'].append(str(img_path))

    # 통계 저장
    with open(Path(output_dir) / 'preprocessing_stats.json', 'w') as f:
        json.dump(stats, f, indent=2)

    print(f"\n{'='*50}")
    print(f"Preprocessing completed!")
    print(f"Success: {stats['success']}/{stats['total']}")
    print(f"Failed: {stats['failed']}/{stats['total']}")
    print(f"Results saved to: {output_dir}")
    print(f"{'='*50}")

    return stats

In [None]:
if __name__ == "__main__":
    print("\n\nBatch preprocessing")
    batch_preprocess_dataset(
        input_dir="/content/drive/MyDrive/CV/25-2컴비/PlantDoc-Dataset/test",
        output_dir="/content/drive/MyDrive/CV/25-2컴비/PlantDoc-Dataset/processed_advanced_test",
        method='advanced'
    )



Batch preprocessing
Found 236 images


Processing images: 100%|██████████| 236/236 [01:39<00:00,  2.36it/s]


Preprocessing completed!
Success: 236/236
Failed: 0/236
Results saved to: /content/drive/MyDrive/CV/25-2컴비/PlantDoc-Dataset/processed_advanced_test





### Run PlantVillage Segmentation

In [None]:
import os
import cv2
import numpy as np
from pathlib import Path
from tqdm import tqdm
import json

def batch_preprocess_dataset(input_dir, output_dir, method1='simple', method2='hybrid',target_size=(224, 224)):

    preprocessor = LeafPreprocessor(target_size=target_size)

    # 출력 디렉토리 생성
    Path(output_dir).mkdir(parents=True, exist_ok=True)

    image_extensions = ['.jpg', '.jpeg', '.png', '.JPG', '.JPEG', '.PNG']
    image_files = []

    for ext in image_extensions:
        image_files.extend(Path(input_dir).rglob(f'*{ext}'))

    print(f"Found {len(image_files)} images")

    # 통계 수집
    stats = {
        'total': len(image_files),
        'success': 0,
        'failed': 0,
        'failed_files': []
    }

    # 일괄 처리
    for img_path in tqdm(image_files, desc="Processing images"):
        try:
            # --------------method 결정-----------------
            folder_name = img_path.parent.name.lower()

            if "healthy" in folder_name:
                applied_method = method1
            else:
                applied_method = method2
            # ------------------------------------------

            processed, _ = preprocessor.preprocess_pipeline(
                str(img_path),
                method=applied_method
            )

            # 상대 경로 유지하며 저장
            rel_path = img_path.relative_to(input_dir)
            output_path = Path(output_dir) / rel_path
            output_path.parent.mkdir(parents=True, exist_ok=True)

            # 저장 (RGB -> BGR for OpenCV)
            cv2.imwrite(str(output_path), cv2.cvtColor(processed, cv2.COLOR_RGB2BGR))

            stats['success'] += 1

        except Exception as e:
            print(f"\nError processing {img_path}: {e}")
            stats['failed'] += 1
            stats['failed_files'].append(str(img_path))

    # 통계 저장
    with open(Path(output_dir) / 'preprocessing_stats.json', 'w') as f:
        json.dump(stats, f, indent=2)

    print(f"\n{'='*50}")
    print(f"Preprocessing completed!")
    print(f"Success: {stats['success']}/{stats['total']}")
    print(f"Failed: {stats['failed']}/{stats['total']}")
    print(f"Results saved to: {output_dir}")
    print(f"{'='*50}")

    return stats

In [None]:
if __name__ == "__main__":
    print("\n\nBatch preprocessing")
    batch_preprocess_dataset(
        input_dir="/content/drive/MyDrive/CV/25-2컴비/PlantVillage/test",
        output_dir="/content/drive/MyDrive/CV/25-2컴비/PlantVillage/processed_advanced_test",
        method1='simple',
        method2='hybrid',
    )



Batch preprocessing
Found 2009 images


Processing images: 100%|██████████| 2009/2009 [51:04<00:00,  1.53s/it]


Preprocessing completed!
Success: 2009/2009
Failed: 0/2009
Results saved to: /content/drive/MyDrive/CV/25-2컴비/PlantVillage/processed_advanced_test



