In [326]:
import os
import glob
import random
import warnings
from typing import List, Tuple
from joblib import dump, load
import cv2
import numpy as np
from tqdm import tqdm

from skimage.feature import hog
from sklearn.preprocessing import StandardScaler
from sklearn.svm import LinearSVC
from sklearn.metrics import (
    classification_report,
    confusion_matrix,
    accuracy_score
)

warnings.filterwarnings('ignore')

print("Libraries imported")
#Data paths
DATA_DIR      = '../data'
TRAIN_IMG_DIR = os.path.join(DATA_DIR, 'train', 'images')
TRAIN_LBL_DIR = os.path.join(DATA_DIR, 'train', 'labels')
VAL_IMG_DIR   = os.path.join(DATA_DIR, 'valid', 'images')
VAL_LBL_DIR   = os.path.join(DATA_DIR, 'valid', 'labels')
TEST_IMG_DIR  = os.path.join(DATA_DIR, 'test', 'images')
TEST_LBL_DIR  = os.path.join(DATA_DIR, 'test', 'labels')

Libraries imported


In [327]:
# 후보 박스 모듈 on/off =====
USE_PREPROC   = False   # 전처리 마스크 기반
USE_SLIDING   = False   # 슬라이딩 윈도우
USE_EDGE      = True  # 에지/컨투어 기반
USE_MSER      = False  # MSER 기반 blob
USE_SEG       = False  # 세그멘테이션 기반 (SLIC 등)
USE_KP        = False  # 키포인트 클러스터 기반


In [328]:
# 파라미터
# Detection hyperparameters
# ============================
DET_SCALES        = [0.75, 1.0, 1.25]   # 슬라이딩 윈도우 스케일
DET_STRIDE_RATIO  = 0.5                 # 윈도우 크기의 몇 배 간격으로 이동할지
DET_SCORE_THRESH  = 0.5                 # SVM decision_function threshold
DET_NMS_IOU_THR   = 0.3                 # NMS에서 박스를 지울 IoU 기준
# ============================
# Hog parameters
PATCH_SIZE         = (128, 128)   # (H, W)
HOG_ORIENTATIONS   = 9
HOG_PIXELS_PER_CELL = (8, 8)
HOG_CELLS_PER_BLOCK = (2, 2)
HOG_BLOCK_NORM     = 'L2-Hys'
HOG_TRANSFORM_SQRT = True
# ============================
#svm parameters
SVM_C            = 1.0
SVM_CLASS_WEIGHT = "balanced"   # 또는 {0:1.0, 1:1.5} 같은 dict
SVM_MAX_ITER     = 5000
SVM_RANDOM_STATE = 0
# ============================
# binary class labels
BACKGROUND_LABEL = 0
INSECT_LABEL     = 1
#patch size for cropping
PATCH_SIZE = (128, 128)  # H, W (crop을 이 크기로 리사이즈)
# ============================
#preprocessing parameters
BORDER_PX      = 13     # 테두리 폭(px) - 배경 표본 추출용
COLOR_SPACE    = "LAB"  # "LAB" 권장 (HSV도 가능)
COLOR_WEIGHT   = 1.6    # 색 점수 가중치 (↑ 전경을 색차 위주로)
EDGE_WEIGHT    = 2.75   # 에지 점수 가중치 (↑ 윤곽 위주로)
GAUSS_KSIZE    = 5      # 사전 블러(홀수)
CC_CONNECT     = 8      # 연결성(4/8)
AREA_MIN_RATIO = 0.03  # 연결요소 최소 면적(이미지 대비) — 잡점 제거
AREA_MAX_RATIO = 0.45    # 연결요소 최대 면적 — 배경 덩어리 컷
OTSU_BIAS      = 0.00   # Otsu 전에 fused에서 빼는 바이어스(0~0.2 시도)
# ============================
#data sampling parameters
TRAIN_MAX_IMAGES      = 3000    # None이면 train 전부 사용
VAL_MAX_IMAGES        = None
TEST_MAX_IMAGES       = None
MAX_POS_PER_IMG       = 50      # 이미지당 최대 positive 박스 수
NEG_POS_RATIO         = 2.0     # 양성 1개당 음성 몇 개
NEG_IOU_THR           = 0.1     # 배경 박스가 GT랑 겹치지 않도록 하는 IoU upper bound
BG_MAX_ATTEMPTS_FACTOR = 50
# ============================
#detection(슬라이딩+NMS) parameters
SW_SCALES        = (1.0, 1.5, 2.0, 3.0)
SW_STRIDE_RATIO  = 0.5
CLS_SCORE_THR    = 0.5   # SVM decision_function threshold
NMS_IOU_THR      = 0.5
EVAL_IOU_THR     = 0.5   # TP 판정용 IoU



In [329]:
#YOLO format box reader

def read_yolo_boxes(label_path: str, img_w: int, img_h: int):
    boxes = []
    labels = []
    if not os.path.exists(label_path):
        return boxes, labels

    with open(label_path, 'r') as f:
        for line in f:
            parts = line.strip().split()
            if len(parts) < 5:
                continue

            cls = int(float(parts[0]))
            xc, yc, w, h = map(float, parts[1:5])

            bw = w * img_w
            bh = h * img_h
            x  = (xc - w / 2.0) * img_w
            y  = (yc - h / 2.0) * img_h

            x = int(np.clip(x, 0, img_w - 1))
            y = int(np.clip(y, 0, img_h - 1))
            bw = int(np.clip(bw, 1, img_w - x))
            bh = int(np.clip(bh, 1, img_h - y))

            boxes.append((x, y, bw, bh))
            labels.append(cls)
    return boxes, labels

#Image listing
def list_images(img_dir: str):
    exts = ("*.jpg", "*.jpeg", "*.png", "*.bmp",
            "*.JPG", "*.JPEG", "*.PNG", "*.BMP")
    img_paths = []
    for ext in exts:
        img_paths.extend(glob.glob(os.path.join(img_dir, ext)))
    img_paths = sorted(img_paths)
    print(f"[DEBUG] {img_dir} 에서 이미지 {len(img_paths)}개 발견")
    return img_paths

In [330]:
#patch generation + hog
def crop_resize_gray(bgr: np.ndarray,
                     box: Tuple[int, int, int, int],
                     size: Tuple[int, int] = PATCH_SIZE):
    x, y, w, h = box
    H, W = bgr.shape[:2]

    x0 = max(0, x)
    y0 = max(0, y)
    x1 = min(W, x + w)
    y1 = min(H, y + h)

    if x0 >= x1 or y0 >= y1:
        return None

    crop = bgr[y0:y1, x0:x1]
    if crop.size == 0:
        return None

    crop_resized = cv2.resize(crop, (size[1], size[0]), interpolation=cv2.INTER_LINEAR)
    gray = cv2.cvtColor(crop_resized, cv2.COLOR_BGR2GRAY)
    return gray


def extract_hog_feature(gray: np.ndarray):
    feat = hog(
        gray,
        orientations=HOG_ORIENTATIONS,
        pixels_per_cell=HOG_PIXELS_PER_CELL,
        cells_per_block=HOG_CELLS_PER_BLOCK,
        block_norm=HOG_BLOCK_NORM,
        transform_sqrt=HOG_TRANSFORM_SQRT,
        feature_vector=True
    )
    return feat.astype(np.float32)


def sample_background_boxes(
    H: int,
    W: int,
    gt_boxes: List[Tuple[int,int,int,int]],
    num_neg: int,
    neg_iou_thr: float = NEG_IOU_THR,
    max_attempts_factor: int = BG_MAX_ATTEMPTS_FACTOR
):
    boxes = []
    if num_neg <= 0:
        return boxes

    max_attempts = max_attempts_factor * num_neg
    attempts = 0

    while len(boxes) < num_neg and attempts < max_attempts:
        attempts += 1

        min_size = max(16, min(H, W) // 10)
        max_size = max(min_size + 1, min(H, W) // 2)
        bw = np.random.randint(min_size, max_size)
        bh = np.random.randint(min_size, max_size)
        if bw <= 0 or bh <= 0:
            continue

        x = np.random.randint(0, max(1, W - bw + 1))
        y = np.random.randint(0, max(1, H - bh + 1))
        box = (x, y, bw, bh)

        ious = [iou_xywh(box, g) for g in gt_boxes]
        if len(ious) > 0 and max(ious) > neg_iou_thr:
            continue

        boxes.append(box)

    return boxes


def build_binary_dataset_for_split(
    img_dir: str,
    lbl_dir: str,
    split_name: str = "train",
    max_images: int = None,
    max_pos_per_img: int = MAX_POS_PER_IMG,
    neg_pos_ratio: float = NEG_POS_RATIO,
    neg_iou_thr: float = NEG_IOU_THR
):
    """
    Positive: GT box crop
    Negative: GT와 IoU <= neg_iou_thr 인 랜덤 배경 crop
    """
    img_paths = list_images(img_dir)
    if max_images is not None:
        img_paths = img_paths[:max_images]

    if len(img_paths) == 0:
        raise RuntimeError(f"No images found in {img_dir}")

    X_list = []
    y_list = []
    total_pos = 0
    total_neg = 0

    for img_path in tqdm(img_paths, desc=f"Build binary HOG dataset @ {split_name}"):
        bgr = cv2.imread(img_path)
        if bgr is None:
            continue
        H, W = bgr.shape[:2]

        base = os.path.splitext(os.path.basename(img_path))[0]
        label_path = os.path.join(lbl_dir, base + ".txt")
        gt_boxes, gt_labels = read_yolo_boxes(label_path, W, H)

        # Positive
        pos_boxes = gt_boxes
        if max_pos_per_img is not None and len(pos_boxes) > max_pos_per_img:
            pos_boxes = pos_boxes[:max_pos_per_img]

        for box in pos_boxes:
            gray = crop_resize_gray(bgr, box)
            if gray is None:
                continue
            feat = extract_hog_feature(gray)
            X_list.append(feat)
            y_list.append(INSECT_LABEL)
            total_pos += 1

        # Negative
        if neg_pos_ratio > 0:
            pos_count_for_neg = len(pos_boxes) if len(pos_boxes) > 0 else (max_pos_per_img or 10)
            num_neg_desired = int(pos_count_for_neg * neg_pos_ratio)

            bg_boxes = sample_background_boxes(
                H, W,
                gt_boxes,
                num_neg=num_neg_desired,
                neg_iou_thr=neg_iou_thr
            )

            for box in bg_boxes:
                gray_bg = crop_resize_gray(bgr, box)
                if gray_bg is None:
                    continue
                feat_bg = extract_hog_feature(gray_bg)
                X_list.append(feat_bg)
                y_list.append(BACKGROUND_LABEL)
                total_neg += 1

    if len(X_list) == 0:
        raise RuntimeError(f"No samples collected from {img_dir}. Check labels and paths.")

    X = np.vstack(X_list).astype(np.float32)
    y = np.array(y_list, dtype=np.int32)

    print(f"[INFO] {split_name} split: 총 샘플 수 = {len(y)}")
    print(f"[INFO]   - Positive(곤충=1): {total_pos}")
    print(f"[INFO]   - Negative(배경=0): {total_neg}")
    return X, y


def evaluate_patch_split(
    clf: LinearSVC,
    scaler: StandardScaler,
    X: np.ndarray,
    y: np.ndarray,
    split_name: str = "valid"
):
    X_std = scaler.transform(X)
    y_pred = clf.predict(X_std)

    acc = accuracy_score(y, y_pred)
    print(f"\n[{split_name}] Accuracy: {acc * 100:.2f}%")

    print(f"\n[{split_name}] Classification report (0=background, 1=insect):")
    print(classification_report(
        y, y_pred,
        labels=[0, 1],
        target_names=["background", "insect"],
        digits=4
    ))

    print(f"[{split_name}] Confusion matrix (rows=true, cols=pred):")
    print(confusion_matrix(y, y_pred, labels=[0, 1]))

In [331]:
#Prepressing based box
def _to_colorspace(bgr, mode="LAB"):
    mode = mode.upper()
    if mode == "LAB":
        return cv2.cvtColor(bgr, cv2.COLOR_BGR2LAB).astype(np.float32)
    elif mode == "HSV":
        return cv2.cvtColor(bgr, cv2.COLOR_BGR2HSV).astype(np.float32)
    else:
        raise ValueError("COLOR_SPACE must be 'LAB' or 'HSV'.")

def _border_stats(colimg, border_px):
    H, W = colimg.shape[:2]
    m = np.zeros((H, W), np.uint8)
    m[:border_px,:] = 1; m[-border_px:,:] = 1; m[:,:border_px] = 1; m[:,-border_px:] = 1
    samples = colimg[m>0].reshape(-1, colimg.shape[2])
    mu  = np.median(samples, axis=0).astype(np.float32)
    cov = np.cov(samples.T).astype(np.float32) if len(samples)>=10 else np.eye(colimg.shape[2], dtype=np.float32)
    return mu, cov

def _mahalanobis(X, mu, cov):
    C = cov.shape[0]
    inv = np.linalg.inv(cov + 1e-6*np.eye(C, dtype=np.float32))
    d = (X - mu).astype(np.float32)
    return np.sqrt(np.einsum('...i,ij,...j->...', d, inv, d)).astype(np.float32)

def _grad_mag(gray):
    gx = cv2.Sobel(gray, cv2.CV_32F, 1, 0, ksize=3)
    gy = cv2.Sobel(gray, cv2.CV_32F, 0, 1, ksize=3)
    mag = cv2.magnitude(gx, gy)
    mag = mag / (mag.max() + 1e-6)
    return mag.astype(np.float32)

def _norm01(x):
    x = x.astype(np.float32)
    mn, mx = float(x.min()), float(x.max())
    return np.zeros_like(x) if mx-mn < 1e-6 else (x-mn)/(mx-mn)

def make_fg_mask_simple(bgr):
    """
    텍스처/모폴로지 없이:
    - 테두리 배경 통계로 Mahalanobis 색 거리 + Sobel 에지 → 점수 결합 → Otsu
    - 작은/너무 큰 연결요소만 면적으로 필터링
    """
    H, W = bgr.shape[:2]
    blur = cv2.GaussianBlur(bgr, (GAUSS_KSIZE, GAUSS_KSIZE), 0)

    col = _to_colorspace(blur, COLOR_SPACE)
    mu, cov = _border_stats(col, BORDER_PX)

    dist = _mahalanobis(col.reshape(-1, col.shape[2]), mu, cov).reshape(H, W)
    color_score = _norm01(dist)

    gray = cv2.cvtColor(blur, cv2.COLOR_BGR2GRAY)
    edge_score = _grad_mag(gray)

    fused = COLOR_WEIGHT*color_score + EDGE_WEIGHT*edge_score
    fused = _norm01(fused)
    if OTSU_BIAS != 0.0:
        fused = np.clip(fused - OTSU_BIAS, 0, 1)

    fused_u8 = (fused*255).astype(np.uint8)
    _, mask = cv2.threshold(fused_u8, 0, 255, cv2.THRESH_BINARY+cv2.THRESH_OTSU)

    # 형태학 없이 → 면적 기반 연결요소 필터만
    num, labels, stats, _ = cv2.connectedComponentsWithStats(mask, connectivity=int(CC_CONNECT))
    out = np.zeros_like(mask)
    img_area = H*W
    for i in range(1, num):
        x,y,w,h,area = stats[i]
        if area < AREA_MIN_RATIO*img_area or area > AREA_MAX_RATIO*img_area:
            continue
        out[labels==i] = 255
    return out

def propose_boxes_from_mask(
    mask,
    min_area_ratio=0.001,   # 이미지 대비 최소 면적(노이즈 컷)
    max_area_ratio=0.90,    # 이미지 대비 최대 면적(배경 큰 덩어리 컷)
    pad=4,                  # 박스 패딩(px)
    connectivity=8,         # 연결성(4 또는 8)
    merge=True,             # 겹친 박스 병합 여부
    merge_iou=0.30          # 병합 임계 IoU
):
    """
    전경 마스크(0/255) → 후보 박스 목록 [(x,y,w,h), ...] 생성
    - 연결요소 기반 바운딩 박스 추출
    - 면적 비율로 1차 필터
    - 패딩 부여 후 이미지 경계로 클립
    - (옵션) IoU 기준으로 박스 병합
    """
    if mask is None or mask.size == 0:
        return []

    # 마스크 이진화 보정(0/255 보장)
    if mask.dtype != np.uint8:
        binm = (mask > 0).astype(np.uint8)
    else:
        binm = (mask > 0).astype(np.uint8)

    H, W = binm.shape[:2]
    img_area = H * W
    if img_area == 0:
        return []

    num, labels, stats, _ = cv2.connectedComponentsWithStats(binm, connectivity=int(connectivity))
    boxes = []
    for i in range(1, num):  # 0은 배경
        x, y, w, h, area = stats[i]
        if area < min_area_ratio * img_area or area > max_area_ratio * img_area:
            continue

        # 패딩
        x0 = max(0, x - pad)
        y0 = max(0, y - pad)
        x1 = min(W - 1, x + w + pad - 1)
        y1 = min(H - 1, y + h + pad - 1)

        xx, yy = int(x0), int(y0)
        ww, hh = int(max(1, x1 - x0 + 1)), int(max(1, y1 - y0 + 1))
        boxes.append((xx, yy, ww, hh))

    if not merge or len(boxes) <= 1:
        boxes.sort(key=lambda b: b[2]*b[3], reverse=True)
        return boxes

    # ---------------------------
    # 간단 병합(높은 IoU 박스 합치기)
    # ---------------------------
    def _xywh_to_xyxy(b):
        x, y, w, h = b
        return (x, y, x + w - 1, y + h - 1)

    def _xyxy_to_xywh(b):
        x1, y1, x2, y2 = b
        return (x1, y1, x2 - x1 + 1, y2 - y1 + 1)

    def _iou(a, b):
        ax1, ay1, ax2, ay2 = a
        bx1, by1, bx2, by2 = b
        iw = max(0, min(ax2, bx2) - max(ax1, bx1) + 1)
        ih = max(0, min(ay2, by2) - max(ay1, by1) + 1)
        inter = iw * ih
        if inter <= 0:
            return 0.0
        area_a = (ax2 - ax1 + 1) * (ay2 - ay1 + 1)
        area_b = (bx2 - bx1 + 1) * (by2 - by1 + 1)
        return inter / float(area_a + area_b - inter + 1e-9)

    xyxy = [_xywh_to_xyxy(b) for b in boxes]
    changed = True
    while changed and len(xyxy) > 1:
        changed = False
        new_xyxy = []
        used = [False] * len(xyxy)

        for i in range(len(xyxy)):
            if used[i]:
                continue
            merged = xyxy[i]
            used[i] = True
            for j in range(i + 1, len(xyxy)):
                if used[j]:
                    continue
                if _iou(merged, xyxy[j]) >= merge_iou:
                    # 합집합(바운딩 박스)
                    x1 = min(merged[0], xyxy[j][0])
                    y1 = min(merged[1], xyxy[j][1])
                    x2 = max(merged[2], xyxy[j][2])
                    y2 = max(merged[3], xyxy[j][3])
                    merged = (x1, y1, x2, y2)
                    used[j] = True
                    changed = True
            new_xyxy.append(merged)
        xyxy = new_xyxy

    merged_boxes = [_xyxy_to_xywh(b) for b in xyxy]
    merged_boxes.sort(key=lambda b: b[2]*b[3], reverse=True)
    return merged_boxes

In [332]:
#슬라이딩 윈도우
def generate_sliding_window_boxes(
    img_h: int,
    img_w: int,
    base_size: Tuple[int, int] = PATCH_SIZE,
    scales: List[float] = DET_SCALES,
    stride_ratio: float = DET_STRIDE_RATIO
) -> List[Tuple[int, int, int, int]]:
    """
    멀티스케일 슬라이딩 윈도우로 후보 박스를 생성.
    반환: [(x, y, w, h), ...]
    """
    base_h, base_w = base_size
    boxes = []

    for s in scales:
        win_h = int(round(base_h * s))
        win_w = int(round(base_w * s))
        if win_h <= 0 or win_w <= 0:
            continue

        stride_y = max(1, int(round(win_h * stride_ratio)))
        stride_x = max(1, int(round(win_w * stride_ratio)))

        for y in range(0, max(1, img_h - win_h + 1), stride_y):
            for x in range(0, max(1, img_w - win_w + 1), stride_x):
                boxes.append((x, y, win_w, win_h))

    return boxes


In [333]:
#Edge / Contour 기반 proposal
def propose_boxes_from_edges(
    bgr: np.ndarray,
    min_area_ratio: float = 0.03,
    max_area_ratio: float = 0.45
) -> List[Tuple[int, int, int, int]]:
    """
    Canny 에지 + 컨투어 기반 박스 제안
    """
    H, W = bgr.shape[:2]
    gray = cv2.cvtColor(bgr, cv2.COLOR_BGR2GRAY)
    gray_blur = cv2.GaussianBlur(gray, (5, 5), 0)

    # 에지 검출
    edges = cv2.Canny(gray_blur, 50, 150)

    # 컨투어 추출
    contours, _ = cv2.findContours(
        edges, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE
    )

    boxes = []
    img_area = H * W
    for cnt in contours:
        x, y, w, h = cv2.boundingRect(cnt)
        area = w * h
        ratio = area / float(img_area)
        if ratio < min_area_ratio or ratio > max_area_ratio:
            continue
        boxes.append((x, y, w, h))

    return boxes


In [334]:
#MSER 기반 proposal
def propose_boxes_from_mser(
    bgr: np.ndarray,
    min_area_ratio: float = 0.0005,
    max_area_ratio: float = 0.5
) -> List[Tuple[int, int, int, int]]:
    """
    MSER 기반 후보 박스 생성
    """
    H, W = bgr.shape[:2]
    gray = cv2.cvtColor(bgr, cv2.COLOR_BGR2GRAY)

    # 1) MSER 객체 생성 (키워드 인자 X)
    mser = cv2.MSER_create()

    # 2) 파라미터는 setter로 설정
    mser.setDelta(5)
    mser.setMinArea(int(min_area_ratio * H * W))
    mser.setMaxArea(int(max_area_ratio * H * W))

    # 3) 영역 검출
    regions, _ = mser.detectRegions(gray)

    boxes = []
    for pts in regions:
        x, y, w, h = cv2.boundingRect(pts)
        boxes.append((x, y, w, h))

    # (선택) 완전히 중복되는 박스 제거
    boxes = list({(x, y, w, h) for (x, y, w, h) in boxes})
    return boxes


In [335]:
#세그멘테이션 기반 proposal (SLIC)
from skimage.segmentation import slic

def propose_boxes_from_segmentation(
    bgr: np.ndarray,
    n_segments: int = 200,
    compactness: float = 10.0,
    min_area_ratio: float = 0.001,
    max_area_ratio: float = 0.4
) -> List[Tuple[int, int, int, int]]:
    """
    SLIC superpixel + bounding box 기반 proposal
    """
    H, W = bgr.shape[:2]
    img_rgb = cv2.cvtColor(bgr, cv2.COLOR_BGR2RGB)

    segments = slic(
        img_rgb,
        n_segments=n_segments,
        compactness=compactness,
        start_label=0
    )

    boxes = []
    img_area = H * W
    for seg_id in np.unique(segments):
        ys, xs = np.where(segments == seg_id)
        if ys.size == 0:
            continue
        y1, y2 = ys.min(), ys.max()
        x1, x2 = xs.min(), xs.max()
        w = x2 - x1 + 1
        h = y2 - y1 + 1
        area = w * h
        ratio = area / float(img_area)
        if ratio < min_area_ratio or ratio > max_area_ratio:
            continue
        boxes.append((int(x1), int(y1), int(w), int(h)))

    return boxes


In [336]:
#키포인트 클러스터링 기반 proposal
from sklearn.cluster import DBSCAN

def propose_boxes_from_keypoints(
    bgr: np.ndarray,
    max_corners: int = 500,
    quality_level: float = 0.01,
    min_distance: float = 5.0,
    eps: float = 25.0,
    min_samples: int = 5,
    min_area_ratio: float = 0.03,
    max_area_ratio: float = 0.45
) -> List[Tuple[int, int, int, int]]:
    """
    키포인트(코너) → DBSCAN 클러스터 → bounding box
    """
    H, W = bgr.shape[:2]
    gray = cv2.cvtColor(bgr, cv2.COLOR_BGR2GRAY)

    # 코너(키포인트) 검출
    pts = cv2.goodFeaturesToTrack(
        gray,
        maxCorners=max_corners,
        qualityLevel=quality_level,
        minDistance=min_distance
    )
    if pts is None or len(pts) == 0:
        return []

    pts = pts.reshape(-1, 2)  # (N, 2)

    # 밀도 기반 클러스터링
    clustering = DBSCAN(eps=eps, min_samples=min_samples).fit(pts)
    labels = clustering.labels_

    boxes = []
    img_area = H * W

    for lab in np.unique(labels):
        if lab == -1:
            continue  # noise
        cluster_pts = pts[labels == lab]
        xs = cluster_pts[:, 0]
        ys = cluster_pts[:, 1]
        x1, x2 = xs.min(), xs.max()
        y1, y2 = ys.min(), ys.max()
        w = x2 - x1 + 1
        h = y2 - y1 + 1
        area = w * h
        ratio = area / float(img_area)
        if ratio < min_area_ratio or ratio > max_area_ratio:
            continue

        # 약간 패딩
        pad = 4
        x1 = max(0, int(x1) - pad)
        y1 = max(0, int(y1) - pad)
        x2 = min(W - 1, int(x2) + pad)
        y2 = min(H - 1, int(y2) + pad)
        w = x2 - x1 + 1
        h = y2 - y1 + 1

        boxes.append((x1, y1, w, h))

    return boxes


In [337]:
#IoU calculation
def iou_xywh(box1, box2):
    """
    box: (x, y, w, h) → IoU 계산
    """
    x1, y1, w1, h1 = box1
    x2, y2, w2, h2 = box2

    ax1, ay1, ax2, ay2 = x1, y1, x1 + w1 - 1, y1 + h1 - 1
    bx1, by1, bx2, by2 = x2, y2, x2 + w2 - 1, y2 + h2 - 1

    iw = max(0, min(ax2, bx2) - max(ax1, bx1) + 1)
    ih = max(0, min(ay2, by2) - max(ay1, by1) + 1)
    inter = iw * ih
    if inter <= 0:
        return 0.0

    area_a = w1 * h1
    area_b = w2 * h2
    return inter / float(area_a + area_b - inter + 1e-9)

In [338]:
#NMS 함수 (IoU 기준) final_boxes, final_scores return
def nms_xywh(
    boxes: List[Tuple[int, int, int, int]],
    scores: List[float],
    iou_thr: float = DET_NMS_IOU_THR
) -> Tuple[List[Tuple[int, int, int, int]], List[float]]:
    """
    간단한 NMS (Non-Maximum Suppression), 박스 포맷은 (x, y, w, h)
    """
    if len(boxes) == 0:
        return [], []

    boxes = np.array(boxes, dtype=np.float32)
    scores = np.array(scores, dtype=np.float32)

    order = scores.argsort()[::-1]  # 점수 내림차순 인덱스
    keep_indices = []

    while len(order) > 0:
        i = order[0]
        keep_indices.append(int(i))

        if len(order) == 1:
            break

        rest = order[1:]
        i_box = boxes[i]

        ious = []
        for j in rest:
            j_box = boxes[j]
            ious.append(iou_xywh(tuple(i_box), tuple(j_box)))
        ious = np.array(ious, dtype=np.float32)

        # IoU가 threshold보다 큰 애들은 제거
        remaining = rest[ious <= iou_thr]
        order = remaining

    final_boxes = [tuple(boxes[i].astype(int)) for i in keep_indices]
    final_scores = [float(scores[i]) for i in keep_indices]
    return final_boxes, final_scores


In [339]:
#후보 박스 생성
def get_candidate_boxes(
    bgr: np.ndarray,
    use_preproc: bool = True,
    use_sliding: bool = True,
    use_edge: bool = False,
    use_mser: bool = False,
    use_seg: bool = False,
    use_kp: bool = False
) -> List[Tuple[int, int, int, int]]:
    """
    한 장의 이미지에서
    - 전처리 기반 후보
    - 슬라이딩 윈도우 후보
    를 모두 모아서 반환.
    """
    H, W = bgr.shape[:2]
    all_boxes: List[Tuple[int, int, int, int]] = []

    # 1) 전처리 기반 후보
    if use_preproc:
        fg_mask = make_fg_mask_simple(bgr)
        boxes_fg = propose_boxes_from_mask(
            fg_mask,
            min_area_ratio=0.001,
            max_area_ratio=0.9,
            pad=4,
            connectivity=8,
            merge=True,
            merge_iou=0.30
        )
        all_boxes.extend(boxes_fg)

    # 2) 슬라이딩 윈도우 후보
    if use_sliding:
        boxes_sw = generate_sliding_window_boxes(
            H, W,
            base_size=PATCH_SIZE,
            scales=SW_SCALES,
            stride_ratio=SW_STRIDE_RATIO
        )
        all_boxes.extend(boxes_sw)

    
    # 3) 에지/컨투어 기반
    if use_edge:
        boxes_edge = propose_boxes_from_edges(bgr)
        all_boxes.extend(boxes_edge)

    # 4) MSER 기반
    if use_mser:
        boxes_mser = propose_boxes_from_mser(bgr)
        all_boxes.extend(boxes_mser)

    # 5) 세그멘테이션 기반
    if use_seg:
        boxes_seg = propose_boxes_from_segmentation(bgr)
        all_boxes.extend(boxes_seg)

    # 6) 키포인트 클러스터 기반
    if use_kp:
        boxes_kp = propose_boxes_from_keypoints(bgr)
        all_boxes.extend(boxes_kp)

    # 완전히 동일한 박스 제거 (그냥 set 써도 됨)
    all_boxes = list({(x, y, w, h) for (x, y, w, h) in all_boxes})
    return all_boxes




In [340]:
#후보 박스 기반 hog,svm
def compute_scores_for_boxes(
    bgr: np.ndarray,
    boxes: List[Tuple[int, int, int, int]],
    scaler: StandardScaler,
    svm: LinearSVC
) -> Tuple[List[Tuple[int, int, int, int]], np.ndarray]:
    """
    주어진 후보 박스들에 대해:
      - crop + resize + gray
      - HOG 특징 추출
      - StandardScaler 변환
      - SVM decision_function 점수 계산
    을 수행하고,
      (유효한 박스 리스트, 각 박스에 대한 점수 배열)
    을 반환.
    """
    feats = []
    valid_boxes: List[Tuple[int, int, int, int]] = []

    for box in boxes:
        gray = crop_resize_gray(bgr, box, size=PATCH_SIZE)
        if gray is None:
            continue
        feat = extract_hog_feature(gray)
        feats.append(feat)
        valid_boxes.append(box)

    if len(feats) == 0:
        return [], np.array([])

    X = np.vstack(feats).astype(np.float32)
    X_std = scaler.transform(X)
    scores = svm.decision_function(X_std)

    return valid_boxes, scores


In [341]:
#점수 기준 1차 필터링 + NMS
def filter_boxes_by_score(
    boxes: List[Tuple[int, int, int, int]],
    scores: np.ndarray,
    score_thr: float
) -> Tuple[List[Tuple[int, int, int, int]], np.ndarray]:
    """
    SVM 점수가 score_thr 이상인 박스만 남긴다.
    """
    if len(boxes) == 0 or scores.size == 0:
        return [], np.array([])

    keep_idx = np.where(scores >= score_thr)[0]
    if len(keep_idx) == 0:
        return [], np.array([])

    boxes_pos = [boxes[i] for i in keep_idx]
    scores_pos = scores[keep_idx]
    return boxes_pos, scores_pos

# 이미 정의되어 있는 nms_xywh 사용:
    final_boxes, final_scores = nms_xywh(
        List[Tuple[int, int, int, int]],
        List[float],
        float = DET_NMS_IOU_THR
)


In [342]:
#디텍션
def detect_in_image(
    bgr: np.ndarray,
    scaler: StandardScaler,
    svm: LinearSVC,
    use_preproc: bool = USE_PREPROC,
    use_sliding: bool = USE_SLIDING,
    use_edge: bool = USE_EDGE,
    use_mser: bool = USE_MSER,
    use_seg: bool = USE_SEG,
    use_kp: bool = USE_KP
):
    """
    한 장의 BGR 이미지에 대해:
      1) 후보 박스 생성 (preproc + sliding)
      2) HOG + SVM 점수 계산
      3) 점수 threshold로 1차 필터링
      4) NMS로 중복 제거
      → 최종 박스 및 점수 반환
    """
    # 1) 후보 박스 생성
    all_boxes = get_candidate_boxes(
        bgr,
        use_preproc=use_preproc,
        use_sliding=use_sliding,
        use_edge=use_edge,
        use_mser=use_mser,
        use_seg=use_seg,
        use_kp=use_kp
    )

    if len(all_boxes) == 0:
        return [], np.array([])

    # 2) 후보 박스에 대한 SVM 점수 계산
    valid_boxes, scores = compute_scores_for_boxes(
        bgr, all_boxes, scaler, svm
    )
    if len(valid_boxes) == 0:
        return [], np.array([])

    # 3) 분류 임계값으로 1차 필터링
    boxes_pos, scores_pos = filter_boxes_by_score(
        valid_boxes, scores, CLS_SCORE_THR
    )
    if len(boxes_pos) == 0:
        return [], np.array([])

    # 4) NMS 적용
    final_boxes, final_scores = nms_xywh(
        boxes_pos, scores_pos, iou_thr=NMS_IOU_THR
    )

    return final_boxes, final_scores


In [343]:
#mAP 계산 함수
import numpy as np

# --------------------------------------------------
# AP 계산용 헬퍼 (단일 클래스, IoU=EVAL_IOU_THR 고정)
# --------------------------------------------------
def compute_ap(scores: list[float], tp_flags: list[int], num_gt: int) -> float:
    """
    scores: 각 detection의 confidence 점수 리스트
    tp_flags: 각 detection이 TP면 1, FP면 0
    num_gt: 전체 GT box 개수
    """
    if num_gt == 0 or len(scores) == 0:
        return 0.0

    scores_np = np.array(scores, dtype=np.float32)
    tp_np     = np.array(tp_flags, dtype=np.int32)

    # 점수 내림차순 정렬
    order = np.argsort(scores_np)[::-1]
    scores_np = scores_np[order]
    tp_np     = tp_np[order]

    fp_np = 1 - tp_np

    tp_cum = np.cumsum(tp_np)
    fp_cum = np.cumsum(fp_np)

    recall    = tp_cum / (num_gt + 1e-9)
    precision = tp_cum / (tp_cum + fp_cum + 1e-9)

    # VOC 스타일: precision을 뒤에서부터 누적 최대값으로 보정
    for i in range(len(precision) - 2, -1, -1):
        precision[i] = max(precision[i], precision[i + 1])

    # recall이 변하는 구간마다 면적 적분
    ap = 0.0
    prev_r = 0.0
    for r, p in zip(recall, precision):
        if r > prev_r:
            ap += (r - prev_r) * p
            prev_r = r

    return float(ap)


In [344]:
#디텍터 평가 TP/FP/FN 기반 + AP/mAP 계산
def eval_detector_on_split(
    img_dir: str,
    lbl_dir: str,
    scaler: StandardScaler,
    svm: LinearSVC,
    split_name: str = "valid",
    max_images: int = None,
    det_kwargs: dict | None = None,
):
    if det_kwargs is None:
        det_kwargs = {}

    img_paths = list_images(img_dir)
    if max_images is not None:
        img_paths = img_paths[:max_images]

    TP = 0
    FP = 0
    FN = 0

    # mAP 계산용: 전체 detection에 대한 점수 / TP 여부 저장
    all_scores = []
    all_tp_flags = []

    for img_path in tqdm(img_paths, desc=f"Eval detector @ {split_name}"):
        bgr = cv2.imread(img_path)
        if bgr is None:
            continue
        H, W = bgr.shape[:2]
        base = os.path.splitext(os.path.basename(img_path))[0]
        label_path = os.path.join(lbl_dir, base + ".txt")
        gt_boxes, gt_labels = read_yolo_boxes(label_path, W, H)

        # 디텍션 수행 (옵션들 전달)
        pred_boxes, pred_scores = detect_in_image(
            bgr,
            scaler,
            svm,
            **det_kwargs
        )

        used_gt = [False] * len(gt_boxes)

        # mAP를 위해, 한 이미지 안에서는 점수 내림차순으로 매칭
        if len(pred_boxes) > 0:
            order = np.argsort(pred_scores)[::-1]
        else:
            order = []

        for idx in order:
            pb = pred_boxes[idx]
            score = float(pred_scores[idx])

            best_iou = 0.0
            best_idx = -1
            for gi, gb in enumerate(gt_boxes):
                if used_gt[gi]:
                    continue
                iou = iou_xywh(pb, gb)
                if iou > best_iou:
                    best_iou = iou
                    best_idx = gi

            if best_iou >= EVAL_IOU_THR and best_idx >= 0:
                TP += 1
                used_gt[best_idx] = True
                is_tp = 1
            else:
                FP += 1
                is_tp = 0

            # 전체 리스트에 추가 (mAP 계산용)
            all_scores.append(score)
            all_tp_flags.append(is_tp)

        # 매칭되지 않은 GT는 FN
        for gi, used in enumerate(used_gt):
            if not used:
                FN += 1

    precision = TP / (TP + FP + 1e-9)
    recall    = TP / (TP + FN + 1e-9)
    f1        = 2 * precision * recall / (precision + recall + 1e-9)

    # 전체 GT 수 = TP + FN
    total_gt = TP + FN
    ap = compute_ap(all_scores, all_tp_flags, total_gt)

    print(f"\n[{split_name}] Detection results (IoU >= {EVAL_IOU_THR}):")
    print(f"TP={TP}, FP={FP}, FN={FN}")
    print(f"Precision={precision:.4f}, Recall={recall:.4f}, F1-score={f1:.4f}")
    print(f"mAP@{EVAL_IOU_THR:.2f} (single-class AP) = {ap:.4f}")


In [345]:
#훈련 끝난 뒤, 한 번에 여러 config를 돌리는 함수
from itertools import product

def run_detector_ablation(
    scaler: StandardScaler,
    svm: LinearSVC,
    img_dir: str = VAL_IMG_DIR,
    lbl_dir: str = VAL_LBL_DIR,
    max_images: int = 200
):
    """
    여러 후보 생성 조합을 자동으로 돌려보며 성능 비교.
    """
    # 사용할 모듈 이름과 순서
    modules = ["preproc", "sliding", "edge", "mser", "seg", "kp"]

    # 실험해볼 조합들(예: 수동 버전) -------------------------
    configs = [
        ("preproc_only",   dict(use_preproc=True,  use_sliding=False, use_edge=False, use_mser=False, use_seg=False, use_kp=False)),
       # ("sliding_only",   dict(use_preproc=False, use_sliding=True,  use_edge=False, use_mser=False, use_seg=False, use_kp=False)),
       # ("mser_only",      dict(use_preproc=False, use_sliding=False, use_edge=False, use_mser=True,  use_seg=False, use_kp=False)),
       # ("seg_only",       dict(use_preproc=False, use_sliding=False, use_edge=False, use_mser=False, use_seg=True,  use_kp=False)),
       # ("kp_only",        dict(use_preproc=False, use_sliding=False, use_edge=False, use_mser=False, use_seg=False, use_kp=True)),
       # ("preproc+mser",dict(use_preproc=True,  use_sliding=False,  use_edge=False, use_mser=True, use_seg=False, use_kp=False)),
       # ("preproc+seg",   dict(use_preproc=True,  use_sliding=False, use_edge=False,  use_mser=False, use_seg=True, use_kp=False)),
       # ("all_modules",    dict(use_preproc=True,  use_sliding=True,  use_edge=True,  use_mser=True,  use_seg=True,  use_kp=True)),
    ]

    # 만약 2^6 모든 조합을 다 돌리고 싶으면 위 configs 대신 아래 자동 생성 사용 가능
    # configs = []
    # for bits in product([0, 1], repeat=len(modules)):
    #     if sum(bits) == 0:
    #         continue  # 아무 모듈도 안 쓰는 조합은 스킵
    #     name_parts = [m for m, b in zip(modules, bits) if b]
    #     name = "+".join(name_parts)
    #     cfg = dict(
    #         use_preproc=bool(bits[0]),
    #         use_sliding=bool(bits[1]),
    #         use_edge=bool(bits[2]),
    #         use_mser=bool(bits[3]),
    #         use_seg=bool(bits[4]),
    #         use_kp=bool(bits[5]),
    #     )
    #     configs.append((name, cfg))

    # 실제 루프
    for cfg_name, det_kwargs in configs:
        print("\n" + "=" * 60)
        print(f"=== Ablation config: {cfg_name} ===")
        print("  det_kwargs:", det_kwargs)
        print("=" * 60)

        eval_detector_on_split(
            img_dir,
            lbl_dir,
            scaler,
            svm,
            split_name=f"valid_{cfg_name}",
            max_images=max_images,
            det_kwargs=det_kwargs
        )


In [346]:
#메인: 훈련 → 저장 → 디텍션 평가

if __name__ == "__main__":
    random.seed(0)
    np.random.seed(0)

    # ----- 5-1. 패치 데이터셋 생성 -----
    print("\n=== 1) Train binary HOG dataset 생성 ===")
    X_train, y_train = build_binary_dataset_for_split(
        TRAIN_IMG_DIR,
        TRAIN_LBL_DIR,
        split_name="train",
        max_images=TRAIN_MAX_IMAGES,
        max_pos_per_img=MAX_POS_PER_IMG,
        neg_pos_ratio=NEG_POS_RATIO,
        neg_iou_thr=NEG_IOU_THR
    )

    print("\n=== 2) Valid binary HOG dataset 생성 ===")
    X_val, y_val = build_binary_dataset_for_split(
        VAL_IMG_DIR,
        VAL_LBL_DIR,
        split_name="valid",
        max_images=VAL_MAX_IMAGES,
        max_pos_per_img=MAX_POS_PER_IMG,
        neg_pos_ratio=NEG_POS_RATIO,
        neg_iou_thr=NEG_IOU_THR
    )

    # 필요하면 test patch dataset 도 생성 가능
    # print("\n=== 3) Test binary HOG dataset 생성 ===")
    # X_test, y_test = build_binary_dataset_for_split(
    #     TEST_IMG_DIR,
    #     TEST_LBL_DIR,
    #     split_name="test",
    #     max_images=TEST_MAX_IMAGES,
    #     max_pos_per_img=MAX_POS_PER_IMG,
    #     neg_pos_ratio=NEG_POS_RATIO,
    #     neg_iou_thr=NEG_IOU_THR
    # )

    # ----- 5-2. SVM 학습 -----
    print("\n=== 4) StandardScaler 학습 ===")
    scaler = StandardScaler()
    X_train_std = scaler.fit_transform(X_train)

    print("\n=== 5) Linear SVM (이진 분류: insect vs background) 학습 ===")
    svm = LinearSVC(
        C=SVM_C,
        class_weight=SVM_CLASS_WEIGHT,
        max_iter=SVM_MAX_ITER,
        random_state=SVM_RANDOM_STATE
    )
    svm.fit(X_train_std, y_train)
    print("→ SVM 학습 완료")

    # ----- 5-3. 패치 수준 성능 확인 -----
    evaluate_patch_split(svm, scaler, X_train, y_train, split_name="train")
    evaluate_patch_split(svm, scaler, X_val,   y_val,   split_name="valid")

    # 여기서 Ablation 실행
    print("\n=== 6) Detector ablation on VALID set ===")
    run_detector_ablation(
        scaler,
        svm,
        img_dir=VAL_IMG_DIR,
        lbl_dir=VAL_LBL_DIR,
        max_images=200
    )
    # ----- 5-4. 모델 저장 -----
    dump(scaler, "scaler_detector.joblib")
    dump(svm,    "svm_detector.joblib")
    print("\n[INFO] scaler_detector.joblib / svm_detector.joblib 저장 완료")





=== 1) Train binary HOG dataset 생성 ===
[DEBUG] ../data/train/images 에서 이미지 11502개 발견


Build binary HOG dataset @ train: 100%|██████████| 3000/3000 [00:35<00:00, 83.98it/s] 


[INFO] train split: 총 샘플 수 = 13727
[INFO]   - Positive(곤충=1): 4577
[INFO]   - Negative(배경=0): 9150

=== 2) Valid binary HOG dataset 생성 ===
[DEBUG] ../data/valid/images 에서 이미지 1095개 발견


Build binary HOG dataset @ valid: 100%|██████████| 1095/1095 [00:10<00:00, 105.58it/s]


[INFO] valid split: 총 샘플 수 = 4023
[INFO]   - Positive(곤충=1): 1341
[INFO]   - Negative(배경=0): 2682

=== 4) StandardScaler 학습 ===

=== 5) Linear SVM (이진 분류: insect vs background) 학습 ===
→ SVM 학습 완료

[train] Accuracy: 100.00%

[train] Classification report (0=background, 1=insect):
              precision    recall  f1-score   support

  background     1.0000    1.0000    1.0000      9150
      insect     1.0000    1.0000    1.0000      4577

    accuracy                         1.0000     13727
   macro avg     1.0000    1.0000    1.0000     13727
weighted avg     1.0000    1.0000    1.0000     13727

[train] Confusion matrix (rows=true, cols=pred):
[[9150    0]
 [   0 4577]]

[valid] Accuracy: 82.28%

[valid] Classification report (0=background, 1=insect):
              precision    recall  f1-score   support

  background     0.8525    0.8878    0.8698      2682
      insect     0.7553    0.6928    0.7227      1341

    accuracy                         0.8228      4023
   macro avg    

Eval detector @ valid_preproc_only: 100%|██████████| 200/200 [00:05<00:00, 34.07it/s]


[valid_preproc_only] Detection results (IoU >= 0.5):
TP=109, FP=51, FN=177
Precision=0.6812, Recall=0.3811, F1-score=0.4888
mAP@0.50 (single-class AP) = 0.3010

[INFO] scaler_detector.joblib / svm_detector.joblib 저장 완료



