In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F

## 1. Segmentation Cross Entropy

In [2]:
def cross_entropy_2d(
    logits: torch.Tensor,
    target: torch.Tensor,
    ignore_index: int | None = None,
    reduction: str = "mean",
) -> torch.Tensor:
    """
    2D segmentation에 사용하는 Cross Entropy loss를 직접 구현.

    Args:
        logits: (B, C, H, W)
            - 각 픽셀에 대한 class별 logit 값
        target: (B, H, W), dtype=torch.long
            - 각 픽셀의 class index (0 ~ C-1)
        ignore_index: int or None
            - 이 값의 label은 loss 계산에서 제외 (ex. -1)
        reduction: "none" | "mean" | "sum"

    Returns:
        loss: scalar tensor or (B, H, W) (reduction="none"일 때)
    """
    B, C, H, W = logits.shape
    # (B, C, H, W) -> (B, H, W, C) -> (B*H*W, C)
    logits_flat = logits.permute(0, 2, 3, 1).reshape(-1, C)
    target_flat = target.view(-1)  # (B*H*W,)

    if ignore_index is not None:
        # 사용할 위치 mask
        valid_mask = target_flat != ignore_index
        logits_flat = logits_flat[valid_mask]            # (N_valid, C)
        target_flat = target_flat[valid_mask]            # (N_valid,)

    # numerical stable을 위해 log_softmax 사용
    log_probs = F.log_softmax(logits_flat, dim=1)        # (N_valid, C)
    # 각 sample의 정답 class의 log prob만 gather
    # target_flat: (N_valid,), unsqueeze -> (N_valid, 1)
    loss = -log_probs.gather(dim=1, index=target_flat.unsqueeze(1)).squeeze(1)  # (N_valid,)

    if reduction == "none":
        # 원래 (B, H, W) shape으로 되돌릴 수도 있지만,
        # 여기서는 일단 1D로만 반환
        return loss
    elif reduction == "mean":
        return loss.mean()
    elif reduction == "sum":
        return loss.sum()
    else:
        raise ValueError(f"Invalid reduction: {reduction}")



In [None]:
loss_ce = cross_entropy_2d(logits, target, ignore_index=-1)
# logits: (B,C,H,W), target: (B,H,W)

## 2. Dice Loss (binary & multi-class)

In [3]:
def binary_dice_loss(
    logits: torch.Tensor,
    target: torch.Tensor,
    eps: float = 1e-6,
) -> torch.Tensor:
    """
    Binary segmentation용 Dice loss.
    내부에서 sigmoid를 적용한다고 가정.

    Args:
        logits: (B, 1, H, W) or (B, H, W)
            - 각 픽셀의 foreground logit 값
        target: (B, 1, H, W) or (B, H, W)
            - 각 픽셀의 GT (0 또는 1)
        eps:
            - 분모가 0되는 상황 방지용 small constant

    Returns:
        dice_loss: scalar tensor
    """
    # shape 정리: 항상 (B, 1, H, W)로 맞추기
    if logits.dim() == 3:
        logits = logits.unsqueeze(1)   # (B, 1, H, W)
    if target.dim() == 3:
        target = target.unsqueeze(1)   # (B, 1, H, W)

    probs = torch.sigmoid(logits)      # (B, 1, H, W)
    target = target.float()

    # (B, 1, H, W) -> (B, -1)
    probs_flat = probs.view(probs.size(0), -1)
    target_flat = target.view(target.size(0), -1)

    intersection = (probs_flat * target_flat).sum(dim=1)   # (B,)
    union = probs_flat.sum(dim=1) + target_flat.sum(dim=1) # (B,)

    dice = (2 * intersection + eps) / (union + eps)        # (B,)
    dice_loss = 1 - dice.mean()
    return dice_loss


def multiclass_dice_loss(
    logits: torch.Tensor,
    target: torch.Tensor,
    ignore_index: int | None = None,
    eps: float = 1e-6,
) -> torch.Tensor:
    """
    Multi-class segmentation용 Dice loss.
    내부에서 softmax로 확률을 만들고, target은 one-hot으로 변환.

    Args:
        logits: (B, C, H, W)
            - 각 픽셀의 class별 logit
        target: (B, H, W), dtype=torch.long
            - 각 픽셀의 class index
        ignore_index: int or None
            - 이 class index는 dice에서 제외
        eps:
            - 분모가 0되는 상황 방지용 small constant

    Returns:
        dice_loss: scalar tensor (모든 class 평균 Dice loss)
    """
    B, C, H, W = logits.shape

    # (B, C, H, W) -> softmax -> (B, C, H, W)
    probs = F.softmax(logits, dim=1)

    # target: (B, H, W) -> one-hot: (B, H, W, C) -> (B, C, H, W)
    target_onehot = F.one_hot(
        target.clamp(min=0), num_classes=C
    ).permute(0, 3, 1, 2).float()  # 음수(ignore_index 방지용 clamp)

    # ignore_index가 있는 경우, 해당 위치를 모두 0으로 mask 처리
    if ignore_index is not None:
        ignore_mask = (target == ignore_index).unsqueeze(1)  # (B,1,H,W)
        target_onehot[ignore_mask.expand_as(target_onehot)] = 0.0
        probs = probs.masked_fill(ignore_mask.expand_as(probs), 0.0)

    # (B, C, H, W) -> (B, C, -1)
    probs_flat = probs.view(B, C, -1)
    target_flat = target_onehot.view(B, C, -1)

    intersection = (probs_flat * target_flat).sum(dim=2)        # (B, C)
    union = probs_flat.sum(dim=2) + target_flat.sum(dim=2)      # (B, C)

    dice = (2 * intersection + eps) / (union + eps)             # (B, C)
    # batch, class 평균
    dice_loss = 1 - dice.mean()
    return dice_loss

In [None]:
## Binary dice
loss_dice_bin = binary_dice_loss(bin_logits, bin_target)
# bin_logits: (B,1,H,W), bin_target: (B,1,H,W)

## Multi-class Dice
loss_dice_mc = multiclass_dice_loss(logits, target, ignore_index=-1)
# logits: (B,C,H,W), target: (B,H,W)


## 3. Segmentation IoU / mIoU metric

In [4]:
def compute_binary_iou(
    logits: torch.Tensor,
    target: torch.Tensor,
    threshold: float = 0.5,
    eps: float = 1e-6,
) -> torch.Tensor:
    """
    Binary segmentation에서 IoU 계산.

    Args:
        logits: (B, 1, H, W) or (B, H, W)
            - foreground logit
        target: (B, 1, H, W) or (B, H, W)
            - GT (0 또는 1)
        threshold:
            - sigmoid(logits) >= threshold 를 positive로 판정
        eps:
            - 0으로 나누기 방지

    Returns:
        mean_iou: scalar tensor (batch 평균 IoU)
    """
    if logits.dim() == 3:
        logits = logits.unsqueeze(1)  # (B,1,H,W)
    if target.dim() == 3:
        target = target.unsqueeze(1)  # (B,1,H,W)

    probs = torch.sigmoid(logits)
    pred = (probs >= threshold).float()  # (B,1,H,W)
    target = target.float()

    # (B,1,H,W) -> (B,-1)
    pred_flat = pred.view(pred.size(0), -1)
    target_flat = target.view(target.size(0), -1)

    intersection = (pred_flat * target_flat).sum(dim=1)        # (B,)
    union = pred_flat.sum(dim=1) + target_flat.sum(dim=1) - intersection  # (B,)
    iou = (intersection + eps) / (union + eps)                 # (B,)
    return iou.mean()


def compute_multiclass_iou(
    logits: torch.Tensor,
    target: torch.Tensor,
    num_classes: int,
    ignore_index: int | None = None,
    eps: float = 1e-6,
) -> tuple[torch.Tensor, torch.Tensor]:
    """
    Multi-class segmentation IoU / mIoU 계산.

    Args:
        logits: (B, C, H, W)
            - class별 logit
        target: (B, H, W), dtype=torch.long
            - GT class index
        num_classes: int
            - class 개수 C
        ignore_index: int or None
            - 이 label은 metric 계산에서 제외
        eps:
            - 0으로 나누기 방지

    Returns:
        iou_per_class: (C,) tensor
        miou: scalar tensor (유효 class만 평균)
    """
    # pred: (B, H, W), 예측 class index
    pred = logits.argmax(dim=1)  # (B, H, W)

    # ignore_index가 있으면 해당 위치는 빼버린다.
    if ignore_index is not None:
        mask = target != ignore_index  # (B,H,W)
        pred = pred[mask]
        target = target[mask]
    else:
        mask = torch.ones_like(target, dtype=torch.bool)
        pred = pred[mask]
        target = target[mask]

    # 이제 pred, target: (N,) 이고 각 값은 [0, num_classes-1]
    iou_per_class = pred.new_zeros(num_classes, dtype=torch.float32)

    for c in range(num_classes):
        pred_c = pred == c  # (N,)
        target_c = target == c  # (N,)

        intersection = (pred_c & target_c).sum().float()
        union = (pred_c | target_c).sum().float()

        if union == 0:
            # 아예 등장하지 않은 class -> NaN 대신 0으로 두고, 나중에 평균에서 제외해도 됨.
            iou = torch.tensor(0.0, device=logits.device)
        else:
            iou = (intersection + eps) / (union + eps)

        iou_per_class[c] = iou

    # union이 0인 class는 제외하고 평균내고 싶다면:
    valid_classes = iou_per_class > 0  # union==0이면 0이므로 자동 제외
    if valid_classes.any():
        miou = iou_per_class[valid_classes].mean()
    else:
        miou = torch.tensor(0.0, device=logits.device)

    return iou_per_class, miou

In [4]:
## 1. Binary IoU
iou_bin = compute_binary_iou(bin_logits, bin_target)

## 2. Multi-class iou / miou
iou_per_class, miou = compute_multiclass_iou(logits, target, num_classes=C, ignore_index=-1)

## 4. Focal loss

In [5]:

def focal_loss(
    logits: torch.Tensor,
    target: torch.Tensor,
    gamma: float = 2.0,
    alpha: float | None = None,
    reduction: str = "mean",
) -> torch.Tensor:
    """
    Multi-class classification/segmentation용 Focal Loss (CrossEntropy 기반).

    Args:
        logits: (B, C, ...)  (예: (B,C,H,W)도 가능)
            - class별 logit
        target: (B, ...) dtype=torch.long
            - 정답 class index (0 ~ C-1)
        gamma:
            - focusing parameter (보통 2.0)
        alpha:
            - class imbalance weighting (scalar)
              (보통 0.25 등, 여기서는 모든 class에 동일하게 적용)
        reduction: "none" | "mean" | "sum"

    Returns:
        focal_loss: scalar tensor 또는 sample별 loss
    """
    # logits: (B, C, ...) -> (N, C)
    num_classes = logits.size(1)
    logits_flat = logits.permute(0, *range(2, logits.dim()), 1).reshape(-1, num_classes)
    target_flat = target.view(-1)

    # log_softmax & softmax
    log_probs = F.log_softmax(logits_flat, dim=1)        # (N, C)
    probs = log_probs.exp()                              # (N, C)

    # 정답 class의 p_t, log_p_t
    # target_flat: (N,) -> (N,1)
    target_flat_clamped = target_flat.clamp(min=0)       # 음수 방지용
    log_p_t = log_probs.gather(dim=1, index=target_flat_clamped.unsqueeze(1)).squeeze(1)  # (N,)
    p_t = probs.gather(dim=1, index=target_flat_clamped.unsqueeze(1)).squeeze(1)          # (N,)

    # focal term
    focal_term = (1 - p_t) ** gamma                      # (N,)

    loss = -focal_term * log_p_t                         # (N,)

    # alpha가 주어지면 weight 곱해줌
    if alpha is not None:
        loss = alpha * loss

    if reduction == "none":
        return loss.view_as(target)
    elif reduction == "mean":
        return loss.mean()
    elif reduction == "sum":
        return loss.sum()
    else:
        raise ValueError(f"Invalid reduction: {reduction}")

In [None]:
loss_focal = focal_loss(logits, target, gamma=2.0, alpha=0.25)

## 5. Bounding box IoU / GIoU

In [6]:
def box_area(boxes: torch.Tensor) -> torch.Tensor:
    """
    Axis-aligned box area 계산.

    Args:
        boxes: (N, 4) = (x1, y1, x2, y2)

    Returns:
        area: (N,)
    """
    # clamp로 음수 방지 (x2 < x1인 경우 등)
    return ((boxes[:, 2] - boxes[:, 0]).clamp(min=0) *
            (boxes[:, 3] - boxes[:, 1]).clamp(min=0))


def box_iou(boxes1: torch.Tensor, boxes2: torch.Tensor) -> torch.Tensor:
    """
    두 집합의 axis-aligned bounding boxes 사이의 IoU matrix 계산.

    Args:
        boxes1: (N, 4) = (x1, y1, x2, y2)
        boxes2: (M, 4) = (x1, y1, x2, y2)

    Returns:
        iou: (N, M)
            - iou[i, j] = IoU(boxes1[i], boxes2[j])
    """
    area1 = box_area(boxes1)  # (N,)
    area2 = box_area(boxes2)  # (M,)

    # 각 pair의 교집합 좌상단/우하단 좌표 계산
    # (N,1) vs (M,) -> broadcasting으로 (N,M) 만들기
    x1 = torch.max(boxes1[:, 0].unsqueeze(1), boxes2[:, 0].unsqueeze(0))  # (N,M)
    y1 = torch.max(boxes1[:, 1].unsqueeze(1), boxes2[:, 1].unsqueeze(0))  # (N,M)
    x2 = torch.min(boxes1[:, 2].unsqueeze(1), boxes2[:, 2].unsqueeze(0))  # (N,M)
    y2 = torch.min(boxes1[:, 3].unsqueeze(1), boxes2[:, 3].unsqueeze(0))  # (N,M)

    inter_w = (x2 - x1).clamp(min=0)  # (N,M)
    inter_h = (y2 - y1).clamp(min=0)  # (N,M)
    inter = inter_w * inter_h         # (N,M)

    union = area1.unsqueeze(1) + area2.unsqueeze(0) - inter  # (N,M)
    iou = inter / union.clamp(min=1e-6)                      # (N,M)
    return iou


def generalized_box_iou(boxes1: torch.Tensor, boxes2: torch.Tensor) -> torch.Tensor:
    """
    두 집합 bounding box의 Generalized IoU 계산.

    GIoU = IoU - (|C \ U| / |C|)
    C: 두 box를 둘 다 포함하는 최소 bounding box.

    Args:
        boxes1: (N, 4)
        boxes2: (M, 4)

    Returns:
        giou: (N, M)
    """
    # 기본 IoU
    iou = box_iou(boxes1, boxes2)  # (N,M)

    # 각 pair의 enclosing box C 계산
    x1_c = torch.min(boxes1[:, 0].unsqueeze(1), boxes2[:, 0].unsqueeze(0))  # (N,M)
    y1_c = torch.min(boxes1[:, 1].unsqueeze(1), boxes2[:, 1].unsqueeze(0))  # (N,M)
    x2_c = torch.max(boxes1[:, 2].unsqueeze(1), boxes2[:, 2].unsqueeze(0))  # (N,M)
    y2_c = torch.max(boxes1[:, 3].unsqueeze(1), boxes2[:, 3].unsqueeze(0))  # (N,M)

    c_w = (x2_c - x1_c).clamp(min=0)
    c_h = (y2_c - y1_c).clamp(min=0)
    area_c = c_w * c_h  # (N,M)

    # 각각의 area와 IoU는 이미 계산됨.
    area1 = box_area(boxes1).unsqueeze(1)  # (N,1)
    area2 = box_area(boxes2).unsqueeze(0)  # (1,M)
    inter = iou * (area1 + area2 - iou * (area1 + area2))  # 다시 구해도 되지만 여기선 트릭 X, 그냥 다시 계산하는게 안전함.

    # inter를 다시 정확히 계산하기 위해 box_iou 내부 로직 재사용하는 게 더 깔끔이긴 한데,
    # 여기서는 IoU와 union에서 역산하는 대신, 다시 intersection을 계산하는 방식을 추천.
    # → 간단하게 다시 계산하자 (코드 중복을 살짝 감수).
    x1 = torch.max(boxes1[:, 0].unsqueeze(1), boxes2[:, 0].unsqueeze(0))
    y1 = torch.max(boxes1[:, 1].unsqueeze(1), boxes2[:, 1].unsqueeze(0))
    x2 = torch.min(boxes1[:, 2].unsqueeze(1), boxes2[:, 2].unsqueeze(0))
    y2 = torch.min(boxes1[:, 3].unsqueeze(1), boxes2[:, 3].unsqueeze(0))
    inter_w = (x2 - x1).clamp(min=0)
    inter_h = (y2 - y1).clamp(min=0)
    inter = inter_w * inter_h  # (N,M)

    union = area1 + area2 - inter  # (N,M)

    # GIoU 공식: IoU - (|C|-|U|)/|C|
    giou = iou - (area_c - union) / area_c.clamp(min=1e-6)
    return giou

  GIoU = IoU - (|C \ U| / |C|)


In [None]:
iou_mat = box_iou(boxes1, boxes2)         # boxes1: (N,4), boxes2: (M,4)
giou_mat = generalized_box_iou(boxes1, boxes2)

## 6. Average Precision

In [7]:
import torch
import numpy as np

def compute_ap(recall, precision):
    """
    AP = Precision-Recall 곡선의 면적
    VOC 방식: interpolation 없이 trapezoid積분
    """
    # 앞에 0, 뒤에 1 붙여 Edge 포함
    mrec = np.concatenate(([0.0], recall, [1.0]))
    mpre = np.concatenate(([1.0], precision, [0.0]))

    # Precision envelope (단조 감소)
    for i in range(len(mpre)-1, 0, -1):
        mpre[i-1] = max(mpre[i-1], mpre[i])

    # PR curve 면적 계산
    idx = np.where(mrec[1:] != mrec[:-1])[0]
    ap = np.sum((mrec[idx+1] - mrec[idx]) * mpre[idx+1])
    return ap


def compute_map(pred_boxes, pred_scores, gt_boxes, iou_thresh=0.5):
    """
    PASCAL VOC style mAP@IoU=0.5 (single class 예제)

    pred_boxes: (N_pred,4)  (x1,y1,x2,y2)
    pred_scores: (N_pred,)  confidence score 높은 순 정렬돼있지 않아도됨
    gt_boxes: (N_gt,4)
    """

    # score 높은 순으로 정렬
    indices = np.argsort(-pred_scores)
    pred_boxes = pred_boxes[indices]
    pred_scores = pred_scores[indices]

    tp = np.zeros(len(pred_boxes))
    fp = np.zeros(len(pred_boxes))
    detected = np.zeros(len(gt_boxes))  # GT 매칭 여부

    for i, pb in enumerate(pred_boxes):
        # IoU 모든 GT와 계산
        iou = box_iou(
            torch.tensor(pb).unsqueeze(0),
            torch.tensor(gt_boxes)
        ).numpy().squeeze(0)  # (N_gt,)

        max_iou = iou.max()
        max_gt = iou.argmax()

        if max_iou >= iou_thresh and detected[max_gt] == 0:
            tp[i] = 1
            detected[max_gt] = 1
        else:
            fp[i] = 1

    # 누적 TP/FP 계산
    tp_cum = np.cumsum(tp)
    fp_cum = np.cumsum(fp)

    recall = tp_cum / len(gt_boxes)
    precision = tp_cum / (tp_cum + fp_cum + 1e-6)

    ap = compute_ap(recall, precision)
    return ap

In [8]:
# 단일 이미지, 단일 클래스 에시

import numpy as np

# 예측 박스들 (x1,y1,x2,y2)
pred_boxes = np.array([
    [10, 10, 30, 30],
    [15, 15, 40, 40],
    [60, 60, 90, 90],
])

# 각 박스 confidence score
pred_scores = np.array([0.9, 0.6, 0.3])

# GT 박스들
gt_boxes = np.array([
    [12, 12, 28, 28],
    [50, 50, 80, 80],
])

ap_05 = compute_map(pred_boxes, pred_scores, gt_boxes, iou_thresh=0.5)
print("AP@0.5:", ap_05)


AP@0.5: 0.49999950000050003


In [9]:
# 여러 이미지에서 mAP@0.5 를 구하려면?
aps = []
for img_idx in range(num_images):
    pred_boxes = ...  # img_idx에 대한 예측
    pred_scores = ...
    gt_boxes = ...

    ap_05 = compute_map(pred_boxes, pred_scores, gt_boxes, iou_thresh=0.5)
    aps.append(ap_05)

mAP_05 = np.mean(aps)
print("mAP@0.5 over dataset:", mAP_05)

In [10]:
## 50:95

import numpy as np

def compute_map_50_95(pred_boxes, pred_scores, gt_boxes):
    """
    너가 이미 만든 compute_map()을 이용해서
    COCO 스타일 mAP 50:95를 계산하는 예시.

    pred_boxes: (N_pred, 4) numpy array, (x1,y1,x2,y2)
    pred_scores: (N_pred,) numpy array
    gt_boxes: (N_gt, 4) numpy array

    return:
        mAP_50_95: scalar (float)
        aps: dict {iou_threshold: AP 값}
    """
    iou_thresholds = np.arange(0.50, 0.96, 0.05)  # [0.50, 0.55, ..., 0.95]

    aps = {}
    for thr in iou_thresholds:
        ap = compute_map(pred_boxes, pred_scores, gt_boxes, iou_thresh=thr)
        aps[thr] = ap

    # 10개 IoU threshold에서의 AP 평균
    mAP_50_95 = np.mean(list(aps.values()))
    return mAP_50_95, aps


In [11]:
# 예측 박스들 (x1,y1,x2,y2)
pred_boxes = np.array([
    [10, 10, 30, 30],
    [15, 15, 40, 40],
    [60, 60, 90, 90],
])

# 각 박스 confidence score
pred_scores = np.array([0.9, 0.6, 0.3])

# GT 박스들
gt_boxes = np.array([
    [12, 12, 28, 28],
    [50, 50, 80, 80],
])

mAP_50_95, aps = compute_map_50_95(pred_boxes, pred_scores, gt_boxes)

print("mAP 50:95 =", mAP_50_95)
for thr, ap in aps.items():
    print(f"AP@IoU={thr:.2f} = {ap}")

mAP 50:95 = 0.14999985000015
AP@IoU=0.50 = 0.49999950000050003
AP@IoU=0.55 = 0.49999950000050003
AP@IoU=0.60 = 0.49999950000050003
AP@IoU=0.65 = 0.0
AP@IoU=0.70 = 0.0
AP@IoU=0.75 = 0.0
AP@IoU=0.80 = 0.0
AP@IoU=0.85 = 0.0
AP@IoU=0.90 = 0.0
AP@IoU=0.95 = 0.0
