Hybrid_model : ConvNext-FPN Mask2Former 만 사용, Gaussian Distance 사용, mAP 0 뜸. F1, precision, recall 은 올라감. 근데 전혀 못 맞춰서 실패

In [12]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms
from torch.optim import AdamW
from torch.optim.lr_scheduler import CosineAnnealingLR, LinearLR, SequentialLR
from torch.cuda.amp import GradScaler, autocast

from pathlib import Path
from PIL import Image
import numpy as np
import json
from tqdm import tqdm
from types import SimpleNamespace
import os

# --- 프로젝트 경로 추가 및 모듈 임포트 ---
import sys
sys.path.append('.')

from models.hybrid_model import HybridUnifiedModel
from utils.criterion import SetCriterion
from utils.hungarian_matcher import HungarianMatcher
from torchmetrics.detection import MeanAveragePrecision
from torchmetrics.classification import MulticlassJaccardIndex, MulticlassF1Score, MulticlassPrecision, MulticlassRecall
from pycocotools import mask as mask_utils

# --- 최종 설정 ---
class FinalConfig:
    DATA_ROOT = Path('./data/master_dataset')
    DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu'
    
    EPOCHS = 50
    LR = 3e-5
    WEIGHT_DECAY = 1e-4
    BATCH_SIZE = 4
    NUM_WORKERS = 0
    GRADIENT_CLIP_VAL = 1.0
    WARMUP_EPOCHS = 5

    MODEL = SimpleNamespace(
        BACKBONE=SimpleNamespace(NAME='ConvNeXt-Tiny'),
        FPN=SimpleNamespace(OUT_CHANNELS=256),
        HEAD_B=SimpleNamespace(
            FEAT_CHANNELS=256,
            OUT_CHANNELS=256,
            NUM_CLASSES=4,
            QUERIES_PER_CLASS=75,
            DEC_LAYERS=6
        )
    )
    LOSS = SimpleNamespace(
        CLASS_WEIGHTS=[0.5, 1.5, 1.0, 1.3],
        EOS_COEF=0.1
    )
config = FinalConfig()

# --- 데이터셋 클래스 및 로더 ---
class UnifiedDataset(Dataset):
    def __init__(self, image_infos, annotations_map, images_dir, transform=None):
        self.image_infos = image_infos
        self.annotations_map = annotations_map
        self.images_dir = images_dir
        self.transform = transform
        self.cat_map = {1: 0, 2: 1, 3: 2, 4: 3}

    def __len__(self):
        return len(self.image_infos)

    def __getitem__(self, idx):
        img_info = self.image_infos[idx]
        img_id = img_info['id']
        image = Image.open(self.images_dir / img_info['file_name']).convert('RGB')
        original_w, original_h = image.size
        
        target = {}
        masks, labels = [], []
        annotations = self.annotations_map.get(img_id, [])
        for ann in annotations:
            if not ann.get('segmentation') or not ann['segmentation'][0] or len(ann['segmentation'][0]) < 6: continue
            rle = mask_utils.frPyObjects([ann['segmentation'][0]], original_h, original_h)
            mask = mask_utils.decode(rle)
            if mask.ndim == 3: mask = np.max(mask, axis=2)
            masks.append(mask)
            labels.append(self.cat_map[ann['category_id']])

        if self.transform: image = self.transform(image)
        
        target['labels'] = torch.tensor(labels, dtype=torch.int64)
        
        # --- [핵심 수정] 마스크 타입을 다시 float32로 변경 ---
        if masks:
            target['masks'] = torch.from_numpy(np.stack(masks)).to(torch.float32)
        else:
            target['masks'] = torch.zeros((0, original_h, original_w), dtype=torch.float32)
            
        return image, target

def collate_fn(batch): return tuple(zip(*batch))

# --- 데이터 로딩 실행 ---
print("--- Loading Master Dataset ---")
with open(config.DATA_ROOT / 'master_annotations.json', 'r') as f: master_data = json.load(f)
images_info = master_data['images']
annotations_map = {}
for ann in master_data['annotations']:
    img_id = ann['image_id']
    if img_id not in annotations_map: annotations_map[img_id] = []
    annotations_map[img_id].append(ann)

transform = transforms.Compose([
    transforms.Resize((640, 640)), transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])
full_dataset = UnifiedDataset(images_info, annotations_map, config.DATA_ROOT / 'images', transform)
train_size = int(0.8 * len(full_dataset))
val_size = len(full_dataset) - train_size
train_dataset, val_dataset = torch.utils.data.random_split(full_dataset, [train_size, val_size])
train_loader = DataLoader(train_dataset, batch_size=config.BATCH_SIZE, shuffle=True, num_workers=config.NUM_WORKERS, collate_fn=collate_fn)
val_loader = DataLoader(val_dataset, batch_size=config.BATCH_SIZE, shuffle=False, num_workers=config.NUM_WORKERS, collate_fn=collate_fn)
print("✅ DataLoaders created!")

# --- 모델, 손실함수, 옵티마이저 초기화 ---
print("--- Initializing Hybrid Unified Model ---")
model = HybridUnifiedModel(config).to(config.DEVICE)
matcher = HungarianMatcher(num_classes=config.MODEL.HEAD_B.NUM_CLASSES)
weight_dict = {'loss_ce': 2.0, 'loss_mask': 5.0, 'loss_dice': 5.0}
criterion = SetCriterion(
    num_classes=config.MODEL.HEAD_B.NUM_CLASSES, matcher=matcher, weight_dict=weight_dict,
    eos_coef=config.LOSS.EOS_COEF, losses=['labels', 'masks'], class_weights=config.LOSS.CLASS_WEIGHTS
).to(config.DEVICE)
optimizer = AdamW(model.parameters(), lr=config.LR, weight_decay=config.WEIGHT_DECAY)
scaler = GradScaler()
warmup_scheduler = LinearLR(optimizer, start_factor=0.1, total_iters=config.WARMUP_EPOCHS)
main_scheduler = CosineAnnealingLR(optimizer, T_max=config.EPOCHS - config.WARMUP_EPOCHS, eta_min=1e-7)
lr_scheduler = SequentialLR(optimizer, schedulers=[warmup_scheduler, main_scheduler], milestones=[config.WARMUP_EPOCHS])
print("✅ Initialization complete.")

--- Loading Master Dataset ---
✅ DataLoaders created!
--- Initializing Hybrid Unified Model ---
✅ Initialization complete.


  scaler = GradScaler()


In [17]:
def train_epoch(model, criterion, dataloader, optimizer, device, epoch):
    model.train()
    criterion.train()
    pbar = tqdm(dataloader, desc=f"Epoch {epoch+1}/{config.EPOCHS} [Train]")
    for images, targets in pbar:
        images = torch.stack(images).to(device)
        targets_gpu = [{k: v.to(device) for k, v in t.items()} for t in targets]
        with autocast():
            outputs = model(images)
            loss_dict = criterion(outputs, targets_gpu)
            weighted_loss = sum(loss_dict[k] * weight_dict[k] for k in loss_dict.keys() if k in weight_dict)
        optimizer.zero_grad()
        scaler.scale(weighted_loss).backward()
        scaler.unscale_(optimizer)
        torch.nn.utils.clip_grad_norm_(model.parameters(), config.GRADIENT_CLIP_VAL)
        scaler.step(optimizer)
        scaler.update()
        pbar.set_postfix({'loss': f'{weighted_loss.item():.4f}'})

def validate(model, criterion, dataloader, device):
    """
    모델 검증 함수
    Args:
        model: 평가할 모델
        criterion: 손실 함수 (matcher 접근용)
        dataloader: 검증 데이터 로더
        device: 디바이스 (cuda/cpu)
    Returns:
        metrics: 평가 지표 딕셔너리
    """
    model.eval()
    
    # --- 평가 지표 객체 초기화 ---
    num_classes = config.MODEL.HEAD_B.NUM_CLASSES
    
    # 1. mAP (Detection/Segmentation 표준)
    map_metric = MeanAveragePrecision(iou_type="segm")
    
    # 2. IoU (Jaccard Index) - 클래스별 평균
    iou_metric = MulticlassJaccardIndex(num_classes=num_classes, average='macro').to(device)
    
    # 3. Precision, Recall, F1-score - 클래스별 평균
    precision_metric = MulticlassPrecision(num_classes=num_classes, average='macro').to(device)
    recall_metric = MulticlassRecall(num_classes=num_classes, average='macro').to(device)
    f1_metric = MulticlassF1Score(num_classes=num_classes, average='macro').to(device)
    
    # 메트릭 업데이트 여부 추적
    metrics_updated = False
    
    with torch.no_grad():
        pbar = tqdm(dataloader, desc="[Valid]")
        for images, targets in pbar:
            images = torch.stack(images).to(device)
            
            with autocast():
                outputs = model(images)
            
            # --- mAP 계산용 데이터 준비 ---
            preds_cpu = {k: v.cpu() for k, v in outputs.items() if torch.is_tensor(v)}
            
            preds_for_map = []
            for i in range(len(targets)):
                scores, labels = F.softmax(preds_cpu['pred_logits'][i], dim=-1).max(-1)
                # boolean 마스크를 uint8로 변환
                masks_uint8 = (torch.sigmoid(preds_cpu['pred_masks'][i]) > 0.5).to(torch.uint8)
                preds_for_map.append(dict(masks=masks_uint8, scores=scores, labels=labels))

            targets_for_map = []
            for t in targets:
                target_dict = {
                    'labels': t['labels'].cpu(),
                    # 정답 마스크도 uint8 타입으로 변환
                    'masks': t['masks'].cpu().to(torch.uint8)
                }
                targets_for_map.append(target_dict)

            map_metric.update(preds_for_map, targets_for_map)
            
            # --- IoU, P, R, F1 계산용 데이터 준비 ---
            indices = criterion.matcher(outputs, [{k: v.to(device) for k, v in t.items()} for t in targets])
            
            if any(len(i[0]) > 0 for i in indices):
                idx = criterion._get_src_permutation_idx(indices)
                pred_logits = outputs['pred_logits'][idx]
                target_labels = torch.cat([t["labels"][J] for t, (_, J) in zip(targets, indices)]).to(device)

                # 매칭된 결과로 지표 업데이트
                iou_metric.update(pred_logits, target_labels)
                precision_metric.update(pred_logits, target_labels)
                recall_metric.update(pred_logits, target_labels)
                f1_metric.update(pred_logits, target_labels)
                metrics_updated = True

    # 최종 결과 집계
    try:
        map_results = map_metric.compute()
        map_val = map_results.get('map', torch.tensor(0.0))
        map_50_val = map_results.get('map_50', torch.tensor(0.0))
        
        # tensor인지 확인하고 item() 호출
        if torch.is_tensor(map_val):
            map_val = map_val.item()
        if torch.is_tensor(map_50_val):
            map_50_val = map_50_val.item()
    except:
        print("⚠️ Warning: mAP computation failed")
        map_val = 0.0
        map_50_val = 0.0
    
    # 다른 메트릭들 계산
    if metrics_updated:
        try:
            iou_val = iou_metric.compute().item()
        except:
            iou_val = 0.0
        
        try:
            precision_val = precision_metric.compute().item()
        except:
            precision_val = 0.0
            
        try:
            recall_val = recall_metric.compute().item()
        except:
            recall_val = 0.0
            
        try:
            f1_val = f1_metric.compute().item()
        except:
            f1_val = 0.0
    else:
        # 메트릭이 한 번도 업데이트되지 않은 경우
        print("⚠️ Warning: No valid predictions matched with targets")
        iou_val = precision_val = recall_val = f1_val = 0.0
    
    metrics = {
        'mAP': map_val,
        'mAP_50': map_50_val,
        'iou': iou_val,
        'precision': precision_val,
        'recall': recall_val,
        'f1_score': f1_val
    }
    
    return metrics



In [18]:
# --- 메인 학습 루프 (완전한 버전) ---
print("\n--- 🚀 Starting Hybrid Unified Model Training 🚀 ---")
best_map = 0.0
for epoch in range(config.EPOCHS):
    # 학습
    train_epoch(model, criterion, train_loader, optimizer, config.DEVICE, epoch)
    
    # 검증 - criterion도 전달
    val_metrics = validate(model, criterion, val_loader, config.DEVICE)
    
    # 메트릭 출력
    val_map = val_metrics['mAP']
    val_map_50 = val_metrics['mAP_50']
    
    print(f"\nEpoch {epoch+1}/{config.EPOCHS}:")
    print(f"  Val mAP: {val_map:.4f} | mAP@.50: {val_map_50:.4f}")
    print(f"  IoU: {val_metrics['iou']:.4f} | F1: {val_metrics['f1_score']:.4f}")
    print(f"  Precision: {val_metrics['precision']:.4f} | Recall: {val_metrics['recall']:.4f}")
    
    # 스케줄러 업데이트
    lr_scheduler.step()
    
    # 베스트 모델 저장
    if val_map > best_map:
        best_map = val_map
        torch.save({
            'epoch': epoch,
            'model_state_dict': model.state_dict(),
            'optimizer_state_dict': optimizer.state_dict(),
            'best_map': best_map,
            'metrics': val_metrics
        }, 'hybrid_unified_best_model.pth')
        print(f"✨ New best model saved with mAP: {best_map:.4f}")
        
print(f"\n--- 🎉 Training Complete ---")
print(f"Best mAP achieved: {best_map:.4f}")


--- 🚀 Starting Hybrid Unified Model Training 🚀 ---


  with autocast():
Epoch 1/50 [Train]: 100%|██████████| 655/655 [06:41<00:00,  1.63it/s, loss=6.2026] 
  with autocast():
[Valid]: 100%|██████████| 164/164 [00:26<00:00,  6.24it/s]



Epoch 1/50:
  Val mAP: 0.0000 | mAP@.50: 0.0000
  IoU: 0.2091 | F1: 0.3120
  Precision: 0.5245 | Recall: 0.3580


Epoch 2/50 [Train]: 100%|██████████| 655/655 [06:44<00:00,  1.62it/s, loss=4.4183]
[Valid]: 100%|██████████| 164/164 [00:27<00:00,  5.90it/s]



Epoch 2/50:
  Val mAP: 0.0000 | mAP@.50: 0.0000
  IoU: 0.1826 | F1: 0.2692
  Precision: 0.5048 | Recall: 0.4049


Epoch 3/50 [Train]: 100%|██████████| 655/655 [06:50<00:00,  1.59it/s, loss=5.5379] 
[Valid]: 100%|██████████| 164/164 [00:26<00:00,  6.22it/s]



Epoch 3/50:
  Val mAP: 0.0000 | mAP@.50: 0.0000
  IoU: 0.1881 | F1: 0.2836
  Precision: 0.4199 | Recall: 0.3226


Epoch 4/50 [Train]: 100%|██████████| 655/655 [06:40<00:00,  1.63it/s, loss=7.5676] 
[Valid]: 100%|██████████| 164/164 [00:26<00:00,  6.25it/s]



Epoch 4/50:
  Val mAP: 0.0000 | mAP@.50: 0.0000
  IoU: 0.2325 | F1: 0.3702
  Precision: 0.3975 | Recall: 0.4046


Epoch 5/50 [Train]: 100%|██████████| 655/655 [06:41<00:00,  1.63it/s, loss=7.3938] 
[Valid]: 100%|██████████| 164/164 [00:26<00:00,  6.20it/s]



Epoch 5/50:
  Val mAP: 0.0000 | mAP@.50: 0.0000
  IoU: 0.2449 | F1: 0.3594
  Precision: 0.3797 | Recall: 0.3659


Epoch 6/50 [Train]: 100%|██████████| 655/655 [06:41<00:00,  1.63it/s, loss=5.6607] 
[Valid]: 100%|██████████| 164/164 [00:26<00:00,  6.24it/s]



Epoch 6/50:
  Val mAP: 0.0000 | mAP@.50: 0.0000
  IoU: 0.2083 | F1: 0.3375
  Precision: 0.4139 | Recall: 0.3514


Epoch 7/50 [Train]: 100%|██████████| 655/655 [06:41<00:00,  1.63it/s, loss=7.6219] 
[Valid]: 100%|██████████| 164/164 [00:26<00:00,  6.21it/s]



Epoch 7/50:
  Val mAP: 0.0000 | mAP@.50: 0.0000
  IoU: 0.1485 | F1: 0.2508
  Precision: 0.2549 | Recall: 0.2555


Epoch 8/50 [Train]: 100%|██████████| 655/655 [06:40<00:00,  1.64it/s, loss=1.6495] 
[Valid]: 100%|██████████| 164/164 [00:26<00:00,  6.16it/s]



Epoch 8/50:
  Val mAP: 0.0000 | mAP@.50: 0.0000
  IoU: 0.5350 | F1: 0.6613
  Precision: 0.7152 | Recall: 0.6734


Epoch 9/50 [Train]: 100%|██████████| 655/655 [06:41<00:00,  1.63it/s, loss=8.3342] 
[Valid]: 100%|██████████| 164/164 [00:26<00:00,  6.25it/s]



Epoch 9/50:
  Val mAP: 0.0000 | mAP@.50: 0.0000
  IoU: 0.2372 | F1: 0.3705
  Precision: 0.3753 | Recall: 0.3774


Epoch 10/50 [Train]: 100%|██████████| 655/655 [06:40<00:00,  1.63it/s, loss=8.4895] 
[Valid]: 100%|██████████| 164/164 [00:26<00:00,  6.21it/s]



Epoch 10/50:
  Val mAP: 0.0000 | mAP@.50: 0.0000
  IoU: 0.1817 | F1: 0.2878
  Precision: 0.3134 | Recall: 0.3108


Epoch 11/50 [Train]: 100%|██████████| 655/655 [06:41<00:00,  1.63it/s, loss=6.6898] 
[Valid]: 100%|██████████| 164/164 [00:26<00:00,  6.23it/s]



Epoch 11/50:
  Val mAP: 0.0000 | mAP@.50: 0.0000
  IoU: 0.2125 | F1: 0.3346
  Precision: 0.3524 | Recall: 0.3694


Epoch 12/50 [Train]: 100%|██████████| 655/655 [06:40<00:00,  1.64it/s, loss=6.8351]
[Valid]: 100%|██████████| 164/164 [00:26<00:00,  6.20it/s]



Epoch 12/50:
  Val mAP: 0.0000 | mAP@.50: 0.0000
  IoU: 0.2624 | F1: 0.4101
  Precision: 0.4102 | Recall: 0.4305


Epoch 13/50 [Train]: 100%|██████████| 655/655 [06:41<00:00,  1.63it/s, loss=3.0545] 
[Valid]: 100%|██████████| 164/164 [00:26<00:00,  6.20it/s]



Epoch 13/50:
  Val mAP: 0.0000 | mAP@.50: 0.0000
  IoU: 0.4192 | F1: 0.5474
  Precision: 0.5901 | Recall: 0.5601


Epoch 14/50 [Train]: 100%|██████████| 655/655 [06:41<00:00,  1.63it/s, loss=5.5454] 
[Valid]: 100%|██████████| 164/164 [00:26<00:00,  6.20it/s]



Epoch 14/50:
  Val mAP: 0.0000 | mAP@.50: 0.0000
  IoU: 0.3298 | F1: 0.4791
  Precision: 0.5163 | Recall: 0.4669


Epoch 15/50 [Train]: 100%|██████████| 655/655 [06:40<00:00,  1.64it/s, loss=4.6245] 
[Valid]: 100%|██████████| 164/164 [00:26<00:00,  6.21it/s]



Epoch 15/50:
  Val mAP: 0.0000 | mAP@.50: 0.0000
  IoU: 0.2632 | F1: 0.3969
  Precision: 0.4319 | Recall: 0.4411


Epoch 16/50 [Train]: 100%|██████████| 655/655 [06:40<00:00,  1.63it/s, loss=3.5817] 
[Valid]: 100%|██████████| 164/164 [00:26<00:00,  6.24it/s]



Epoch 16/50:
  Val mAP: 0.0000 | mAP@.50: 0.0000
  IoU: 0.2821 | F1: 0.4246
  Precision: 0.4830 | Recall: 0.4619


Epoch 17/50 [Train]: 100%|██████████| 655/655 [06:40<00:00,  1.64it/s, loss=5.5957] 
[Valid]: 100%|██████████| 164/164 [00:26<00:00,  6.20it/s]



Epoch 17/50:
  Val mAP: 0.0000 | mAP@.50: 0.0000
  IoU: 0.3289 | F1: 0.4791
  Precision: 0.5083 | Recall: 0.4745


Epoch 18/50 [Train]: 100%|██████████| 655/655 [06:40<00:00,  1.63it/s, loss=6.3058] 
[Valid]: 100%|██████████| 164/164 [00:26<00:00,  6.21it/s]



Epoch 18/50:
  Val mAP: 0.0000 | mAP@.50: 0.0000
  IoU: 0.2441 | F1: 0.3887
  Precision: 0.3833 | Recall: 0.4037


Epoch 19/50 [Train]: 100%|██████████| 655/655 [06:40<00:00,  1.63it/s, loss=6.4287]
[Valid]: 100%|██████████| 164/164 [00:26<00:00,  6.24it/s]



Epoch 19/50:
  Val mAP: 0.0000 | mAP@.50: 0.0000
  IoU: 0.2672 | F1: 0.4094
  Precision: 0.4222 | Recall: 0.4047


Epoch 20/50 [Train]: 100%|██████████| 655/655 [06:41<00:00,  1.63it/s, loss=1.8159]
[Valid]: 100%|██████████| 164/164 [00:26<00:00,  6.18it/s]



Epoch 20/50:
  Val mAP: 0.0000 | mAP@.50: 0.0000
  IoU: 0.3709 | F1: 0.5085
  Precision: 0.5606 | Recall: 0.4992


Epoch 21/50 [Train]: 100%|██████████| 655/655 [06:40<00:00,  1.63it/s, loss=3.1577] 
[Valid]: 100%|██████████| 164/164 [00:26<00:00,  6.17it/s]



Epoch 21/50:
  Val mAP: 0.0000 | mAP@.50: 0.0000
  IoU: 0.2017 | F1: 0.3039
  Precision: 0.4411 | Recall: 0.3209


Epoch 22/50 [Train]: 100%|██████████| 655/655 [06:41<00:00,  1.63it/s, loss=6.1988] 
[Valid]: 100%|██████████| 164/164 [00:26<00:00,  6.25it/s]



Epoch 22/50:
  Val mAP: 0.0000 | mAP@.50: 0.0000
  IoU: 0.2004 | F1: 0.3184
  Precision: 0.3700 | Recall: 0.3254


Epoch 23/50 [Train]: 100%|██████████| 655/655 [06:40<00:00,  1.63it/s, loss=5.7442] 
[Valid]: 100%|██████████| 164/164 [00:26<00:00,  6.25it/s]



Epoch 23/50:
  Val mAP: 0.0000 | mAP@.50: 0.0000
  IoU: 0.2610 | F1: 0.4028
  Precision: 0.4508 | Recall: 0.3927


Epoch 24/50 [Train]: 100%|██████████| 655/655 [06:40<00:00,  1.64it/s, loss=7.8434] 
[Valid]: 100%|██████████| 164/164 [00:26<00:00,  6.19it/s]



Epoch 24/50:
  Val mAP: 0.0000 | mAP@.50: 0.0000
  IoU: 0.4425 | F1: 0.6028
  Precision: 0.6265 | Recall: 0.5929


Epoch 25/50 [Train]: 100%|██████████| 655/655 [06:40<00:00,  1.64it/s, loss=4.1192] 
[Valid]: 100%|██████████| 164/164 [00:26<00:00,  6.22it/s]



Epoch 25/50:
  Val mAP: 0.0000 | mAP@.50: 0.0000
  IoU: 0.4028 | F1: 0.5536
  Precision: 0.5901 | Recall: 0.5553


Epoch 26/50 [Train]: 100%|██████████| 655/655 [06:40<00:00,  1.63it/s, loss=7.1493]
[Valid]: 100%|██████████| 164/164 [00:26<00:00,  6.27it/s]



Epoch 26/50:
  Val mAP: 0.0000 | mAP@.50: 0.0000
  IoU: 0.2344 | F1: 0.3619
  Precision: 0.4128 | Recall: 0.5061


Epoch 27/50 [Train]: 100%|██████████| 655/655 [06:41<00:00,  1.63it/s, loss=2.1421] 
[Valid]: 100%|██████████| 164/164 [00:26<00:00,  6.21it/s]



Epoch 27/50:
  Val mAP: 0.0000 | mAP@.50: 0.0000
  IoU: 0.5240 | F1: 0.6627
  Precision: 0.6980 | Recall: 0.6544


Epoch 28/50 [Train]: 100%|██████████| 655/655 [06:41<00:00,  1.63it/s, loss=1.2596]
[Valid]: 100%|██████████| 164/164 [00:26<00:00,  6.20it/s]



Epoch 28/50:
  Val mAP: 0.0000 | mAP@.50: 0.0000
  IoU: 0.6021 | F1: 0.7278
  Precision: 0.7288 | Recall: 0.7386


Epoch 29/50 [Train]: 100%|██████████| 655/655 [06:41<00:00,  1.63it/s, loss=3.8005]
[Valid]: 100%|██████████| 164/164 [00:26<00:00,  6.20it/s]



Epoch 29/50:
  Val mAP: 0.0000 | mAP@.50: 0.0000
  IoU: 0.5740 | F1: 0.6938
  Precision: 0.6882 | Recall: 0.7015


Epoch 30/50 [Train]: 100%|██████████| 655/655 [06:42<00:00,  1.63it/s, loss=2.6286]
[Valid]: 100%|██████████| 164/164 [00:26<00:00,  6.18it/s]



Epoch 30/50:
  Val mAP: 0.0000 | mAP@.50: 0.0000
  IoU: 0.7203 | F1: 0.8270
  Precision: 0.8210 | Recall: 0.8349


Epoch 31/50 [Train]: 100%|██████████| 655/655 [06:41<00:00,  1.63it/s, loss=2.0859]
[Valid]: 100%|██████████| 164/164 [00:26<00:00,  6.20it/s]



Epoch 31/50:
  Val mAP: 0.0000 | mAP@.50: 0.0000
  IoU: 0.7126 | F1: 0.8214
  Precision: 0.8566 | Recall: 0.8001


Epoch 32/50 [Train]: 100%|██████████| 655/655 [06:41<00:00,  1.63it/s, loss=0.7915]
[Valid]: 100%|██████████| 164/164 [00:26<00:00,  6.21it/s]



Epoch 32/50:
  Val mAP: 0.0000 | mAP@.50: 0.0000
  IoU: 0.7115 | F1: 0.8187
  Precision: 0.8352 | Recall: 0.8067


Epoch 33/50 [Train]: 100%|██████████| 655/655 [06:41<00:00,  1.63it/s, loss=2.3643]
[Valid]: 100%|██████████| 164/164 [00:26<00:00,  6.19it/s]



Epoch 33/50:
  Val mAP: 0.0000 | mAP@.50: 0.0000
  IoU: 0.6853 | F1: 0.7962
  Precision: 0.8456 | Recall: 0.7789


Epoch 34/50 [Train]: 100%|██████████| 655/655 [06:42<00:00,  1.63it/s, loss=1.2871]
[Valid]: 100%|██████████| 164/164 [00:26<00:00,  6.20it/s]



Epoch 34/50:
  Val mAP: 0.0000 | mAP@.50: 0.0000
  IoU: 0.7036 | F1: 0.8082
  Precision: 0.8777 | Recall: 0.7844


Epoch 35/50 [Train]: 100%|██████████| 655/655 [06:41<00:00,  1.63it/s, loss=0.6665]
[Valid]: 100%|██████████| 164/164 [00:26<00:00,  6.21it/s]



Epoch 35/50:
  Val mAP: 0.0000 | mAP@.50: 0.0000
  IoU: 0.7083 | F1: 0.8156
  Precision: 0.8578 | Recall: 0.8027


Epoch 36/50 [Train]: 100%|██████████| 655/655 [06:41<00:00,  1.63it/s, loss=0.7545]
[Valid]: 100%|██████████| 164/164 [00:26<00:00,  6.20it/s]



Epoch 36/50:
  Val mAP: 0.0000 | mAP@.50: 0.0000
  IoU: 0.7415 | F1: 0.8422
  Precision: 0.8593 | Recall: 0.8314


Epoch 37/50 [Train]: 100%|██████████| 655/655 [06:41<00:00,  1.63it/s, loss=1.2581]
[Valid]: 100%|██████████| 164/164 [00:26<00:00,  6.20it/s]



Epoch 37/50:
  Val mAP: 0.0000 | mAP@.50: 0.0000
  IoU: 0.6948 | F1: 0.8039
  Precision: 0.8300 | Recall: 0.7882


Epoch 38/50 [Train]: 100%|██████████| 655/655 [06:41<00:00,  1.63it/s, loss=4.5561]
[Valid]: 100%|██████████| 164/164 [00:26<00:00,  6.20it/s]



Epoch 38/50:
  Val mAP: 0.0000 | mAP@.50: 0.0000
  IoU: 0.7011 | F1: 0.8083
  Precision: 0.8569 | Recall: 0.7863


Epoch 39/50 [Train]: 100%|██████████| 655/655 [06:41<00:00,  1.63it/s, loss=2.3546]
[Valid]: 100%|██████████| 164/164 [00:26<00:00,  6.14it/s]



Epoch 39/50:
  Val mAP: 0.0000 | mAP@.50: 0.0000
  IoU: 0.7302 | F1: 0.8338
  Precision: 0.8909 | Recall: 0.8082


Epoch 40/50 [Train]: 100%|██████████| 655/655 [06:41<00:00,  1.63it/s, loss=1.2932]
[Valid]: 100%|██████████| 164/164 [00:26<00:00,  6.16it/s]



Epoch 40/50:
  Val mAP: 0.0000 | mAP@.50: 0.0000
  IoU: 0.7032 | F1: 0.8124
  Precision: 0.8338 | Recall: 0.7976


Epoch 41/50 [Train]: 100%|██████████| 655/655 [06:41<00:00,  1.63it/s, loss=2.6223]
[Valid]: 100%|██████████| 164/164 [00:26<00:00,  6.21it/s]



Epoch 41/50:
  Val mAP: 0.0000 | mAP@.50: 0.0000
  IoU: 0.7305 | F1: 0.8330
  Precision: 0.8736 | Recall: 0.8115


Epoch 42/50 [Train]: 100%|██████████| 655/655 [06:41<00:00,  1.63it/s, loss=1.3673]
[Valid]: 100%|██████████| 164/164 [00:26<00:00,  6.17it/s]



Epoch 42/50:
  Val mAP: 0.0000 | mAP@.50: 0.0000
  IoU: 0.7534 | F1: 0.8522
  Precision: 0.8617 | Recall: 0.8456


Epoch 43/50 [Train]: 100%|██████████| 655/655 [06:41<00:00,  1.63it/s, loss=2.4409]
[Valid]: 100%|██████████| 164/164 [00:26<00:00,  6.19it/s]



Epoch 43/50:
  Val mAP: 0.0000 | mAP@.50: 0.0000
  IoU: 0.7506 | F1: 0.8502
  Precision: 0.8616 | Recall: 0.8430


Epoch 44/50 [Train]: 100%|██████████| 655/655 [06:41<00:00,  1.63it/s, loss=2.1028]
[Valid]: 100%|██████████| 164/164 [00:26<00:00,  6.21it/s]



Epoch 44/50:
  Val mAP: 0.0000 | mAP@.50: 0.0000
  IoU: 0.7722 | F1: 0.8658
  Precision: 0.8782 | Recall: 0.8561


Epoch 45/50 [Train]: 100%|██████████| 655/655 [06:44<00:00,  1.62it/s, loss=3.0336]
[Valid]: 100%|██████████| 164/164 [00:26<00:00,  6.11it/s]



Epoch 45/50:
  Val mAP: 0.0000 | mAP@.50: 0.0000
  IoU: 0.7540 | F1: 0.8509
  Precision: 0.8600 | Recall: 0.8441


Epoch 46/50 [Train]: 100%|██████████| 655/655 [06:42<00:00,  1.63it/s, loss=1.1953]
[Valid]: 100%|██████████| 164/164 [00:27<00:00,  6.04it/s]



Epoch 46/50:
  Val mAP: 0.0000 | mAP@.50: 0.0000
  IoU: 0.7244 | F1: 0.8313
  Precision: 0.8324 | Recall: 0.8304


Epoch 47/50 [Train]: 100%|██████████| 655/655 [06:41<00:00,  1.63it/s, loss=1.1622]
[Valid]: 100%|██████████| 164/164 [00:26<00:00,  6.19it/s]



Epoch 47/50:
  Val mAP: 0.0000 | mAP@.50: 0.0000
  IoU: 0.7392 | F1: 0.8415
  Precision: 0.8538 | Recall: 0.8342


Epoch 48/50 [Train]: 100%|██████████| 655/655 [06:41<00:00,  1.63it/s, loss=3.0448]
[Valid]: 100%|██████████| 164/164 [00:26<00:00,  6.21it/s]



Epoch 48/50:
  Val mAP: 0.0000 | mAP@.50: 0.0000
  IoU: 0.7470 | F1: 0.8474
  Precision: 0.8647 | Recall: 0.8369


Epoch 49/50 [Train]: 100%|██████████| 655/655 [06:41<00:00,  1.63it/s, loss=1.6188]
[Valid]: 100%|██████████| 164/164 [00:26<00:00,  6.15it/s]



Epoch 49/50:
  Val mAP: 0.0000 | mAP@.50: 0.0000
  IoU: 0.7545 | F1: 0.8522
  Precision: 0.8670 | Recall: 0.8433


Epoch 50/50 [Train]: 100%|██████████| 655/655 [06:41<00:00,  1.63it/s, loss=0.5522]
[Valid]: 100%|██████████| 164/164 [00:26<00:00,  6.19it/s]



Epoch 50/50:
  Val mAP: 0.0000 | mAP@.50: 0.0000
  IoU: 0.7533 | F1: 0.8514
  Precision: 0.8700 | Recall: 0.8407

--- 🎉 Training Complete ---
Best mAP achieved: 0.0000


In [21]:
print("--- 🚀 Starting Single Batch Overfit Test ---")

# 1. 학습 데이터 로더에서 딱 한 개의 배치만 가져오기
try:
    single_batch = next(iter(train_loader))
    print("✅ Single batch loaded successfully.")
except Exception as e:
    print(f"❌ Failed to load a batch: {e}")

if 'single_batch' in locals():
    images, targets = single_batch
    images = torch.stack(images).to(config.DEVICE)
    targets_gpu = [{k: v.to(config.DEVICE) for k, v in t.items()} for t in targets]

    # 2. 모델과 옵티마이저를 새로 초기화 (깨끗한 상태에서 시작)
    model_test = HybridUnifiedModel(config).to(config.DEVICE)
    optimizer_test = AdamW(model_test.parameters(), lr=config.LR)
    criterion_test = criterion.to(config.DEVICE)
    scaler_test = GradScaler()
    model_test.train()
    
    print("\n--- Starting to overfit on the single batch for 200 iterations ---")
    # 3. 동일한 배치로 200번 학습 시도
    for i in tqdm(range(200)):
        with autocast():
            outputs = model_test(images)
            loss_dict = criterion_test(outputs, targets_gpu)
            weighted_loss = sum(loss_dict[k] * weight_dict[k] for k in loss_dict.keys() if k in weight_dict)

        optimizer_test.zero_grad()
        scaler_test.scale(weighted_loss).backward()
        scaler_test.step(optimizer_test)
        scaler_test.update()
        
        if (i + 1) % 10 == 0:
            print(f"Iteration {i+1}/200 -> Loss: {weighted_loss.item():.4f}")

--- 🚀 Starting Single Batch Overfit Test ---
✅ Single batch loaded successfully.


  scaler_test = GradScaler()



--- Starting to overfit on the single batch for 200 iterations ---


  with autocast():
  5%|▌         | 10/200 [00:05<01:48,  1.75it/s]

Iteration 10/200 -> Loss: 10.7047


 10%|█         | 20/200 [00:11<01:42,  1.76it/s]

Iteration 20/200 -> Loss: 9.3961


 15%|█▌        | 30/200 [00:17<01:36,  1.77it/s]

Iteration 30/200 -> Loss: 8.4513


 20%|██        | 40/200 [00:22<01:30,  1.77it/s]

Iteration 40/200 -> Loss: 9.1292


 25%|██▌       | 50/200 [00:28<01:24,  1.77it/s]

Iteration 50/200 -> Loss: 9.2957


 30%|███       | 60/200 [00:34<01:19,  1.77it/s]

Iteration 60/200 -> Loss: 7.6857


 35%|███▌      | 70/200 [00:39<01:13,  1.77it/s]

Iteration 70/200 -> Loss: 7.8645


 40%|████      | 80/200 [00:45<01:08,  1.76it/s]

Iteration 80/200 -> Loss: 8.9995


 45%|████▌     | 90/200 [00:51<01:03,  1.74it/s]

Iteration 90/200 -> Loss: 8.0684


 50%|█████     | 100/200 [00:57<00:58,  1.71it/s]

Iteration 100/200 -> Loss: 7.2103


 55%|█████▌    | 110/200 [01:02<00:51,  1.75it/s]

Iteration 110/200 -> Loss: 7.3659


 60%|██████    | 120/200 [01:08<00:45,  1.76it/s]

Iteration 120/200 -> Loss: 8.4996


 65%|██████▌   | 130/200 [01:14<00:39,  1.76it/s]

Iteration 130/200 -> Loss: 7.8203


 70%|███████   | 140/200 [01:19<00:34,  1.76it/s]

Iteration 140/200 -> Loss: 6.9739


 75%|███████▌  | 150/200 [01:25<00:28,  1.76it/s]

Iteration 150/200 -> Loss: 7.4296


 80%|████████  | 160/200 [01:31<00:22,  1.75it/s]

Iteration 160/200 -> Loss: 8.8397


 85%|████████▌ | 170/200 [01:36<00:17,  1.76it/s]

Iteration 170/200 -> Loss: 7.3558


 90%|█████████ | 180/200 [01:42<00:11,  1.75it/s]

Iteration 180/200 -> Loss: 7.2570


 95%|█████████▌| 190/200 [01:48<00:05,  1.76it/s]

Iteration 190/200 -> Loss: 8.6877


100%|██████████| 200/200 [01:53<00:00,  1.75it/s]

Iteration 200/200 -> Loss: 8.6085





Unified_model : ConvNext-FPN Mask2Former 만 사용, Gaussian Distance 사용.

In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms
from torch.optim import AdamW
from torch.optim.lr_scheduler import CosineAnnealingLR, LinearLR, SequentialLR
from torch.cuda.amp import GradScaler, autocast

from pathlib import Path
from PIL import Image
import numpy as np
import json
from tqdm import tqdm
from types import SimpleNamespace
import os

# --- 프로젝트 경로 추가 및 모듈 임포트 ---
import sys
sys.path.append('.')

from models.unified_model import UnifiedModel
from utils.criterion import SetCriterion
from utils.hungarian_matcher import HungarianMatcher
from torchmetrics.detection import MeanAveragePrecision
from torchmetrics.classification import MulticlassJaccardIndex, MulticlassF1Score, MulticlassPrecision, MulticlassRecall
from pycocotools import mask as mask_utils

print("--- 🚀 Starting Single Batch Overfit Test (Simple Model) ---")

# 1. 학습 데이터 로더에서 딱 한 개의 배치만 가져오기
try:
    single_batch = next(iter(train_loader))
    print("✅ Single batch loaded successfully.")
except Exception as e:
    print(f"❌ Failed to load a batch: {e}")

if 'single_batch' in locals():
    images, targets = single_batch
    images = torch.stack(images).to(config.DEVICE)
    targets_gpu = [{k: v.to(config.DEVICE) for k, v in t.items()} for t in targets]

    # 2. 모델과 옵티마이저를 새로 초기화 (깨끗한 상태에서 시작)
    model_test = UnifiedModel(config).to(config.DEVICE)
    optimizer_test = AdamW(model_test.parameters(), lr=config.LR)
    criterion_test = criterion.to(config.DEVICE)
    scaler_test = GradScaler()
    model_test.train()
    
    print("\n--- Starting to overfit on the single batch for 200 iterations ---")
    # 3. 동일한 배치로 200번 학습 시도
    for i in tqdm(range(200)):
        with autocast():
            outputs = model_test(images)
            loss_dict = criterion_test(outputs, targets_gpu)
            weighted_loss = sum(loss_dict[k] * weight_dict[k] for k in loss_dict.keys() if k in weight_dict)

        optimizer_test.zero_grad()
        scaler_test.scale(weighted_loss).backward()
        scaler_test.step(optimizer_test)
        scaler_test.update()
        
        if (i + 1) % 10 == 0:
            print(f"Iteration {i+1}/200 -> Loss: {weighted_loss.item():.4f}")

--- Loading the best model and visualizing predictions ---


  model.load_state_dict(torch.load('hybrid_unified_best_model.pth'))


FileNotFoundError: [Errno 2] No such file or directory: 'hybrid_unified_best_model.pth'