In [None]:
# ===== 셀 1: 환경 설정 및 Import =====
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import DataLoader
from torch.cuda.amp import GradScaler, autocast
import numpy as np
import matplotlib.pyplot as plt
from pathlib import Path
import json
from tqdm import tqdm
import time
from datetime import datetime
import warnings
warnings.filterwarnings('ignore')

# 프로젝트 경로 추가
import sys
sys.path.append('.')

# 모델 import
from models.unified.unified_model import UnifiedModel
from models.heads.mask2former_damage_head import Mask2FormerLoss
from utils.dataset import UnifiedDamageDataset, create_dataloaders
from utils.evaluate import ModelEvaluator

print(f"PyTorch Version: {torch.__version__}")
print(f"CUDA Available: {torch.cuda.is_available()}")
if torch.cuda.is_available():
    print(f"GPU: {torch.cuda.get_device_name(0)}")
    print(f"GPU Memory: {torch.cuda.get_device_properties(0).total_memory / 1e9:.2f} GB")

In [None]:
# ===== 셀 2: Mask2Former 설정 =====
class Config:
    # 데이터 경로 - 분리된 경로
    blade_data_root = Path(r'C:\EngineBladeAI\EngineInspectionAI_MS\data\blade_data')  # Head-A용
    damage_data_root = Path(r'C:\EngineBladeAI\EngineInspectionAI_MS\data\multilabeled_data_augmented')  # Head-B용
    
    blade_checkpoint = 'best_unified_blade_model.pth'
    
    # 모델 타입
    model_type = 'mask2former'
    
    # 모델 기본 설정
    backbone_type = 'tiny'
    use_fpn = True
    num_blade_classes = 2
    num_damage_classes = 3
    
    # Mask2Former 특화 설정
    batch_size = 2  # 메모리 절약
    accumulate_grad_batches = 2  # Gradient accumulation
    num_workers = 0
    
    # Mask2Former Head 설정
    mask2former_config = {
        'num_queries': 100,  # 처음엔 적게
        'hidden_dim': 256,
        'num_heads': 8,
        'dec_layers': 3,  # 처음엔 적은 레이어
        'dropout': 0.1
    }
    
    # 학습 설정
    epochs = 30
    learning_rate = 1e-5  # Mask2Former는 작은 lr
    weight_decay = 0.05
    gradient_clip = 0.01  # 작은 gradient clipping
    
    # Mixed Precision Training
    use_amp = True
    
    # 학습 전략
    freeze_blade_initially = True
    unfreeze_epoch = 15
    
    # Loss weights
    blade_loss_weight = 1.0
    aux_loss_weight = 0.4
    
    # 기타
    device = 'cuda' if torch.cuda.is_available() else 'cpu'
    save_dir = Path('outputs_mask2former')
    save_dir.mkdir(exist_ok=True)
    
    experiment_name = f"mask2former_{datetime.now().strftime('%Y%m%d_%H%M%S')}"

config = Config()
print(f"Experiment: {config.experiment_name}")
print(f"Model Type: {config.model_type}")
print(f"Batch Size: {config.batch_size} x {config.accumulate_grad_batches} = {config.batch_size * config.accumulate_grad_batches}")
print(f"Blade Data: {config.blade_data_root}")
print(f"Damage Data: {config.damage_data_root}")

In [None]:
# ===== 셀 3: 데이터로더 생성 =====
print("데이터로더 생성 중...")

train_loader, valid_loader, test_loader = create_dataloaders(
    blade_data_root=config.blade_data_root,
    damage_data_root=config.damage_data_root,
    batch_size=config.batch_size,
    num_workers=config.num_workers,
    model_type='mask2former'
)

print(f"✅ Train: {len(train_loader)} batches")
print(f"✅ Valid: {len(valid_loader)} batches")
print(f"✅ Test: {len(test_loader)} batches")

# 데이터 샘플 확인
for batch in train_loader:
    print(f"\n데이터 샘플:")
    for key, value in batch.items():
        if torch.is_tensor(value):
            print(f"  {key}: {value.shape}")
        elif isinstance(value, list):
            print(f"  {key}: {len(value)} items")
    break

In [None]:
# ===== 셀 4: Mask2Former 모델 생성 =====
print("Mask2Former 모델 생성 중...")

model = UnifiedModel(
    backbone_type=config.backbone_type,
    num_blade_classes=config.num_blade_classes,
    num_damage_classes=config.num_damage_classes,
    pretrained_backbone=True,
    blade_checkpoint=config.blade_checkpoint if Path(config.blade_checkpoint).exists() else None,
    freeze_blade=config.freeze_blade_initially,
    use_fpn=config.use_fpn,
    damage_head_type='mask2former',
    damage_head_config=config.mask2former_config
)

model = model.to(config.device)

# 파라미터 수 계산
total_params = sum(p.numel() for p in model.parameters())
trainable_params = sum(p.numel() for p in model.parameters() if p.requires_grad)

print(f"✅ Total parameters: {total_params/1e6:.2f}M")
print(f"✅ Trainable parameters: {trainable_params/1e6:.2f}M")
print(f"✅ Blade head frozen: {config.freeze_blade_initially}")
print(f"✅ Damage head type: Mask2Former")
print(f"  - Queries: {config.mask2former_config['num_queries']}")
print(f"  - Decoder layers: {config.mask2former_config['dec_layers']}")

In [None]:
# ===== 셀 5 재정의: autocast 안전한 Loss =====
class SimpleLoss(nn.Module):
    def __init__(self, config):
        super().__init__()
        self.config = config
        self.blade_ce = nn.CrossEntropyLoss()
        self.ml_loss = nn.BCEWithLogitsLoss()  # 항상 이것만 사용
    
    def forward(self, outputs, batch):
        losses = {}
        total_loss = 0
        
        # Blade loss
        if 'blade' in outputs and 'blade_mask' in batch:
            losses['blade'] = self.blade_ce(outputs['blade'], batch['blade_mask'])
            total_loss += losses['blade'] * self.config.blade_loss_weight
        
        # Multilabel loss - 항상 BCEWithLogitsLoss 사용
        if 'multilabel' in outputs and 'multilabel' in batch:
            # multilabel 출력이 이미 sigmoid를 거쳤다면 logit으로 역변환
            ml_output = outputs['multilabel']
            if ml_output.min() >= 0 and ml_output.max() <= 1:
                # sigmoid 역변환: logit = log(p / (1-p))
                eps = 1e-7
                ml_output = torch.log((ml_output + eps) / (1 - ml_output + eps))
            
            losses['ml'] = self.ml_loss(ml_output, batch['multilabel'])
            total_loss += losses['ml'] * 2.0
        
        losses['total'] = total_loss
        return total_loss, losses

criterion = SimpleLoss(config)
print("✅ SimpleLoss ready - autocast safe")

In [None]:
# ===== 셀 6: Optimizer =====
param_groups = [
    {'params': model.backbone.parameters(), 'lr': config.learning_rate * 0.1, 'name': 'backbone'}
]

# Damage head parameters
for name, param in model.damage_head.named_parameters():
    param_groups.append({'params': [param], 'lr': config.learning_rate, 'name': 'damage_head'})

if not config.freeze_blade_initially:
    param_groups.append({'params': model.blade_head.parameters(), 'lr': config.learning_rate * 0.5, 'name': 'blade_head'})

optimizer = torch.optim.AdamW(param_groups[:2], weight_decay=config.weight_decay)  # 처음 2개 그룹만
scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=config.epochs, eta_min=1e-7)
scaler = GradScaler() if config.use_amp else None

print(f"✅ Optimizer ready with {len(param_groups[:2])} groups")

In [None]:
# ===== 셀 7: Improved Training Functions =====
def train_epoch(model, train_loader, criterion, optimizer, scheduler, scaler, config, epoch):
    model.train()
    
    metrics = {
        'loss': 0,
        'blade_iou': 0,
        'damage_f1': 0,
        'num_batches': 0
    }
    
    pbar = tqdm(train_loader, desc=f'Epoch {epoch+1}/{config.epochs}')
    
    for batch_idx, batch in enumerate(pbar):
        # Move to device
        for key in batch:
            if torch.is_tensor(batch[key]):
                batch[key] = batch[key].to(config.device)
            elif isinstance(batch[key], list):
                batch[key] = [item.to(config.device) if torch.is_tensor(item) else item 
                             for item in batch[key]]
        
        with autocast(enabled=config.use_amp):
            outputs = model(batch['image'])
            loss, loss_dict = criterion(outputs, batch)
            loss = loss / config.accumulate_grad_batches
        
        # Backward
        if scaler:
            scaler.scale(loss).backward()
        else:
            loss.backward()
        
        # Optimizer step
        if (batch_idx + 1) % config.accumulate_grad_batches == 0:
            if scaler:
                scaler.unscale_(optimizer)
                torch.nn.utils.clip_grad_norm_(model.parameters(), config.gradient_clip)
                scaler.step(optimizer)
                scaler.update()
            else:
                torch.nn.utils.clip_grad_norm_(model.parameters(), config.gradient_clip)
                optimizer.step()
            optimizer.zero_grad()
        
        # Calculate metrics
        with torch.no_grad():
            # Blade IoU
            if 'blade' in outputs and 'blade_mask' in batch:
                blade_pred = outputs['blade'].argmax(1)
                intersection = (blade_pred & batch['blade_mask']).float().sum()
                union = (blade_pred | batch['blade_mask']).float().sum()
                blade_iou = (intersection / (union + 1e-6)).item()
                metrics['blade_iou'] += blade_iou
            
            # Damage F1
            if 'multilabel' in outputs and 'multilabel' in batch:
                ml_output = outputs['multilabel']
                if ml_output.min() >= 0 and ml_output.max() <= 1:
                    pred = (ml_output > 0.5).float()
                else:
                    pred = (torch.sigmoid(ml_output) > 0.5).float()
                
                tp = (pred * batch['multilabel']).sum()
                fp = (pred * (1 - batch['multilabel'])).sum()
                fn = ((1 - pred) * batch['multilabel']).sum()
                
                f1 = (2 * tp / (2 * tp + fp + fn + 1e-6)).item()
                metrics['damage_f1'] += f1
        
        metrics['loss'] += loss.item() * config.accumulate_grad_batches
        metrics['num_batches'] += 1
        
        # Update progress bar
        avg_blade_iou = metrics['blade_iou'] / max(1, metrics['num_batches'])
        avg_damage_f1 = metrics['damage_f1'] / max(1, metrics['num_batches'])
        
        pbar.set_postfix({
            'loss': f"{loss.item() * config.accumulate_grad_batches:.4f}",
            'blade_iou': f"{avg_blade_iou:.3f}",
            'damage_f1': f"{avg_damage_f1:.3f}",
            'lr': f"{optimizer.param_groups[0]['lr']:.6f}"
        })
    
    scheduler.step()
    
    return {
        'total': metrics['loss'] / metrics['num_batches'],
        'blade_iou': metrics['blade_iou'] / metrics['num_batches'],
        'damage_f1': metrics['damage_f1'] / metrics['num_batches']
    }


def validate_epoch(model, valid_loader, criterion, config):
    model.eval()
    
    metrics = {
        'loss': 0,
        'blade_iou': 0,
        'damage_tp': torch.zeros(3).to(config.device),
        'damage_fp': torch.zeros(3).to(config.device),
        'damage_fn': torch.zeros(3).to(config.device),
        'num_batches': 0
    }
    
    with torch.no_grad():
        for batch in tqdm(valid_loader, desc='Validation'):
            for key in batch:
                if torch.is_tensor(batch[key]):
                    batch[key] = batch[key].to(config.device)
                elif isinstance(batch[key], list):
                    batch[key] = [item.to(config.device) if torch.is_tensor(item) else item 
                                 for item in batch[key]]
            
            outputs = model(batch['image'])
            loss, _ = criterion(outputs, batch)
            metrics['loss'] += loss.item()
            
            # Blade IoU
            if 'blade' in outputs and 'blade_mask' in batch:
                blade_pred = outputs['blade'].argmax(1)
                intersection = (blade_pred & batch['blade_mask']).float().sum()
                union = (blade_pred | batch['blade_mask']).float().sum()
                metrics['blade_iou'] += (intersection / (union + 1e-6)).item()
            
            # Damage metrics
            if 'multilabel' in outputs and 'multilabel' in batch:
                ml_output = outputs['multilabel']
                if ml_output.min() >= 0 and ml_output.max() <= 1:
                    pred = (ml_output > 0.5).float()
                else:
                    pred = (torch.sigmoid(ml_output) > 0.5).float()
                
                metrics['damage_tp'] += (pred * batch['multilabel']).sum(dim=0)
                metrics['damage_fp'] += (pred * (1 - batch['multilabel'])).sum(dim=0)
                metrics['damage_fn'] += ((1 - pred) * batch['multilabel']).sum(dim=0)
            
            metrics['num_batches'] += 1
    
    # Calculate F1 scores
    precision = metrics['damage_tp'] / (metrics['damage_tp'] + metrics['damage_fp'] + 1e-6)
    recall = metrics['damage_tp'] / (metrics['damage_tp'] + metrics['damage_fn'] + 1e-6)
    f1 = 2 * precision * recall / (precision + recall + 1e-6)
    
    return {
        'loss': metrics['loss'] / metrics['num_batches'],
        'blade_iou': metrics['blade_iou'] / metrics['num_batches'],
        'damage_f1': f1.mean().item(),
        'per_class_f1': f1.cpu().numpy(),
        'precision': precision.cpu().numpy(),
        'recall': recall.cpu().numpy()
    }

print("✅ Training functions with full metrics ready")

In [None]:
# ===== 셀 8: Improved Training Loop =====
print("="*60)
print("Mask2Former 통합 학습 시작")
print("="*60)

history = {
    'train_loss': [], 'val_loss': [],
    'train_blade_iou': [], 'val_blade_iou': [],
    'train_damage_f1': [], 'val_damage_f1': []
}
best_score = 0
best_epoch = 0

for epoch in range(config.epochs):
    print(f"\nEpoch {epoch+1}/{config.epochs}")
    print("-" * 50)
    
    if epoch == config.unfreeze_epoch and config.freeze_blade_initially:
        print("🔓 Unfreezing Blade Head for fine-tuning")
        for param in model.blade_head.parameters():
            param.requires_grad = True
        optimizer.add_param_group({
            'params': model.blade_head.parameters(), 
            'lr': config.learning_rate * 0.1
        })
    
    # Training
    train_metrics = train_epoch(model, train_loader, criterion, optimizer, 
                               scheduler, scaler, config, epoch)
    
    # Validation
    val_metrics = validate_epoch(model, valid_loader, criterion, config)
    
    # Record history
    history['train_loss'].append(train_metrics['total'])
    history['train_blade_iou'].append(train_metrics['blade_iou'])
    history['train_damage_f1'].append(train_metrics['damage_f1'])
    history['val_loss'].append(val_metrics['loss'])
    history['val_blade_iou'].append(val_metrics['blade_iou'])
    history['val_damage_f1'].append(val_metrics['damage_f1'])
    
    # Combined score
    combined_score = 0.4 * val_metrics['blade_iou'] + 0.6 * val_metrics['damage_f1']
    
    # Print detailed summary
    print(f"\n📊 Epoch {epoch+1} Summary:")
    print(f"  Train:")
    print(f"    Loss: {train_metrics['total']:.4f}")
    print(f"    Blade IoU: {train_metrics['blade_iou']:.4f}")
    print(f"    Damage F1: {train_metrics['damage_f1']:.4f}")
    print(f"  Valid:")
    print(f"    Loss: {val_metrics['loss']:.4f}")
    print(f"    Blade IoU: {val_metrics['blade_iou']:.4f}")
    print(f"    Damage F1: {val_metrics['damage_f1']:.4f}")
    
    # Per-class F1
    damage_types = ['Crack', 'Nick', 'Tear']
    for i, (f1, p, r) in enumerate(zip(val_metrics['per_class_f1'], 
                                       val_metrics['precision'], 
                                       val_metrics['recall'])):
        print(f"    {damage_types[i]}: F1={f1:.3f}, P={p:.3f}, R={r:.3f}")
    
    print(f"  Combined Score: {combined_score:.4f}")
    
    # Save best model
    if combined_score > best_score:
        best_score = combined_score
        best_epoch = epoch
        
        checkpoint = {
            'epoch': epoch,
            'model_state_dict': model.state_dict(),
            'optimizer_state_dict': optimizer.state_dict(),
            'best_score': best_score,
            'val_blade_iou': val_metrics['blade_iou'],
            'val_damage_f1': val_metrics['damage_f1']
        }
        
        torch.save(checkpoint, config.save_dir / f'{config.experiment_name}_best.pth')
        print(f"  ✅ Best model saved! (Score: {best_score:.4f})")

print("\n" + "="*60)
print(f"학습 완료! Best epoch: {best_epoch+1}, Best score: {best_score:.4f}")
print("="*60)

20250928 다시 시작

In [None]:
# ===== 셀 1: 환경 설정 및 Import =====
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import DataLoader
from torch.cuda.amp import GradScaler, autocast
import numpy as np
import matplotlib.pyplot as plt
from pathlib import Path
import json
from tqdm import tqdm
import time
from datetime import datetime
import warnings
warnings.filterwarnings('ignore')

# 프로젝트 경로 추가
import sys
sys.path.append('.')

# 모델 import
from models.unified.unified_model import UnifiedModel
from models.heads.mask2former_damage_head import Mask2FormerLoss
from utils.dataset import UnifiedDamageDataset, create_dataloaders
from utils.evaluate import ModelEvaluator

print(f"PyTorch Version: {torch.__version__}")
print(f"CUDA Available: {torch.cuda.is_available()}")
if torch.cuda.is_available():
    print(f"GPU: {torch.cuda.get_device_name(0)}")
    print(f"GPU Memory: {torch.cuda.get_device_properties(0).total_memory / 1e9:.2f} GB")

In [None]:
# 맨 위에 추가 (이미 import 했더라도)
import importlib
import sys

# 모듈 리로드
if 'models.heads.mask2former_damage_head' in sys.modules:
    del sys.modules['models.heads.mask2former_damage_head']

from models.heads.mask2former_damage_head import Mask2FormerDamageHead

In [None]:
# Notebook 셀에서 직접 확인
import torch

# 테스트 데이터
batch_size = 2
dummy_features = [
    torch.randn(batch_size, 256, 160, 160),
    torch.randn(batch_size, 256, 80, 80),
    torch.randn(batch_size, 256, 40, 40),
    torch.randn(batch_size, 256, 20, 20)
]

# Shape만 확인
print("Input feature shapes:")
for i, f in enumerate(dummy_features):
    print(f"  Level {i}: {f.shape}")
    elements = f.shape[2] * f.shape[3]
    print(f"    -> {f.shape[2]}x{f.shape[3]} = {elements} spatial elements")

# 예상되는 mask shape
# 보통 첫 번째 레벨 크기를 따라감
expected_mask_h, expected_mask_w = dummy_features[0].shape[2:]
print(f"\nExpected mask shape: [batch, queries, {expected_mask_h}, {expected_mask_w}]")
print(f"Total elements per mask: {expected_mask_h * expected_mask_w}")

In [None]:
# ConvNeXtFPN 문제를 우회하고 직접 테스트

# 1. 더미 features 생성 (640x640 입력 기준)
batch_size = 2
dummy_features = [
    torch.randn(batch_size, 256, 160, 160),  # 640/4 = 160
    torch.randn(batch_size, 256, 80, 80),    # 640/8 = 80
    torch.randn(batch_size, 256, 40, 40),    # 640/16 = 40
    torch.randn(batch_size, 256, 20, 20)     # 640/32 = 20
]

print("=== 640x640 입력 → Backbone 예상 출력 ===")
for i, f in enumerate(dummy_features):
    h, w = f.shape[-2:]
    print(f"Level {i}: {f.shape} -> {h}x{w} = {h*w} elements")

# 2. Mask2Former Head 직접 테스트
from models.heads.mask2former_damage_head import Mask2FormerDamageHead

mask2former_head = Mask2FormerDamageHead(
    in_channels=256,
    num_classes=3,
    num_queries=100,
    hidden_dim=256,
    num_heads=8,
    dec_layers=3,
    dropout=0.1
)

print("\n=== Mask2Former Head 테스트 ===")
with torch.no_grad():
    try:
        # Head 통과
        outputs = mask2former_head(dummy_features, blade_mask=batch.get('blade_mask'))
        
        print("출력:")
        for key, value in outputs.items():
            if torch.is_tensor(value):
                print(f"  {key}: {value.shape}")
                
                if key == 'pred_masks':
                    h, w = value.shape[-2:]
                    total = h * w
                    print(f"    -> {h}x{w} = {total} elements")
                    
                    if total == 640000:
                        print("    ⚠️ 800x800 발견! (문제의 원인)")
                    elif total == 409600:
                        print("    ✓ 640x640 (정상)")
                    elif total == 102400:
                        print("    ⚠️ 320x320")
                    elif total == 40000:
                        print("    ⚠️ 200x200")
                    elif total == 25600:
                        print("    ⚠️ 160x160")
                    
    except Exception as e:
        print(f"에러: {e}")
        
# 3. Target과 비교
print("\n=== Target 크기 확인 ===")
if 'instance_masks' in batch:
    print(f"instance_masks: {batch['instance_masks'][0].shape}")
    h, w = batch['instance_masks'][0].shape[-2:]
    print(f"  -> {h}x{w} = {h*w} elements")

In [1]:
# ===== 셀 1: Import 및 설정 =====
import torch
import torch.nn as nn
import torch.nn.functional as F
from pathlib import Path
import sys
sys.path.append('.')

from utils.hungarian_loss import HungarianLoss
from utils.hungarian_matcher import HungarianMatcherFixed
from utils.dataset import create_dataloaders

# ===== 셀 2: 데이터 로드 =====
blade_data_root = Path(r'C:\EngineBladeAI\EngineInspectionAI_MS\data\blade_data')
damage_data_root = Path(r'C:\EngineBladeAI\EngineInspectionAI_MS\data\multilabeled_data_augmented')

train_loader, valid_loader, test_loader = create_dataloaders(
    blade_data_root=blade_data_root,
    damage_data_root=damage_data_root,
    batch_size=2,
    num_workers=0,
    model_type='mask2former'
)

# batch 변수 정의 (중요!)
batch = next(iter(train_loader))
print("데이터 Shape:", {k: v.shape if torch.is_tensor(v) else len(v) for k, v in batch.items()})

# ===== 셀 3: SimplePixelDecoder 정의 =====
class SimplePixelDecoder(nn.Module):
    def __init__(self):
        super().__init__()
        self.conv = nn.Conv2d(256, 256, 1)
        
    def forward(self, features):
        # 일단 160x160 그대로 (나중에 업샘플링 추가)
        mask_features = self.conv(features[0])
        return mask_features, features[:3]

# ===== 셀 4: Mask2Former 테스트 =====
from models.heads.mask2former_damage_head import Mask2FormerDamageHead

# Mask2Former 생성
mask2former = Mask2FormerDamageHead(
    in_channels=256,
    num_classes=3,
    num_queries=100
)

# MSDeformAttnPixelDecoder 대체
mask2former.pixel_decoder = SimplePixelDecoder()

# 더미 features
dummy_features = [
    torch.randn(2, 256, 160, 160),
    torch.randn(2, 256, 80, 80),
    torch.randn(2, 256, 40, 40),
    torch.randn(2, 256, 20, 20)
]

# Forward pass
with torch.no_grad():
    outputs = mask2former(dummy_features)
    
print("\n=== Mask2Former 출력 ===")
for k, v in outputs.items():
    if torch.is_tensor(v):
        print(f"{k}: {v.shape}")
        if k == 'pred_masks':
            h, w = v.shape[-2:]
            print(f"  -> {h}x{w} = {h*w} elements")

# ===== 셀 5: Hungarian Loss 테스트 =====
print("\n=== Hungarian Loss 테스트 ===")

criterion = HungarianLoss(num_classes=3)

try:
    loss, loss_dict = criterion(outputs, batch)
    print(f"✅ 성공!")
    print(f"Total loss: {loss.item():.4f}")
    print("Loss components:", {k: v.item() if torch.is_tensor(v) else v for k, v in loss_dict.items()})
except Exception as e:
    print(f"❌ 에러: {e}")
    
    # 디버깅 정보
    print("\n디버깅 정보:")
    print(f"outputs keys: {outputs.keys()}")
    print(f"batch keys: {batch.keys()}")
    
    if 'instance_masks' in batch:
        print(f"instance_masks[0]: {batch['instance_masks'][0].shape}")
        print(f"pred_masks: {outputs['pred_masks'].shape}")

# ===== 셀 6: 크기 문제 해결 확인 =====
print("\n=== 크기 매칭 확인 ===")
pred_h, pred_w = outputs['pred_masks'].shape[-2:]
tgt_h, tgt_w = batch['instance_masks'][0].shape[-2:]

print(f"Prediction: {pred_h}x{pred_w}")
print(f"Target: {tgt_h}x{tgt_w}")

if pred_h != tgt_h:
    print(f"⚠️ 크기 불일치 - {tgt_h/pred_h}배 업샘플링 필요")
else:
    print("✅ 크기 일치!")

데이터 Shape: {'image': torch.Size([2, 3, 640, 640]), 'blade_mask': torch.Size([2, 640, 640]), 'multilabel': torch.Size([2, 3]), 'instance_masks': 2, 'instance_labels': 2}
mask_embed shape: torch.Size([2, 100, 256])
mask_pred shape: torch.Size([2, 100, 160, 160])
mask_pred spatial: H=160, W=160
Total elements per mask: 25600
⚠️ Unexpected resolution: 25600 elements

=== Mask2Former 출력 ===
pred_logits: torch.Size([2, 100, 4])
pred_masks: torch.Size([2, 100, 160, 160])
  -> 160x160 = 25600 elements
multilabel: torch.Size([2, 3])

=== Hungarian Loss 테스트 ===
✅ 성공!
Total loss: 8.9148
Loss components: {'ce': 1.6377153396606445, 'mask': 0.734769880771637, 'dice': 0.9827669858932495}

=== 크기 매칭 확인 ===
Prediction: 160x160
Target: 640x640
⚠️ 크기 불일치 - 4.0배 업샘플링 필요


In [3]:
# ===== Jupyter Notebook 테스트: 개선된 Mask2Former =====

# 셀 1: 파일 재로드
import importlib
import sys

# 기존 모듈 제거
if 'models.heads.mask2former_damage_head' in sys.modules:
    del sys.modules['models.heads.mask2former_damage_head']

# 다시 import
from models.heads.mask2former_damage_head import Mask2FormerDamageHead

# 셀 2: 모델 생성 및 테스트
import torch
import torch.nn as nn

# Mask2Former 생성 (개선된 버전)
mask2former = Mask2FormerDamageHead(
    in_channels=256,
    num_classes=3,
    num_queries=100,  # 줄임
    hidden_dim=256,
    num_heads=8,
    dec_layers=3,  # 줄임
    dropout=0.1
)

print("모델 파라미터 수:", sum(p.numel() for p in mask2former.parameters()) / 1e6, "M")

# 셀 3: 더미 데이터로 Forward Pass 테스트
dummy_features = [
    torch.randn(2, 256, 160, 160),
    torch.randn(2, 256, 80, 80),
    torch.randn(2, 256, 40, 40),
    torch.randn(2, 256, 20, 20)
]

with torch.no_grad():
    outputs = mask2former(dummy_features)

print("\n=== 개선된 Mask2Former 출력 ===")
for k, v in outputs.items():
    if torch.is_tensor(v):
        print(f"{k}: {v.shape}")
        if k == 'pred_masks':
            h, w = v.shape[-2:]
            print(f"  -> Resolution: {h}x{w} = {h*w} pixels")
            if h*w == 409600:
                print("  ✅ 640x640 달성!")

# 셀 4: 실제 데이터로 테스트
from utils.dataset import create_dataloaders
from pathlib import Path

blade_data_root = Path(r'C:\EngineBladeAI\EngineInspectionAI_MS\data\blade_data')
damage_data_root = Path(r'C:\EngineBladeAI\EngineInspectionAI_MS\data\multilabeled_data_augmented')

train_loader, _, _ = create_dataloaders(
    blade_data_root=blade_data_root,
    damage_data_root=damage_data_root,
    batch_size=2,
    num_workers=0,
    model_type='mask2former'
)

batch = next(iter(train_loader))

# 셀 5: Hungarian Loss 테스트
from utils.hungarian_loss import HungarianLoss

criterion = HungarianLoss(num_classes=3)

with torch.no_grad():
    outputs = mask2former(dummy_features)
    
try:
    loss, loss_dict = criterion(outputs, batch)
    print("\n=== Hungarian Loss 결과 ===")
    print(f"Total loss: {loss.item():.4f}")
    for k, v in loss_dict.items():
        print(f"  {k}: {v.item():.4f}")
    print("✅ Hungarian matching 성공!")
except Exception as e:
    print(f"❌ 에러: {e}")

# 셀 6: 메모리 사용량 확인
if torch.cuda.is_available():
    mask2former = mask2former.cuda()
    dummy_features_cuda = [f.cuda() for f in dummy_features]
    
    torch.cuda.reset_peak_memory_stats()
    with torch.no_grad():
        outputs = mask2former(dummy_features_cuda)
    
    memory_used = torch.cuda.max_memory_allocated() / 1e9
    print(f"\n메모리 사용량: {memory_used:.2f} GB")

모델 파라미터 수: 24.690439 M

=== 개선된 Mask2Former 출력 ===
pred_logits: torch.Size([2, 100, 4])
pred_masks: torch.Size([2, 100, 640, 640])
  -> Resolution: 640x640 = 409600 pixels
  ✅ 640x640 달성!
multilabel: torch.Size([2, 3])

=== Hungarian Loss 결과 ===
Total loss: 4.8094
  ce: 0.7768
  mask: 0.2529
  dice: 0.9958
✅ Hungarian matching 성공!

메모리 사용량: 2.39 GB
