In [3]:
# import
import os
import json
import shutil
import platform
import random
import subprocess
import sys

import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

from PIL import Image
from pathlib import Path
from collections import Counter, OrderedDict
from typing import Any, Dict, List, Optional, Tuple
from sklearn.metrics import classification_report, confusion_matrix
from tqdm import tqdm

import torch
import torchvision
from torch.utils.data import DataLoader
from torchvision import transforms as T
from PIL import Image
import matplotlib.pyplot as plt

from transformers import DetrImageProcessor, DetrForObjectDetection
import transformers
import pytorch_lightning as pl
from torchmetrics.detection.mean_ap import MeanAveragePrecision


In [None]:
# ÌîÑÎ°úÏ†ùÌä∏ Î£®Ìä∏ ÌÉêÏÉâ

# Ìïú Î≤àÎßå Ï∞æÍ≥† Í≥†Ï†ï
def find_project_root(marker_filename=".project-root"):
    current_dir = os.path.abspath(os.getcwd())
    while True:
        if os.path.isfile(os.path.join(current_dir, marker_filename)):
            return current_dir
        parent_dir = os.path.dirname(current_dir)
        if parent_dir == current_dir:
            raise FileNotFoundError(f"Could not find {marker_filename} in any parent directory.")
        current_dir = parent_dir
        
# Í≤ΩÎ°ú/Ï∂úÎ†• Ìè¥Îçî ÏÉùÏÑ±(find_project_root() Ìò∏Ï∂ú ÌõÑ ÏÇ¨Ïö©)
def ensure_dir(path):
    # ÎîîÎ†âÌÜ†Î¶¨ ÏóÜÏúºÎ©¥ ÏÉùÏÑ±
    os.makedirs(path, exist_ok=True)

PROJECT_ROOT = find_project_root()
def get_project_path(*paths):
    return os.path.join(PROJECT_ROOT, *paths)

In [None]:
print("=" * 60)
print("EXPERIMENTAL ENVIRONMENT")
print("=" * 60)

# ÌïòÎìúÏõ®Ïñ¥ Ï†ïÎ≥¥
print("\n[Hardware]")
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"  Device: {device}")

if torch.cuda.is_available():
    gpu_name = torch.cuda.get_device_name(0)
    gpu_memory = torch.cuda.get_device_properties(0).total_memory / (1024**3)
    print(f"  GPU: {gpu_name}")
    print(f"  GPU Memory: {gpu_memory:.1f} GB")
    print(f"  CUDA Version: {torch.version.cuda}")
    if torch.backends.cudnn.is_available():
        print(f"  cuDNN Version: {torch.backends.cudnn.version()}")

# CPU Ï†ïÎ≥¥
print(f"  CPU: {platform.processor()}")
print(f"  CPU Cores: {os.cpu_count()}")

# ÏÜåÌîÑÌä∏Ïõ®Ïñ¥ Ï†ïÎ≥¥
print("\n[Software]")
print(f"  OS: {platform.system()} {platform.release()}")
print(f"  Python: {platform.python_version()}")
print(f"  PyTorch: {torch.__version__}")
print(f"  Torchvision: {torchvision.__version__}")
print(f"  Transformers: {transformers.__version__}")
print(f"  PyTorch Lightning: {pl.__version__}")

print("\n" + "=" * 60)
print("Environment check completed")
print("=" * 60)

In [None]:
# Experiment Management & Logging

# ============================================================
# Cell: Experiment Manager
# ============================================================

class ExperimentManager:
    """Ïã§Ìóò Í¥ÄÎ¶¨ Î∞è ÏûêÎèô Î°úÍ∑∏ Ï†ÄÏû•"""
    
    def __init__(self, config: ConfigDict):
        self.config = config
        
        # Experiment ID ÏÉùÏÑ± (ÌÉÄÏûÑÏä§ÌÉ¨ÌîÑ)
        self.experiment_id = datetime.now().strftime("%Y%m%d_%H%M%S")
        
        # Ïã§Ìóò ÎîîÎ†âÌÜ†Î¶¨ Íµ¨Ï°∞: exp/{model}/{dataset}/{exp_id}
        self.exp_dir = Path(config.experiment.save_dir) / \
                       config.model.arch_name / \
                       config.data.dataset_name / \
                       self.experiment_id
        
        # ÌïòÏúÑ ÎîîÎ†âÌÜ†Î¶¨
        self.config_dir = self.exp_dir / "config"
        self.checkpoint_dir = self.exp_dir / "checkpoints"
        self.tensorboard_dir = self.exp_dir / "tensorboard"
        self.results_dir = self.exp_dir / "results"
        
        # ÎîîÎ†âÌÜ†Î¶¨ ÏÉùÏÑ±
        self._create_directories()
        
        # ConfigÏôÄ metadata Ï†ÄÏû•
        self._save_experiment_info()
    
    def _create_directories(self):
        """Ïã§Ìóò Ìè¥Îçî ÏÉùÏÑ±"""
        for directory in [self.config_dir, self.checkpoint_dir, 
                         self.tensorboard_dir, self.results_dir]:
            directory.mkdir(parents=True, exist_ok=True)
    
    def _save_experiment_info(self):
        """Config Î∞è Î©îÌÉÄÎç∞Ïù¥ÌÑ∞ Ï†ÄÏû•"""
        # Config Ï†ÄÏû•
        save_config(self.config, self.config_dir / "config.yaml")
        
        # Metadata Ï†ÄÏû•
        metadata = {
            'experiment_id': self.experiment_id,
            'created_at': datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
            'exp_dir': str(self.exp_dir),
            'model': self.config.model.arch_name,
            'dataset': self.config.data.dataset_name,
            'num_classes': self.config.data.num_classes,
        }
        save_config(metadata, self.config_dir / "metadata.yaml")
    
    def save_results(self, results: Dict[str, Any]):
        """Ïã§Ìóò Í≤∞Í≥º Ï†ÄÏû•"""
        save_config(results, self.results_dir / "results.yaml")
    
    def print_info(self):
        """Ïã§Ìóò Ï†ïÎ≥¥ Ï∂úÎ†•"""
        print(f"\n{'='*70}")
        print(f"üß™ EXPERIMENT SETUP")
        print(f"{'='*70}")
        print(f"  Experiment ID:   {self.experiment_id}")
        print(f"  Name:            {self.config.experiment.name}")
        print(f"  Model:           {self.config.model.arch_name}")
        print(f"  Dataset:         {self.config.data.dataset_name} "
              f"({self.config.data.num_classes} classes)")
        print(f"  Classes:         {', '.join(self.config.data.class_names)}")
        print(f"")
        print(f"  üìÅ Directories:")
        print(f"     Root:         {self.exp_dir}")
        print(f"     Config:       {self.config_dir}")
        print(f"     Checkpoints:  {self.checkpoint_dir}")
        print(f"     TensorBoard:  {self.tensorboard_dir}")
        print(f"     Results:      {self.results_dir}")
        print(f"{'='*70}\n")

print("‚úÖ Experiment Manager Î°úÎìú ÏôÑÎ£å")

In [None]:
# ==========================
# DETR Îç∞Ïù¥ÌÑ∞ÏÖã ÌÅ¥ÎûòÏä§ Ï†ïÏùò
# ==========================

class CocoDetection(torchvision.datasets.CocoDetection):
    """
    DETR ÌïôÏäµÏùÑ ÏúÑÌïú Ïª§Ïä§ÌÖÄ COCO Detection Îç∞Ïù¥ÌÑ∞ÏÖã ÌÅ¥ÎûòÏä§
    """
    
    def __init__(
        self, 
        img_folder: str, 
        ann_file: str,
        imageprocessor: DetrImageProcessor, 
        train: bool = True
    ):
        super(CocoDetection, self).__init__(img_folder, ann_file)
        
        self.imageprocessor = imageprocessor
        self.train = train
        
        # ÌïôÏäµ Ïãú ÏÉâÏÉÅ/Î∏îÎü¨ Í∏∞Î∞ò Ï¶ùÍ∞ï Ï†ÅÏö© (bbox ÏàòÏ†ï Î∂àÌïÑÏöî)
        self.augment = (
            T.Compose([
                T.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2, hue=0.05),
                T.RandomAdjustSharpness(sharpness_factor=1.5, p=0.2),
            ])
            if train
            else None
        )

    def __getitem__(self, idx: int):
        # PIL Ïù¥ÎØ∏ÏßÄÏôÄ COCO ÌòïÏãùÏùò ÌÉÄÍ≤ü ÏùΩÍ∏∞
        img, target = super(CocoDetection, self).__getitem__(idx)

        # ÌïôÏäµ Ïãú Ïù¥ÎØ∏ÏßÄ Ï¶ùÍ∞ï
        if self.augment is not None:
            img = self.augment(img)

        # DETR ÌòïÏãùÏúºÎ°ú Ïù¥ÎØ∏ÏßÄÏôÄ ÌÉÄÍ≤ü Ï†ÑÏ≤òÎ¶¨
        image_id = self.ids[idx]
        target = {'image_id': image_id, 'annotations': target}
        encoding = self.imageprocessor(images=img, annotations=target, return_tensors="pt")
        
        # Î∞∞Ïπò Ï∞®Ïõê Ï†úÍ±∞
        pixel_values = encoding["pixel_values"].squeeze()
        target = encoding["labels"][0]

        return pixel_values, target

In [None]:
# Data Loader

# =========================
# DataLoader ÏÉùÏÑ±
# =========================

# DETR ImageProcessor Ï¥àÍ∏∞Ìôî
imageprocessor = DetrImageProcessor.from_pretrained("facebook/detr-resnet-50")

def create_collate_fn(imageprocessor: DetrImageProcessor):
    """Î∞∞Ïπò collate Ìï®Ïàò (Ìå®Îî© Ï†ÅÏö©)"""
    def collate_fn(batch):
        pixel_values = [item[0] for item in batch]
        labels = [item[1] for item in batch]
        
        # imageprocessorÎ°ú Ìå®Îî© Ï†ÅÏö©
        encoding = imageprocessor.pad(pixel_values, return_tensors="pt")
        
        return {
            'pixel_values': encoding['pixel_values'],
            'pixel_mask': encoding['pixel_mask'],
            'labels': labels
        }
    return collate_fn
# WLS 

In [None]:
# DETR Model 

# =========================================
# DETR Î™®Îç∏ Î°úÎìú (ÏÇ¨Ï†ÑÌïôÏäµ Î™®Îç∏)
# =========================================

import pytorch_lightning as pl
from torchmetrics.detection.mean_ap import MeanAveragePrecision

class Detr(pl.LightningModule):
    """PyTorch Lightning Î™®ÎìàÎ°ú Íµ¨ÌòÑÌïú DETR ÌïôÏäµ ÎûòÌçº"""

    def __init__(
        self,
        num_labels: int,
        lr: float = 1e-4,
        lr_backbone: float = 1e-5,
        weight_decay: float = 1e-4,
        score_threshold: float = 0.5,
    ):
        super().__init__()
        self.save_hyperparameters()

        self.model = DetrForObjectDetection.from_pretrained(
            "facebook/detr-resnet-50",
            num_labels=num_labels,
            ignore_mismatched_sizes=True,
        )

        self.map_metric = MeanAveragePrecision(
            box_format="cxcywh", iou_type="bbox", class_metrics=True
        )

    def forward(self, pixel_values, pixel_mask=None):
        return self.model(pixel_values=pixel_values, pixel_mask=pixel_mask)

    def common_step(self, batch, batch_idx):
        pixel_values = batch["pixel_values"]
        pixel_mask = batch.get("pixel_mask")
        labels = [{k: v.to(self.device) for k, v in t.items()} for t in batch["labels"]]

        outputs = self.model(
            pixel_values=pixel_values, pixel_mask=pixel_mask, labels=labels
        )
        return outputs.loss, outputs.loss_dict

    def training_step(self, batch, batch_idx):
        loss, loss_dict = self.common_step(batch, batch_idx)
        self.log("train_loss", loss, prog_bar=True)
        for k, v in loss_dict.items():
            self.log(f"train_{k}", v.item())
        return loss

    def validation_step(self, batch, batch_idx):
        loss, loss_dict = self.common_step(batch, batch_idx)
        self.log("val_loss", loss, prog_bar=True)
        for k, v in loss_dict.items():
            self.log(f"val_{k}", v.item())

        # mAP Í≥ÑÏÇ∞
        with torch.no_grad():
            labels = [{k: v.to(self.device) for k, v in t.items()} for t in batch["labels"]]
            outputs = self.model(
                pixel_values=batch["pixel_values"], 
                pixel_mask=batch.get("pixel_mask")
            )
            self._update_map(outputs, labels)
        return loss

    def _update_map(self, outputs, labels):
        """mAP ÏóÖÎç∞Ïù¥Ìä∏"""
        probs = outputs.logits.softmax(-1)[..., :-1]
        scores, pred_labels = probs.max(-1)
        pred_boxes = outputs.pred_boxes

        preds = []
        targets = []

        for i in range(pred_boxes.shape[0]):
            keep = scores[i] > self.hparams.score_threshold
            preds.append({
                "boxes": pred_boxes[i][keep].detach().cpu(),
                "scores": scores[i][keep].detach().cpu(),
                "labels": pred_labels[i][keep].detach().cpu(),
            })
            targets.append({
                "boxes": labels[i]["boxes"].detach().cpu(),
                "labels": labels[i]["class_labels"].detach().cpu(),
            })

        if preds:
            self.map_metric.update(preds, targets)

    def on_validation_epoch_end(self):
        metrics = self.map_metric.compute()
        for k, v in metrics.items():
            if torch.is_tensor(v) and v.ndim == 0:
                self.log(f"val_{k}", v, prog_bar=True)
        self.map_metric.reset()

    def configure_optimizers(self):
        param_dicts = [
            {"params": [p for n, p in self.named_parameters() 
                       if "backbone" not in n and p.requires_grad]},
            {"params": [p for n, p in self.named_parameters() 
                       if "backbone" in n and p.requires_grad],
             "lr": self.hparams.lr_backbone},
        ]
        return torch.optim.AdamW(
            param_dicts, lr=self.hparams.lr, weight_decay=self.hparams.weight_decay
        )


In [None]:
# ============================================================
# Cell: ÌÜµÌï© Ïã§Ìóò Ïã§Ìñâ Ìï®Ïàò
# ============================================================

import pytorch_lightning as pl
from pytorch_lightning.callbacks import ModelCheckpoint, EarlyStopping
from pytorch_lightning.loggers import TensorBoardLogger, CSVLogger

def run_experiment(config: ConfigDict):
    """Config Í∏∞Î∞ò Ï†ÑÏ≤¥ Ïã§Ìóò Ïã§Ìñâ"""
    
    # 1. Experiment Manager ÏÉùÏÑ±
    exp_manager = ExperimentManager(config)
    exp_manager.print_info()
    
    # 2. Seed ÏÑ§Ï†ï (Ïû¨ÌòÑÏÑ±)
    pl.seed_everything(config.experiment.seed)
    print(f" Seed set to: {config.experiment.seed}\n")
    
    # 3. ImageProcessor Ï¥àÍ∏∞Ìôî
    imageprocessor = DetrImageProcessor.from_pretrained(
        config.model.pretrained_path
    )
    print(f" ImageProcessor loaded: {config.model.pretrained_path}\n")
    
    # 4. Dataset ÏÉùÏÑ± (Registry ÏÇ¨Ïö©)
    print(" Creating datasets...")
    train_dataset = create_dataset_from_config(
        config.data.dataset_name, "train", imageprocessor, config
    )
    val_dataset = create_dataset_from_config(
        config.data.dataset_name, "val", imageprocessor, config
    )
    print(f"   Train: {len(train_dataset)} samples")
    print(f"   Val:   {len(val_dataset)} samples\n")
    
    # 5. DataLoader ÏÉùÏÑ±
    print(" Creating dataloaders...")
    collate_fn = create_collate_fn(imageprocessor)
    
    train_loader = DataLoader(
        train_dataset,
        batch_size=config.data.batch_size,
        shuffle=True,
        num_workers=config.data.num_workers,
        collate_fn=collate_fn
    )
    
    val_loader = DataLoader(
        val_dataset,
        batch_size=config.data.batch_size,
        shuffle=False,
        num_workers=config.data.num_workers,
        collate_fn=collate_fn
    )
    print(f"   Train batches: {len(train_loader)}")
    print(f"   Val batches:   {len(val_loader)}\n")
    
    # 6. Î™®Îç∏ ÏÉùÏÑ±
    print(" Creating model...")
    model = Detr(
        num_labels=config.model.num_labels,
        lr=config.model.learning_rate,
        lr_backbone=config.model.lr_backbone,
        weight_decay=config.model.weight_decay,
        score_threshold=config.model.score_threshold
    )
    print(f"   Model: DETR with {config.model.num_labels} classes\n")
    
    # 7. Callbacks ÏÑ§Ï†ï
    checkpoint_callback = ModelCheckpoint(
        dirpath=str(exp_manager.checkpoint_dir),
        filename='best-{epoch:02d}-{val_loss:.2f}',
        save_top_k=config.trainer.checkpoint.save_top_k,
        monitor=config.trainer.checkpoint.monitor,
        mode=config.trainer.checkpoint.mode,
        save_last=config.trainer.checkpoint.save_last
    )
    
    early_stop_callback = EarlyStopping(
        monitor=config.trainer.early_stopping.monitor,
        patience=config.trainer.early_stopping.patience,
        mode=config.trainer.early_stopping.mode
    )
    
    # 8. Loggers ÏÑ§Ï†ï
    tensorboard_logger = TensorBoardLogger(
        save_dir=str(exp_manager.tensorboard_dir),
        name="",
        version=""
    )
    
    csv_logger = CSVLogger(
        save_dir=str(exp_manager.exp_dir),
        name="logs"
    )
    
    # 9. Trainer ÏÑ§Ï†ï
    trainer = pl.Trainer(
        max_epochs=config.trainer.max_epochs,
        accelerator=config.trainer.accelerator,
        devices=config.trainer.devices,
        precision=config.trainer.precision,
        log_every_n_steps=10,
        accumulate_grad_batches=config.trainer.accumulate_grad_batches,
        gradient_clip_val=config.trainer.gradient_clip_val,
        callbacks=[checkpoint_callback, early_stop_callback],
        logger=[tensorboard_logger, csv_logger],
        enable_progress_bar=True
    )
    
    # 10. ÌïôÏäµ Ï†ïÎ≥¥ Ï∂úÎ†•
    effective_batch = config.data.batch_size * config.trainer.accumulate_grad_batches
    print(f"{'='*70}")
    print(f" TRAINING CONFIGURATION")
    print(f"{'='*70}")
    print(f"  Max Epochs:          {config.trainer.max_epochs}")
    print(f"  Batch Size:          {config.data.batch_size}")
    print(f"  Gradient Accum:      {config.trainer.accumulate_grad_batches}")
    print(f"  Effective Batch:     {effective_batch}")
    print(f"  Learning Rate:       {config.model.learning_rate}")
    print(f"  LR Backbone:         {config.model.lr_backbone}")
    print(f"  Precision:           {config.trainer.precision}")
    print(f"  Early Stop Patience: {config.trainer.early_stopping.patience}")
    print(f"{'='*70}\n")
    
    # 11. ÌïôÏäµ ÏãúÏûë
    print(" Starting training...\n")
    trainer.fit(model, train_loader, val_loader)
    
    # 12. Í≤∞Í≥º Ï†ÄÏû•
    results = {
        'experiment_id': exp_manager.experiment_id,
        'best_checkpoint': str(checkpoint_callback.best_model_path),
        'best_val_loss': float(checkpoint_callback.best_model_score) 
                        if checkpoint_callback.best_model_score else None,
        'total_epochs': trainer.current_epoch,
        'config_path': str(exp_manager.config_dir / "config.yaml")
    }
    exp_manager.save_results(results)
    
    # 13. ÏôÑÎ£å Î©îÏãúÏßÄ
    print(f"\n{'='*70}")
    print(f" TRAINING COMPLETED!")
    print(f"{'='*70}")
    print(f"  Experiment ID:   {exp_manager.experiment_id}")
    print(f"  Best Checkpoint: {checkpoint_callback.best_model_path}")
    print(f"  Best Val Loss:   {checkpoint_callback.best_model_score:.4f}")
    print(f"")
    print(f"   All results saved in:")
    print(f"     {exp_manager.exp_dir}")
    print(f"")
    print(f"   View TensorBoard:")
    print(f"     tensorboard --logdir {exp_manager.tensorboard_dir}")
    print(f"{'='*70}\n")
    
    return exp_manager, trainer, model

print("‚úÖ ÌÜµÌï© Ïã§Ìóò Ìï®Ïàò Î°úÎìú ÏôÑÎ£å")

In [None]:
# Ï†ÄÏû•Îêú Ïã§Ìóò Ïû¨ÌòÑ 

# Config Î°úÎìú
config 

# ÎèôÏùºÌïú ÏÑ§Ï†ïÏúºÎ°ú Ïû¨Ïã§Ìóò
exp_manager, trainer, model = run_experiment(config)

In [None]:
# Ï≤¥ÌÅ¨Ìè¨Ïù∏Ìä∏ÏóêÏÑú Î™®Îç∏ Î°úÎìú

model = Detr.load_from_checkpoint(
    "exp/detr/TomatOD_COCO_3/20250101_123456/checkpoints/best-epoch=10-val_loss=1.23.ckpt",
    num_labels=3
)