<a href="https://colab.research.google.com/github/BrianChuan/TAICA_Computer-Vision/blob/main/HW2_Parking_Lot_Detection.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# 匯入必要套件＆定義資料路徑
> import Libibraies & Define Data Path
- 模型：Faster R-CNN model
- 任務：多類別物件偵測 (停車場)

In [None]:
import os
import torch
import torchvision
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor
from torchvision.transforms import functional as F
from torch.utils.data import Dataset, DataLoader, random_split
from PIL import Image
import pandas as pd

# ------------------------------
# 1. 資料集路徑 (Kaggle 環境)
# ------------------------------
BASE_PATH = '/kaggle/input/parking-lot/CVPDL_hw2/CVPDL_hw2/'
TRAIN_IMG_DIR = os.path.join(BASE_PATH, 'train/img')
TRAIN_TXT_DIR = os.path.join(BASE_PATH, 'train/txt') # 標註檔資料夾
TEST_IMG_DIR  = os.path.join(BASE_PATH, 'test/img')

# ------------------------------
# 2. 類別定義
# ------------------------------
# 檔案中的 class id: 0: car, 1: hov, 2: person, 3: motorcycle
# 模型中的 class id: 1: car, 2: hov, 3: person, 4: motorcycle (0 保留給背景)
CLASSES = ['car', 'hov', 'person', 'motorcycle']
NUM_CLASSES = len(CLASSES) + 1 # +1 for background

print(f"✅ Base path: {BASE_PATH}")
print(f"✅ Number of classes: {NUM_CLASSES}")

In [ ]:
!pip install albumentations -q

# Dataset 實作
- 功能：讀取圖片與對應的 `.txt` 標註檔案，並轉換成訓練用的 Tensor 格式。

In [None]:
# ------------------------------
# 3. Dataset (修改為 ParkingLotDataset)
# ------------------------------
import cv2
import albumentations as A
from albumentations.pytorch import ToTensorV2

def get_train_transforms():
    return A.Compose([
        A.HorizontalFlip(p=0.5),
        A.RandomBrightnessContrast(p=0.2),
        A.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
        ToTensorV2(),
    ], bbox_params=A.BboxParams(format='pascal_voc', label_fields=['labels']))

class ParkingLotDataset(Dataset):
    def __init__(self, img_dir, txt_dir, transforms=None):
        self.img_dir = img_dir
        self.txt_dir = txt_dir
        self.transforms = transforms
        self.imgs = sorted([f for f in os.listdir(img_dir) if f.endswith('.png')])

    def __len__(self):
        return len(self.imgs)

    def __getitem__(self, idx):
        img_name = self.imgs[idx]
        img_path = os.path.join(self.img_dir, img_name)
        img = cv2.imread(img_path)
        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)

        # 根據圖片名稱找到對應的 txt 標註檔
        txt_name = os.path.splitext(img_name)[0] + '.txt'
        txt_path = os.path.join(self.txt_dir, txt_name)

        boxes = []
        labels = []
        if os.path.exists(txt_path):
            with open(txt_path) as f:
                for line in f:
                    line = line.strip().split()
                    if len(line) < 5:
                        continue
                    
                    class_id = int(line[0])
                    x, y, w, h = map(float, line[1:5])
                    
                    if w <= 0 or h <= 0:
                        continue
                    
                    # 轉換為 [x_min, y_min, x_max, y_max] 格式
                    x_min = x
                    y_min = y
                    x_max = x + w
                    y_max = y + h
                    
                    boxes.append([x_min, y_min, x_max, y_max])
                    # 重要：將檔案中的 class_id (0-3) 轉換為模型需要的 id (1-4)
                    labels.append(class_id + 1)

        # 如果這張圖沒有任何標註，遞迴取下一張
        if not boxes:
             return self.__getitem__((idx + 1) % len(self.imgs))
        
        target = {
            "boxes": torch.tensor(boxes, dtype=torch.float32),
            "labels": torch.tensor(labels, dtype=torch.int64)
        }
        
        if self.transforms:
            transformed = self.transforms(image=img, bboxes=target['boxes'], labels=target['labels'])
            img = transformed['image']
            
            if not transformed['bboxes']:
                return self.__getitem__((idx + 1) % len(self.imgs))

            target['boxes'] = torch.tensor(transformed['bboxes'], dtype=torch.float32)
            target['labels'] = torch.tensor(transformed['labels'], dtype=torch.int64)
        
        return img, target, img_name


# 將 Dataset 物件包裝成 DataLoader

In [None]:
# ------------------------------
# 4. DataLoader + Validation Split
# ------------------------------

def get_val_transforms():
    return A.Compose([
        A.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
        ToTensorV2(),
    ], bbox_params=A.BboxParams(format='pascal_voc', label_fields=['labels']))

# 建立一個不帶任何 transform 的初始 dataset
full_dataset = ParkingLotDataset(TRAIN_IMG_DIR, TRAIN_TXT_DIR, transforms=None)

# 按照 80/20 切分訓練集和驗證集
n_total = len(full_dataset)
n_val = int(0.2 * n_total)
n_train = n_total - n_val
train_dataset, val_dataset = random_split(full_dataset, [n_train, n_val])

# 為切分後的兩個子集分別賦予不同的 transform
train_dataset.dataset.transforms = get_train_transforms()
val_dataset.dataset.transforms = get_val_transforms()

def collate_fn(batch):
    batch = list(filter(lambda x: x is not None, batch))
    if not batch: return (torch.empty(0), torch.empty(0))
    return tuple(zip(*batch))

train_loader = DataLoader(train_dataset, batch_size=4, shuffle=True, collate_fn=collate_fn, num_workers=2, pin_memory=True)
val_loader   = DataLoader(val_dataset, batch_size=4, shuffle=False, collate_fn=collate_fn, num_workers=2, pin_memory=True)

print(f"Train size: {len(train_dataset)} | Val size: {len(val_dataset)}")

# 模型設定

In [None]:
# ------------------------------
# 5. Faster R-CNN + pretrained weights
# ------------------------------
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')

from torchvision.models.detection import fasterrcnn_resnet50_fpn_v2
model = fasterrcnn_resnet50_fpn_v2(weights="COCO_V1")

# 替換分類頭以符合我們的類別數量 (NUM_CLASSES)
in_features = model.roi_heads.box_predictor.cls_score.in_features
model.roi_heads.box_predictor = FastRCNNPredictor(in_features, NUM_CLASSES)
model.to(device)
print(f"Model moved to {device}")

# 參數設定

In [None]:
# ------------------------------
# 6. 訓練設定
# ------------------------------
from torch.optim.lr_scheduler import CosineAnnealingLR

params = [p for p in model.parameters() if p.requires_grad]
optimizer = torch.optim.AdamW(params, lr=1e-4, weight_decay=1e-4)
num_epochs = 10

scheduler = CosineAnnealingLR(optimizer, T_max=num_epochs, eta_min=1e-6)

best_map = -1.0 # 追蹤歷史最高的驗證 mAP
MODEL_SAVE_PATH = 'best_parking_lot_model.pth' # Kaggle 輸出路徑

In [ ]:
!pip install torchmetrics -q

# (訓練＋驗證＋模型儲存)迴圈

In [None]:
# ------------------------------
# 7. 訓練 + 驗證 Loop
# ------------------------------
from tqdm.notebook import tqdm
from torchmetrics.detection.mean_ap import MeanAveragePrecision

for epoch in range(num_epochs):
    print(f"\nEpoch {epoch+1}/{num_epochs}")
    model.train()
    train_loss_sum = 0.0

    for imgs, targets, _ in tqdm(train_loader, desc=f"Training"):
        imgs = [img.to(device) for img in imgs]
        targets = [{k: v.to(device) for k, v in t.items()} for t in targets]

        loss_dict = model(imgs, targets)
        losses = sum(loss for loss in loss_dict.values())

        optimizer.zero_grad()
        losses.backward()
        optimizer.step()

        train_loss_sum += losses.item()

    scheduler.step()

    avg_train_loss = train_loss_sum / len(train_loader)
    print(f"Train Loss: {avg_train_loss:.4f}")

    # ------------------------------
    # Validation
    # ------------------------------
    metric = MeanAveragePrecision(box_format='xyxy').to(device)
    model.eval()
    with torch.no_grad():
        for imgs, targets, _ in tqdm(val_loader, desc=f"Validating"):
            imgs = [img.to(device) for img in imgs]
            targets = [{k: v.to(device) for k, v in t.items()} for t in targets]

            predictions = model(imgs)
            metric.update(predictions, targets)

    results = metric.compute()
    val_map = results['map'].item()
    print(f"Validation mAP: {val_map:.4f}")

    # ------------------------------
    # 模型儲存邏輯 (以 mAP 為標準)
    # ------------------------------
    if val_map > best_map:
        print(f"Validation mAP Improved ({best_map:.4f} -> {val_map:.4f}). Saving model...")
        best_map = val_map
        torch.save(model.state_dict(), MODEL_SAVE_PATH)
    else:
        print(f"Validation mAP did not improve from {best_map:.4f}.")

# 預測與提交

In [ ]:
# ------------------------------
# 8. 預測 & submission
# ------------------------------
# 載入表現最好的模型權重
model.load_state_dict(torch.load(MODEL_SAVE_PATH))
model.eval()

test_imgs = sorted(os.listdir(TEST_IMG_DIR))
predictions = []

with torch.no_grad():
    for img_name in tqdm(test_imgs, desc="Testing"):
        img_path = os.path.join(TEST_IMG_DIR, img_name)
        img = Image.open(img_path).convert("RGB")
        img_tensor = F.to_tensor(img).to(device)
        pred = model([img_tensor])[0]

        img_id = int(os.path.splitext(img_name)[0])
        parts = []
        for score, label, box in zip(pred['scores'], pred['labels'], pred['boxes']):
            if score < 0.3: # 可調整信心度閾值
                continue
            
            # 將模型的 label (1-4) 轉回檔案格式的 label (0-3)
            class_id = label.item() - 1 

            x_min, y_min, x_max, y_max = box.tolist()
            w, h = x_max - x_min, y_max - y_min
            parts.append(f"{score:.6f} {x_min:.2f} {y_min:.2f} {w:.2f} {h:.2f} {class_id}")

        pred_str = " ".join(parts)
        predictions.append([img_id, pred_str])

submission = pd.DataFrame(predictions, columns=['Image_ID', 'PredictionString'])
submission.to_csv('submission.csv', index=False)
print("\n✅ Submission saved: submission.csv")