<a href="https://colab.research.google.com/github/DannyNguyen69/conveyor-belt/blob/model/SSD.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
!nvidia-smi

Fri Sep 12 04:59:51 2025       
+-----------------------------------------------------------------------------------------+
| NVIDIA-SMI 550.54.15              Driver Version: 550.54.15      CUDA Version: 12.4     |
|-----------------------------------------+------------------------+----------------------+
| GPU  Name                 Persistence-M | Bus-Id          Disp.A | Volatile Uncorr. ECC |
| Fan  Temp   Perf          Pwr:Usage/Cap |           Memory-Usage | GPU-Util  Compute M. |
|                                         |                        |               MIG M. |
|   0  Tesla T4                       Off |   00000000:00:04.0 Off |                    0 |
| N/A   53C    P8             11W /   70W |       0MiB /  15360MiB |      0%      Default |
|                                         |                        |                  N/A |
+-----------------------------------------+------------------------+----------------------+
                                                

In [2]:
### 1. Mount Google Drive ###

from google.colab import drive

drive.mount('/content/gdrive')

Mounted at /content/gdrive


In [None]:
### 2. Define root directory ###

ROOT_DIR = '/content/gdrive/MyDrive/data/bang_chuyen/bang_chuyen_final.v2i.yolov8'

In [4]:
!pip install torch
!pip install torchvision



In [3]:
import torch
import torchvision
from torchvision.models.detection import ssd300_vgg16
from torchvision.models.detection.ssd import SSDClassificationHead
from torchvision.datasets import VOCDetection
import torchvision.transforms as T
import matplotlib.pyplot as plt

In [None]:
# 2. Data loader (VOC sample)
# ==========================
transform = T.Compose([
    T.Resize((300, 300)),
    T.ToTensor()
])

train_dataset = VOCDetection("./data", year="2012", image_set="train", download=True, transform=transform)
val_dataset   = VOCDetection("./data", year="2012", image_set="val",   download=True, transform=transform)

train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=4, shuffle=True, collate_fn=lambda x: tuple(zip(*x)))
val_loader   = torch.utils.data.DataLoader(val_dataset, batch_size=4, shuffle=False, collate_fn=lambda x: tuple(zip(*x)))

In [None]:
# 1. Load SSD model
# ==========================
def get_model(num_classes):
    model = ssd300_vgg16(pretrained=True)

    # Fine-tune số class (cộng thêm background)
    in_channels = model.head.classification_head.in_channels
    num_anchors = model.head.classification_head.num_anchors
    model.head.classification_head = SSDClassificationHead(
        in_channels, num_anchors, num_classes
    )
    return model

In [None]:
# 3. Train function
# ==========================
def train_model(model, train_loader, val_loader, num_epochs=5, lr=0.001, device="cuda"):
    model.to(device)
    optimizer = torch.optim.SGD(model.parameters(), lr=lr, momentum=0.9)

    history = {"train_loss": [], "val_loss": [], "mAP": []}

    for epoch in range(num_epochs):
        # ---- Training ----
        model.train()
        total_loss = 0
        for images, targets in train_loader:
            images = [img.to(device) for img in images]

            # Convert VOC labels sang dạng expected
            target_list = []
            for t in targets:
                objs = t["annotation"]["object"]
                if not isinstance(objs, list):  # nếu chỉ có 1 object
                    objs = [objs]
                boxes, labels = [], []
                for obj in objs:
                    bbox = obj["bndbox"]
                    box = [float(bbox["xmin"]), float(bbox["ymin"]),
                           float(bbox["xmax"]), float(bbox["ymax"])]
                    boxes.append(box)
                    labels.append(1)  # demo: coi tất cả là class=1
                target_list.append({
                    "boxes": torch.tensor(boxes, dtype=torch.float32, device=device),
                    "labels": torch.tensor(labels, dtype=torch.int64, device=device)
                })

            loss_dict = model(images, target_list)
            losses = sum(loss for loss in loss_dict.values())
            total_loss += losses.item()

            optimizer.zero_grad()
            losses.backward()
            optimizer.step()

        avg_train_loss = total_loss / len(train_loader)
        history["train_loss"].append(avg_train_loss)

        # ---- Validation (chỉ tính loss, mAP bỏ qua cho nhanh) ----
        model.eval()
        val_loss = 0
        with torch.no_grad():
            for images, targets in val_loader:
                images = [img.to(device) for img in images]
                target_list = []
                for t in targets:
                    objs = t["annotation"]["object"]
                    if not isinstance(objs, list):
                        objs = [objs]
                    boxes, labels = [], []
                    for obj in objs:
                        bbox = obj["bndbox"]
                        box = [float(bbox["xmin"]), float(bbox["ymin"]),
                               float(bbox["xmax"]), float(bbox["ymax"])]
                        boxes.append(box)
                        labels.append(1)
                    target_list.append({
                        "boxes": torch.tensor(boxes, dtype=torch.float32, device=device),
                        "labels": torch.tensor(labels, dtype=torch.int64, device=device)
                    })
                loss_dict = model(images, target_list)
                losses = sum(loss for loss in loss_dict.values())
                val_loss += losses.item()

        avg_val_loss = val_loss / len(val_loader)
        history["val_loss"].append(avg_val_loss)

        print(f"Epoch [{epoch+1}/{num_epochs}]  Train Loss: {avg_train_loss:.4f} | Val Loss: {avg_val_loss:.4f}")

    return history

# ==========================
# 4. Plot training curves
# ==========================
def plot_training_curves(history, save_path=None):
    epochs = range(1, len(history["train_loss"]) + 1)

    plt.figure(figsize=(8, 5))
    plt.plot(epochs, history["train_loss"], "b-", label="Train Loss")
    plt.plot(epochs, history["val_loss"], "r-", label="Val Loss")
    plt.xlabel("Epoch")
    plt.ylabel("Loss")
    plt.title("Training & Validation Loss")
    plt.legend()
    if save_path:
        plt.savefig(save_path)
    plt.show()

# ==========================
# 5. Run training
# ==========================
num_classes = 2  # background + 1 class demo
device = "cuda" if torch.cuda.is_available() else "cpu"

model = get_model(num_classes)
history = train_model(model, train_loader, val_loader, num_epochs=5, lr=0.001, device=device)

# Plot
plot_training_curves(history, save_path="ssd_training.png")

# Save model
torch.save(model.state_dict(), "ssd_model.pth")
print("✅ Training xong, model saved tại ssd_model.pth")