In [14]:
import os
import random
import xml.etree.ElementTree as ET

from PIL import Image
import GPUtil
import numpy as np
import torch
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
import torchvision.transforms as T
from torchvision.ops import box_iou
from torchvision.models.detection import ssd300_vgg16
from torchvision.models import VGG16_Weights
from tqdm import tqdm


In [2]:
# 1) Reproducibility & GPU selection
SEED = 42
random.seed(SEED)
np.random.seed(SEED)
torch.manual_seed(SEED)
torch.cuda.manual_seed_all(SEED)

available = GPUtil.getAvailable(order='memory', limit=1)
if available and torch.cuda.is_available():
    gpu_id = available[0]
    device = torch.device(f"cuda:{gpu_id}")
    print(f"🏎 Training on GPU #{gpu_id}: {torch.cuda.get_device_name(gpu_id)}")
else:
    device = torch.device("cpu")
    print("⚠️  No GPU available, using CPU instead")

🏎 Training on GPU #0: NVIDIA GeForce RTX 5090


NVIDIA GeForce RTX 5090 with CUDA capability sm_120 is not compatible with the current PyTorch installation.
The current PyTorch install supports CUDA capabilities sm_37 sm_50 sm_60 sm_61 sm_70 sm_75 sm_80 sm_86 sm_90 compute_37.
If you want to use the NVIDIA GeForce RTX 5090 GPU with PyTorch, please check the instructions at https://pytorch.org/get-started/locally/



In [7]:
# 2) Paths
ROOT    = os.path.join(os.getcwd(), "CANS")
DEF_DIR = os.path.join(ROOT, "defect")
NON_DIR = os.path.join(ROOT, "non")
AUG_DIR = os.path.join(ROOT, "augmented")
IMG_OUT = os.path.join(AUG_DIR, "images")
ANN_OUT = os.path.join(AUG_DIR, "annotations")
for d in (IMG_OUT, ANN_OUT):
    os.makedirs(d, exist_ok=True)

In [8]:
# 3) Helper: write Pascal-VOC XML
def write_voc_xml(fn, size, boxes, labels, dest):
    ann = ET.Element("annotation")
    ET.SubElement(ann, "filename").text = fn
    size_el = ET.SubElement(ann, "size")
    ET.SubElement(size_el, "width").text  = str(size[0])
    ET.SubElement(size_el, "height").text = str(size[1])
    ET.SubElement(size_el, "depth").text  = "3"
    for box, lbl in zip(boxes, labels):
        obj = ET.SubElement(ann, "object")
        ET.SubElement(obj, "name").text = lbl
        bb = ET.SubElement(obj, "bndbox")
        ET.SubElement(bb, "xmin").text = str(box[0])
        ET.SubElement(bb, "ymin").text = str(box[1])
        ET.SubElement(bb, "xmax").text = str(box[2])
        ET.SubElement(bb, "ymax").text = str(box[3])
    tree = ET.ElementTree(ann)
    tree.write(dest)

In [9]:
# 4) Preprocess & Augment
def preprocess_and_augment(src_dir, label, num_aug=3, size=(300,300)):
    for imgfn in os.listdir(src_dir):
        if not imgfn.lower().endswith((".jpg", ".jpeg", ".png")):
            continue

        im = Image.open(os.path.join(src_dir, imgfn)).convert("RGB")
        im = im.resize(size)
        base, ext = os.path.splitext(imgfn)

        # Original
        out_name = f"{label}_{base}{ext}"
        im.save(os.path.join(IMG_OUT, out_name))
        xml_name = out_name.replace(ext, ".xml")
        write_voc_xml(
            fn     = out_name,
            size   = size,
            boxes  = [[0, 0, size[0], size[1]]],
            labels = [label],
            dest   = os.path.join(ANN_OUT, xml_name)
        )

        # Horizontal flips
        for i in range(num_aug):
            aug = im.transpose(Image.FLIP_LEFT_RIGHT)
            aug_name = f"{label}_{base}_flip{i}{ext}"
            aug.save(os.path.join(IMG_OUT, aug_name))
            xml_name = aug_name.replace(ext, ".xml")
            write_voc_xml(
                fn     = aug_name,
                size   = size,
                boxes  = [[0, 0, size[0], size[1]]],
                labels = [label],
                dest   = os.path.join(ANN_OUT, xml_name)
            )

# Run preprocessing
preprocess_and_augment(DEF_DIR, "defect",     num_aug=3)
preprocess_and_augment(NON_DIR, "non-defect", num_aug=3)


In [12]:
# 5) Dataset & DataLoader
class VOCDataset(Dataset):
    def __init__(self, img_dir, ann_dir, tf=None):
        self.img_dir = img_dir
        self.ann_dir = ann_dir
        self.tf      = tf
        self.files   = [f for f in os.listdir(img_dir) if f.lower().endswith(".jpg")]

    def __len__(self):
        return len(self.files)

    def __getitem__(self, idx):
        fn = self.files[idx]
        im = Image.open(os.path.join(self.img_dir, fn)).convert("RGB")
        if self.tf:
            im = self.tf(im)

        tree = ET.parse(os.path.join(self.ann_dir, fn.replace(".jpg",".xml")))
        root = tree.getroot()
        boxes, labels = [], []
        for obj in root.findall("object"):
            b = obj.find("bndbox")
            boxes.append([
                int(b.find("xmin").text),
                int(b.find("ymin").text),
                int(b.find("xmax").text),
                int(b.find("ymax").text),
            ])
            lbl = obj.find("name").text
            labels.append(0 if lbl=="defect" else 1)

        return (
            im.to(device),
            {
                "boxes":  torch.tensor(boxes, dtype=torch.float32, device=device),
                "labels": torch.tensor(labels, dtype=torch.int64, device=device)
            }
        )

tf = T.Compose([
    T.ToTensor(),
    T.Normalize([0.485,0.456,0.406], [0.229,0.224,0.225])
])

dataset     = VOCDataset(IMG_OUT, ANN_OUT, tf)
data_loader = DataLoader(
    dataset,
    batch_size=4,
    shuffle=True,
    collate_fn=lambda batch: tuple(zip(*batch))
)

In [16]:
# 6) Model Initialization
model = ssd300_vgg16(
    weights=None,
    weights_backbone=VGG16_Weights.DEFAULT,    # ← valid enum alias for IMAGENET1K_V1
    num_classes=3
)
model.to(device)
print("Model on device:", next(model.parameters()).device)

Downloading: "https://download.pytorch.org/models/vgg16-397923af.pth" to C:\Users\USER/.cache\torch\hub\checkpoints\vgg16-397923af.pth
100%|██████████| 528M/528M [00:21<00:00, 25.5MB/s] 


Model on device: cuda:0


In [17]:
# 7) Optimizer & Scheduler
optimizer    = optim.SGD(model.parameters(), lr=5e-4, momentum=0.9, weight_decay=5e-4)
lr_scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=5, gamma=0.1)

In [None]:
# 8) mAP@0.5 Utility
def calc_map(preds, targs, thr=0.5):
    p_b, p_s, p_l = preds
    t_b, t_l      = targs
    precisions = []
    for pb, ps, pl, tb, tl in zip(p_b, p_s, p_l, t_b, t_l):
        if pb.numel()==0 or tb.numel()==0: continue
        ious = box_iou(pb, tb)
        tp   = (ious.max(1)[0] > thr).sum().item()
        fp   = pb.size(0) - tp
        precisions.append(tp / (tp + fp + 1e-8))
    return sum(precisions)/len(precisions) if precisions else 0.0

In [18]:
# 9) Training Loop
NUM_EPOCHS = 15
for epoch in range(1, NUM_EPOCHS+1):
    model.train()
    total_loss = 0.0
    all_pb, all_ps, all_pl = [], [], []
    all_tb, all_tl         = [], []

    loop = tqdm(data_loader, desc=f"Epoch {epoch}/{NUM_EPOCHS}")
    for imgs, targets in loop:
        imgs = [img.to(device) for img in imgs]
        for t in targets:
            t["boxes"]  = t["boxes"].to(device)
            t["labels"] = t["labels"].to(device)

        loss_dict = model(imgs, targets)
        loss = sum(loss_dict.values())
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        total_loss += loss.item()

        model.eval()
        with torch.no_grad():
            outs = model(imgs)
            for out, gt in zip(outs, targets):
                keep = out["scores"] > 0.3
                all_pb.append(   out["boxes"][keep].cpu()   )
                all_ps.append(  out["scores"][keep].cpu()   )
                all_pl.append( out["labels"][keep].cpu()   )
                all_tb.append(   gt["boxes"].cpu()           )
                all_tl.append(  gt["labels"].cpu()          )
        model.train()

    lr_scheduler.step()
    avg_loss = total_loss / len(data_loader)
    mAP50    = calc_map((all_pb, all_ps, all_pl), (all_tb, all_tl)) * 100
    print(f"→ Epoch {epoch}: Loss={avg_loss:.4f}, mAP@0.5={mAP50:.1f}%")

Epoch 1/15: 100%|██████████| 860/860 [02:21<00:00,  6.10it/s] 


NameError: name 'calc_map' is not defined

In [None]:
# 10) Save Model
save_path = os.path.join(os.getcwd(), "ssd_cans.pth")
torch.save(model.state_dict(), save_path)
print("Model weights saved to:", save_path)