In [1]:
!pip install torch torchvision tqdm scikit-learn matplotlib pillow

Defaulting to user installation because normal site-packages is not writeable



[notice] A new release of pip is available: 25.1.1 -> 25.3
[notice] To update, run: python.exe -m pip install --upgrade pip


In [2]:
import os
import torch

DATA_DIR = r"C:\Users\acer\Desktop\dl project\car-damage-dataset\data1a"
TRAIN_DIR = os.path.join(DATA_DIR, "training")
VAL_DIR   = os.path.join(DATA_DIR, "validation")

BATCH_SIZE = 16
NUM_EPOCHS = 3
LEARNING_RATE = 1e-4
IMAGE_SIZE = 224
NUM_WORKERS = 2

DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
DEVICE


device(type='cpu')

In [3]:
from torchvision import datasets, transforms
from torch.utils.data import DataLoader

train_tf = transforms.Compose([
    transforms.RandomResizedCrop(IMAGE_SIZE),
    transforms.RandomHorizontalFlip(),
    transforms.RandomRotation(8),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406],
                         [0.229, 0.224, 0.225])
])

val_tf = transforms.Compose([
    transforms.Resize(int(IMAGE_SIZE * 1.1)),
    transforms.CenterCrop(IMAGE_SIZE),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406],
                         [0.229, 0.224, 0.225])
])

train_ds = datasets.ImageFolder(TRAIN_DIR, transform=train_tf)
val_ds   = datasets.ImageFolder(VAL_DIR,   transform=val_tf)

train_dl = DataLoader(train_ds, batch_size=BATCH_SIZE, shuffle=True, 
                      num_workers=NUM_WORKERS)
val_dl   = DataLoader(val_ds, batch_size=BATCH_SIZE, shuffle=False, 
                      num_workers=NUM_WORKERS)

class_names = train_ds.classes
class_names


['00-damage', '01-whole']

In [4]:
import torch.nn as nn
from torchvision import models

def build_model(num_classes=2):
    model = models.resnet50(pretrained=True)

    # freeze backbone
    for param in model.parameters():
        param.requires_grad = False

    # replace last layer
    model.fc = nn.Sequential(
        nn.Linear(model.fc.in_features, 256),
        nn.ReLU(),
        nn.Dropout(0.3),
        nn.Linear(256, num_classes)
    )
    return model

model = build_model(num_classes=len(class_names)).to(DEVICE)
model




ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): Bottleneck(
      (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (downsample): Sequential(
        (0): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 

In [5]:
import time
import copy
from tqdm import tqdm

criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam([p for p in model.parameters() if p.requires_grad],
                             lr=LEARNING_RATE)
scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=5, gamma=0.1)

def train_model(model, train_dl, val_dl):
    best_wts = copy.deepcopy(model.state_dict())
    best_acc = 0.0

    for epoch in range(NUM_EPOCHS):
        print(f"\nEpoch {epoch+1}/{NUM_EPOCHS}")

        for phase in ["train", "val"]:
            if phase == "train":
                model.train()
                dl = train_dl
            else:
                model.eval()
                dl = val_dl

            running_loss = 0.0
            running_corrects = 0

            for images, labels in tqdm(dl, desc=phase):
                images, labels = images.to(DEVICE), labels.to(DEVICE)

                optimizer.zero_grad()

                with torch.set_grad_enabled(phase == "train"):
                    outputs = model(images)
                    _, preds = torch.max(outputs, 1)
                    loss = criterion(outputs, labels)

                    if phase == "train":
                        loss.backward()
                        optimizer.step()

                running_loss += loss.item() * images.size(0)
                running_corrects += torch.sum(preds == labels)

            epoch_loss = running_loss / len(dl.dataset)
            epoch_acc = running_corrects.double() / len(dl.dataset)

            print(f"{phase} Loss: {epoch_loss:.4f} Acc: {epoch_acc:.4f}")

            # save best model
            if phase == "val" and epoch_acc > best_acc:
                best_acc = epoch_acc
                best_wts = copy.deepcopy(model.state_dict())

        scheduler.step()

    print(f"\nBest accuracy: {best_acc:.4f}")
    model.load_state_dict(best_wts);
    return model

model = train_model(model, train_dl, val_dl)



Epoch 1/3


train: 100%|██████████| 115/115 [09:32<00:00,  4.98s/it]


train Loss: 0.4926 Acc: 0.7810


val: 100%|██████████| 29/29 [02:21<00:00,  4.90s/it]


val Loss: 0.5481 Acc: 0.6978

Epoch 2/3


train: 100%|██████████| 115/115 [09:18<00:00,  4.85s/it]


train Loss: 0.4019 Acc: 0.8234


val: 100%|██████████| 29/29 [02:10<00:00,  4.50s/it]


val Loss: 0.3670 Acc: 0.8196

Epoch 3/3


train: 100%|██████████| 115/115 [11:38<00:00,  6.07s/it]


train Loss: 0.3816 Acc: 0.8217


val: 100%|██████████| 29/29 [02:42<00:00,  5.61s/it]


val Loss: 0.3301 Acc: 0.8478

Best accuracy: 0.8478


In [6]:
MODEL_PATH = "car_damage_binary_resnet50.pth"

torch.save({
    "model_state": model.state_dict(),
    "classes": class_names
}, MODEL_PATH)

MODEL_PATH


'car_damage_binary_resnet50.pth'

In [7]:
from PIL import Image

def predict_image(img_path, model):
    model.eval()
    img = Image.open(img_path).convert("RGB")
    tf = transforms.Compose([
        transforms.Resize(int(IMAGE_SIZE * 1.1)),
        transforms.CenterCrop(IMAGE_SIZE),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406],
                             [0.229, 0.224, 0.225])
    ])
    tensor = tf(img).unsqueeze(0).to(DEVICE)
    
    with torch.no_grad():
        outputs = model(tensor)
        _, pred = torch.max(outputs, 1)
    
    return class_names[pred.item()]

# Example:
# predict_image("/mnt/data/data1a/data1a/validation/00-damage/example.jpg", model)
