In [3]:
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import datasets, models, transforms
from torch.utils.data import DataLoader
from torch.optim.lr_scheduler import StepLR
import time

# 1. Tiền xử lý + Augmentation (giúp tránh overfitting)
train_transform = transforms.Compose([
    transforms.Resize((224,224)),
    transforms.RandomHorizontalFlip(),
    transforms.RandomRotation(10),
    transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406],
                         [0.229, 0.224, 0.225])
])

val_transform = transforms.Compose([
    transforms.Resize((224,224)),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406],
                         [0.229, 0.224, 0.225])
])

# 2. Dataset + DataLoader
train_dataset = datasets.ImageFolder("dataset/train", transform=train_transform)
val_dataset   = datasets.ImageFolder("dataset/val",   transform=val_transform)

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True, num_workers=4)
val_loader   = DataLoader(val_dataset, batch_size=32, shuffle=False, num_workers=4)

# 3. Load pretrained ResNet18 (fine-tune toàn bộ model)
model = models.resnet18(pretrained=True)
num_ftrs = model.fc.in_features
model.fc = nn.Linear(num_ftrs, 3)  # 3 class

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = model.to(device)

# 4. Loss, optimizer, scheduler
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001, weight_decay=1e-4)
scheduler = StepLR(optimizer, step_size=5, gamma=0.1)

# 5. Train + Validation loop
best_acc = 0.0
for epoch in range(15):  
    start_time = time.time()

    # --- Train ---
    model.train()
    running_loss, correct, total = 0.0, 0, 0
    for inputs, labels in train_loader:
        inputs, labels = inputs.to(device), labels.to(device)

        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        running_loss += loss.item() * inputs.size(0)
        _, predicted = outputs.max(1)
        correct += predicted.eq(labels).sum().item()
        total += labels.size(0)

    train_loss = running_loss / len(train_dataset)
    train_acc = correct / total

    # --- Validation ---
    model.eval()
    val_loss, correct, total = 0.0, 0, 0
    with torch.no_grad():
        for inputs, labels in val_loader:
            inputs, labels = inputs.to(device), labels.to(device)
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            val_loss += loss.item() * inputs.size(0)

            _, predicted = outputs.max(1)
            correct += predicted.eq(labels).sum().item()
            total += labels.size(0)

    val_loss /= len(val_dataset)
    val_acc = correct / total

    scheduler.step()

    elapsed = time.time() - start_time
    print(f"Epoch {epoch+1:02d} | "
          f"Train Loss: {train_loss:.4f}, Acc: {train_acc:.4f} | "
          f"Val Loss: {val_loss:.4f}, Acc: {val_acc:.4f} | "
          f"Time: {elapsed:.1f}s")

    # Lưu model tốt nhất
    if val_acc > best_acc:
        best_acc = val_acc
        torch.save(model.state_dict(), "best_resnet18.pth")
        print("✅ Saved new best model")

print("Training done! Best Val Acc:", best_acc)


Epoch 01 | Train Loss: 1.1048, Acc: 0.4242 | Val Loss: 1.4710, Acc: 0.7000 | Time: 11.0s
✅ Saved new best model
Epoch 02 | Train Loss: 0.2705, Acc: 0.9091 | Val Loss: 2.1072, Acc: 0.7000 | Time: 17.0s
Epoch 03 | Train Loss: 0.0919, Acc: 0.9697 | Val Loss: 2.5074, Acc: 0.7000 | Time: 14.8s
Epoch 04 | Train Loss: 0.0672, Acc: 0.9697 | Val Loss: 2.8135, Acc: 0.6000 | Time: 13.6s
Epoch 05 | Train Loss: 0.0848, Acc: 0.9394 | Val Loss: 2.6938, Acc: 0.7000 | Time: 12.7s
Epoch 06 | Train Loss: 0.0594, Acc: 0.9697 | Val Loss: 2.2710, Acc: 0.7000 | Time: 12.3s
Epoch 07 | Train Loss: 0.0478, Acc: 0.9697 | Val Loss: 1.9186, Acc: 0.7000 | Time: 12.5s
Epoch 08 | Train Loss: 0.0570, Acc: 0.9697 | Val Loss: 1.5269, Acc: 0.7000 | Time: 12.7s
Epoch 09 | Train Loss: 0.0500, Acc: 1.0000 | Val Loss: 1.2805, Acc: 0.9000 | Time: 12.6s
✅ Saved new best model
Epoch 10 | Train Loss: 0.0490, Acc: 0.9697 | Val Loss: 1.1996, Acc: 0.8000 | Time: 12.8s
Epoch 11 | Train Loss: 0.0495, Acc: 0.9697 | Val Loss: 1.2679, A

In [4]:
import torch
import torch.nn as nn
from torchvision import models, transforms
from PIL import Image

# Load lại model
model = models.resnet18(pretrained=False)
num_ftrs = model.fc.in_features
model.fc = nn.Linear(num_ftrs, 3)  # 3 classes: nothing, pallet_empty, pallet_loaded

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.load_state_dict(torch.load("best_resnet18.pth", map_location=device))
model = model.to(device)
model.eval()




ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
    (1): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
  

In [5]:
transform = transforms.Compose([
    transforms.Resize((224,224)),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406],
                         [0.229, 0.224, 0.225])
])

In [6]:
# Danh sách class phải giống với thư mục bạn train
class_names = ["nothing", "pallet_empty", "pallet_loaded"]

def predict_image(image_path):
    image = Image.open(image_path).convert("RGB")
    img = transform(image).unsqueeze(0).to(device)

    with torch.no_grad():
        outputs = model(img)
        _, predicted = outputs.max(1)

    return class_names[predicted.item()]

In [7]:
print(predict_image(r"D:\Lab\AGV\AI\Vision\Pallet\dataset\test\empty_pallet\0019.jpg"))
print(predict_image(r"D:\Lab\AGV\AI\Vision\Pallet\dataset\test\loaded_pallet\0020.jpg"))

nothing
pallet_empty


In [13]:
import torch
import torch.nn as nn
from torchvision import models, transforms, datasets
from PIL import Image

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# 1) Lấy đúng thứ tự class từ train
train_dir = r"D:\Lab\AGV\AI\Vision\Pallet\dataset\train"  # đổi đúng đường dẫn của bạn
class_names = datasets.ImageFolder(train_dir).classes
print("Class order used by the model:", class_names)  # kiểm tra

# 2) Khởi tạo model đúng số lớp và load trọng số
model = models.resnet18(pretrained=False)
model.fc = nn.Linear(model.fc.in_features, len(class_names))
model.load_state_dict(torch.load("best_resnet18.pth", map_location=device))
model = model.to(device)
model.eval()

# 3) Tiền xử lý phải GIỐNG lúc train
transform = transforms.Compose([
    transforms.Resize((224,224)),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406],
                         [0.229, 0.224, 0.225])
])

def predict_image(path):
    img = Image.open(path).convert("RGB")
    x = transform(img).unsqueeze(0).to(device)
    with torch.no_grad():
        logits = model(x)
        probs = torch.softmax(logits, dim=1)[0]
        pred = probs.argmax().item()
    return class_names[pred], {cls: float(probs[i]) for i, cls in enumerate(class_names)}

# Test lại
print(predict_image(r"D:\Lab\AGV\AI\Vision\Pallet\dataset\test\loaded_pallet\0021.jpg"))
print(predict_image(r"D:\Lab\AGV\AI\Vision\Pallet\dataset\test\loaded_pallet\0020.jpg"))
print(predict_image(r"D:\Lab\AGV\AI\Vision\Pallet\dataset\test\nothing\0007.jpg"))

Class order used by the model: ['empty_pallet', 'loaded_pallet', 'nothing']
('loaded_pallet', {'empty_pallet': 5.1589373470051214e-05, 'loaded_pallet': 0.9790634512901306, 'nothing': 0.02088490128517151})
('loaded_pallet', {'empty_pallet': 0.059533264487981796, 'loaded_pallet': 0.7327089905738831, 'nothing': 0.20775775611400604})
('loaded_pallet', {'empty_pallet': 5.444834641821217e-06, 'loaded_pallet': 0.9966446161270142, 'nothing': 0.0033499787095934153})


In [11]:
from torch.utils.data import DataLoader
test_dir = r"D:\Lab\AGV\AI\Vision\Pallet\dataset\test"
test_ds = datasets.ImageFolder(test_dir, transform=transform)
test_loader = DataLoader(test_ds, batch_size=32, shuffle=False)

correct, total = 0, 0
conf = torch.zeros(len(class_names), len(class_names), dtype=torch.int64)
with torch.no_grad():
    for x, y in test_loader:
        x, y = x.to(device), y.to(device)
        pred = model(x).argmax(1)
        correct += (pred == y).sum().item()
        total += y.size(0)
        for t, p in zip(y.view(-1), pred.view(-1)):
            conf[t, p] += 1

print("Test Acc:", correct/total)
print("Confusion matrix (rows=true, cols=pred):\n", conf.cpu().numpy())


Test Acc: 0.8
Confusion matrix (rows=true, cols=pred):
 [[2 0 0]
 [0 2 0]
 [0 1 0]]
