In [None]:
!pip install xmltodict


In [None]:
import os
import xmltodict
import torch
from torch.utils.data import Dataset, DataLoader
from PIL import Image
import torchvision.transforms as T
from torchvision.models.detection import fasterrcnn_resnet50_fpn
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor
from sklearn.metrics import classification_report, confusion_matrix
import matplotlib.pyplot as plt
import numpy as np


In [None]:
class VOCDataset(Dataset):
    def __init__(self, image_dir, annotation_dir, classes, transforms=None):
        self.image_dir = image_dir
        self.annotation_dir = annotation_dir
        self.transforms = transforms
        self.classes = classes
        self.image_files = [f for f in os.listdir(image_dir) if f.endswith(".jpg")]

    def __getitem__(self, idx):
        img_path = os.path.join(self.image_dir, self.image_files[idx])
        ann_path = os.path.join(self.annotation_dir, self.image_files[idx].replace(".jpg", ".xml"))
        img = Image.open(img_path).convert("RGB")

        with open(ann_path) as f:
            ann = xmltodict.parse(f.read())["annotation"]

        boxes, labels = [], []
        objs = ann["object"]
        if not isinstance(objs, list):
            objs = [objs]

        for obj in objs:
            label = self.classes.index(obj["name"])
            bbox = obj["bndbox"]

            xmin = float(bbox["xmin"])
            ymin = float(bbox["ymin"])
            xmax = float(bbox["xmax"])
            ymax = float(bbox["ymax"])

            # ✅ Bỏ qua bbox không hợp lệ (zero hoặc âm width/height)
            if xmax <= xmin or ymax <= ymin:
                print(f"⚠️ Bỏ bbox lỗi: {[xmin, ymin, xmax, ymax]} ở ảnh {self.image_files[idx]}")
                continue

            boxes.append([xmin, ymin, xmax, ymax])
            labels.append(label)

        boxes = torch.tensor(boxes, dtype=torch.float32)
        labels = torch.tensor(labels, dtype=torch.int64)
        target = {"boxes": boxes, "labels": labels, "image_id": torch.tensor([idx])}

        if self.transforms:
            img = self.transforms(img)

        return img, target

    def __len__(self):
        return len(self.image_files)


In [None]:
def get_fasterrcnn_model(num_classes):
    model = fasterrcnn_resnet50_fpn(pretrained=True)
    in_features = model.roi_heads.box_predictor.cls_score.in_features
    model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes)
    return model


In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
classes = ["MR", "NC", "WF"]
data_dir = "/kaggle/input/yellow-sticky-traps-dataset" # sửa thành tên dataset của bạn

transforms = T.Compose([T.ToTensor()])
trainset = VOCDataset(f"{data_dir}/train", f"{data_dir}/train", classes, transforms)
validset = VOCDataset(f"{data_dir}/valid", f"{data_dir}/valid", classes, transforms)


trainloader = DataLoader(trainset, batch_size=2, shuffle=True, collate_fn=lambda x: tuple(zip(*x)))
validloader = DataLoader(validset, batch_size=1, shuffle=False, collate_fn=lambda x: tuple(zip(*x)))

model = get_fasterrcnn_model(num_classes=len(classes) + 1)
model.to(device)

optimizer = torch.optim.SGD([p for p in model.parameters() if p.requires_grad],
                            lr=0.005, momentum=0.9)

num_epochs = 50
loss_history = []

for epoch in range(num_epochs):
    model.train()
    total_loss = 0.0
    for images, targets in trainloader:
        images = [img.to(device) for img in images]
        targets = [{k: v.to(device) for k, v in t.items()} for t in targets]

        loss_dict = model(images, targets)
        losses = sum(loss for loss in loss_dict.values())

        optimizer.zero_grad()
        losses.backward()
        optimizer.step()
        total_loss += losses.item()

    avg_loss = total_loss / len(trainloader)
    loss_history.append(avg_loss)
    print(f"Epoch {epoch+1} - Loss: {avg_loss:.4f}")

torch.save(model.state_dict(), "fasterrcnn_model.pth")
print("✅ Mô hình đã được lưu.")


In [None]:
plt.plot(loss_history, marker='o', label="Loss")
plt.title("Training Loss over Epochs")
plt.xlabel("Epoch")
plt.ylabel("Loss")
plt.grid(True)
plt.legend()
plt.savefig("loss_curve.png")
plt.show()

# ✅ Tạm thời tính accuracy là số lượng dự đoán đúng label đầu tiên trùng với label thật (gần đúng với object detection)
correct = 0
total = 0
model.eval()

with torch.no_grad():
    for images, targets in validloader:
        images = [img.to(device) for img in images]
        outputs = model(images)

        for i in range(len(outputs)):
            pred_labels = outputs[i]['labels'].cpu().numpy()
            true_labels = targets[i]['labels'].cpu().numpy()

            matched = sum(p == t for p, t in zip(pred_labels[:len(true_labels)], true_labels))
            correct += matched
            total += len(true_labels)

accuracy = 100 * correct / total if total > 0 else 0
print(f"✅ Approximate Accuracy: {accuracy:.2f}%")


In [None]:
from sklearn.metrics import classification_report, confusion_matrix

model.eval()
y_true = []
y_pred = []

with torch.no_grad():
    for images, targets in validloader:
        images = [img.to(device) for img in images]
        outputs = model(images)

        for i in range(len(outputs)):
            pred_labels = outputs[i]['labels'].cpu().numpy()
            true_labels = targets[i]['labels'].cpu().numpy()

            # Lấy số bbox bằng nhau (tránh mismatch)
            min_len = min(len(pred_labels), len(true_labels))
            y_true.extend(true_labels[:min_len])
            y_pred.extend(pred_labels[:min_len])

print("📋 Classification Report:")
print(classification_report(y_true, y_pred, target_names=classes))

print("📊 Confusion Matrix:")
print(confusion_matrix(y_true, y_pred))


In [None]:
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.metrics import ConfusionMatrixDisplay, accuracy_score, precision_recall_fscore_support

def plot_full_evaluation(model, dataloader, classes):
    model.eval()
    y_true = []
    y_pred = []

    with torch.no_grad():
        for images, targets in dataloader:
            images = [img.to(device) for img in images]
            outputs = model(images)

            for i in range(len(outputs)):
                pred_labels = outputs[i]['labels'].cpu().numpy()
                true_labels = targets[i]['labels'].cpu().numpy()

                min_len = min(len(pred_labels), len(true_labels))
                y_true.extend(true_labels[:min_len])
                y_pred.extend(pred_labels[:min_len])

    # === 1. Tổng hợp Precision / Recall / F1 / Accuracy ===
    print("📊 Precision / Recall / F1-score / Accuracy:")
    precision, recall, f1, _ = precision_recall_fscore_support(y_true, y_pred, average='macro', zero_division=0)
    acc = accuracy_score(y_true, y_pred)
    print(f"  ✅ Accuracy:  {acc:.4f}")
    print(f"  🎯 Precision: {precision:.4f}")
    print(f"  📥 Recall:    {recall:.4f}")
    print(f"  💡 F1-score:  {f1:.4f}")

    # === 2. Confusion Matrix ===
    plt.figure(figsize=(6, 5))
    ConfusionMatrixDisplay.from_predictions(
        y_true, y_pred, display_labels=classes, cmap='Blues', values_format='d')
    plt.title("Confusion Matrix")
    plt.grid(False)
    plt.show()

    # === 3. Biểu đồ theo từng lớp ===
    pr, rc, f1_class, _ = precision_recall_fscore_support(y_true, y_pred, labels=range(len(classes)), zero_division=0)
    x = range(len(classes))
    plt.figure(figsize=(8, 4))
    plt.bar(x, pr, width=0.2, label="Precision", align='center')
    plt.bar([i + 0.2 for i in x], rc, width=0.2, label="Recall", align='center')
    plt.bar([i + 0.4 for i in x], f1_class, width=0.2, label="F1-score", align='center')
    plt.xticks([i + 0.2 for i in x], classes)
    plt.ylabel("Score")
    plt.title("Precision / Recall / F1 per Class")
    plt.legend()
    plt.tight_layout()
    plt.show()

# ✅ Gọi đánh giá trên tập valid/test
plot_full_evaluation(model, validloader, classes)  # hoặc testloader nếu bạn muốn


In [None]:
import random
import matplotlib.patches as patches
from PIL import Image

def visualize_prediction(model, dataset, class_names):
    model.eval()

    # Lấy ngẫu nhiên 1 ảnh từ tập test
    idx = random.randint(0, len(dataset)-1)
    image, _ = dataset[idx]  # lấy ảnh (không cần ground truth)

    # Chuyển ảnh sang thiết bị và thêm batch dim
    input_tensor = image.to(device).unsqueeze(0)

    with torch.no_grad():
        outputs = model(input_tensor)

    output = outputs[0]
    boxes = output['boxes'].cpu().numpy()
    labels = output['labels'].cpu().numpy()
    scores = output['scores'].cpu().numpy()

    # Hiển thị ảnh + bbox
    image_np = image.permute(1, 2, 0).cpu().numpy()

    fig, ax = plt.subplots(1, figsize=(8, 8))
    ax.imshow(image_np)
    for box, label, score in zip(boxes, labels, scores):
        if score < 0.5:
            continue  # bỏ qua kết quả score thấp
        xmin, ymin, xmax, ymax = box
        rect = patches.Rectangle((xmin, ymin), xmax - xmin, ymax - ymin,
                                 linewidth=2, edgecolor='lime', facecolor='none')
        ax.add_patch(rect)
        ax.text(xmin, ymin - 5, f"{class_names[label]} ({score:.2f})",
                color='yellow', fontsize=10, weight='bold',
                bbox=dict(facecolor='black', alpha=0.5, boxstyle='round,pad=0.2'))
    plt.title("📷 Dự đoán từ mô hình Faster R-CNN")
    plt.axis('off')
    plt.show()

# ✅ Gọi hàm để hiển thị ảnh test bất kỳ
visualize_prediction(model, testset, classes)
