In [1]:
import torch
import torchvision
from torchvision.models.detection import fasterrcnn_resnet50_fpn
from torchvision.transforms import transforms
from torch.utils.data import DataLoader, Dataset
from pycocotools.coco import COCO
import os
from PIL import Image

In [2]:
# Set device to CPU
device = torch.device('cpu')
print(f"Using device: {device}")

Using device: cpu


In [3]:
# Custom COCO Dataset class
class COCODataset(Dataset):
    def __init__(self, root, annotation, transforms=None, max_images=None):
        self.root = root
        self.coco = COCO(annotation)
        self.ids = list(self.coco.imgToAnns.keys())
        if max_images is not None:
            self.ids = self.ids[:max_images]
        self.transforms = transforms

    def __getitem__(self, index):
        coco = self.coco
        img_id = self.ids[index]
        ann_ids = coco.getAnnIds(imgIds=img_id)
        anns = coco.loadAnns(ann_ids)
        img_info = coco.loadImgs(img_id)[0]

        # Load image
        img_path = os.path.join(self.root, img_info['file_name'])
        img = Image.open(img_path).convert("RGB")

        # Extract bounding boxes and labels
        boxes = []
        labels = []
        for ann in anns:
            bbox = ann['bbox']
            boxes.append([bbox[0], bbox[1], bbox[0] + bbox[2], bbox[1] + bbox[3]])
            labels.append(ann['category_id'])

        boxes = torch.as_tensor(boxes, dtype=torch.float32)
        labels = torch.as_tensor(labels, dtype=torch.int64)
        target = {'boxes': boxes, 'labels': labels}

        if self.transforms:
            img = self.transforms(img)

        return img, target

    def __len__(self):
        return len(self.ids)


# Paths to dataset
train_data_dir = '/home/ubuntu_qa/codevs/Computer vision/Kien～先生/Cloud/MS COCO dataset/train2017'
train_annotation_file = '/home/ubuntu_qa/codevs/Computer vision/Kien～先生/Cloud/MS COCO dataset/annotations/instances_train2017.json'

val_data_dir = '/home/ubuntu_qa/codevs/Computer vision/Kien～先生/Cloud/MS COCO dataset/val2017'
val_annotation_file = '/home/ubuntu_qa/codevs/Computer vision/Kien～先生/Cloud/MS COCO dataset/annotations/instances_val2017.json'

# Create datasets
train_dataset = COCODataset(train_data_dir, train_annotation_file)
val_dataset = COCODataset(val_data_dir, val_annotation_file)


loading annotations into memory...
Done (t=0.63s)
creating index...
index created!
loading annotations into memory...
Done (t=0.61s)
creating index...
index created!


In [4]:
# DataLoaders
train_loader = DataLoader(train_dataset, batch_size=2, shuffle=True, collate_fn=lambda x: tuple(zip(*x)))
val_loader = DataLoader(val_dataset, batch_size=2, shuffle=False, collate_fn=lambda x: tuple(zip(*x)))

print(f"Number of training samples: {len(train_dataset)}")
print(f"Number of validation samples: {len(val_dataset)}")


Number of training samples: 5000
Number of validation samples: 4952


In [5]:
for i, (images, targets) in enumerate(train_loader):
    # Chuyển dữ liệu sang GPU
    images = [img.to(device) for img in images]
    targets = [{k: v.to(device) for k, v in t.items()} for t in targets]

    # Kiểm tra thiết bị
    for img in images:
        print(f"Image device: {img.device}")
    for t in targets:
        print(f"Boxes device: {t['boxes'].device}")
        print(f"Labels device: {t['labels'].device}")


AttributeError: 'Image' object has no attribute 'to'

In [None]:
# Load Faster R-CNN model
model = fasterrcnn_resnet50_fpn(pretrained=True)
# model.to(device)

# Replace the head for COCO classes
num_classes = 91  # COCO has 80 classes + background
in_features = model.roi_heads.box_predictor.cls_score.in_features
model.roi_heads.box_predictor = torchvision.models.detection.faster_rcnn.FastRCNNPredictor(in_features, num_classes)

print(model)


In [38]:
# print(f"Model is on device: {next(model.parameters()).device}")


In [None]:
# Optimizer
params = [p for p in model.parameters() if p.requires_grad]
optimizer = torch.optim.SGD(params, lr=0.005, momentum=0.9, weight_decay=0.0005)

# Learning rate scheduler
lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=3, gamma=0.1)

# Training loop
num_epochs = 5
for epoch in range(num_epochs):
    model.train()
    epoch_loss = 0
    for images, targets in train_loader:
        images = list(img.to(device) for img in images)
        targets = [{k: v.to(device) for k, v in t.items()} for t in targets]

        # Forward pass
        loss_dict = model(images, targets)
        losses = sum(loss for loss in loss_dict.values())
        epoch_loss += losses.item()

        # Backward pass
        optimizer.zero_grad()
        losses.backward()
        optimizer.step()

    print(f"Epoch [{epoch+1}/{num_epochs}], Loss: {epoch_loss:.4f}")
    lr_scheduler.step()
