In [1]:
import os
import torch
from torch.utils.data import Dataset, DataLoader
from torchvision.models.detection import fasterrcnn_mobilenet_v3_large_fpn
import torchvision.transforms as T
from PIL import Image
import xml.etree.ElementTree as ET
from tqdm.notebook import tqdm
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor
# Đường dẫn tới thư mục train và val
train_root = "/kaggle/input/yoloooooooooo/dataset_80.20_full/train"
val_root = "/kaggle/input/yoloooooooooo/dataset_80.20_full/val"


# Lấy đường dẫn tới thư mục images và labels từ root
def get_img_label_paths(root):
    return os.path.join(root, "images"), os.path.join(root, "labels")

# Hàm đọc XML và trả về labels và boxes
def parse_yolo_format(txt_path, img_width, img_height):
    with open(txt_path, 'r') as f:
        lines = f.readlines()
    labels, boxes = [], []

    for line in lines:
        data = line.strip().split()
        class_id = int(data[0])
        x_center, y_center, width, height = map(float, data[1:])
        # Convert normalized YOLO format to absolute coordinates
        x_min = (x_center - width / 2) * img_width
        y_min = (y_center - height / 2) * img_height
        x_max = (x_center + width / 2) * img_width
        y_max = (y_center + height / 2) * img_height

        # Ensure width and height are greater than 0
        if x_max - x_min <= 0 or y_max - y_min <= 0:
            continue

        labels.append(class_id)
        boxes.append([x_min, y_min, x_max, y_max])

    return labels, boxes


class TrafficSignDataset(Dataset):
    def __init__(self, img_root, label_root, transforms=None):
        self.img_root = img_root
        self.label_root = label_root
        self.transforms = transforms

        self.imgs = sorted(os.listdir(img_root))
        self.labels = sorted(os.listdir(label_root))

    def __getitem__(self, idx):
        img_path = os.path.join(self.img_root, self.imgs[idx])
        label_path = os.path.join(self.label_root, self.labels[idx])
    
        img = Image.open(img_path).convert("RGB")
        img_width, img_height = img.size

        labels, boxes = parse_yolo_format(label_path, img_width, img_height)
        
        boxes_tensor = torch.tensor(boxes, dtype=torch.float32)

        target = {
            "boxes": boxes_tensor,
            "labels": torch.tensor(labels, dtype=torch.int64),
            "image_id": torch.tensor([idx]),
            "area": torch.tensor(((boxes_tensor[:, 2] - boxes_tensor[:, 0]) * (boxes_tensor[:, 3] - boxes_tensor[:, 1])), dtype=torch.float32),
            "iscrowd": torch.zeros((len(labels),), dtype=torch.int64)
        }
        
        if self.transforms:
            img = self.transforms(img)
        return img, target

    def __len__(self):
        return len(self.imgs)

def get_faster_rcnn_model(num_classes):
    model = fasterrcnn_mobilenet_v3_large_fpn(pretrained=True)
    # Get number of input features for the classifier
    in_features = model.roi_heads.box_predictor.cls_score.in_features
    # Replace the pre-trained head with a new one
    model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes)
    return model

num_classes = 50   # 49 classes + background
batch_size = 24
num_epochs = 100

train_img_root, train_label_root = get_img_label_paths(train_root)
val_img_root, val_label_root = get_img_label_paths(val_root)

train_dataset = TrafficSignDataset(train_img_root, train_label_root, transforms=T.ToTensor())
val_dataset = TrafficSignDataset(val_img_root, val_label_root, transforms=T.ToTensor())

train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, collate_fn=lambda x: tuple(zip(*x)), num_workers=4)
val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False, collate_fn=lambda x: tuple(zip(*x)), num_workers=4)

model = get_faster_rcnn_model(num_classes)
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
model.to(device)

params = [p for p in model.parameters() if p.requires_grad]
optimizer = torch.optim.SGD(params, lr=0.005, momentum=0.9, weight_decay=0.0005)
accumulation_steps = 4
model.train()
loss_values = []

#for epoch in tqdm(range(num_epochs), desc="Epochs"):
#    epoch_loss = 0
#    num_batches = 0

 #   for images, targets in tqdm(train_loader, desc="Batches", leave=False):
 #       images = list(image.to(device) for image in images)
 #       targets = [{k: v.to(device) for k, v in t.items()} for t in targets]
 #       loss_dict = model(images, targets)
 #       losses = sum(loss for loss in loss_dict.values())
  #      
#
 #       epoch_loss += losses.item()
 #       num_batches += 1
#
 #       # Gradient accumulation
  #      losses.backward()
   #     
    #    if (num_batches % accumulation_steps) == 0:
     #       optimizer.step()
     #       optimizer.zero_grad()
#
 #   average_epoch_loss = epoch_loss / num_batches
  ##  loss_values.append(average_epoch_loss)
   ## print(f"Epoch {epoch+1}/{num_epochs}, Loss: {average_epoch_loss:.4f}")
#
 #   # Save the model
  #  torch.save(model.state_dict(), f"faster_epoch_{epoch}.pth")

Downloading: "https://download.pytorch.org/models/fasterrcnn_mobilenet_v3_large_fpn-fb6a3cc7.pth" to /root/.cache/torch/hub/checkpoints/fasterrcnn_mobilenet_v3_large_fpn-fb6a3cc7.pth
100%|██████████| 74.2M/74.2M [00:00<00:00, 206MB/s] 


In [2]:
checkpoint_path = "/kaggle/working/faster_epoch_95.pth" 

# Tải lại trạng thái mô hình
model.load_state_dict(torch.load(checkpoint_path, map_location=device))

# Đặt lại device và chế độ train cho mô hình
model.to(device)
model.train()

# Bắt đầu vòng lặp đào tạo từ epoch thứ 44
for epoch in tqdm(range(96, num_epochs), desc="Epochs"):
    epoch_loss = 0
    num_batches = 0

    for images, targets in tqdm(train_loader, desc="Batches", leave=False):
        images = list(image.to(device) for image in images)
        targets = [{k: v.to(device) for k, v in t.items()} for t in targets]

        # Xóa gradients hiện có
        optimizer.zero_grad()

        loss_dict = model(images, targets)
        losses = sum(loss for loss in loss_dict.values())

        # Chia losses cho số lượng bước tích lũy gradient
        losses = losses / accumulation_steps
        losses.backward()
        
        if (num_batches % accumulation_steps) == 0:
            optimizer.step()
            optimizer.zero_grad()

        epoch_loss += losses.item() * accumulation_steps  # Nhân lại để có tổng loss cho batch
        num_batches += 1

    average_epoch_loss = epoch_loss / num_batches
    loss_values.append(average_epoch_loss)
    print(f"Epoch {epoch+1}/{num_epochs}, Loss: {average_epoch_loss:.4f}")

    # Lưu mô hình sau mỗi epoch
    torch.save(model.state_dict(), f"faster_epoch_{epoch}.pth")

Epochs:   0%|          | 0/4 [00:00<?, ?it/s]

Batches:   0%|          | 0/778 [00:00<?, ?it/s]

  "area": torch.tensor(((boxes_tensor[:, 2] - boxes_tensor[:, 0]) * (boxes_tensor[:, 3] - boxes_tensor[:, 1])), dtype=torch.float32),
  "area": torch.tensor(((boxes_tensor[:, 2] - boxes_tensor[:, 0]) * (boxes_tensor[:, 3] - boxes_tensor[:, 1])), dtype=torch.float32),
  "area": torch.tensor(((boxes_tensor[:, 2] - boxes_tensor[:, 0]) * (boxes_tensor[:, 3] - boxes_tensor[:, 1])), dtype=torch.float32),
  "area": torch.tensor(((boxes_tensor[:, 2] - boxes_tensor[:, 0]) * (boxes_tensor[:, 3] - boxes_tensor[:, 1])), dtype=torch.float32),


Epoch 97/100, Loss: 0.2606


Batches:   0%|          | 0/778 [00:00<?, ?it/s]

  "area": torch.tensor(((boxes_tensor[:, 2] - boxes_tensor[:, 0]) * (boxes_tensor[:, 3] - boxes_tensor[:, 1])), dtype=torch.float32),
  "area": torch.tensor(((boxes_tensor[:, 2] - boxes_tensor[:, 0]) * (boxes_tensor[:, 3] - boxes_tensor[:, 1])), dtype=torch.float32),
  "area": torch.tensor(((boxes_tensor[:, 2] - boxes_tensor[:, 0]) * (boxes_tensor[:, 3] - boxes_tensor[:, 1])), dtype=torch.float32),
  "area": torch.tensor(((boxes_tensor[:, 2] - boxes_tensor[:, 0]) * (boxes_tensor[:, 3] - boxes_tensor[:, 1])), dtype=torch.float32),


Epoch 98/100, Loss: 0.2611


Batches:   0%|          | 0/778 [00:00<?, ?it/s]

  "area": torch.tensor(((boxes_tensor[:, 2] - boxes_tensor[:, 0]) * (boxes_tensor[:, 3] - boxes_tensor[:, 1])), dtype=torch.float32),
  "area": torch.tensor(((boxes_tensor[:, 2] - boxes_tensor[:, 0]) * (boxes_tensor[:, 3] - boxes_tensor[:, 1])), dtype=torch.float32),
  "area": torch.tensor(((boxes_tensor[:, 2] - boxes_tensor[:, 0]) * (boxes_tensor[:, 3] - boxes_tensor[:, 1])), dtype=torch.float32),
  "area": torch.tensor(((boxes_tensor[:, 2] - boxes_tensor[:, 0]) * (boxes_tensor[:, 3] - boxes_tensor[:, 1])), dtype=torch.float32),


Epoch 99/100, Loss: 0.2482


Batches:   0%|          | 0/778 [00:00<?, ?it/s]

  "area": torch.tensor(((boxes_tensor[:, 2] - boxes_tensor[:, 0]) * (boxes_tensor[:, 3] - boxes_tensor[:, 1])), dtype=torch.float32),
  "area": torch.tensor(((boxes_tensor[:, 2] - boxes_tensor[:, 0]) * (boxes_tensor[:, 3] - boxes_tensor[:, 1])), dtype=torch.float32),
  "area": torch.tensor(((boxes_tensor[:, 2] - boxes_tensor[:, 0]) * (boxes_tensor[:, 3] - boxes_tensor[:, 1])), dtype=torch.float32),
  "area": torch.tensor(((boxes_tensor[:, 2] - boxes_tensor[:, 0]) * (boxes_tensor[:, 3] - boxes_tensor[:, 1])), dtype=torch.float32),


Epoch 100/100, Loss: 0.2537
