In [None]:
# For tips on running notebooks in Google Colab, see
# https://pytorch.org/tutorials/beginner/colab
%matplotlib inline
!pip install torchvision==0.16
!pip install pycocotools

# Import các thư viện cần thiết

In [None]:
import matplotlib.pyplot as plt
from torchvision.io import read_image
import pandas as pd
import os
import torch

from torchvision.io import read_image
from torchvision import tv_tensors
from torchvision.transforms.v2 import functional as F
from torchvision.transforms import v2 as T
import ast
import torchvision
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor
from torchvision.models.detection.rpn import AnchorGenerator
from torchvision.models.detection import FasterRCNN
import torch.nn as nn
import utils
from engine import train_one_epoch, evaluate

In [None]:
os.system("wget https://raw.githubusercontent.com/pytorch/vision/main/references/detection/engine.py")
os.system("wget https://raw.githubusercontent.com/pytorch/vision/main/references/detection/utils.py")
os.system("wget https://raw.githubusercontent.com/pytorch/vision/main/references/detection/coco_utils.py")
os.system("wget https://raw.githubusercontent.com/pytorch/vision/main/references/detection/coco_eval.py")
os.system("wget https://raw.githubusercontent.com/pytorch/vision/main/references/detection/transforms.py")

# Xem qua ảnh trong bộ dataset

In [None]:
image = read_image("/kaggle/input/face-recognition/mnt/md0/projects/sami-hackathon/private/data/47540406.jpg")

plt.figure(figsize=(16, 8))
plt.subplot(121)
plt.title("Image")
plt.imshow(image.permute(1, 2, 0))

In [None]:
df = pd.read_csv("/kaggle/input/labels/labels.csv")
print((df['file_name'].value_counts()))

# Dataset for FaceAnalysis

In [None]:
class FaceDetectDataset(torch.utils.data.Dataset):
    def __init__(self, label_root, data_root, transforms):
        self.label_root = label_root
        self.data_root = data_root
        self.transforms = transforms
        # load all image files, sorting them to
        # ensure that they are aligned
        self.df = pd.read_csv(
            os.path.join(self.label_root, "labels.csv"),
            converters={'bbox': ast.literal_eval}
        )
        self.imgs = list(self.df['file_name'])
        self.img_counts = self.df['file_name'].value_counts()

    def __getitem__(self, idx):
        # load images and masks
        img_path = os.path.join(self.data_root, self.imgs[idx])
        img = read_image(img_path)

        num_objs = self.img_counts[self.imgs[idx]]

        # get bounding box coordinates for each mask
        boxes = []
        for i in range(len(self.df)):
            if self.df.loc[i,'file_name'] == self.imgs[idx]:
                tmp = self.df.loc[i, 'bbox']
                boxes.append([tmp[0],tmp[1], tmp[0] + tmp[2], tmp[1] + tmp[3]])

        boxes = torch.tensor(boxes, dtype=torch.float32)
        # there is only one class
        labels = torch.ones((num_objs,), dtype=torch.int64)

        image_id = idx
        area = (boxes[:, 3] - boxes[:, 1]) * (boxes[:, 2] - boxes[:, 0])
        # suppose all instances are not crowd
        iscrowd = torch.zeros((num_objs,), dtype=torch.int64)

        # Wrap sample and targets into torchvision tv_tensors:
        img = tv_tensors.Image(img)

        target = {}
        target["boxes"] = tv_tensors.BoundingBoxes(boxes, format="XYXY", canvas_size=F.get_size(img))
        target["labels"] = labels
        target["image_id"] = image_id
        target["area"] = area
        target["iscrowd"] = iscrowd

        if self.transforms is not None:
            img, target = self.transforms(img, target)

        return img, target

    def __len__(self):
        return len(self.imgs)

# Hàm sử dụng để transform ảnh và tạo model

In [None]:
def get_transform(train):
    transforms = []
    if train:
        transforms.append(T.RandomHorizontalFlip(0.5))
    transforms.append(T.ToDtype(torch.float, scale=True))
    transforms.append(T.ToPureTensor())
    return T.Compose(transforms)

def get_model(num_classes):
    # Load pre-trained Faster R-CNN model
    model = torchvision.models.detection.fasterrcnn_resnet50_fpn(weights="DEFAULT")

    # Modify the last fully connected layer of the classifier
    in_features = model.roi_heads.box_predictor.cls_score.in_features
    # replace the pre-trained head with a new one
    model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes)
    
    return model

# Test thử model

In [None]:
model = get_model(num_classes= 2)
dataset = FaceDetectDataset(data_root='/kaggle/input/face-recognition/mnt/md0/projects/sami-hackathon/private/data',
                            label_root='/kaggle/input/labels',
                            transforms= get_transform(train=True))
data_loader = torch.utils.data.DataLoader(
    dataset,
    batch_size=4,
    shuffle=True,
    num_workers=4,
    collate_fn=utils.collate_fn
)

images, targets = next(iter(data_loader))
images = list(image for image in images)
targets = [{k: v for k, v in t.items()} for t in targets]
output = model(images, targets)  # Returns losses and detections
print(output)

# For inference
model.eval()
x = [torch.rand(3, 300, 400), torch.rand(3, 500, 400)]
predictions = model(x)  # Returns predictions
print(predictions[0])

# Train model

In [None]:
# train on the GPU or on the CPU, if a GPU is not available
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')

# our dataset has two classes only - background and person
num_classes = 2
# use our dataset and defined transformations
dataset = FaceDetectDataset(data_root='/kaggle/input/face-recognition/mnt/md0/projects/sami-hackathon/private/data',
                            label_root='/kaggle/input/labels',
                            transforms= get_transform(train=True))
dataset_test = FaceDetectDataset(data_root='/kaggle/input/face-recognition/mnt/md0/projects/sami-hackathon/private/data',
                            label_root='/kaggle/input/labels',
                            transforms= get_transform(train=False))

# split the dataset in train and test set
indices = torch.randperm(len(dataset)).tolist()
dataset = torch.utils.data.Subset(dataset, indices[:-50])
dataset_test = torch.utils.data.Subset(dataset_test, indices[-50:])

# define training and validation data loaders
data_loader = torch.utils.data.DataLoader(
    dataset,
    batch_size=4,
    shuffle=True,
    num_workers=4,
    collate_fn=utils.collate_fn
)

data_loader_test = torch.utils.data.DataLoader(
    dataset_test,
    batch_size=1,
    shuffle=False,
    num_workers=4,
    collate_fn=utils.collate_fn
)

# get the model using our helper function
model = get_model(num_classes)
# model= nn.DataParallel(model)
# move model to the right device
model.to(device)

model.load_state_dict(torch.load("/kaggle/input/modelweights/model.pth"))

# construct an optimizer
params = [p for p in model.parameters() if p.requires_grad]
optimizer = torch.optim.SGD(
    params,
    lr=0.005,
    momentum=0.9,
    weight_decay=0.0005
)

# and a learning rate scheduler
lr_scheduler = torch.optim.lr_scheduler.StepLR(
    optimizer,
    step_size=3,
    gamma=0.1
)

# let's train it just for 2 epochs
num_epochs = 2

for epoch in range(num_epochs):
    # train for one epoch, printing every 100 iterations
    train_one_epoch(model, optimizer, data_loader, device, epoch, print_freq=100)
    # update the learning rate
    lr_scheduler.step()
    # evaluate on the test dataset
    evaluate(model, data_loader_test, device=device)

print("That's it!")

# Lưu lại weights của model

In [None]:
torch.save(model.state_dict(), "model_v2.pth")

# Epochs

In [None]:
# Epoch: [0]  [   0/3815]  eta: 4:36:36  lr: 0.000010  loss: 0.5750 (0.5750)  loss_classifier: 0.4692 (0.4692)  loss_box_reg: 0.0623 (0.0623)  loss_objectness: 0.0331 (0.0331)  loss_rpn_box_reg: 0.0105 (0.0105)  time: 4.3504  data: 3.0938  max mem: 12776
# Epoch: [0]  [ 100/3815]  eta: 1:33:05  lr: 0.000509  loss: 0.2066 (0.2898)  loss_classifier: 0.0835 (0.1500)  loss_box_reg: 0.1089 (0.0929)  loss_objectness: 0.0057 (0.0395)  loss_rpn_box_reg: 0.0046 (0.0074)  time: 1.4545  data: 0.0383  max mem: 12776
# Epoch: [0]  [ 200/3815]  eta: 1:29:41  lr: 0.001009  loss: 0.1267 (0.2307)  loss_classifier: 0.0380 (0.1039)  loss_box_reg: 0.0869 (0.0984)  loss_objectness: 0.0029 (0.0230)  loss_rpn_box_reg: 0.0022 (0.0054)  time: 1.5065  data: 0.0382  max mem: 12776
# Epoch: [0]  [ 300/3815]  eta: 1:27:18  lr: 0.001508  loss: 0.0768 (0.1941)  loss_classifier: 0.0234 (0.0817)  loss_box_reg: 0.0535 (0.0913)  loss_objectness: 0.0003 (0.0164)  loss_rpn_box_reg: 0.0014 (0.0047)  time: 1.4689  data: 0.0370  max mem: 12776
# Epoch: [0]  [ 400/3815]  eta: 1:24:22  lr: 0.002008  loss: 0.0787 (0.1717)  loss_classifier: 0.0248 (0.0687)  loss_box_reg: 0.0525 (0.0859)  loss_objectness: 0.0004 (0.0130)  loss_rpn_box_reg: 0.0010 (0.0041)  time: 1.4734  data: 0.0363  max mem: 12776
# Epoch: [0]  [ 500/3815]  eta: 1:21:39  lr: 0.002507  loss: 0.0714 (0.1546)  loss_classifier: 0.0224 (0.0603)  loss_box_reg: 0.0479 (0.0801)  loss_objectness: 0.0001 (0.0106)  loss_rpn_box_reg: 0.0010 (0.0035)  time: 1.5449  data: 0.0376  max mem: 12776
# Epoch: [0]  [ 600/3815]  eta: 1:19:19  lr: 0.003007  loss: 0.0640 (0.1422)  loss_classifier: 0.0205 (0.0545)  loss_box_reg: 0.0457 (0.0754)  loss_objectness: 0.0001 (0.0090)  loss_rpn_box_reg: 0.0008 (0.0032)  time: 1.5345  data: 0.0353  max mem: 12776
# Epoch: [0]  [ 700/3815]  eta: 1:17:01  lr: 0.003506  loss: 0.0601 (0.1317)  loss_classifier: 0.0176 (0.0497)  loss_box_reg: 0.0398 (0.0713)  loss_objectness: 0.0001 (0.0079)  loss_rpn_box_reg: 0.0008 (0.0029)  time: 1.4229  data: 0.0391  max mem: 12776
# Epoch: [0]  [ 800/3815]  eta: 1:14:44  lr: 0.004006  loss: 0.0590 (0.1249)  loss_classifier: 0.0178 (0.0463)  loss_box_reg: 0.0384 (0.0689)  loss_objectness: 0.0001 (0.0070)  loss_rpn_box_reg: 0.0008 (0.0027)  time: 1.4270  data: 0.0375  max mem: 12776
# Epoch: [0]  [ 900/3815]  eta: 1:12:32  lr: 0.004505  loss: 0.0640 (0.1187)  loss_classifier: 0.0184 (0.0435)  loss_box_reg: 0.0446 (0.0665)  loss_objectness: 0.0002 (0.0063)  loss_rpn_box_reg: 0.0008 (0.0025)  time: 1.5618  data: 0.0363  max mem: 12776
# Epoch: [0]  [1000/3815]  eta: 1:10:12  lr: 0.005000  loss: 0.0550 (0.1126)  loss_classifier: 0.0184 (0.0410)  loss_box_reg: 0.0331 (0.0636)  loss_objectness: 0.0001 (0.0057)  loss_rpn_box_reg: 0.0006 (0.0023)  time: 1.6032  data: 0.0373  max mem: 12776
# Epoch: [0]  [1100/3815]  eta: 1:07:39  lr: 0.005000  loss: 0.0517 (0.1079)  loss_classifier: 0.0139 (0.0389)  loss_box_reg: 0.0370 (0.0615)  loss_objectness: 0.0000 (0.0052)  loss_rpn_box_reg: 0.0005 (0.0022)  time: 1.4623  data: 0.0382  max mem: 12776
# Epoch: [0]  [1200/3815]  eta: 1:05:09  lr: 0.005000  loss: 0.0571 (0.1035)  loss_classifier: 0.0158 (0.0370)  loss_box_reg: 0.0387 (0.0595)  loss_objectness: 0.0002 (0.0049)  loss_rpn_box_reg: 0.0007 (0.0021)  time: 1.5879  data: 0.0379  max mem: 12776
# Epoch: [0]  [1300/3815]  eta: 1:02:40  lr: 0.005000  loss: 0.0463 (0.0997)  loss_classifier: 0.0142 (0.0354)  loss_box_reg: 0.0290 (0.0577)  loss_objectness: 0.0001 (0.0046)  loss_rpn_box_reg: 0.0006 (0.0020)  time: 1.4295  data: 0.0369  max mem: 12776
# Epoch: [0]  [1400/3815]  eta: 1:00:07  lr: 0.005000  loss: 0.0458 (0.0968)  loss_classifier: 0.0147 (0.0342)  loss_box_reg: 0.0311 (0.0564)  loss_objectness: 0.0002 (0.0043)  loss_rpn_box_reg: 0.0005 (0.0019)  time: 1.4136  data: 0.0395  max mem: 12776
# Epoch: [0]  [1500/3815]  eta: 0:57:30  lr: 0.005000  loss: 0.0533 (0.0942)  loss_classifier: 0.0150 (0.0331)  loss_box_reg: 0.0357 (0.0552)  loss_objectness: 0.0002 (0.0041)  loss_rpn_box_reg: 0.0007 (0.0019)  time: 1.3769  data: 0.0372  max mem: 12776
# Epoch: [0]  [1600/3815]  eta: 0:55:04  lr: 0.005000  loss: 0.0395 (0.0913)  loss_classifier: 0.0099 (0.0319)  loss_box_reg: 0.0268 (0.0538)  loss_objectness: 0.0001 (0.0038)  loss_rpn_box_reg: 0.0004 (0.0018)  time: 1.5360  data: 0.0365  max mem: 12776
# Epoch: [0]  [1700/3815]  eta: 0:52:35  lr: 0.005000  loss: 0.0391 (0.0890)  loss_classifier: 0.0112 (0.0310)  loss_box_reg: 0.0272 (0.0527)  loss_objectness: 0.0001 (0.0036)  loss_rpn_box_reg: 0.0005 (0.0017)  time: 1.5256  data: 0.0374  max mem: 12776
# Epoch: [0]  [1800/3815]  eta: 0:50:04  lr: 0.005000  loss: 0.0426 (0.0869)  loss_classifier: 0.0129 (0.0301)  loss_box_reg: 0.0314 (0.0517)  loss_objectness: 0.0003 (0.0035)  loss_rpn_box_reg: 0.0005 (0.0017)  time: 1.4562  data: 0.0373  max mem: 12776
# Epoch: [0]  [1900/3815]  eta: 0:47:33  lr: 0.005000  loss: 0.0390 (0.0849)  loss_classifier: 0.0116 (0.0293)  loss_box_reg: 0.0283 (0.0507)  loss_objectness: 0.0001 (0.0033)  loss_rpn_box_reg: 0.0004 (0.0016)  time: 1.5403  data: 0.0386  max mem: 12776
# Epoch: [0]  [2000/3815]  eta: 0:45:05  lr: 0.005000  loss: 0.0368 (0.0832)  loss_classifier: 0.0119 (0.0286)  loss_box_reg: 0.0253 (0.0499)  loss_objectness: 0.0001 (0.0032)  loss_rpn_box_reg: 0.0005 (0.0016)  time: 1.4208  data: 0.0372  max mem: 12776
# Epoch: [0]  [2100/3815]  eta: 0:42:32  lr: 0.005000  loss: 0.0387 (0.0815)  loss_classifier: 0.0130 (0.0279)  loss_box_reg: 0.0263 (0.0490)  loss_objectness: 0.0001 (0.0030)  loss_rpn_box_reg: 0.0005 (0.0015)  time: 1.3692  data: 0.0374  max mem: 12776
# Epoch: [0]  [2200/3815]  eta: 0:40:03  lr: 0.005000  loss: 0.0366 (0.0798)  loss_classifier: 0.0121 (0.0272)  loss_box_reg: 0.0256 (0.0482)  loss_objectness: 0.0001 (0.0029)  loss_rpn_box_reg: 0.0005 (0.0015)  time: 1.5452  data: 0.0376  max mem: 12776
# Epoch: [0]  [2300/3815]  eta: 0:37:32  lr: 0.005000  loss: 0.0330 (0.0783)  loss_classifier: 0.0111 (0.0267)  loss_box_reg: 0.0217 (0.0473)  loss_objectness: 0.0002 (0.0028)  loss_rpn_box_reg: 0.0003 (0.0015)  time: 1.5216  data: 0.0373  max mem: 12776
# Epoch: [0]  [2400/3815]  eta: 0:35:02  lr: 0.005000  loss: 0.0405 (0.0772)  loss_classifier: 0.0122 (0.0263)  loss_box_reg: 0.0270 (0.0467)  loss_objectness: 0.0001 (0.0027)  loss_rpn_box_reg: 0.0004 (0.0014)  time: 1.5120  data: 0.0384  max mem: 12776
# Epoch: [0]  [2500/3815]  eta: 0:32:35  lr: 0.005000  loss: 0.0332 (0.0759)  loss_classifier: 0.0094 (0.0258)  loss_box_reg: 0.0244 (0.0460)  loss_objectness: 0.0001 (0.0026)  loss_rpn_box_reg: 0.0003 (0.0014)  time: 1.5113  data: 0.0360  max mem: 12776
# Epoch: [0]  [2600/3815]  eta: 0:30:07  lr: 0.005000  loss: 0.0319 (0.0746)  loss_classifier: 0.0109 (0.0253)  loss_box_reg: 0.0205 (0.0454)  loss_objectness: 0.0001 (0.0026)  loss_rpn_box_reg: 0.0003 (0.0014)  time: 1.4930  data: 0.0365  max mem: 12776
# Epoch: [0]  [2700/3815]  eta: 0:27:39  lr: 0.005000  loss: 0.0371 (0.0735)  loss_classifier: 0.0109 (0.0249)  loss_box_reg: 0.0266 (0.0448)  loss_objectness: 0.0001 (0.0025)  loss_rpn_box_reg: 0.0005 (0.0013)  time: 1.4832  data: 0.0366  max mem: 12776
# Epoch: [0]  [2800/3815]  eta: 0:25:11  lr: 0.005000  loss: 0.0340 (0.0724)  loss_classifier: 0.0113 (0.0245)  loss_box_reg: 0.0226 (0.0442)  loss_objectness: 0.0001 (0.0024)  loss_rpn_box_reg: 0.0005 (0.0013)  time: 1.5101  data: 0.0380  max mem: 12776
# Epoch: [0]  [2900/3815]  eta: 0:22:41  lr: 0.005000  loss: 0.0369 (0.0715)  loss_classifier: 0.0108 (0.0241)  loss_box_reg: 0.0232 (0.0437)  loss_objectness: 0.0001 (0.0023)  loss_rpn_box_reg: 0.0004 (0.0013)  time: 1.4242  data: 0.0358  max mem: 12776
# Epoch: [0]  [3000/3815]  eta: 0:20:11  lr: 0.005000  loss: 0.0376 (0.0705)  loss_classifier: 0.0104 (0.0237)  loss_box_reg: 0.0261 (0.0432)  loss_objectness: 0.0001 (0.0023)  loss_rpn_box_reg: 0.0005 (0.0013)  time: 1.4944  data: 0.0354  max mem: 12776
# Epoch: [0]  [3100/3815]  eta: 0:17:43  lr: 0.005000  loss: 0.0345 (0.0697)  loss_classifier: 0.0110 (0.0234)  loss_box_reg: 0.0238 (0.0428)  loss_objectness: 0.0001 (0.0022)  loss_rpn_box_reg: 0.0003 (0.0013)  time: 1.5090  data: 0.0366  max mem: 12776
# Epoch: [0]  [3200/3815]  eta: 0:15:14  lr: 0.005000  loss: 0.0366 (0.0688)  loss_classifier: 0.0104 (0.0231)  loss_box_reg: 0.0224 (0.0423)  loss_objectness: 0.0000 (0.0022)  loss_rpn_box_reg: 0.0003 (0.0012)  time: 1.5429  data: 0.0380  max mem: 12776
# Epoch: [0]  [3300/3815]  eta: 0:12:45  lr: 0.005000  loss: 0.0365 (0.0680)  loss_classifier: 0.0103 (0.0227)  loss_box_reg: 0.0247 (0.0419)  loss_objectness: 0.0001 (0.0021)  loss_rpn_box_reg: 0.0005 (0.0012)  time: 1.4599  data: 0.0388  max mem: 12776
# Epoch: [0]  [3400/3815]  eta: 0:10:16  lr: 0.005000  loss: 0.0332 (0.0672)  loss_classifier: 0.0089 (0.0225)  loss_box_reg: 0.0228 (0.0415)  loss_objectness: 0.0001 (0.0021)  loss_rpn_box_reg: 0.0004 (0.0012)  time: 1.5232  data: 0.0372  max mem: 12776
# Epoch: [0]  [3500/3815]  eta: 0:07:48  lr: 0.005000  loss: 0.0370 (0.0665)  loss_classifier: 0.0110 (0.0222)  loss_box_reg: 0.0255 (0.0411)  loss_objectness: 0.0001 (0.0020)  loss_rpn_box_reg: 0.0004 (0.0012)  time: 1.5947  data: 0.0373  max mem: 12776
# Epoch: [0]  [3600/3815]  eta: 0:05:19  lr: 0.005000  loss: 0.0369 (0.0657)  loss_classifier: 0.0124 (0.0219)  loss_box_reg: 0.0214 (0.0407)  loss_objectness: 0.0001 (0.0020)  loss_rpn_box_reg: 0.0003 (0.0012)  time: 1.4863  data: 0.0362  max mem: 12776
# Epoch: [0]  [3700/3815]  eta: 0:02:50  lr: 0.005000  loss: 0.0375 (0.0650)  loss_classifier: 0.0111 (0.0216)  loss_box_reg: 0.0237 (0.0403)  loss_objectness: 0.0001 (0.0019)  loss_rpn_box_reg: 0.0005 (0.0011)  time: 1.5034  data: 0.0389  max mem: 12776
# Epoch: [0]  [3800/3815]  eta: 0:00:22  lr: 0.005000  loss: 0.0380 (0.0643)  loss_classifier: 0.0103 (0.0214)  loss_box_reg: 0.0250 (0.0399)  loss_objectness: 0.0001 (0.0019)  loss_rpn_box_reg: 0.0004 (0.0011)  time: 1.4792  data: 0.0370  max mem: 12776
# Epoch: [0]  [3814/3815]  eta: 0:00:01  lr: 0.005000  loss: 0.0345 (0.0642)  loss_classifier: 0.0115 (0.0214)  loss_box_reg: 0.0240 (0.0398)  loss_objectness: 0.0002 (0.0019)  loss_rpn_box_reg: 0.0004 (0.0011)  time: 1.4308  data: 0.0350  max mem: 12776
# Epoch: [0] Total time: 1:34:25 (1.4850 s / it)
# creating index...
# index created!
# Test:  [ 0/50]  eta: 0:01:15  model_time: 0.5408 (0.5408)  evaluator_time: 0.0304 (0.0304)  time: 1.5029  data: 0.9208  max mem: 12776
# Test:  [49/50]  eta: 0:00:00  model_time: 0.1157 (0.1473)  evaluator_time: 0.0063 (0.0085)  time: 0.1627  data: 0.0124  max mem: 12776
# Test: Total time: 0:00:10 (0.2019 s / it)
# Averaged stats: model_time: 0.1157 (0.1473)  evaluator_time: 0.0063 (0.0085)
# Accumulating evaluation results...
# DONE (t=0.02s).
# IoU metric: bbox
#  Average Precision  (AP) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.846
#  Average Precision  (AP) @[ IoU=0.50      | area=   all | maxDets=100 ] = 1.000
#  Average Precision  (AP) @[ IoU=0.75      | area=   all | maxDets=100 ] = 0.965
#  Average Precision  (AP) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = -1.000
#  Average Precision  (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.664
#  Average Precision  (AP) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.867
#  Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=  1 ] = 0.620
#  Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets= 10 ] = 0.789
#  Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.863
#  Average Recall     (AR) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = -1.000
#  Average Recall     (AR) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.671
#  Average Recall     (AR) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.884

In [None]:
# Epoch: [0]  [   0/3815]  eta: 5:40:04  lr: 0.000010  loss: 0.0482 (0.0482)  loss_classifier: 0.0128 (0.0128)  loss_box_reg: 0.0320 (0.0320)  loss_objectness: 0.0028 (0.0028)  loss_rpn_box_reg: 0.0005 (0.0005)  time: 5.3485  data: 2.3902  max mem: 4980
# Epoch: [0]  [ 100/3815]  eta: 0:52:30  lr: 0.000509  loss: 0.0299 (0.0363)  loss_classifier: 0.0094 (0.0120)  loss_box_reg: 0.0180 (0.0234)  loss_objectness: 0.0001 (0.0003)  loss_rpn_box_reg: 0.0003 (0.0005)  time: 0.8295  data: 0.0361  max mem: 7566
# Epoch: [0]  [ 200/3815]  eta: 0:49:31  lr: 0.001009  loss: 0.0304 (0.0345)  loss_classifier: 0.0093 (0.0112)  loss_box_reg: 0.0219 (0.0226)  loss_objectness: 0.0001 (0.0003)  loss_rpn_box_reg: 0.0003 (0.0005)  time: 0.7791  data: 0.0386  max mem: 7567
# Epoch: [0]  [ 300/3815]  eta: 0:47:25  lr: 0.001508  loss: 0.0267 (0.0332)  loss_classifier: 0.0085 (0.0107)  loss_box_reg: 0.0179 (0.0217)  loss_objectness: 0.0001 (0.0003)  loss_rpn_box_reg: 0.0002 (0.0004)  time: 0.7831  data: 0.0374  max mem: 7580
# Epoch: [0]  [ 400/3815]  eta: 0:46:00  lr: 0.002008  loss: 0.0254 (0.0325)  loss_classifier: 0.0078 (0.0104)  loss_box_reg: 0.0195 (0.0214)  loss_objectness: 0.0000 (0.0003)  loss_rpn_box_reg: 0.0003 (0.0004)  time: 0.8053  data: 0.0363  max mem: 8301
# Epoch: [0]  [ 500/3815]  eta: 0:44:22  lr: 0.002507  loss: 0.0300 (0.0330)  loss_classifier: 0.0096 (0.0104)  loss_box_reg: 0.0196 (0.0219)  loss_objectness: 0.0001 (0.0003)  loss_rpn_box_reg: 0.0004 (0.0004)  time: 0.7881  data: 0.0366  max mem: 8301
# Epoch: [0]  [ 600/3815]  eta: 0:42:48  lr: 0.003007  loss: 0.0299 (0.0331)  loss_classifier: 0.0096 (0.0104)  loss_box_reg: 0.0181 (0.0220)  loss_objectness: 0.0000 (0.0002)  loss_rpn_box_reg: 0.0003 (0.0004)  time: 0.7799  data: 0.0375  max mem: 8301
# Epoch: [0]  [ 700/3815]  eta: 0:41:30  lr: 0.003506  loss: 0.0342 (0.0335)  loss_classifier: 0.0095 (0.0104)  loss_box_reg: 0.0233 (0.0224)  loss_objectness: 0.0001 (0.0002)  loss_rpn_box_reg: 0.0003 (0.0005)  time: 0.8029  data: 0.0393  max mem: 8301
# Epoch: [0]  [ 800/3815]  eta: 0:40:12  lr: 0.004006  loss: 0.0271 (0.0338)  loss_classifier: 0.0078 (0.0104)  loss_box_reg: 0.0188 (0.0227)  loss_objectness: 0.0000 (0.0002)  loss_rpn_box_reg: 0.0003 (0.0005)  time: 0.8359  data: 0.0394  max mem: 8301
# Epoch: [0]  [ 900/3815]  eta: 0:38:49  lr: 0.004505  loss: 0.0292 (0.0338)  loss_classifier: 0.0092 (0.0105)  loss_box_reg: 0.0195 (0.0227)  loss_objectness: 0.0001 (0.0002)  loss_rpn_box_reg: 0.0003 (0.0005)  time: 0.7757  data: 0.0408  max mem: 8303
# Epoch: [0]  [1000/3815]  eta: 0:37:26  lr: 0.005000  loss: 0.0376 (0.0343)  loss_classifier: 0.0083 (0.0106)  loss_box_reg: 0.0244 (0.0230)  loss_objectness: 0.0001 (0.0003)  loss_rpn_box_reg: 0.0005 (0.0005)  time: 0.7923  data: 0.0363  max mem: 8303
# Epoch: [0]  [1100/3815]  eta: 0:36:05  lr: 0.005000  loss: 0.0334 (0.0345)  loss_classifier: 0.0095 (0.0107)  loss_box_reg: 0.0208 (0.0231)  loss_objectness: 0.0001 (0.0003)  loss_rpn_box_reg: 0.0004 (0.0005)  time: 0.8237  data: 0.0382  max mem: 8303
# Epoch: [0]  [1200/3815]  eta: 0:34:41  lr: 0.005000  loss: 0.0279 (0.0344)  loss_classifier: 0.0087 (0.0107)  loss_box_reg: 0.0190 (0.0230)  loss_objectness: 0.0000 (0.0003)  loss_rpn_box_reg: 0.0003 (0.0005)  time: 0.7455  data: 0.0416  max mem: 8303
# Epoch: [0]  [1300/3815]  eta: 0:33:20  lr: 0.005000  loss: 0.0367 (0.0347)  loss_classifier: 0.0114 (0.0107)  loss_box_reg: 0.0242 (0.0232)  loss_objectness: 0.0001 (0.0003)  loss_rpn_box_reg: 0.0003 (0.0005)  time: 0.8035  data: 0.0369  max mem: 8303
# Epoch: [0]  [1400/3815]  eta: 0:32:01  lr: 0.005000  loss: 0.0296 (0.0346)  loss_classifier: 0.0093 (0.0107)  loss_box_reg: 0.0213 (0.0231)  loss_objectness: 0.0001 (0.0003)  loss_rpn_box_reg: 0.0003 (0.0005)  time: 0.7964  data: 0.0384  max mem: 8303
# Epoch: [0]  [1500/3815]  eta: 0:30:40  lr: 0.005000  loss: 0.0281 (0.0347)  loss_classifier: 0.0083 (0.0108)  loss_box_reg: 0.0200 (0.0232)  loss_objectness: 0.0001 (0.0003)  loss_rpn_box_reg: 0.0002 (0.0005)  time: 0.7836  data: 0.0388  max mem: 8303
# Epoch: [0]  [1600/3815]  eta: 0:29:19  lr: 0.005000  loss: 0.0295 (0.0349)  loss_classifier: 0.0081 (0.0108)  loss_box_reg: 0.0187 (0.0233)  loss_objectness: 0.0001 (0.0003)  loss_rpn_box_reg: 0.0003 (0.0005)  time: 0.7345  data: 0.0391  max mem: 8303
# Epoch: [0]  [1700/3815]  eta: 0:27:56  lr: 0.005000  loss: 0.0307 (0.0351)  loss_classifier: 0.0088 (0.0108)  loss_box_reg: 0.0206 (0.0235)  loss_objectness: 0.0001 (0.0003)  loss_rpn_box_reg: 0.0004 (0.0005)  time: 0.7649  data: 0.0376  max mem: 8303
# Epoch: [0]  [1800/3815]  eta: 0:26:37  lr: 0.005000  loss: 0.0286 (0.0350)  loss_classifier: 0.0084 (0.0108)  loss_box_reg: 0.0196 (0.0235)  loss_objectness: 0.0001 (0.0003)  loss_rpn_box_reg: 0.0003 (0.0005)  time: 0.8076  data: 0.0350  max mem: 8303
# Epoch: [0]  [1900/3815]  eta: 0:25:17  lr: 0.005000  loss: 0.0253 (0.0348)  loss_classifier: 0.0069 (0.0107)  loss_box_reg: 0.0174 (0.0234)  loss_objectness: 0.0000 (0.0003)  loss_rpn_box_reg: 0.0002 (0.0005)  time: 0.8044  data: 0.0344  max mem: 8303
# Epoch: [0]  [2000/3815]  eta: 0:23:59  lr: 0.005000  loss: 0.0331 (0.0349)  loss_classifier: 0.0100 (0.0107)  loss_box_reg: 0.0217 (0.0234)  loss_objectness: 0.0001 (0.0003)  loss_rpn_box_reg: 0.0003 (0.0005)  time: 0.8342  data: 0.0346  max mem: 8303
# Epoch: [0]  [2100/3815]  eta: 0:22:40  lr: 0.005000  loss: 0.0286 (0.0349)  loss_classifier: 0.0097 (0.0107)  loss_box_reg: 0.0184 (0.0234)  loss_objectness: 0.0001 (0.0003)  loss_rpn_box_reg: 0.0003 (0.0005)  time: 0.7971  data: 0.0340  max mem: 8303
# Epoch: [0]  [2200/3815]  eta: 0:21:19  lr: 0.005000  loss: 0.0293 (0.0347)  loss_classifier: 0.0082 (0.0107)  loss_box_reg: 0.0200 (0.0233)  loss_objectness: 0.0001 (0.0003)  loss_rpn_box_reg: 0.0003 (0.0005)  time: 0.7738  data: 0.0382  max mem: 8303
# Epoch: [0]  [2300/3815]  eta: 0:19:59  lr: 0.005000  loss: 0.0348 (0.0349)  loss_classifier: 0.0102 (0.0107)  loss_box_reg: 0.0233 (0.0234)  loss_objectness: 0.0001 (0.0003)  loss_rpn_box_reg: 0.0004 (0.0005)  time: 0.7840  data: 0.0353  max mem: 8303
# Epoch: [0]  [2400/3815]  eta: 0:18:40  lr: 0.005000  loss: 0.0391 (0.0351)  loss_classifier: 0.0098 (0.0108)  loss_box_reg: 0.0279 (0.0235)  loss_objectness: 0.0001 (0.0003)  loss_rpn_box_reg: 0.0004 (0.0005)  time: 0.8004  data: 0.0377  max mem: 8303
# Epoch: [0]  [2500/3815]  eta: 0:17:21  lr: 0.005000  loss: 0.0313 (0.0351)  loss_classifier: 0.0087 (0.0108)  loss_box_reg: 0.0213 (0.0235)  loss_objectness: 0.0002 (0.0003)  loss_rpn_box_reg: 0.0003 (0.0005)  time: 0.8043  data: 0.0367  max mem: 8303
# Epoch: [0]  [2600/3815]  eta: 0:16:02  lr: 0.005000  loss: 0.0308 (0.0349)  loss_classifier: 0.0094 (0.0107)  loss_box_reg: 0.0202 (0.0234)  loss_objectness: 0.0001 (0.0003)  loss_rpn_box_reg: 0.0004 (0.0005)  time: 0.7700  data: 0.0386  max mem: 8303
# Epoch: [0]  [2700/3815]  eta: 0:14:41  lr: 0.005000  loss: 0.0268 (0.0347)  loss_classifier: 0.0083 (0.0107)  loss_box_reg: 0.0174 (0.0233)  loss_objectness: 0.0001 (0.0003)  loss_rpn_box_reg: 0.0003 (0.0005)  time: 0.7783  data: 0.0363  max mem: 8303
# Epoch: [0]  [2800/3815]  eta: 0:13:22  lr: 0.005000  loss: 0.0307 (0.0346)  loss_classifier: 0.0094 (0.0107)  loss_box_reg: 0.0208 (0.0232)  loss_objectness: 0.0001 (0.0003)  loss_rpn_box_reg: 0.0003 (0.0005)  time: 0.7832  data: 0.0346  max mem: 8303
# Epoch: [0]  [2900/3815]  eta: 0:12:03  lr: 0.005000  loss: 0.0341 (0.0346)  loss_classifier: 0.0082 (0.0107)  loss_box_reg: 0.0227 (0.0232)  loss_objectness: 0.0001 (0.0003)  loss_rpn_box_reg: 0.0002 (0.0005)  time: 0.8156  data: 0.0383  max mem: 8303
# Epoch: [0]  [3000/3815]  eta: 0:10:44  lr: 0.005000  loss: 0.0346 (0.0348)  loss_classifier: 0.0102 (0.0108)  loss_box_reg: 0.0240 (0.0233)  loss_objectness: 0.0001 (0.0003)  loss_rpn_box_reg: 0.0004 (0.0005)  time: 0.7929  data: 0.0357  max mem: 8303
# Epoch: [0]  [3100/3815]  eta: 0:09:25  lr: 0.005000  loss: 0.0317 (0.0347)  loss_classifier: 0.0099 (0.0107)  loss_box_reg: 0.0203 (0.0232)  loss_objectness: 0.0002 (0.0003)  loss_rpn_box_reg: 0.0003 (0.0005)  time: 0.7600  data: 0.0382  max mem: 8303
# Epoch: [0]  [3200/3815]  eta: 0:08:06  lr: 0.005000  loss: 0.0307 (0.0347)  loss_classifier: 0.0102 (0.0107)  loss_box_reg: 0.0202 (0.0232)  loss_objectness: 0.0001 (0.0003)  loss_rpn_box_reg: 0.0003 (0.0005)  time: 0.7697  data: 0.0360  max mem: 8303
# Epoch: [0]  [3300/3815]  eta: 0:06:47  lr: 0.005000  loss: 0.0282 (0.0346)  loss_classifier: 0.0080 (0.0107)  loss_box_reg: 0.0190 (0.0232)  loss_objectness: 0.0000 (0.0003)  loss_rpn_box_reg: 0.0002 (0.0005)  time: 0.8271  data: 0.0373  max mem: 8303
# Epoch: [0]  [3400/3815]  eta: 0:05:28  lr: 0.005000  loss: 0.0270 (0.0345)  loss_classifier: 0.0077 (0.0107)  loss_box_reg: 0.0194 (0.0231)  loss_objectness: 0.0001 (0.0003)  loss_rpn_box_reg: 0.0003 (0.0005)  time: 0.7703  data: 0.0366  max mem: 8303
# Epoch: [0]  [3500/3815]  eta: 0:04:08  lr: 0.005000  loss: 0.0241 (0.0344)  loss_classifier: 0.0080 (0.0107)  loss_box_reg: 0.0172 (0.0230)  loss_objectness: 0.0001 (0.0003)  loss_rpn_box_reg: 0.0003 (0.0005)  time: 0.7804  data: 0.0375  max mem: 8303
# Epoch: [0]  [3600/3815]  eta: 0:02:49  lr: 0.005000  loss: 0.0292 (0.0343)  loss_classifier: 0.0086 (0.0106)  loss_box_reg: 0.0200 (0.0230)  loss_objectness: 0.0001 (0.0003)  loss_rpn_box_reg: 0.0004 (0.0005)  time: 0.7968  data: 0.0349  max mem: 8303
# Epoch: [0]  [3700/3815]  eta: 0:01:30  lr: 0.005000  loss: 0.0302 (0.0343)  loss_classifier: 0.0083 (0.0106)  loss_box_reg: 0.0197 (0.0229)  loss_objectness: 0.0001 (0.0003)  loss_rpn_box_reg: 0.0002 (0.0005)  time: 0.7514  data: 0.0389  max mem: 8303
# Epoch: [0]  [3800/3815]  eta: 0:00:11  lr: 0.005000  loss: 0.0272 (0.0343)  loss_classifier: 0.0081 (0.0106)  loss_box_reg: 0.0192 (0.0229)  loss_objectness: 0.0001 (0.0003)  loss_rpn_box_reg: 0.0003 (0.0005)  time: 0.7898  data: 0.0372  max mem: 8303
# Epoch: [0]  [3814/3815]  eta: 0:00:00  lr: 0.005000  loss: 0.0293 (0.0343)  loss_classifier: 0.0094 (0.0106)  loss_box_reg: 0.0208 (0.0229)  loss_objectness: 0.0001 (0.0003)  loss_rpn_box_reg: 0.0004 (0.0005)  time: 0.7795  data: 0.0337  max mem: 8303
# Epoch: [0] Total time: 0:50:13 (0.7899 s / it)
# creating index...
# index created!
# Test:  [ 0/50]  eta: 0:00:59  model_time: 0.2925 (0.2925)  evaluator_time: 0.0252 (0.0252)  time: 1.1811  data: 0.8524  max mem: 8303
# Test:  [49/50]  eta: 0:00:00  model_time: 0.0684 (0.0997)  evaluator_time: 0.0067 (0.0075)  time: 0.1138  data: 0.0148  max mem: 8303
# Test: Total time: 0:00:07 (0.1593 s / it)
# Averaged stats: model_time: 0.0684 (0.0997)  evaluator_time: 0.0067 (0.0075)
# Accumulating evaluation results...
# DONE (t=0.02s).
# IoU metric: bbox
#  Average Precision  (AP) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.872
#  Average Precision  (AP) @[ IoU=0.50      | area=   all | maxDets=100 ] = 1.000
#  Average Precision  (AP) @[ IoU=0.75      | area=   all | maxDets=100 ] = 0.978
#  Average Precision  (AP) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = -1.000
#  Average Precision  (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = -1.000
#  Average Precision  (AP) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.872
#  Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=  1 ] = 0.860
#  Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets= 10 ] = 0.892
#  Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.892
#  Average Recall     (AR) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = -1.000
#  Average Recall     (AR) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = -1.000
#  Average Recall     (AR) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.892

# Test thử khả năng phát hiện khuôn mặt của model bằng cách tạo bounding box trên ảnh

In [None]:
import matplotlib.pyplot as plt
from torchvision.utils import draw_bounding_boxes, draw_segmentation_masks

num_classes = 2
# train on the GPU or on the CPU, if a GPU is not available
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
# get the model using our helper function
model = get_model(num_classes)
# move model to the right device
model.to(device)
model.load_state_dict(torch.load("/kaggle/input/modelweight/model_v2.pth"))

image = read_image("/kaggle/input/face-public-test/public_test/102495577.jpg")
eval_transform = get_transform(train=False)

model.eval()
with torch.no_grad():
    x = eval_transform(image)
    # convert RGBA -> RGB and move to device
    x = x[:3, ...].to(device)
    predictions = model([x, ])
    pred = predictions[0]


image = (255.0 * (image - image.min()) / (image.max() - image.min())).to(torch.uint8)
image = image[:3, ...]
pred_labels = [f"face: {score:.3f}" for label, score in zip(pred["labels"], pred["scores"])]
pred_boxes = pred["boxes"]
print(pred_boxes[0].tolist())
output_image = draw_bounding_boxes(image, pred_boxes, pred_labels, colors="red", font="/kaggle/input/boxesfont/Roboto-Bold.ttf",font_size=20)


plt.figure(figsize=(12, 12))
plt.imshow(output_image.permute(1, 2, 0))

# Chạy model trên tập test dataset

In [None]:
import matplotlib.pyplot as plt

from torchvision.utils import draw_bounding_boxes, draw_segmentation_masks

num_classes = 2
# train on the GPU or on the CPU, if a GPU is not available
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
# get the model using our helper function
model = get_model(num_classes)
# model= nn.DataParallel(model)
# move model to the right device
model.to(device)
model.load_state_dict(torch.load("/kaggle/input/modelweight/model_v2.pth"))

filelist = os.listdir("/kaggle/input/face-public-test/public_test")

img_dict = {
    "file_name": [],
    "bbox": []
}
for filename in filelist:
    image = read_image(f"/kaggle/input/face-public-test/public_test/{filename}")
    eval_transform = get_transform(train=False)
    
    model.eval()
    with torch.no_grad():
        x = eval_transform(image)
        # convert RGBA -> RGB and move to device
        x = x[:3, ...].to(device)
        predictions = model([x, ])
        pred = predictions[0]

    pred_boxes = pred["boxes"]
    for i in range(len(pred_boxes)):
        img_dict["file_name"].append(filename)
        tmp = pred_boxes[i].tolist()
        img_dict["bbox"].append([int(tmp[0]), int(tmp[1]), int(abs(tmp[2] - tmp[0])), int(abs(tmp[3] - tmp[1]))])

df = pd.DataFrame(img_dict)
display(df)