In [1]:
from model import YOLOv3
from library import *
from config import learning_rate, ANCHORS, device, s
from utils import load_checkpoint, convert_cells_to_bboxes, plot_image, plot_custom_image
from dataset import Dataset
from augment import test_transform
from loss import YOLOLoss
from metrics import nms, mean_average_precision
import multiprocessing

In [2]:
# Check point file
checkpoint_file = "save model/checkpoint_custom_final.pth.tar"
# Setting the load_model to True
load_model = True
  
# Defining the model, optimizer, loss function and scaler
model = YOLOv3(num_classes=3).to(device)
optimizer = optim.Adam(model.parameters(), lr = learning_rate)
loss_fn = YOLOLoss()
scaler = torch.cuda.amp.GradScaler()
multiprocessing.freeze_support()
# Loading the checkpoint
if load_model:
    load_checkpoint(checkpoint_file, model, optimizer, learning_rate)

print("Hoan thanh checkpoint model")

==> Loading checkpoint
Hoan thanh checkpoint model


In [3]:
# Defining the test dataset and data loader
test_dataset = Dataset(
    csv_file="custom data/test.csv",
    image_dir="custom data/dataset_resized/dataset_resized/Img",
    label_dir="custom data/dataset_resized/dataset_resized/Label",
    anchors=ANCHORS,
    transform=test_transform,
    num_classes=3
)
test_loader = torch.utils.data.DataLoader(
    test_dataset,
    batch_size = 8,
    num_workers = 2,
    shuffle = False,
)

In [None]:
# Getting a sample image from the test data loader
pred_boxes = []
for (x, y) in test_loader:
    x = x.to(device)
    with torch.no_grad():
        # Getting the model predictions
        output = model(x)
        # Getting the bounding boxes from the predictions
        bboxes = [[] for _ in range(x.shape[0])]
        anchors = (
                torch.tensor(ANCHORS)
                    * torch.tensor(s).unsqueeze(1).unsqueeze(1).repeat(1, 3, 2)
                ).to(device)
        # Getting bounding boxes for each scale
        for i in range(3):
            batch_size, A, S, _, _ = output[i].shape
            anchor = anchors[i]
            boxes_scale_i = convert_cells_to_bboxes(
                                output[i], anchor, s=S, is_predictions=True
                            )
            for idx, (box) in enumerate(boxes_scale_i):
                bboxes[idx] += box
    model.train()
    # Create figure and axes
    # Plotting the image with bounding boxes for each image in the batch
    for i in range(batch_size):
        # Applying non-max suppression to remove overlapping bounding boxes
        nms_boxes = nms(bboxes[i], iou_threshold=0.3, threshold=0.6)
        pred_boxes.append(nms_boxes)
        plot_custom_image(x[i].permute(1,2,0).detach().cpu(), nms_boxes)
    

In [16]:
pred_boxes[1][1]

[0.0,
 0.7091950178146362,
 0.470702588558197,
 0.3809179365634918,
 0.44609999656677246,
 0.38880646228790283]

In [None]:
target_boxes = []
# Creating a dataset object
dataset = Dataset(
    csv_file="custom data/test.csv",
    image_dir="custom data/dataset_resized/dataset_resized/Img",
    label_dir="custom data/dataset_resized/dataset_resized/Label",
    grid_sizes=[13, 26, 52],
    anchors=ANCHORS,
    transform=test_transform
)

# Creating a dataloader object
loader = torch.utils.data.DataLoader(
    dataset=dataset,
    batch_size=1,
    shuffle=True,
)
# Defining the grid size and the scaled anchors
GRID_SIZE = [13, 26, 52]
scaled_anchors = torch.tensor(ANCHORS) / (
    1 / torch.tensor(GRID_SIZE).unsqueeze(1).unsqueeze(1).repeat(1, 3, 2)
)

# Getting a batch from the dataloader
for(x, y) in loader:
    # Getting the boxes coordinates from the labels
    # and converting them into bounding boxes without scaling
    boxes = []
    for i in range(y[0].shape[1]):
        anchor = scaled_anchors[i]
        boxes += convert_cells_to_bboxes(
                y[i], is_predictions=False, s=y[i].shape[2], anchors=anchor
                )[0]

    # Applying non-maximum suppression
    boxes = nms(boxes, iou_threshold=1, threshold=0.7)
    target_boxes.append(boxes)
    # Plotting the image with the bounding boxes
    plot_custom_image(x[0].permute(1,2,0).to("cpu"), boxes)

In [86]:
img_pred_boxes = pred_boxes[0]
for box in img_pred_boxes:
    print(box)
    break

[0.0, 0.6059927344322205, 0.39271607995033264, 0.3791066110134125, 0.38310161232948303, 0.2940683364868164]


In [108]:
import torch
from metrics import iou
def mean_average_precision(
    pred_boxes, true_boxes, iou_threshold=0.5, box_format="midpoint", num_classes=20, num_img=35
):
    """
    Calculates mean average precision 

    Parameters:
        pred_boxes (list): list of lists containing all bboxes with each bboxes
        specified as [train_idx, class_prediction, prob_score, x1, y1, x2, y2]
        true_boxes (list): Similar as pred_boxes except all the correct ones 
        iou_threshold (float): threshold where predicted bboxes is correct
        box_format (str): "midpoint" or "corners" used to specify bboxes
        num_classes (int): number of classes

    Returns:
        float: mAP value across all classes given a specific IoU threshold 
    """

    # list storing all AP for respective classes
    average_precisions = []
    k = 0
    # used for numerical stability later on
    epsilon = 1e-6
    for i in range(num_img):
        img_pred_boxes = pred_boxes[i]
        img_true_boxes = true_boxes[i]
        print("Target boxes", img_true_boxes)
        for c in range(num_classes):
            detections = []
            ground_truths = []

            # Go through all predictions and targets,
            # and only add the ones that belong to the
            # current class c
            for detection in img_pred_boxes:
                if detection[0] == c:
                    detections.append(detection)

            for true_box in img_true_boxes:
                if true_box[0] == c:
                    ground_truths.append(true_box)
            if detections == [] or ground_truths == []:
                continue
            print("Detection", detections)
            print("Ground truth", ground_truths)
            k+=1
            # find the amount of bboxes for each training example
            # Counter here finds how many ground truth bboxes we get
            # for each training example, so let's say img 0 has 3,
            # img 1 has 5 then we will obtain a dictionary with:
            # amount_bboxes = {0:3, 1:5}
            amount_bboxes = {}
            for i in range(num_img):
                amount_bboxes[i] = 1
            # We then go through each key, val in this dictionary
            # and convert to the following (w.r.t same example):
            # ammount_bboxes = {0:torch.tensor[0,0,0], 1:torch.tensor[0,0,0,0,0]}
            for key, val in amount_bboxes.items():
                amount_bboxes[key] = torch.zeros(val)

            # sort by box probabilities which is index 2
            detections.sort(key=lambda x: x[1], reverse=True)
            TP = torch.zeros((len(detections)))
            FP = torch.zeros((len(detections)))
            total_true_bboxes = len(ground_truths)
            
            # If none exists for this class then we can safely skip
            if total_true_bboxes == 0:
                continue

            for detection_idx, detection in enumerate(detections):
                # Only take out the ground_truths that have the same
                # training idx as detection
                ground_truth_img = [bbox for bbox in ground_truths]

                num_gts = len(ground_truth_img)
                best_iou = 0
                best_gt_idx = 0
                for idx, gt in enumerate(ground_truth_img):
                    iou_metric = iou(
                        torch.tensor(detection[2:]),
                        torch.tensor(gt[2:]),
                    )

                    if iou_metric > best_iou:
                        best_iou = iou_metric
                        best_gt_idx = idx

                if best_iou > iou_threshold:
                    # only detect ground truth detection once
                    if amount_bboxes[i][best_gt_idx] == 0:
                        # true positive and add this bounding box to seen
                        TP[detection_idx] = 1
                        amount_bboxes[i][best_gt_idx] = 1
                    else:
                        FP[detection_idx] = 1

                # if IOU is lower then the detection is a false positive
                else:
                    FP[detection_idx] = 1

            TP_cumsum = torch.cumsum(TP, dim=0)
            FP_cumsum = torch.cumsum(FP, dim=0)
            recalls = TP_cumsum / (total_true_bboxes + epsilon)
            precisions = TP_cumsum / (TP_cumsum + FP_cumsum + epsilon)
            precisions = torch.cat((torch.tensor([1]), precisions))
            recalls = torch.cat((torch.tensor([0]), recalls))
            # torch.trapz for numerical integration
            average_precisions.append(torch.trapz(precisions, recalls))
    print("k la", k)
    return sum(average_precisions) / len(average_precisions)

In [109]:
mean_average_precision(pred_boxes, target_boxes, num_classes=2,num_img=35)

Target boxes [[1.0, 1.0, 0.37620192766189575, 0.37860578298568726, 0.286057710647583, 0.5600962042808533]]
Target boxes [[0.0, 1.0, 0.46514424681663513, 0.4146634638309479, 0.7475962042808533, 0.5264423489570618]]
Detection [[0.0, 0.7336120009422302, 0.39415493607521057, 0.39217254519462585, 0.4283025860786438, 0.33205217123031616], [0.0, 0.7091950178146362, 0.470702588558197, 0.3809179365634918, 0.44609999656677246, 0.38880646228790283], [0.0, 0.6922194361686707, 0.49094563722610474, 0.3764212429523468, 0.3973250389099121, 0.4430702328681946], [0.0, 0.6351394653320312, 0.5077136754989624, 0.3731922507286072, 0.3621458411216736, 0.40502721071243286]]
Ground truth [[0.0, 1.0, 0.46514424681663513, 0.4146634638309479, 0.7475962042808533, 0.5264423489570618]]
Target boxes [[0.0, 1.0, 0.4531250298023224, 0.41826924681663513, 0.7139423489570618, 0.5240384936332703]]
Target boxes [[1.0, 1.0, 0.2343750149011612, 0.4302884638309479, 0.4062500298023224, 0.6826923489570618]]
Target boxes [[0.0, 1

tensor(0.1786)

In [84]:
map = calculate_mAP(pred_boxes, target_boxes, num_classes=3, num_img=len(target_boxes))

In [107]:
a= [[0.0, 0.7336120009422302, 0.39415493607521057, 0.39217254519462585, 0.4283025860786438, 0.33205217123031616], [0.0, 0.7091950178146362, 0.470702588558197, 0.3809179365634918, 0.44609999656677246, 0.38880646228790283], [0.0, 0.6922194361686707, 0.49094563722610474, 0.3764212429523468, 0.3973250389099121, 0.4430702328681946], [0.0, 0.6351394653320312, 0.5077136754989624, 0.3731922507286072, 0.3621458411216736, 0.40502721071243286]]
print(np.asarray(a).shape)

(4, 6)


In [76]:
map

[]

In [56]:
a = []
if a == []:
    print("ok")

ok
