In [1]:
from script.model_localization import pyramid, sliding_window, iou_bbox, non_maximum_supperssion, visualize_bbox
from script.model_classification import SVMObjectClassifier
from sklearn.model_selection import train_test_split
from script.dataset import load_vehicle_dataset, load_traffic_signboard_dataset_test
from script.feature_source import FeatureExtracter
from script.training import training_model
from script.slider import Slider
import numpy as np
import argparse
import time
import cv2
import os

In [2]:
def iou(box1, box2):
    """ Compute the Intersection over Union (IoU) of two bounding boxes. """
    x1, y1, x2, y2 = box1
    x1g, y1g, x2g, y2g = box2

    # Compute the coordinates of the intersection rectangle
    xi1 = max(x1, x1g)
    yi1 = max(y1, y1g)
    xi2 = min(x2, x2g)
    yi2 = min(y2, y2g)

    # Compute the area of intersection rectangle
    inter_area = max(0, xi2 - xi1 + 1) * max(0, yi2 - yi1 + 1)

    # Compute the area of both the prediction and ground-truth rectangles
    box1_area = (x2 - x1 + 1) * (y2 - y1 + 1)
    box2_area = (x2g - x1g + 1) * (y2g - y1g + 1)

    # Compute the intersection over union by taking the intersection area
    # and dividing it by the sum of prediction + ground-truth areas - inter_area
    iou = inter_area / float(box1_area + box2_area - inter_area)
    return iou

def evaluate_detection(pred_boxes, pred_labels, gt_boxes, gt_labels, iou_threshold=0.5):
    """
    Evaluate precision and recall for object detection.

    Args:
        pred_boxes (list of list): Predicted bounding boxes [x1, y1, x2, y2].
        pred_labels (list): Predicted labels.
        gt_boxes (list of list): Ground truth bounding boxes [x1, y1, x2, y2].
        gt_labels (list): Ground truth labels.
        iou_threshold (float): IoU threshold to consider a valid detection.

    Returns:
        precision (float), recall (float)
    """
    assert len(pred_boxes) == len(pred_labels)
    assert len(gt_boxes) == len(gt_labels)

    TP = 0
    FP = 0
    FN = 0

    detected_gt = [False] * len(gt_boxes)

    for pred_box, pred_label in zip(pred_boxes, pred_labels):
        match_found = False
        for i, (gt_box, gt_label) in enumerate(zip(gt_boxes, gt_labels)):
            if pred_label == gt_label and iou(pred_box, gt_box) >= iou_threshold:
                if not detected_gt[i]:
                    TP += 1
                    detected_gt[i] = True
                    match_found = True
                    break
        if not match_found:
            FP += 1

    FN = detected_gt.count(False)

    precision = TP / (TP + FP) if TP + FP > 0 else 0
    recall = TP / (TP + FN) if TP + FN > 0 else 0

    return precision, recall

In [3]:
sourcer_params = {             
  'spatial_size': (32, 32), # (16, 16), (32, 32), (64, 64)
  'orientations': 9,        # 6 - 12
  'pixels_per_cell': 8,     # 8, 16
  'cells_per_block': 2,     # 1, 2
  'transform_sqrt': True,
  'block_norm': 'L2',
  'hog_visualize': False
}
exist_path = './save_model/model.pkl'
save_path = './save_model'
feature_extracter = FeatureExtracter(**sourcer_params)
model = SVMObjectClassifier(C=0.3)
model.set_feature_extracter(feature_extracter)
if os.path.exists(save_path + '/model.pkl'):
    print('Loading model...')
    model.load(save_path)

Loading model...


In [4]:
import random
random.seed(42)

images, labels, bounding_boxes = load_traffic_signboard_dataset_test()

index_range = list(range(len(images)))
random_index = random.sample(index_range, 50)

gt_boxes = []
gt_labels = []
test_images = []
for index in random_index:
    test_images.append(images[index])
    gt_boxes.append(bounding_boxes[index])
    gt_labels.append(labels[index])


In [18]:
from tqdm import tqdm
pred_boxes = []
pred_labels = []

for image in tqdm(test_images):
    pred_box = []
    pred_label = []
    windowSize = [(20, 20), (40, 40), (60, 60), (100, 100)]
    image_path = './dataset/traffic_sign_board/images/road106.png'
    #cv2.imread(image_path)
    predict_bbox = []
    for window_size in windowSize:
        slider = Slider(model, window_size, 5, scale=1.75, visualize=False)
        predict_bbox += slider.predict(image, 0.995)
    predict_bbox = non_maximum_supperssion(predict_bbox, 0.2)
    pred_boxes.append([box[:4] for box in predict_bbox])
    pred_labels.append([box[4] for box in predict_bbox])
    #visualize_bbox(image, predict_bbox)


100%|██████████| 50/50 [11:34<00:00, 13.89s/it]


In [19]:
'''# Example usage:
pred_boxes = [[50, 50, 100, 100], [30, 30, 60, 60]]
pred_labels = ['cat', 'dog']
gt_boxes = [[48, 48, 98, 98], [29, 29, 58, 58]]
gt_labels = ['cat', 'dog']'''

sum_precision = 0
sum_recall = 0
#print(pred_boxes, pred_labels)
#print(gt_boxes, gt_labels)
for pred_box, pred_label, gt_box, gt_label in zip(pred_boxes, pred_labels, gt_boxes, gt_labels):  
    precision, recall = evaluate_detection(pred_box, pred_label, gt_box, gt_label)
    #print(f"Precision: {precision}, Recall: {recall}")
    sum_precision += precision
    sum_recall += recall
average_precision = sum_precision / len(pred_boxes)
average_recall = sum_recall / len(pred_boxes)
f1_score = 2 * average_precision * average_recall / (average_precision + average_recall)
print(f"Average Precision: {average_precision}, Average Recall: {average_recall}, F1 Score: {f1_score}") 

Average Precision: 0.5633333333333334, Average Recall: 0.5733333333333333, F1 Score: 0.5682893450635386


In [None]:
import os
import cv2
import glob
import time
import numpy as np
import xml.etree.ElementTree as ET
dataset_dir = './dataset/traffic_sign_board'
image_dir = os.path.join(dataset_dir, 'images')
annotation_dir = os.path.join(dataset_dir, 'annotations')

file_path = os.path.join(annotation_dir, 'road0.xml')
tree = ET.parse(file_path)
root = tree.getroot()

image_file = root.find('filename').text
image_path = os.path.join(image_dir, image_file)
img = cv2.imread(image_path)
image_label = []
bounding_box = []
adding = False
label = None
for object in root.findall('object'):
    label = object.find('name').text
    if label == 'trafficlight':
        continue
    xmin = int(object.find('bndbox/xmin').text)
    ymin = int(object.find('bndbox/ymin').text)
    xmax = int(object.find('bndbox/xmax').text)
    ymax = int(object.find('bndbox/ymax').text)
    bounding_box.append([xmin, ymin, xmax, ymax])
    image_label.append(label)
print(label)