In [1]:
import os
import cv2
import torch
import numpy as np
from src.utils import *
from src.yolo_net import Yolo
import xml.etree.ElementTree as ET

In [2]:
CLASSES = ['aeroplane', 'bicycle', 'bird', 'boat', 'bottle', 'bus', 'car', 'cat', 'chair', 'cow', 
           'diningtable', 'dog', 'horse', 'motorbike', 'person', 'pottedplant', 'sheep', 'sofa', 'train', 'tvmonitor']

In [3]:
# ground-truth annotations path
annotation_path = '/mnt/YOLOv1/dataset/VOC2007test/Annotations'
annotations = sorted(os.listdir(annotation_path))

In [6]:
# making ground-truth
print('Start making ground-truth!')
for annotation in annotations:
    tree = ET.parse(os.path.join(annotation_path, annotation))
    objects = tree.findall('object')
    with open(os.path.join('mAP/input/ground-truth', annotation.split('.')[0] + '.txt'), 'w+') as f:
        for obj in objects:
            name = str(obj.find('name').text)
            bndbox = obj.find('bndbox')
            xmin = str(bndbox.find('xmin').text)
            ymin = str(bndbox.find('ymin').text)
            xmax = str(bndbox.find('xmax').text)
            ymax = str(bndbox.find('ymax').text)
            f.write(' '.join((name, xmin, ymin, xmax, ymax)) + '\n')
print('Finish making ground-truth!')

Start making ground-truth!
Finish making ground-truth!


In [3]:
print('Start making detection-results!')
image_size = 448
images_path = '/mnt/YOLOv1/dataset/VOC2007test/JPEGImages'
images = sorted(os.listdir(images_path))

model = Yolo(20)
model.cuda()
model.load_state_dict(torch.load('trained_models/yolov2_voc_weights1.pth'))
model.eval()

for img in images:
    image_path = os.path.join(images_path, img)
    image = cv2.imread(image_path)
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    height, width = image.shape[:2]
    image = cv2.resize(image, (image_size, image_size))
    image = np.transpose(np.array(image, dtype=np.float32), (2, 0, 1))
    image = image[None, :, :, :]
    width_ratio = float(image_size) / width
    height_ratio = float(image_size) / height
    data = Variable(torch.FloatTensor(image))
    if torch.cuda.is_available():
        data = data.cuda()
    with torch.no_grad():
        logits = model(data)
        predictions = post_processing(logits, image_size, CLASSES, model.anchors, 0.35, 0.5)
    if len(predictions) == 0:
        predictions = []
    else:
        predictions = predictions[0]
    with open(os.path.join('mAP/input/detection-results', img.split('.')[0] + '.txt'), 'w+') as f:
        for pred in predictions:
            xmin = int(max(pred[0] / width_ratio, 0))
            ymin = int(max(pred[1] / height_ratio, 0))
            xmax = int(min((pred[0] + pred[2]) / width_ratio, width))
            ymax = int(min((pred[1] + pred[3]) / height_ratio, height))
            f.write(' '.join((pred[5], '{0:.2f}'.format(pred[4]), str(xmin), str(ymin), str(xmax), str(ymax))) + '\n')
    print('Done ' + img + '!')
print('Finish making detection-results!')

Start making detection-results!
Done 000001.jpg!
Done 000002.jpg!
Done 000003.jpg!
Done 000004.jpg!
Done 000006.jpg!
Done 000008.jpg!
Done 000010.jpg!
Done 000011.jpg!
Done 000013.jpg!
Done 000014.jpg!
Done 000015.jpg!
Done 000018.jpg!
Done 000022.jpg!
Done 000025.jpg!
Done 000027.jpg!
Done 000028.jpg!
Done 000029.jpg!
Done 000031.jpg!
Done 000037.jpg!
Done 000038.jpg!
Done 000040.jpg!
Done 000043.jpg!
Done 000045.jpg!
Done 000049.jpg!
Done 000053.jpg!
Done 000054.jpg!
Done 000055.jpg!
Done 000056.jpg!
Done 000057.jpg!
Done 000058.jpg!
Done 000059.jpg!
Done 000062.jpg!
Done 000067.jpg!
Done 000068.jpg!
Done 000069.jpg!
Done 000070.jpg!
Done 000071.jpg!
Done 000074.jpg!
Done 000075.jpg!
Done 000076.jpg!
Done 000079.jpg!
Done 000080.jpg!
Done 000082.jpg!
Done 000084.jpg!
Done 000085.jpg!
Done 000086.jpg!
Done 000087.jpg!
Done 000088.jpg!
Done 000090.jpg!
Done 000092.jpg!
Done 000094.jpg!
Done 000096.jpg!
Done 000097.jpg!
Done 000098.jpg!
Done 000100.jpg!
Done 000103.jpg!
Done 000105.jpg!