In [1]:
# https://github.com/ultralytics/yolov5
# https://github.com/VisDrone/VisDrone-Dataset
import cv2
import os
import xml.etree.ElementTree as ET

In [2]:
input_type = 'val'

input_img_folder = f'F:/VISDRONE/object-detection-images/VisDrone2019-DET-{input_type}/VisDrone2019-DET-{input_type}/images'
input_ann_folder = f'F:/VISDRONE/object-detection-images/VisDrone2019-DET-{input_type}/VisDrone2019-DET-{input_type}/annotations'
output_ann_folder = f'yolov5-master/{input_type}_data/labels'
output_img_folder = f'yolov5-master/{input_type}_data/images'

os.makedirs(output_img_folder, exist_ok=True)
os.makedirs(output_ann_folder, exist_ok=True)


image_list = os.listdir(input_img_folder)
annotation_list = os.listdir(input_ann_folder)

label_dict = {
    "0": "Ignore",
    "1": "Pedestrian",
    "2": "People",
    "3": "Bicycle",
    "4": "Car",
    "5": "Van",
    "6": "Truck",
    "7": "Tricycle",
    "8": "Awning-tricycle",
    "9": "Bus",
    "10": "Motor",
    "11": "Others"
}

thickness = 2
# color = (255, 0, 0)
colors = {
    "0": (0, 0, 0),
    "1": (255, 0, 0),
    "2": (0, 255, 0),
    "3": (0, 0, 255),
    "4": (255, 255, 0),
    "5": (0, 255, 255),
    "6": (255, 0, 255),
    "7": (128, 0, 0),
    "8": (128, 128, 0),
    "9": (0, 128, 0),
    "10": (128, 0, 128),
    "11": (0, 0, 128),
}
count = 0


In [3]:

def object_string(label, bbox):
    req_str = '''
	<object>
		<name>{}</name>
		<pose>Unspecified</pose>
		<truncated>0</truncated>
		<difficult>0</difficult>
		<bndbox>
			<xmin>{}</xmin>
			<ymin>{}</ymin>
			<xmax>{}</xmax>
			<ymax>{}</ymax>
		</bndbox>
	</object>
	'''.format(label, bbox[0], bbox[1], bbox[2], bbox[3])
    return req_str

In [4]:
for annotation in annotation_list:
    annotation_path = os.path.join(os.getcwd(), input_ann_folder, annotation)
    xml_annotation = annotation.split('.txt')[0] + '.xml'
    xml_path = os.path.join(os.getcwd(), output_ann_folder, xml_annotation)
    img_file = annotation.split('.txt')[0] + '.jpg'
    img_path = os.path.join(os.getcwd(), input_img_folder, img_file)
    output_img_path = os.path.join(os.getcwd(), output_img_folder, img_file)
    img = cv2.imread(img_path)
    annotation_string_init = '''
<annotation>
	<folder>annotations</folder>
	<filename>{}</filename>
	<path>{}</path>
	<source>
		<database>Unknown</database>
	</source>
	<size>
		<width>{}</width>
		<height>{}</height>
		<depth>{}</depth>
	</size>
	<segmented>0</segmented>'''.format(img_file, img_path, img.shape[1], img.shape[0], img.shape[2])
    # skip rotated images
    if (img.shape[1] < img.shape[0]):
        continue
    file = open(annotation_path, 'r')
    lines = file.readlines()
    for line in lines:
        new_line = line.strip('\n').split(',')
        new_coords_min = (int(new_line[0]), int(new_line[1]))
        new_coords_min_label = (int(new_line[0]), int(new_line[1]) - 10)
        new_coords_max = (
            int(new_line[0])+int(new_line[2]), int(new_line[1])+int(new_line[3]))
        bbox = (int(new_line[0]), int(new_line[1]), int(
                new_line[0])+int(new_line[2]), int(new_line[1])+int(new_line[3]))
        label = label_dict.get(new_line[5])
        req_str = object_string(label, bbox)
        annotation_string_init = annotation_string_init + req_str
        # image = cv2.rectangle(img, new_coords_min, new_coords_max, colors[new_line[5]], thickness)
        # cv2.putText(image, label, new_coords_min_label,
        #             cv2.FONT_HERSHEY_COMPLEX_SMALL, 0.9, colors[new_line[5]], thickness)
    cv2.imwrite(output_img_path, img)
    annotation_string_final = annotation_string_init + '</annotation>'
    f = open(xml_path, 'w')
    f.write(annotation_string_final)
    f.close()
    count += 1
    print('[INFO] Completed {} image(s) and annotation(s) pair'.format(count))

[INFO] Completed 1 image(s) and annotation(s) pair
[INFO] Completed 2 image(s) and annotation(s) pair
[INFO] Completed 3 image(s) and annotation(s) pair
[INFO] Completed 4 image(s) and annotation(s) pair
[INFO] Completed 5 image(s) and annotation(s) pair
[INFO] Completed 6 image(s) and annotation(s) pair
[INFO] Completed 7 image(s) and annotation(s) pair
[INFO] Completed 8 image(s) and annotation(s) pair
[INFO] Completed 9 image(s) and annotation(s) pair
[INFO] Completed 10 image(s) and annotation(s) pair
[INFO] Completed 11 image(s) and annotation(s) pair
[INFO] Completed 12 image(s) and annotation(s) pair
[INFO] Completed 13 image(s) and annotation(s) pair
[INFO] Completed 14 image(s) and annotation(s) pair
[INFO] Completed 15 image(s) and annotation(s) pair
[INFO] Completed 16 image(s) and annotation(s) pair
[INFO] Completed 17 image(s) and annotation(s) pair
[INFO] Completed 18 image(s) and annotation(s) pair
[INFO] Completed 19 image(s) and annotation(s) pair
[INFO] Completed 20 i

In [5]:

def convert(size, box):
    image_width_inv = 1./(size[0])
    image_height_inv = 1./(size[1])
    x = (box[0] + box[1])/2.0
    y = (box[2] + box[3])/2.0
    w = box[1] - box[0]
    h = box[3] - box[2]
    x = x*image_width_inv
    w = w*image_width_inv
    y = y*image_height_inv
    h = h*image_height_inv
    return (x, y, w, h)


In [6]:
def convert_annotation(dir_path, output_path, xml_file_path):
    basename = os.path.basename(xml_file_path)
    basename_no_ext = os.path.splitext(basename)[0]

    in_file = open(dir_path + '/' + basename_no_ext + '.xml')
    out_file = open(output_path + '/' + basename_no_ext + '.txt', 'w')
    tree = ET.parse(in_file)
    root = tree.getroot()
    size = root.find('size')
    w = int(size.find('width').text)
    h = int(size.find('height').text)

    for obj in root.iter('object'):
        difficult = obj.find('difficult').text
        cls = obj.find('name').text
        # if class is not in classes or object has less confidance (difficult = 1)
        if cls not in list(label_dict.values()) or int(difficult) == 1:
            continue
        # cls_id = classes.index(cls)
        cls_id = list(label_dict.values()).index(cls)
        xmlbox = obj.find('bndbox')
        b = (float(xmlbox.find('xmin').text), float(xmlbox.find('xmax').text), float(
            xmlbox.find('ymin').text), float(xmlbox.find('ymax').text))
        bb = convert((w, h), b)
        out_file.write(str(cls_id) + " " +
                        " ".join([str(a) for a in bb]) + '\n')
    out_file.close()


In [7]:
dir_path = output_ann_folder

full_dir_path = os.path.join(os.getcwd(), dir_path)
for xml_file_name in os.listdir(full_dir_path):
    if (xml_file_name.find('.txt') != -1):
        continue
    xml_file_path = os.path.join(full_dir_path, xml_file_name)
    convert_annotation(full_dir_path, full_dir_path, xml_file_path)
    print("done: " + xml_file_path)
    os.remove(xml_file_path)

print("Finished processing: " + dir_path)

done: e:\College\Sem-7\Major-Project\visdrone\yolov5-master/val_data/labels\0000001_02999_d_0000005.xml
done: e:\College\Sem-7\Major-Project\visdrone\yolov5-master/val_data/labels\0000001_03499_d_0000006.xml
done: e:\College\Sem-7\Major-Project\visdrone\yolov5-master/val_data/labels\0000001_03999_d_0000007.xml
done: e:\College\Sem-7\Major-Project\visdrone\yolov5-master/val_data/labels\0000001_04527_d_0000008.xml
done: e:\College\Sem-7\Major-Project\visdrone\yolov5-master/val_data/labels\0000001_05249_d_0000009.xml
done: e:\College\Sem-7\Major-Project\visdrone\yolov5-master/val_data/labels\0000001_05499_d_0000010.xml
done: e:\College\Sem-7\Major-Project\visdrone\yolov5-master/val_data/labels\0000001_05999_d_0000011.xml
done: e:\College\Sem-7\Major-Project\visdrone\yolov5-master/val_data/labels\0000001_07999_d_0000012.xml
done: e:\College\Sem-7\Major-Project\visdrone\yolov5-master/val_data/labels\0000001_08414_d_0000013.xml
done: e:\College\Sem-7\Major-Project\visdrone\yolov5-master/val_