In [1]:
import os

In [2]:
# clone darknet repo
#os.system('git clone https://github.com/ultralytics/yolov5')

In [3]:
#pip install -r yolov5\\requirements.txt

In [4]:
import glob
import os
import pickle
import xml.etree.ElementTree as ET
from os import listdir, getcwd
from os.path import join
from tqdm import tqdm
from sklearn.model_selection import train_test_split
import zipfile
from pathlib import Path
import numpy as np
import shutil

In [5]:
classes = ['with_mask', 'without_mask', 'mask_weared_incorrect']

In [6]:
r_dir = "datasets"
d_file = "archive.zip"
img_format = ".png"

In [7]:
D_dir = join(r_dir, "mask")
img_directory = join(D_dir, "images")
lb_dir = join(D_dir, "annotations")
p_l_dir = join(D_dir, "processed_annotations")
train_data_directory = join(D_dir, "train")
validation_data_directory = join(D_dir, "valid")

In [8]:
with zipfile.ZipFile(join(r_dir, d_file), 'r') as zip_ref:
    zip_ref.extractall(D_dir)

In [9]:
Path(train_data_directory).mkdir(parents=True, exist_ok=True)
Path(validation_data_directory).mkdir(parents=True, exist_ok=True)
Path(train_data_directory + "/images").mkdir(parents=True, exist_ok=True)
Path(train_data_directory + "/labels").mkdir(parents=True, exist_ok=True)
Path(validation_data_directory + "/images").mkdir(parents=True, exist_ok=True)
Path(validation_data_directory + "/labels").mkdir(parents=True, exist_ok=True)
Path(p_l_dir).mkdir(parents=True, exist_ok=True)

In [10]:
def convert(size, box):
    dw = 1./(size[0])
    dh = 1./(size[1])
    x = (box[0] + box[1])/2.0 - 1
    y = (box[2] + box[3])/2.0 - 1
    w = box[1] - box[0]
    h = box[3] - box[2]
    x = x * dw
    w = w * dw
    y = y * dh
    h = h * dh
    return (x,y,w,h)

In [11]:
def convert_annotation(classes, input_path, output_path):
    basename = os.path.basename(input_path)
    basename_no_ext = os.path.splitext(basename)[0]

    in_file = open(input_path)
    out_file = open(output_path + "/" + basename_no_ext + '.txt', 'w')

    tree = ET.parse(in_file)
    root = tree.getroot()
    size = root.find('size')
    w = int(size.find('width').text)
    h = int(size.find('height').text)

    for obj in root.iter('object'):
        difficult = obj.find('difficult').text
        cls = obj.find('name').text
        if cls not in classes or int(difficult)==1:
            continue
        cls_id = classes.index(cls)
        xmlbox = obj.find('bndbox')
        b = (float(xmlbox.find('xmin').text), float(xmlbox.find('xmax').text), float(xmlbox.find('ymin').text), float(xmlbox.find('ymax').text))
        bb = convert((w,h), b)
        out_file.write(str(cls_id) + " " + " ".join([str(a) for a in bb]) + '\n')

    in_file.close()
    out_file.close()

In [12]:
paths = glob.glob(lb_dir + '/*.xml')

In [13]:
for xml_path in tqdm(paths):
    convert_annotation(classes, xml_path, p_l_dir)

100%|███████████████████████████████████████████████████████████████████████████████| 853/853 [00:01<00:00, 428.71it/s]


In [14]:
label_files = glob.glob(p_l_dir + '/*.txt')

In [15]:
train_indices, valid_indices = train_test_split(
    np.arange(len(label_files)), test_size=0.2, random_state=42, shuffle=True)

In [16]:
train_labels = []
for idx in train_indices:
    train_labels.append(label_files[idx])

In [17]:
valid_labels = []
for idx in valid_indices:
    valid_labels.append(label_files[idx])

In [18]:
for label_path in train_labels:
    basename = os.path.basename(label_path)
    basename_no_ext = os.path.splitext(basename)[0]

    shutil.move(label_path, join(train_data_directory, "labels", basename_no_ext + ".txt"))
    shutil.move(join(img_directory, basename_no_ext + img_format), join(train_data_directory, "images", basename_no_ext + img_format))


In [19]:
for label_path in valid_labels:
    basename = os.path.basename(label_path)
    basename_no_ext = os.path.splitext(basename)[0]

    shutil.move(label_path, join(validation_data_directory, "labels", basename_no_ext + ".txt"))
    shutil.move(join(img_directory, basename_no_ext + img_format), join(validation_data_directory, "images", basename_no_ext + img_format))