# Import stuff

In [1]:
%matplotlib notebook

import cv2
from matplotlib import pyplot as plt
import skimage.io
import numpy as np
import os
from shutil import copyfile
from tqdm.notebook import tqdm as tqdm
import scipy
import xml.etree.ElementTree as ET

# Define Paths

In [None]:
#Path to train/val images
path_trainval = "/path/to/images/"
#Path to test images
path_test = "/path/to/images/"
#Name of the validation output folder, mustn't exist
folder1 = "darknet_val"
#Name of the training output folder, mustn't exist
folder2 = "darknet_train"
#Name of the test output folder, mustn't exist
folder3 = "darknet_test"
#Define train val split, 1/split images of all original images will be used for validation (e.g 1/10 = 10%)
split = 10

# Load images (Train/Val)

In [6]:
def load_images_from_folder(path, split):
    
    os.mkdir( folder1)
    os.mkdir( folder1+"/images")
    os.mkdir( folder1+"/groundtruth")
    os.mkdir( folder1+"/groundtruth_voc")
    
    os.mkdir( folder2)
    os.mkdir( folder2+"/images")
    os.mkdir( folder2+"/groundtruth")
    os.mkdir( folder2+"/groundtruth_voc")
    
    train_txt = open(folder2+"/train.txt","w")
    test_txt = open(folder1+"/val.txt","w")
    
    voc_train_txt = open(folder2+"/trainval_voc.txt","w") 
    voc_val_txt = open(folder1+"/val_voc.txt","w") 

    #we want to iterate through darknet groundtruths
    paths = os.listdir(path + "/groundtruth/")
    valid = 0
    non_valid = 0
    i = 0
    soldier_train = 0
    soldier_val = 0
    civilian_train = 0
    civilian_val = 0
    
    print(len(paths))
    for x in tqdm(range(len(paths))):
        single_path = paths[x]
        #check if file is a text file
        if single_path.endswith('.txt'):
            #check if file is empty, if not continue
            if os.stat(path+ "/groundtruth/" +single_path).st_size > 0:
                image = cv2.imread(path + "images/"+ single_path[:-4] + ".png")
                #check if images exists
                if image is None:
                    #print("Could not load image: " + single_path)
                    non_valid += 1
                    continue
                destination= open( path+ "/groundtruth/" +single_path[:-4] + "_clean" + ".txt", "w" )
                source= open( path+ "/groundtruth/" +single_path, "r" )
                
                #############################################################
                #clean darknet groundtruth from crowd and military vehicle. 
                #############################################################
                #NOTE: Comment or remove this if you use a different dataset!
                #############################################################
                
                good = 0
                for line in source:
                    if line[0] == "1":
                        destination.write( "0" + line[1:] )
                        good += 1
                    elif line[0] == "2":
                        destination.write( "1" + line[1:] )
                        good += 1
                    elif line[0] == "3":
                        destination.write( "2" + line[1:] )
                        good += 1
                    else:
                        continue
                source.close()
                destination.close()
                
                if good == 0:
                    non_valid += 1
                    continue
                
                #############################################################
                #Cleaning over
                #############################################################
                
                valid += 1
                
                if i < 10:
                    #Count class occurances of civilian and soldier
                    root = ET.parse(path + "/groundtruth_voc/" + single_path[:-4] + ".xml").getroot()
                    for child in root:
                        if(child.tag == "object"):
                            if(child.find('name').text == "soldier"):
                                soldier_train += 1
                            if(child.find('name').text == "civilian"):
                                civilian_train += 1
                    #write to darknet txt
                    train_txt.write("/caa/Homes01/mburges/CVSP-Object-Detection-Historical-Videos/darknet/x64/Release/data/img_train/" + str(x) + ".jpg\n")
                    i+=1
                    #write to voc txt
                    voc_train_txt.write(str(x) + "\n")
                    copyfile(path+ "/groundtruth/" +single_path[:-4] + "_clean" + ".txt", folder2+"/groundtruth/" + str(x) + ".txt")
                    copyfile(path + "/groundtruth_voc/" + single_path[:-4] + ".xml", folder2+"/groundtruth_voc/" + str(x) + ".xml")
                    scipy.misc.imsave((folder2+"/images/" + str(x) + ".jpg"), image)
                else:
                    #Count class occurances of civilian and soldier
                    root = ET.parse(path + "/groundtruth_voc/" + single_path[:-4] + ".xml").getroot()
                    for child in root:
                        if(child.tag == "object"):
                            if(child.find('name').text == "soldier"):
                                soldier_val += 1
                            if(child.find('name').text == "civilian"):
                                civilian_val += 1
                    #write to darknet txt
                    test_txt.write("/caa/Homes01/mburges/CVSP-Object-Detection-Historical-Videos/darknet/x64/Release/data/img_val/" + str(x) + ".jpg\n")
                    i = 0
                    #write to voc txt
                    voc_val_txt.write(str(x) + "\n")
                    #copy groundtruths and save the image as JPG
                    #Note: For some reason OpenCV doesn't work here (as darknet can't load the images), so we use scipy.
                    copyfile(path+ "/groundtruth/" +single_path[:-4] + "_clean" + ".txt", folder1+"/groundtruth/" + str(x) + ".txt")
                    copyfile(path + "/groundtruth_voc/" + single_path[:-4] + ".xml", folder1+"/groundtruth_voc/" + str(x) + ".xml")
                    scipy.misc.imsave((folder1+"/images/" + str(x) + ".jpg"), image)
            else:
                non_valid += 1
    train_txt.close()
    test_txt.close()
    print (soldier_train, soldier_val, civilian_train, civilian_val, valid, non_valid)
    return valid, non_valid

x = load_images_from_folder(path_trainval, split)

28187


Widget Javascript not detected.  It may not be installed properly. Did you enable the widgetsnbextension? If not, then run "jupyter nbextension enable --py --sys-prefix widgetsnbextension"



(73, 0, 247, 0, 132, 28055)


# Load images (Test)

In [None]:
def load_images_from_folder_test(path):
    
    os.mkdir( folder3)
    os.mkdir( folder3+"/images")
    os.mkdir( folder3+"/groundtruth")
    os.mkdir( folder3+"/groundtruth_voc")
    
    test_txt = open(folder2+"/test.txt","w")
    
    voc_test_txt = open(folder2+"/test_voc.txt","w")  
    
    #we want to iterate through darknet groundtruths
    paths = os.listdir(path + "/groundtruth/")
    valid = 0
    non_valid = 0
    i = 0
    soldier_train = 0
    soldier_val = 0
    civilian_train = 0
    civilian_val = 0
    
    print(len(paths))
    for x in tqdm(range(len(paths))):
        single_path = paths[x]
        #check if file is a text file
        if single_path.endswith('.txt'):
            #check if file is empty, if not continue
            if os.stat(path+ "/groundtruth/" +single_path).st_size > 0:
                image = cv2.imread(path + "images/"+ single_path[:-4] + ".png")
                #check if images exists
                if image is None:
                    non_valid += 1
                    continue
                
                #############################################################
                #clean darknet groundtruth from crowd and military vehicle. 
                #############################################################
                #NOTE: Comment or remove this if you use a different dataset!
                #############################################################
                destination= open( path+ "/groundtruth/" +single_path[:-4] + "_clean" + ".txt", "w" )
                source= open( path+ "/groundtruth/" +single_path, "r" )
                
                good = 1
                for line in source:
                    if line[0] == "1":
                        destination.write( "0" + line[1:] )
                        good += 1
                    elif line[0] == "2":
                        destination.write( "1" + line[1:] )
                        good += 1
                    elif line[0] == "3":
                        destination.write( "2" + line[1:] )
                        good += 1
                    else:
                        continue
                source.close()
                destination.close()
                
                if good == 0:
                    non_valid += 1
                    continue
                #############################################################
                #Cleaning over
                #############################################################
                
                valid += 1
                #Count class occurances of civilian and soldier
                root = ET.parse(path + "/groundtruth_voc/" + single_path[:-4] + ".xml").getroot()
                for child in root:
                    if(child.tag == "object"):
                        if(child.find('name').text == "soldier"):
                            soldier_train += 1
                        if(child.find('name').text == "civilian"):
                            civilian_train += 1
                #write to darknet txt
                test_txt.write("/caa/Homes01/mburges/CVSP-Object-Detection-Historical-Videos/darknet/x64/Release/data/img_train/" + str(x) + ".jpg\n")
                i+=1
                #write to voc txt
                voc_test_txt.write(str(x) + "\n")
                #copy groundtruths and save the image as JPG
                #Note: For some reason OpenCV doesn't work here (as darknet can't load the images), so we use scipy.
                copyfile(path+ "/groundtruth/" +single_path[:-4] + "_clean" + ".txt", folder2+"/groundtruth/" + str(x) + ".txt")
                copyfile(path + "/groundtruth_voc/" + single_path[:-4] + ".xml", folder2+"/groundtruth_voc/" + str(x) + ".xml")
                scipy.misc.imsave((folder2+"/images/" + str(x) + ".jpg"), image)
            else:
                non_valid += 1
    train_txt.close()
    test_txt.close()
    print (soldier_train, soldier_val, civilian_train, civilian_val, valid, non_valid)
    return valid, non_valid

x = load_images_from_folder_test(path_test)