In [16]:
import os
import shutil
import random
import math
import re
import numpy as np
import cv2
from collections import defaultdict, Counter

In [28]:
from object_detection.utils import label_map_util
label_map = label_map_util.load_labelmap('../annotations/label_map.pbtxt')
label_map_dict = label_map_util.get_label_map_dict(label_map)
label_map_dict

{'car': 1, 'truck': 2, 'bus': 3, 'bike': 4, 'person': 5}

Convert the label files to our default XML format, and remove labels that we don't care about / remove images that do not contain any obj. of interest.

In [17]:
def addObject(label, coords, truncated=0, difficult=0):
    out = '''\t<object>
    \t\t<name>{}</name>
    \t\t<pose>Unspecified</pose>
    \t\t<truncated>{}</truncated>
    \t\t<difficult>{}</difficult>
    \t\t<bndbox>
    \t\t\t<xmin>{}</xmin>
    \t\t\t<ymin>{}</ymin>
    \t\t\t<xmax>{}</xmax>
    \t\t\t<ymax>{}</ymax>
    \t\t</bndbox>\n\t</object>\n'''.format(
        label,
        truncated,
        difficult,
        coords[0],
        coords[1],
        coords[2],
        coords[3]
    )
    return out

def convertToXMLFormat(path, func, flags=cv2.IMREAD_COLOR):
    path = path.strip().lower()
    tmp = path.replace('\\', '/').split('/')
    file, folder = tmp[-1], tmp[-2]
    img = cv2.imread(path, flags)
    out = '''<annotation>
    \t<folder>{}</folder>
    \t<filename>{}</filename>
    \t<path>{}</path>
    \t<source>
    \t\t<database>Unknown</database>
    \t</source>
    \t<size>
    \t\t<width>{}</width>
    \t\t<height>{}</height>
    \t\t<depth>3</depth>
    \t</size>
    \t<segmented>0</segmented>\n{}</annotation>'''.format(
        folder,
        file,
        path,
        img.shape[1],
        img.shape[0],
        func(path)
    )
    img = None
    print(out)

In [18]:
def convertFromXDark(path):
    out = ''
    with open(path.replace('.png', '.txt').replace('.jpg', '.txt').replace('.jpeg', '.txt'), 'r') as f:
        for line in f:
            lbl, xmin, ymin, xmax, ymax = line.strip().lower().split()
            out += addObject(lbl, [xmin, ymin, xmax, ymax])
    return out

In [126]:
convertToXMLFormat(
    r'E:\Cloud\GIT\CVEET\images\temp\e6_nostvet_11-01-15-32.jpg', 
    convertFromXDark
)

<annotation>
    	<folder>temp</folder>
    	<filename>e6_nostvet_11-01-15-32.jpg</filename>
    	<path>e:\cloud\git\cveet\images\temp\e6_nostvet_11-01-15-32.jpg</path>
    	<source>
    		<database>Unknown</database>
    	</source>
    	<size>
    		<width>800</width>
    		<height>600</height>
    		<depth>3</depth>
    	</size>
    	<segmented>0</segmented>
	<object>
    		<name>truck</name>
    		<pose>Unspecified</pose>
    		<truncated>0</truncated>
    		<difficult>0</difficult>
    		<bndbox>
    			<xmin>210</xmin>
    			<ymin>119</ymin>
    			<xmax>259</xmax>
    			<ymax>175</ymax>
    		</bndbox>
	</object>
	<object>
    		<name>car</name>
    		<pose>Unspecified</pose>
    		<truncated>0</truncated>
    		<difficult>0</difficult>
    		<bndbox>
    			<xmin>168</xmin>
    			<ymin>119</ymin>
    			<xmax>188</xmax>
    			<ymax>142</ymax>
    		</bndbox>
	</object>
	<object>
    		<name>truck</name>
    		<pose>Unspecified</pose>
    		<truncated>0</truncated>
    		<dif

In [14]:
with open('../images/dark.txt', 'w') as f:
    for v in sorted(list(set([filename[:-4].lower() for filename in os.listdir('../images/test')]))):
        f.write('raw/{}\n'.format(v))

Rename files of interest

In [24]:
for f in os.listdir('../images/temp'):
    f = f.strip().lower()
    os.rename('../images/temp/{}'.format(f), '../images/temp/{}.xml'.format(f.split('.')[0]))
    print(f, '--->', '{}.xml'.format(f.split('.')[0]))

2015_01879.jpg.txt ---> 2015_01879.xml
2015_01880.png.txt ---> 2015_01880.xml
2015_01881.png.txt ---> 2015_01881.xml
2015_01882.png.txt ---> 2015_01882.xml
2015_01883.jpg.txt ---> 2015_01883.xml
2015_01884.png.txt ---> 2015_01884.xml
2015_01885.png.txt ---> 2015_01885.xml
2015_01886.png.txt ---> 2015_01886.xml
2015_01887.png.txt ---> 2015_01887.xml
2015_01888.jpg.txt ---> 2015_01888.xml
2015_01889.jpg.txt ---> 2015_01889.xml
2015_01890.png.txt ---> 2015_01890.xml
2015_01891.jpg.txt ---> 2015_01891.xml
2015_01892.jpg.txt ---> 2015_01892.xml
2015_01893.jpg.txt ---> 2015_01893.xml
2015_01894.jpg.txt ---> 2015_01894.xml
2015_01895.jpg.txt ---> 2015_01895.xml
2015_01896.jpg.txt ---> 2015_01896.xml
2015_01897.jpg.txt ---> 2015_01897.xml
2015_01898.jpg.txt ---> 2015_01898.xml
2015_01899.jpg.txt ---> 2015_01899.xml
2015_01900.jpg.txt ---> 2015_01900.xml
2015_01901.jpg.txt ---> 2015_01901.xml
2015_01902.jpg.txt ---> 2015_01902.xml
2015_01903.jpg.txt ---> 2015_01903.xml
2015_01904.jpg.txt ---> 2

2015_02174.jpg.txt ---> 2015_02174.xml
2015_02175.jpg.txt ---> 2015_02175.xml
2015_02176.jpg.txt ---> 2015_02176.xml
2015_02177.jpg.txt ---> 2015_02177.xml
2015_02178.jpg.txt ---> 2015_02178.xml
2015_02179.jpg.txt ---> 2015_02179.xml
2015_02180.jpg.txt ---> 2015_02180.xml
2015_02181.jpg.txt ---> 2015_02181.xml
2015_02182.jpg.txt ---> 2015_02182.xml
2015_02183.jpg.txt ---> 2015_02183.xml
2015_02184.jpg.txt ---> 2015_02184.xml
2015_02185.jpg.txt ---> 2015_02185.xml
2015_02186.jpg.txt ---> 2015_02186.xml
2015_02187.jpg.txt ---> 2015_02187.xml
2015_02188.jpg.txt ---> 2015_02188.xml
2015_02189.jpg.txt ---> 2015_02189.xml
2015_02190.jpg.txt ---> 2015_02190.xml
2015_02191.jpg.txt ---> 2015_02191.xml
2015_02192.jpg.txt ---> 2015_02192.xml
2015_02193.jpg.txt ---> 2015_02193.xml
2015_02194.jpg.txt ---> 2015_02194.xml
2015_02195.jpg.txt ---> 2015_02195.xml
2015_02196.jpg.txt ---> 2015_02196.xml
2015_02197.jpg.txt ---> 2015_02197.xml
2015_02198.jpg.txt ---> 2015_02198.xml
2015_02199.jpg.txt ---> 2

2015_02464.jpg.txt ---> 2015_02464.xml
2015_02465.jpg.txt ---> 2015_02465.xml
2015_02466.jpg.txt ---> 2015_02466.xml
2015_02467.jpg.txt ---> 2015_02467.xml
2015_02468.jpg.txt ---> 2015_02468.xml
2015_02469.jpg.txt ---> 2015_02469.xml
2015_02470.jpg.txt ---> 2015_02470.xml
2015_02471.jpg.txt ---> 2015_02471.xml
2015_02472.jpg.txt ---> 2015_02472.xml
2015_02473.jpg.txt ---> 2015_02473.xml
2015_02474.jpg.txt ---> 2015_02474.xml
2015_02475.jpg.txt ---> 2015_02475.xml
2015_02476.jpg.txt ---> 2015_02476.xml
2015_02477.jpg.txt ---> 2015_02477.xml
2015_02478.jpg.txt ---> 2015_02478.xml
2015_02479.jpg.txt ---> 2015_02479.xml
2015_02480.jpg.txt ---> 2015_02480.xml
2015_02481.jpg.txt ---> 2015_02481.xml
2015_02482.jpg.txt ---> 2015_02482.xml
2015_02483.jpg.txt ---> 2015_02483.xml
2015_02484.jpg.txt ---> 2015_02484.xml
2015_02485.jpg.txt ---> 2015_02485.xml
2015_02486.jpg.txt ---> 2015_02486.xml
2015_02487.jpg.txt ---> 2015_02487.xml
2015_02488.jpg.txt ---> 2015_02488.xml
2015_02489.jpg.txt ---> 2

2015_02734.jpg.txt ---> 2015_02734.xml
2015_02735.jpg.txt ---> 2015_02735.xml
2015_02736.jpg.txt ---> 2015_02736.xml
2015_02737.jpg.txt ---> 2015_02737.xml
2015_02738.jpg.txt ---> 2015_02738.xml
2015_02739.jpg.txt ---> 2015_02739.xml
2015_02740.jpg.txt ---> 2015_02740.xml
2015_02741.jpg.txt ---> 2015_02741.xml
2015_02742.jpg.txt ---> 2015_02742.xml
2015_02743.jpg.txt ---> 2015_02743.xml
2015_02744.jpg.txt ---> 2015_02744.xml
2015_02745.jpg.txt ---> 2015_02745.xml
2015_02746.jpg.txt ---> 2015_02746.xml
2015_02747.jpg.txt ---> 2015_02747.xml
2015_02748.jpg.txt ---> 2015_02748.xml
2015_02749.jpg.txt ---> 2015_02749.xml
2015_02750.jpg.txt ---> 2015_02750.xml
2015_02751.jpg.txt ---> 2015_02751.xml
2015_02752.jpg.txt ---> 2015_02752.xml
2015_02753.jpg.txt ---> 2015_02753.xml
2015_02754.jpg.txt ---> 2015_02754.xml
2015_02755.jpg.txt ---> 2015_02755.xml
2015_02756.jpg.txt ---> 2015_02756.xml
2015_02757.jpg.txt ---> 2015_02757.xml
2015_02758.jpg.txt ---> 2015_02758.xml
2015_02759.jpg.txt ---> 2

2015_02991.jpg.txt ---> 2015_02991.xml
2015_02992.jpg.txt ---> 2015_02992.xml
2015_02993.jpg.txt ---> 2015_02993.xml
2015_02994.jpg.txt ---> 2015_02994.xml
2015_02995.jpg.txt ---> 2015_02995.xml
2015_02996.jpg.txt ---> 2015_02996.xml
2015_02997.jpg.txt ---> 2015_02997.xml
2015_02998.jpg.txt ---> 2015_02998.xml
2015_02999.jpg.txt ---> 2015_02999.xml
2015_03000.jpg.txt ---> 2015_03000.xml
2015_03001.jpg.txt ---> 2015_03001.xml
2015_03002.jpg.txt ---> 2015_03002.xml
2015_03003.jpg.txt ---> 2015_03003.xml
2015_03004.png.txt ---> 2015_03004.xml
2015_03005.png.txt ---> 2015_03005.xml
2015_03006.png.txt ---> 2015_03006.xml
2015_03007.png.txt ---> 2015_03007.xml
2015_03008.png.txt ---> 2015_03008.xml
2015_03009.png.txt ---> 2015_03009.xml
2015_03010.png.txt ---> 2015_03010.xml
2015_03011.png.txt ---> 2015_03011.xml
2015_03012.png.txt ---> 2015_03012.xml
2015_03013.png.txt ---> 2015_03013.xml
2015_03014.png.txt ---> 2015_03014.xml
2015_03015.png.txt ---> 2015_03015.xml
2015_03016.png.txt ---> 2

2015_05970.jpg.txt ---> 2015_05970.xml
2015_05971.jpg.txt ---> 2015_05971.xml
2015_05972.jpg.txt ---> 2015_05972.xml
2015_05973.jpg.txt ---> 2015_05973.xml
2015_05974.jpg.txt ---> 2015_05974.xml
2015_05975.jpg.txt ---> 2015_05975.xml
2015_05976.jpg.txt ---> 2015_05976.xml
2015_05977.jpg.txt ---> 2015_05977.xml
2015_05978.jpg.txt ---> 2015_05978.xml
2015_05979.jpg.txt ---> 2015_05979.xml
2015_05980.jpg.txt ---> 2015_05980.xml
2015_05981.jpg.txt ---> 2015_05981.xml
2015_05982.jpg.txt ---> 2015_05982.xml
2015_05983.jpg.txt ---> 2015_05983.xml
2015_05984.jpg.txt ---> 2015_05984.xml
2015_05985.jpg.txt ---> 2015_05985.xml
2015_05986.jpg.txt ---> 2015_05986.xml
2015_05987.jpg.txt ---> 2015_05987.xml
2015_05988.jpg.txt ---> 2015_05988.xml
2015_05989.jpg.txt ---> 2015_05989.xml
2015_05990.jpg.txt ---> 2015_05990.xml
2015_05991.jpg.txt ---> 2015_05991.xml
2015_05992.jpg.txt ---> 2015_05992.xml
2015_05993.jpg.txt ---> 2015_05993.xml
2015_05994.jpg.txt ---> 2015_05994.xml
2015_05995.jpg.txt ---> 2

2015_06274.jpg.txt ---> 2015_06274.xml
2015_06275.jpg.txt ---> 2015_06275.xml
2015_06276.jpg.txt ---> 2015_06276.xml
2015_06277.jpg.txt ---> 2015_06277.xml
2015_06278.jpg.txt ---> 2015_06278.xml
2015_06279.jpg.txt ---> 2015_06279.xml
2015_06280.jpg.txt ---> 2015_06280.xml
2015_06281.jpg.txt ---> 2015_06281.xml
2015_06282.jpg.txt ---> 2015_06282.xml
2015_06283.jpg.txt ---> 2015_06283.xml
2015_06284.jpg.txt ---> 2015_06284.xml
2015_06285.jpg.txt ---> 2015_06285.xml
2015_06286.jpg.txt ---> 2015_06286.xml
2015_06287.jpg.txt ---> 2015_06287.xml
2015_06288.jpg.txt ---> 2015_06288.xml
2015_06289.jpg.txt ---> 2015_06289.xml
2015_06290.jpg.txt ---> 2015_06290.xml
2015_06291.jpg.txt ---> 2015_06291.xml
2015_06292.jpg.txt ---> 2015_06292.xml
2015_06293.jpg.txt ---> 2015_06293.xml
2015_06294.jpg.txt ---> 2015_06294.xml
2015_06295.jpg.txt ---> 2015_06295.xml
2015_06296.jpg.txt ---> 2015_06296.xml
2015_06297.jpg.txt ---> 2015_06297.xml
2015_06298.jpg.txt ---> 2015_06298.xml
2015_06299.jpg.txt ---> 2

2015_06734.jpg.txt ---> 2015_06734.xml
2015_06735.jpg.txt ---> 2015_06735.xml
2015_06736.jpg.txt ---> 2015_06736.xml
2015_06737.jpg.txt ---> 2015_06737.xml
2015_06738.jpg.txt ---> 2015_06738.xml
2015_06739.jpg.txt ---> 2015_06739.xml
2015_06740.jpg.txt ---> 2015_06740.xml
2015_06741.jpg.txt ---> 2015_06741.xml
2015_06742.jpg.txt ---> 2015_06742.xml
2015_06743.jpg.txt ---> 2015_06743.xml
2015_06744.jpg.txt ---> 2015_06744.xml
2015_06745.jpg.txt ---> 2015_06745.xml
2015_06746.jpg.txt ---> 2015_06746.xml
2015_06747.jpg.txt ---> 2015_06747.xml
2015_06748.jpg.txt ---> 2015_06748.xml
2015_06749.jpg.txt ---> 2015_06749.xml
2015_06750.jpg.txt ---> 2015_06750.xml
2015_06751.jpg.txt ---> 2015_06751.xml
2015_06752.jpg.txt ---> 2015_06752.xml
2015_06753.jpg.txt ---> 2015_06753.xml
2015_06754.jpg.txt ---> 2015_06754.xml
2015_06755.jpg.txt ---> 2015_06755.xml
2015_06756.jpg.txt ---> 2015_06756.xml
2015_06757.jpg.txt ---> 2015_06757.xml
2015_06758.jpg.txt ---> 2015_06758.xml
2015_06759.jpg.txt ---> 2

Read the contents and write it out as xml format, with label changes etc.

In [31]:
labels = defaultdict(int)
for f in os.listdir('../images/temp'):
    with open('../images/temp/{}'.format(f), 'r') as file:
        for i, line in enumerate(file):
            if i <= 0:
                continue
            l = line.strip().lower().split()[0]
            if l == 'motorbike':
                l = 'bike'
            elif l == 'people':
                l = 'person'
                
            if (not l in label_map_dict):
                continue
            
            labels[l] += 1

In [37]:
labels

defaultdict(int, {'bus': 685, 'person': 4074, 'car': 2360, 'bike': 1000})