This repository has been archived by the owner on Feb 17, 2023. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 0
/
create-dataset.py
99 lines (72 loc) · 2.89 KB
/
create-dataset.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
import xml.etree.ElementTree as et
import os
import csv
import shutil
import random
from pipeline import ANNOTATIONS_DIRECTORY, DATASET_DIR, IMAGES_DIRECTORY, TESTING_DIRECTORY, TRAINING_DIRECTORY
trainExists = os.path.isfile("./" + DATASET_DIR + "/train_labels.csv")
testExists = os.path.isfile("./" + DATASET_DIR + "/test_labels.csv")
trainDirExists = os.path.isdir(TRAINING_DIRECTORY)
testDirExists = os.path.isdir(TESTING_DIRECTORY)
if trainExists == True and testExists == True:
os.remove("./" + DATASET_DIR + "/train_labels.csv")
os.remove("./" + DATASET_DIR + "/test_labels.csv")
if trainDirExists == False and testDirExists == False:
os.mkdir(TRAINING_DIRECTORY)
os.mkdir(TESTING_DIRECTORY)
files_train = os.listdir(TRAINING_DIRECTORY)
files_test = os.listdir(TESTING_DIRECTORY)
test_labels_written = False
train_labels_written = False
# this segment of the script will randomize the images
image_paths = os.listdir(IMAGES_DIRECTORY)
random.shuffle(image_paths)
row_labels = (
"filename", "height", "width",
"class", "xmin", "ymin", "xmax", "ymax"
)
for i, image_path in enumerate(image_paths):
print(image_path)
if i < int(len(image_paths) * 0.8):
shutil.copy(
f'' + IMAGES_DIRECTORY + "\\" + image_path + '', '' + DATASET_DIR + '/train')
shutil.copy(
f'' + ANNOTATIONS_DIRECTORY + "\\" + image_path.replace("jpg", "xml") + '', '' + DATASET_DIR + '/train')
else:
shutil.copy(
f'' + IMAGES_DIRECTORY + "\\" + image_path + '', '' + DATASET_DIR + '/test')
shutil.copy(
f'' + ANNOTATIONS_DIRECTORY + "\\" + image_path.replace("jpg", "xml") + '', '' + DATASET_DIR + '/test')
# start of the manifest creation script
def getData(file: str, path: str):
x = et.parse(path + "\\" + file.title())
root = x.getroot()
name = root[1].text
height = root[4][0].text
width = root[4][1].text
label = root[6][0].text
xmin = root[6][4][0].text
ymin = root[6][4][1].text
xmax = root[6][4][2].text
ymax = root[6][4][3].text
return name, height, width, label, xmin, ymin, xmax, ymax
for file in files_train:
if (file.endswith("xml")):
dat = getData(file, TRAINING_DIRECTORY)
file = open("./" + DATASET_DIR + "/train_labels.csv",
newline="", mode="a")
writer = csv.writer(file, "excel")
if train_labels_written == False:
writer.writerow(row_labels)
train_labels_written = True
writer.writerow(dat)
for file in files_test:
if (file.endswith("xml")):
dat = getData(file, TESTING_DIRECTORY)
file = open("./" + DATASET_DIR + "/test_labels.csv",
newline="", mode="a")
writer = csv.writer(file, "excel")
if test_labels_written == False:
writer.writerow(row_labels)
test_labels_written = True
writer.writerow(dat)