/
saveHDF5.py
90 lines (67 loc) · 3.36 KB
/
saveHDF5.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
import numpy as np
import cv2
import math
import h5py
import os
import argparse
parser = argparse.ArgumentParser(description="Data initialization -- write the train/val/test data into a hdf5 file.")
parser.add_argument("image_dir", metavar="IN_DIR", type=str, help="Path to original images")
parser.add_argument("label_dir", metavar="LABEL_DIR", type=str, help="Path to annotations")
parser.add_argument("hdf5_dir", metavar="OUT_DIR", type=str, help="Path to output folder")
parser.add_argument("hdf5_name", metavar="OUT_NAME", type=str, help="Name of the hdf5 file")
class SaveHDF5(object):
"""
Save the image and labels in hdf5
The file has 7 data groups:
"im": image data
"label": label image data
"im_name": name of image file (e.g. 2016_1204_08_30)
"set_index": indicates which set the image file belongs to
(0, 1, 2 represents training, validation, testing respectively)
tuple of integers giving the index of the image file in the set construction ([set, i_period, i_file])
"class_weighting": the weight of the classes, calculated from the training set
"mean": mean patch value of each data set
"im", "label", "im_name" are defined in writeHDF5, and stays the same with the actual data
"set", "index", "class_weighting", "mean" changes with the data_loader setting
"set" and "index" are defined in data_loader
"class_weighting", "mean" are calculated in dataGenerator
"""
def __init__(self, in_dir, label_dir, out_dir, out_name):
im_list = os.listdir(label_dir)
im0 = cv2.imread(in_dir+im_list[0]) # read an image to get the image dimension
(self.r, self.c, _) = im0.shape
# self.r = 324
# self.c = 1209
num_im = len(im_list)
# HDF5 file settings
hdf5_path = out_dir+out_name
self.hdf5_file = h5py.File(hdf5_path, mode='w')
self.hdf5_file.create_dataset("im", (num_im, self.r, self.c, 3), np.uint8)
self.hdf5_file.create_dataset("label", (num_im, self.r, self.c), np.uint8)
self.hdf5_file.create_dataset("im_name", (num_im,), "S15")
self.hdf5_file.create_dataset("set_index", (num_im, 3), np.int16)
self.hdf5_file.create_dataset("im_class", (num_im,), np.int)
self.hdf5_file.create_dataset("class_weight", (4,), np.float16)
self.in_dir = in_dir
self.im_list = im_list
self.label_dir = label_dir
def writeHDF5(self):
"""
write data in the hdf5 file (data group "im", "label", "im_name")
"""
class_values = [127, 191, 255, 64]
for i in range(len(self.im_list)):
im = cv2.imread(self.in_dir + self.im_list[i])[0:self.r, 0:self.c, :]
im_label = cv2.imread(self.label_dir + self.im_list[i])[0:self.r, 0:self.c, 0]
self.hdf5_file["im"][i, ...] = im[None]
self.hdf5_file["label"][i, ...] = im_label[None]
unique, counts = np.unique(im_label, return_counts=True)
self.hdf5_file["im_class"][i] = np.where(class_values == unique[np.argmax(counts)])[0][0]
self.hdf5_file["im_name"][i] = np.string_(self.im_list[i][0:15])
self.hdf5_file.close()
def main():
args = parser.parse_args()
hdf5_file = SaveHDF5(args.image_dir, args.label_dir, args.hdf5_dir, args.hdf5_name)
hdf5_file.writeHDF5()
if __name__=='__main__':
main()