/
preprocess.py
276 lines (230 loc) · 11.1 KB
/
preprocess.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
###################################################
#
# Video and ROI preprocessing functions
#
# Authors: Noah Apthorpe and Alex Riordan
#
# Description: Video contrast improvement
# and ROI centroid conversion
#
# Usage: Call main() function with path to
# configuration file to downsample,
# time equalize, improve contrast,
# and find centroids.
#
###################################################
import sys
import shutil
import numpy as np
import skimage.io
import os.path
from load import load_data, load_stack
import tifffile
from PIL import Image
import ConfigParser
from scipy.ndimage import zoom
def improve_contrast(stks, upper_contrast, lower_contrast):
'''Increase contrast of images by trimming high and low intensity values and re-normalizing'''
new_data = []
for i, stk in enumerate(stks):
low_p = np.percentile(stk.flatten(), lower_contrast)
high_p = np.percentile(stk.flatten(), upper_contrast)
new_stk = np.clip(stk, low_p, high_p)
new_stk = new_stk - new_stk.mean()
new_stk = np.divide(new_stk-np.min(new_stk), np.max(new_stk) - np.min(new_stk))
new_data.append(new_stk)
return new_data
def get_centroids(input_rois, radius, img_width, img_height):
'''Convert ImageJ ROIs into centroids for improved convnet boundary detection'''
new_data = []
for j, rois in enumerate(input_rois):
new_rois = np.zeros(rois.shape, dtype='float32')
for i, r in enumerate(rois):
cx, cy = np.where(r != 0)
cx, cy = int(cx.mean()), int(cy.mean())
x, y = np.ogrid[0:img_width, 0:img_height]
index = (x-cx)**2 + (y-cy)**2 <= radius**2
new_rois[i, index] = 1
new_data.append(new_rois)
return new_data
def save_image_tifs(stks, file_names, directory):
'''Save image stacks as tif files'''
directory = add_pathsep(directory)
for i, stk in enumerate(stks):
stk_name = directory + file_names[i] + ".tif"
tifffile.imsave(stk_name, stk.squeeze())
def save_roi_tifs(rois, file_names, directory):
'''Save ROIs as tif files'''
directory = add_pathsep(directory)
for i, roi in enumerate(rois):
roi = roi.max(axis=0)
roi_name = directory + file_names[i] + "_ROI.tif"
tifffile.imsave(roi_name, roi)
def add_pathsep(directory_name):
'''Add path seperater to end of directory name if not already there'''
if directory_name[-1] != os.sep:
return directory_name + os.sep
return directory_name
def is_labeled(dir_path):
'''Returns true if the argument path is to the special 'labeled' directory'''
return 'labeled' in os.path.basename(os.path.dirname(dir_path))
def downsample_helper(files_list, img_width, img_height, mean_proj_bins, max_proj_bins):
'''Mean and max project to covert image files in list to single downsampled numpy array'''
mean_stack = np.zeros((0, img_width, img_height), dtype=np.float32)
full_stack = np.zeros((0, img_width, img_height), dtype=np.float32)
# mean projections
for f in files_list:
full_stack = np.vstack((full_stack, load_stack(f)))
for i in range(0, full_stack.shape[0], mean_proj_bins):
if full_stack.shape[0] - (i + mean_proj_bins) < mean_proj_bins and f == files_list[-1]:
m = np.mean(full_stack[i:, :, :], axis=0)
m = np.expand_dims(m, axis=0)
mean_stack = np.vstack((mean_stack, m))
break
elif full_stack.shape[0] - i < mean_proj_bins:
full_stack = full_stack[i:, :, :]
else:
m = np.mean(full_stack[i:i+mean_proj_bins, :, :], axis=0)
m = np.expand_dims(m, axis=0)
mean_stack = np.vstack((mean_stack, m))
# max projections
max_stack = np.zeros((0, img_width, img_height), dtype=np.float32)
for i in range(0, mean_stack.shape[0], max_proj_bins):
if mean_stack.shape[0] - (i + max_proj_bins) < max_proj_bins:
m = np.max(mean_stack[i:, :, :], axis=0)
m = np.expand_dims(m, axis=0)
max_stack = np.vstack((max_stack, m))
break
else:
m = np.max(mean_stack[i:i+max_proj_bins, :, :], axis=0)
m = np.expand_dims(m, axis=0)
max_stack = np.vstack((max_stack, m))
return max_stack
def downsample(src_dir, dst_dir, img_width, img_height, mean_proj_bins, max_proj_bins):
'''Downsample image stacks in src_dir and place results and roi .zip files in dst_dir'''
do_copy = src_dir != dst_dir
for f in os.listdir(src_dir):
ext = os.path.splitext(f)[1].lower()
# skip hidden files
if ext.strip() == '':
continue
# copy .zip roi files without modification
if ext == '.zip' and do_copy:
shutil.copy(src_dir + f, dst_dir)
# downsample individual videos
elif ext == '.tif' or ext == '.tiff':
result = downsample_helper([src_dir + f], img_width, img_height,
mean_proj_bins, max_proj_bins)
tifffile.imsave(dst_dir + f, result.squeeze())
# downsample folders with videos split into smaller time chunks
elif os.path.isdir(src_dir + f):
sub_videos = [src_dir + add_pathsep(f) + v for v in os.listdir(src_dir + f)
if (os.path.splitext(v)[1].lower() == '.tif' or
os.path.splitext(v)[1].lower() == '.tiff')]
result = downsample_helper(sub_videos, img_width, img_height, mean_proj_bins, max_proj_bins)
tifffile.imsave(dst_dir + f + '.tif', result.squeeze())
def time_equalize(src_dir, dst_dir, img_width, img_height, new_time_depth):
'''Make image stacks in src_dir have the same number of frames. Place results in dst_dir'''
do_copy = src_dir != dst_dir
for f in os.listdir(src_dir):
ext = os.path.splitext(f)[1].lower()
# copy zip roi files without modification
if ext == '.zip' and do_copy:
shutil.copy(src_dir + f, dst_dir)
# time equalize indvidual videos
elif ext == '.tif' or ext == '.tiff':
data = load_stack(src_dir + f)
resized = zoom(data, (float(new_time_depth)/data.shape[0], 1, 1))
tifffile.imsave(dst_dir + f, resized.squeeze())
def remove_ds_store(file_list):
'''Remove OSX .DS_Store file from list'''
try:
file_list.remove('.DS_Store')
except ValueError:
pass
def get_labeled_split(already_split_dir):
'''Builds and returns a dict of the train/test/val split given in already_split_dir
Keys of dict are fnames, values are one of ['training', 'test', 'val']'''
already_split_dir = add_pathsep(already_split_dir)
split_dict = dict()
subdir_list = ['training', 'test', 'validation']
for subdir in subdir_list:
files = os.listdir(already_split_dir + subdir)
remove_ds_store(files)
split_dict.update(dict.fromkeys(files, subdir))
return split_dict
def split_labeled_directory(split_dict, dir_to_split, is_ROI_tif, is_post_process):
'''Moves files from dir_to_split into newly created train, test,
and validation subdirectories in dir_to_split based on split_dict dictionary.
is_ROI_tif should be true when ROI labels in directory are tif files, not zip files'''
dir_to_split = add_pathsep(dir_to_split)
for subdir in split_dict.itervalues():
subdir_path = dir_to_split + subdir
if not os.path.exists(subdir_path):
os.makedirs(subdir_path)
for fname, subdir in split_dict.items():
for f in os.listdir(dir_to_split):
if not os.path.isfile(dir_to_split + f): continue
dict_base = os.path.splitext(fname)[0]
file_base = os.path.splitext(f)[0]
print dict_base
print file_base
if (dict_base == file_base or dict_base + "_ROI" == file_base or
dict_base + '_output' == file_base or dict_base + '_output_1' == file_base or
dict_base + '_output_0' == file_base):
os.rename(dir_to_split + f, dir_to_split + subdir + os.sep + f)
def put_labeled_at_end_of_path_if_not_there(fpath):
if fpath[-1] == '/':
fpath = fpath[:-1]
if not is_labeled(fpath):
return fpath + '/labeled'
else:
return fpath
def main(main_config_fpath='../data/example/main_config.cfg'):
'''Get user-specified information from main_config.cfg'''
cfg_parser = ConfigParser.SafeConfigParser()
cfg_parser.readfp(open(main_config_fpath, 'r'))
# get directory paths
data_dir = add_pathsep(cfg_parser.get('general', 'data_dir'))
downsample_dir = data_dir[0:-1] + "_downsampled" + os.sep
preprocess_dir = data_dir[0:-1] + "_preprocessed" + os.sep
ttv_list = ['training' + os.sep, 'validation' + os.sep, 'test' + os.sep]
# ensure directories exist
if not os.path.isdir(data_dir):
sys.exit("Specified data directory " + data_dir + " does not exist.")
for ttv in ttv_list if is_labeled(data_dir) else ['']:
if not os.path.isdir(downsample_dir + ttv):
os.makedirs(downsample_dir + ttv)
if not os.path.isdir(preprocess_dir + ttv):
os.makedirs(preprocess_dir + ttv)
# get remaining preprocessing parameters
img_width = cfg_parser.getint('general', 'img_width')
img_height = cfg_parser.getint('general', 'img_height')
mean_proj_bins = cfg_parser.getint('preprocessing', 'mean_proj_bin')
max_proj_bins = cfg_parser.getint('preprocessing', 'max_proj_bin')
new_time_depth = cfg_parser.getint('preprocessing', 'time_equalize')
upper_contrast = cfg_parser.getfloat('preprocessing', 'upper_contrast')
lower_contrast = cfg_parser.getfloat('preprocessing', 'lower_contrast')
centroid_radius = cfg_parser.getint('preprocessing', 'centroid_radius')
# run preprocessing
for ttv in ttv_list if is_labeled(data_dir) else ['']:
if cfg_parser.getboolean('general', 'do_downsample'):
downsample(data_dir + ttv, downsample_dir + ttv,
img_width, img_height, mean_proj_bins, max_proj_bins)
time_equalize(downsample_dir + ttv, downsample_dir + ttv,
img_width, img_height, new_time_depth)
else:
time_equalize(data_dir + ttv, downsample_dir + ttv,
img_width, img_height, new_time_depth)
if is_labeled(data_dir):
stks, rois, file_names = load_data(downsample_dir + ttv, img_width, img_height)
stks = improve_contrast(stks, upper_contrast, lower_contrast)
rois = get_centroids(rois, centroid_radius, img_width, img_height)
save_image_tifs(stks, file_names, preprocess_dir + ttv)
save_roi_tifs(rois, file_names, preprocess_dir + ttv)
else:
stks, file_names = load_data(downsample_dir + ttv, img_width, img_height, no_rois=True)
stks = improve_contrast(stks, upper_contrast, lower_contrast)
save_image_tifs(stks, file_names, preprocess_dir + ttv)
if __name__ == "__main__":
main()