-
Notifications
You must be signed in to change notification settings - Fork 80
/
preprocess.py
108 lines (78 loc) · 2.74 KB
/
preprocess.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
import pickle
import os
import numpy as np
import nibabel as nib
modalities = ('flair', 't1ce', 't1', 't2')
# train
train_set = {
'root': 'path to training set',
'flist': 'all.txt',
'has_label': True
}
# test/validation data
valid_set = {
'root': 'path to validation set',
'flist': 'valid.txt',
'has_label': False
}
test_set = {
'root': 'path to testing set',
'flist': 'test.txt',
'has_label': False
}
def nib_load(file_name):
if not os.path.exists(file_name):
print('Invalid file name, can not find the file!')
proxy = nib.load(file_name)
data = proxy.get_data()
proxy.uncache()
return data
def process_i16(path, has_label=True):
""" Save the original 3D MRI images with dtype=int16.
Noted that no normalization is used! """
label = np.array(nib_load(path + 'seg.nii.gz'), dtype='uint8', order='C')
images = np.stack([
np.array(nib_load(path + modal + '.nii.gz'), dtype='int16', order='C')
for modal in modalities], -1)# [240,240,155]
output = path + 'data_i16.pkl'
with open(output, 'wb') as f:
print(output)
print(images.shape, type(images), label.shape, type(label)) # (240,240,155,4) , (240,240,155)
pickle.dump((images, label), f)
if not has_label:
return
def process_f32b0(path, has_label=True):
""" Save the data with dtype=float32.
z-score is used but keep the background with zero! """
if has_label:
label = np.array(nib_load(path + 'seg.nii.gz'), dtype='uint8', order='C')
images = np.stack([np.array(nib_load(path + modal + '.nii.gz'), dtype='float32', order='C') for modal in modalities], -1) # [240,240,155]
output = path + 'data_f32b0.pkl'
mask = images.sum(-1) > 0
for k in range(4):
x = images[..., k] #
y = x[mask]
# 0.8885
x[mask] -= y.mean()
x[mask] /= y.std()
images[..., k] = x
with open(output, 'wb') as f:
print(output)
if has_label:
pickle.dump((images, label), f)
else:
pickle.dump(images, f)
if not has_label:
return
def doit(dset):
root, has_label = dset['root'], dset['has_label']
file_list = os.path.join(root, dset['flist'])
subjects = open(file_list).read().splitlines()
names = [sub.split('/')[-1] for sub in subjects]
paths = [os.path.join(root, sub, name + '_') for sub, name in zip(subjects, names)]
for path in paths:
process_f32b0(path, has_label)
if __name__ == '__main__':
doit(train_set)
doit(valid_set)
# doit(test_set)