-
Notifications
You must be signed in to change notification settings - Fork 419
/
massachusetts_road_segm.py
185 lines (164 loc) · 9.41 KB
/
massachusetts_road_segm.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
__author__ = 'Fabian Isensee'
import numpy as np
import lasagne
import os
import sys
import fnmatch
import matplotlib.pyplot as plt
sys.path.append("../../modelzoo/")
from Unet import *
import theano.tensor as T
import theano
import cPickle
from time import sleep
from generators import batch_generator, threaded_generator, random_crop_generator, center_crop_generator
from massachusetts_road_dataset_utils import prepare_dataset
from sklearn.metrics import roc_auc_score
def plot_some_results(pred_fn, test_generator, n_images=10):
fig_ctr = 0
for data, seg in test_generator:
res = pred_fn(data)
for d, s, r in zip(data, seg, res):
plt.figure(figsize=(12, 6))
plt.subplot(1, 3, 1)
plt.imshow(d.transpose(1,2,0))
plt.title("input patch")
plt.subplot(1, 3, 2)
plt.imshow(s[0])
plt.title("ground truth")
plt.subplot(1, 3, 3)
plt.imshow(r)
plt.title("segmentation")
plt.savefig("road_segmentation_result_%03.0f.png"%fig_ctr)
plt.close()
fig_ctr += 1
if fig_ctr > n_images:
break
def main():
# only download dataset once. This takes a while.
# heuristic that I included to make sure the dataset is only donwloaded and prepared once
if not os.path.isfile("target_test.npy"):
prepare_dataset()
# set some hyper parameters. You should not have to touch anything if you have 4GB or more VRAM
BATCH_SIZE = 8 # this works if you have ~ 8GB VRAM. Use smaller BATCH_SIZE for other GPUs
N_EPOCHS = 50
N_BATCHES_PER_EPOCH = 100
PATCH_SIZE = 512
# load the prepared data. They have been converted to np arrays because they are much faster to load than single image files.
# This code will not load the entire array into memory but rather read from disk the bits that we currently need
# set mmap_mode to None if you want to load the data into RAM
mmap_mode = 'r'
data_train = np.load("data_train.npy", mmap_mode=mmap_mode)
target_train = np.load("target_train.npy", mmap_mode=mmap_mode)
data_valid = np.load("data_valid.npy", mmap_mode=mmap_mode)
target_valid = np.load("target_valid.npy", mmap_mode=mmap_mode)
data_test = np.load("data_test.npy", mmap_mode=mmap_mode)
target_test = np.load("target_test.npy", mmap_mode=mmap_mode)
# we are using pad='same' for simplicity (otherwise we would have to crop our ground truth). Keep in mind that this
# may not be ideal
net = build_UNet(n_input_channels=3, BATCH_SIZE=None, num_output_classes=2, pad='same',
nonlinearity=lasagne.nonlinearities.rectify, input_dim=(PATCH_SIZE, PATCH_SIZE),
base_n_filters=16, do_dropout=False)
output_layer_for_loss = net["output_flattened"]
# this is np.sum(target_train == 0) and np.sum(target_train == 1). No need to compute this every time
class_frequencies = np.array([2374093357., 118906643.])
# we will reweight the loss to put more focus on road pixels (because of class imbalance). This is a simple approach
# and could be improved if you also have a class imbalance in your experiments.
# we are taking **0.25 here because we want the net to focus more on the road pixels but not too much (otherwise
# it would not be penalized enough for missclassifying terrain pixels which results in too many false positives)
class_weights = (class_frequencies[[1,0]])**0.25
class_weights = class_weights / np.sum(class_weights) * 2.
class_weights = class_weights.astype(np.float32)
# if you wish to load pretrained weights you can uncomment this code
# val accuracy: 0.966384 val loss: 0.0947428 val AUC score: 0.980004909707
# you can also change the lower part of this code to load your own pretrained params
'''if not os.path.isfile('UNet_params_pretrained.pkl'):
import urllib
import zipfile
urllib.urlretrieve("https://s3.amazonaws.com/lasagne/recipes/pretrained/UNet_mass_road_segm_params.zip", 'pretrained_weights.zip')
zip_ref = zipfile.ZipFile('pretrained_weights.zip', 'r')
zip_ref.extractall("./")
zip_ref.close()
with open("UNet_params_pretrained.pkl", 'r') as f:
params = cPickle.load(f)
lasagne.layers.set_all_param_values(output_layer_for_loss, params)'''
x_sym = T.tensor4()
seg_sym = T.ivector()
w_sym = T.vector()
# add some weight decay
l2_loss = lasagne.regularization.regularize_network_params(output_layer_for_loss, lasagne.regularization.l2) * 1e-4
# the distinction between prediction_train and test is important only if we enable dropout
prediction_train = lasagne.layers.get_output(output_layer_for_loss, x_sym, deterministic=False, batch_norm_update_averages=False, batch_norm_use_averages=False)
# we could use a binary loss but I stuck with categorical crossentropy so that less code has to be changed if your
# application has more than two classes
loss = lasagne.objectives.categorical_crossentropy(prediction_train, seg_sym)
loss *= w_sym
loss = loss.mean()
loss += l2_loss
acc_train = T.mean(T.eq(T.argmax(prediction_train, axis=1), seg_sym), dtype=theano.config.floatX)
prediction_test = lasagne.layers.get_output(output_layer_for_loss, x_sym, deterministic=True, batch_norm_update_averages=False, batch_norm_use_averages=False)
loss_val = lasagne.objectives.categorical_crossentropy(prediction_test, seg_sym)
# we multiply our loss by a weight map. In this example the weight map simply increases the loss for road pixels and
# decreases the loss for other pixels. We do this to ensure that the network puts more focus on getting the roads
# right
loss_val *= w_sym
loss_val = loss_val.mean()
loss_val += l2_loss
acc = T.mean(T.eq(T.argmax(prediction_test, axis=1), seg_sym), dtype=theano.config.floatX)
# learning rate has to be a shared variable because we decrease it with every epoch
params = lasagne.layers.get_all_params(output_layer_for_loss, trainable=True)
learning_rate = theano.shared(np.float32(0.001))
updates = lasagne.updates.adam(loss, params, learning_rate=learning_rate)
# create a convenience function to get the segmentation
seg_output = lasagne.layers.get_output(net["output_segmentation"], x_sym, deterministic=True)
seg_output = seg_output.argmax(1)
train_fn = theano.function([x_sym, seg_sym, w_sym], [loss, acc_train], updates=updates)
val_fn = theano.function([x_sym, seg_sym, w_sym], [loss_val, acc])
get_segmentation = theano.function([x_sym], seg_output)
# we need this for calculating the AUC score
get_class_probas = theano.function([x_sym], prediction_test)
# some data augmentation. If you want better results you should invest more effort here. I left rotations and
# deformations out for the sake of speed and simplicity
train_generator = random_crop_generator(batch_generator(data_train, target_train, BATCH_SIZE, shuffle=True), PATCH_SIZE)
train_generator = threaded_generator(train_generator, num_cached=10)
# do the actual training
for epoch in np.arange(0, N_EPOCHS):
print epoch
losses_train = []
n_batches = 0
accuracies_train = []
for data, target in train_generator:
# the output of the net has shape (BATCH_SIZE, N_CLASSES). We therefore need to flatten the segmentation so
# that we can match it with the prediction via the crossentropy loss function
target_flat = target.ravel()
loss, acc = train_fn(data.astype(np.float32), target_flat, class_weights[target_flat])
losses_train.append(loss)
accuracies_train.append(acc)
n_batches += 1
if n_batches > N_BATCHES_PER_EPOCH:
break
print "epoch: ", epoch, "\ntrain accuracy: ", np.mean(accuracies_train), " train loss: ", np.mean(losses_train)
losses_val = []
accuracies_val = []
auc_val = []
# there is no need for data augmentation on the validation. However we need patches of the same size which is why
# we are using center crop generator
# since the validation generator does not loop around we need to reinstantiate it for every epoch
validation_generator = center_crop_generator(batch_generator(data_valid, target_valid, BATCH_SIZE, shuffle=False), PATCH_SIZE)
validation_generator = threaded_generator(validation_generator, num_cached=10)
for data, target in validation_generator:
target_flat = target.ravel()
loss, acc = val_fn(data.astype(np.float32), target_flat, class_weights[target_flat])
losses_val.append(loss)
accuracies_val.append(acc)
auc_val.append(roc_auc_score(target_flat, get_class_probas(data)[:, 1]))
print "val accuracy: ", np.mean(accuracies_val), " val loss: ", np.mean(losses_val), " val AUC score: ", np.mean(auc_val)
learning_rate *= 0.8
# save trained weights after each epoch
with open("UNet_params_ep%03.0f.pkl"%epoch, 'w') as f:
cPickle.dump(lasagne.layers.get_all_param_values(output_layer_for_loss), f)
# create some png files showing (raw image, ground truth, prediction). Of course we use the test set here ;-)
test_gen = random_crop_generator(batch_generator(data_test, target_test, BATCH_SIZE), PATCH_SIZE)
plot_some_results(get_segmentation, test_gen, 15)
if __name__ == "__main__":
main()