In [1]:
import os
import tensorflow as tf
import dask
import dask.array as da
import numpy as np
from matplotlib import pyplot as plt
%matplotlib inline

In [2]:
train_x = da.from_npy_stack('/home/skyolia/JupyterProjects/segmentation/TGS Salt/dataset/train_x')
train_y = da.from_npy_stack('/home/skyolia/JupyterProjects/segmentation/TGS Salt/dataset/train_y')
test_x = da.from_npy_stack('/home/skyolia/JupyterProjects/segmentation/TGS Salt/dataset/test_x')
test_y = da.from_npy_stack('/home/skyolia/JupyterProjects/segmentation/TGS Salt/dataset/test_y')
train_x, train_y, test_x, test_y

(dask.array<from-npy-stack, shape=(2400, 128, 128, 1), dtype=float32, chunksize=(2400, 128, 128, 1)>,
 dask.array<from-npy-stack, shape=(2400, 128, 128, 1), dtype=float32, chunksize=(2400, 128, 128, 1)>,
 dask.array<from-npy-stack, shape=(1600, 128, 128, 1), dtype=float32, chunksize=(1600, 128, 128, 1)>,
 dask.array<from-npy-stack, shape=(1600, 128, 128, 1), dtype=float32, chunksize=(1600, 128, 128, 1)>)

In [3]:
def build_block(input_layer, filters, norm=True, k=(3, 3)):
    layer = tf.keras.layers.Conv2D(filters, kernel_size=k, padding='same', use_bias=not norm, kernel_initializer='glorot_normal')(input_layer)
    if norm:
        layer = tf.keras.layers.BatchNormalization()(layer)
    layer = tf.keras.layers.Activation('elu')(layer)
    return layer

def build_unet(n_filters=16, dropout=0.5):
    image_input = tf.keras.Input(shape=(128, 128, 1), name='input_layer')
    
    conv_1 = build_block(image_input, n_filters)
    conv_2 = build_block(conv_1, n_filters)
    pool_1 = tf.keras.layers.MaxPooling2D(padding='same')(conv_2)
    drop_1 = tf.keras.layers.SpatialDropout2D(dropout)(pool_1)
    
    conv_3 = build_block(drop_1, n_filters * 2)
    conv_4 = build_block(conv_3, n_filters * 2)
    pool_2 = tf.keras.layers.MaxPooling2D(padding='same')(conv_4)
    drop_2 = tf.keras.layers.SpatialDropout2D(dropout)(pool_2)
    
    conv_5 = build_block(drop_2, n_filters * 4)
    conv_6 = build_block(conv_5, n_filters * 4)
    pool_3 = tf.keras.layers.MaxPooling2D(padding='same')(conv_6)
    drop_3 = tf.keras.layers.SpatialDropout2D(dropout)(pool_3)
    
    conv_7 = build_block(drop_3, n_filters * 8)
    conv_8 = build_block(conv_7, n_filters * 8)
    pool_4 = tf.keras.layers.MaxPooling2D(padding='same')(conv_8)
    drop_4 = tf.keras.layers.SpatialDropout2D(dropout)(pool_4)
    
    conv_9 = build_block(drop_4, n_filters * 16)
    conv_10 = build_block(conv_9, n_filters * 16)
    
    donv_1 = tf.keras.layers.Convolution2DTranspose(n_filters * 8, (3, 3), strides = (2, 2), padding = 'same')(conv_10)
    donv_1 = tf.keras.layers.concatenate([donv_1, conv_8])
    donv_1 = tf.keras.layers.SpatialDropout2D(dropout)(donv_1)
    conv_11 = build_block(donv_1, n_filters * 8)
    conv_12 = build_block(conv_11, n_filters * 8)
    
    donv_2 = tf.keras.layers.Convolution2DTranspose(n_filters * 4, (3, 3), strides = (2, 2), padding = 'same')(conv_12)
    donv_2 = tf.keras.layers.concatenate([donv_2, conv_6])
    donv_2 = tf.keras.layers.SpatialDropout2D(dropout)(donv_2)
    conv_13 = build_block(donv_2, n_filters * 4)
    conv_14 = build_block(conv_13, n_filters * 4)
    
    donv_3 = tf.keras.layers.Convolution2DTranspose(n_filters * 2, (3, 3), strides = (2, 2), padding = 'same')(conv_14)
    donv_3 = tf.keras.layers.concatenate([donv_3, conv_4])
    donv_3 = tf.keras.layers.SpatialDropout2D(dropout)(donv_3)
    conv_15 = build_block(donv_3, n_filters * 2)
    conv_16 = build_block(conv_15, n_filters * 2)
    
    donv_4 = tf.keras.layers.Convolution2DTranspose(n_filters, (3, 3), strides = (2, 2), padding = 'same')(conv_16)
    donv_4 = tf.keras.layers.concatenate([donv_4, conv_2])
    donv_4 = tf.keras.layers.SpatialDropout2D(dropout)(donv_4)
    conv_17 = build_block(donv_4, n_filters)
    conv_18 = build_block(conv_17, n_filters)
    
    output = tf.keras.layers.Conv2D(1, (1, 1), kernel_initializer='glorot_normal', activation='sigmoid')(conv_18)
    model = tf.keras.Model(inputs=image_input, outputs=output)
    return model

In [4]:
model = build_unet()
model.summary()

__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_layer (InputLayer)        (None, 128, 128, 1)  0                                            
__________________________________________________________________________________________________
conv2d (Conv2D)                 (None, 128, 128, 16) 144         input_layer[0][0]                
__________________________________________________________________________________________________
batch_normalization (BatchNorma (None, 128, 128, 16) 64          conv2d[0][0]                     
__________________________________________________________________________________________________
activation (Activation)         (None, 128, 128, 16) 0           batch_normalization[0][0]        
__________________________________________________________________________________________________
conv2d_1 (

In [5]:
epochs, batch_size, lr, filepath = 10000, 16, 0.001, "day_1.weights.best.hdf5"
steps_per_epoch = int(np.ceil(train_y.shape[0]/batch_size))

data_gen_args = dict(width_shift_range=0.1, height_shift_range=0.1, horizontal_flip=True, vertical_flip=True)
image_datagen = tf.keras.preprocessing.image.ImageDataGenerator(**data_gen_args)
mask_datagen = tf.keras.preprocessing.image.ImageDataGenerator(**data_gen_args)

# Provide the same seed and keyword arguments to the fit and flow methods
seed = 1
image_datagen.fit(train_x, augment=True, seed=seed)
mask_datagen.fit(train_y, augment=True, seed=seed)

image_generator = image_datagen.flow(x=train_x, batch_size=batch_size, seed=seed)
mask_generator = mask_datagen.flow(x=train_y, batch_size=batch_size, seed=seed)
train_generator = zip(image_generator, mask_generator)
test_generator = tf.keras.preprocessing.image.ImageDataGenerator().flow(x=test_x, y=test_y, batch_size=batch_size)
checkpoint = tf.keras.callbacks.ModelCheckpoint(filepath, monitor='val_acc', verbose=1, save_best_only=True, mode='max')
tb = tf.keras.callbacks.TensorBozard(log_dir=os.getcwd())

opt = tf.keras.optimizers.Adam() # 
model.compile(loss='binary_crossentropy', optimizer=opt, metrics=['accuracy'])

model.fit_generator(train_generator,
                    steps_per_epoch=steps_per_epoch,
                    epochs=epochs,
                    verbose=1,
                    validation_data=test_generator,
                    validation_steps=int(np.ceil(test_y.shape[0]/batch_size)),
                    use_multiprocessing=False,
                    workers=12,
                    shuffle=True,
                    #initial_epoch=65,
                    callbacks=[checkpoint, tb])

Epoch 1/10000
Epoch 00001: val_acc improved from -inf to 0.23291, saving model to day_1.weights.best.hdf5
Epoch 2/10000
Epoch 00002: val_acc improved from 0.23291 to 0.27694, saving model to day_1.weights.best.hdf5
Epoch 3/10000
Epoch 00003: val_acc improved from 0.27694 to 0.78389, saving model to day_1.weights.best.hdf5
Epoch 4/10000
Epoch 00004: val_acc did not improve from 0.78389
Epoch 5/10000
Epoch 00005: val_acc improved from 0.78389 to 0.80565, saving model to day_1.weights.best.hdf5
Epoch 6/10000
Epoch 00006: val_acc improved from 0.80565 to 0.84989, saving model to day_1.weights.best.hdf5
Epoch 7/10000
Epoch 00007: val_acc improved from 0.84989 to 0.86189, saving model to day_1.weights.best.hdf5
Epoch 8/10000
Epoch 00008: val_acc did not improve from 0.86189
Epoch 9/10000
Epoch 00009: val_acc improved from 0.86189 to 0.87064, saving model to day_1.weights.best.hdf5
Epoch 10/10000
Epoch 00010: val_acc improved from 0.87064 to 0.87392, saving model to day_1.weights.best.hdf5
Ep

IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)



Epoch 00605: val_acc did not improve from 0.94167
Epoch 606/10000
Epoch 00606: val_acc did not improve from 0.94167
Epoch 607/10000
Epoch 00607: val_acc did not improve from 0.94167
Epoch 608/10000
Epoch 00608: val_acc did not improve from 0.94167
Epoch 609/10000
Epoch 00609: val_acc did not improve from 0.94167
Epoch 610/10000
Epoch 00610: val_acc did not improve from 0.94167
Epoch 611/10000
Epoch 00611: val_acc did not improve from 0.94167
Epoch 612/10000
Epoch 00612: val_acc did not improve from 0.94167
Epoch 613/10000
Epoch 00613: val_acc did not improve from 0.94167
Epoch 614/10000
Epoch 00614: val_acc did not improve from 0.94167
Epoch 615/10000
Epoch 00615: val_acc did not improve from 0.94167
Epoch 616/10000
Epoch 00616: val_acc did not improve from 0.94167
Epoch 617/10000
Epoch 00617: val_acc did not improve from 0.94167
Epoch 618/10000
Epoch 00618: val_acc did not improve from 0.94167
Epoch 619/10000
Epoch 00619: val_acc did not improve from 0.94167
Epoch 620/10000
Epoch 0062

IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)



Epoch 00667: val_acc did not improve from 0.94167
Epoch 668/10000
Epoch 00668: val_acc did not improve from 0.94167
Epoch 669/10000
Epoch 00669: val_acc did not improve from 0.94167
Epoch 670/10000
Epoch 00670: val_acc did not improve from 0.94167
Epoch 671/10000
Epoch 00671: val_acc did not improve from 0.94167
Epoch 672/10000
Epoch 00672: val_acc improved from 0.94167 to 0.94170, saving model to day_1.weights.best.hdf5
Epoch 673/10000
Epoch 00673: val_acc improved from 0.94170 to 0.94185, saving model to day_1.weights.best.hdf5
Epoch 674/10000
Epoch 00674: val_acc did not improve from 0.94185
Epoch 675/10000
Epoch 00675: val_acc did not improve from 0.94185
Epoch 676/10000
Epoch 00676: val_acc did not improve from 0.94185
Epoch 677/10000
Epoch 00677: val_acc did not improve from 0.94185
Epoch 678/10000
Epoch 00678: val_acc did not improve from 0.94185
Epoch 679/10000
Epoch 00679: val_acc did not improve from 0.94185
Epoch 680/10000
Epoch 00680: val_acc did not improve from 0.94185
Ep

IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)



Epoch 00729: val_acc did not improve from 0.94303
Epoch 730/10000
Epoch 00730: val_acc did not improve from 0.94303
Epoch 731/10000
Epoch 00731: val_acc improved from 0.94303 to 0.94428, saving model to day_1.weights.best.hdf5
Epoch 732/10000
Epoch 00732: val_acc did not improve from 0.94428
Epoch 733/10000
Epoch 00733: val_acc did not improve from 0.94428
Epoch 734/10000
Epoch 00734: val_acc did not improve from 0.94428
Epoch 735/10000
Epoch 00735: val_acc did not improve from 0.94428
Epoch 736/10000
Epoch 00736: val_acc did not improve from 0.94428
Epoch 737/10000
Epoch 00737: val_acc did not improve from 0.94428
Epoch 738/10000
Epoch 00738: val_acc did not improve from 0.94428
Epoch 739/10000
Epoch 00739: val_acc did not improve from 0.94428
Epoch 740/10000
Epoch 00740: val_acc did not improve from 0.94428
Epoch 741/10000
Epoch 00741: val_acc did not improve from 0.94428
Epoch 742/10000
Epoch 00742: val_acc did not improve from 0.94428
Epoch 743/10000
Epoch 00743: val_acc did not im

IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)



Epoch 00792: val_acc did not improve from 0.94428
Epoch 793/10000
Epoch 00793: val_acc did not improve from 0.94428
Epoch 794/10000
Epoch 00794: val_acc did not improve from 0.94428
Epoch 795/10000
Epoch 00795: val_acc did not improve from 0.94428
Epoch 796/10000
Epoch 00796: val_acc did not improve from 0.94428
Epoch 797/10000
Epoch 00797: val_acc did not improve from 0.94428
Epoch 798/10000
Epoch 00798: val_acc did not improve from 0.94428
Epoch 799/10000
Epoch 00799: val_acc did not improve from 0.94428
Epoch 800/10000
Epoch 00800: val_acc did not improve from 0.94428
Epoch 801/10000
Epoch 00801: val_acc did not improve from 0.94428
Epoch 802/10000
Epoch 00802: val_acc did not improve from 0.94428
Epoch 803/10000
Epoch 00803: val_acc did not improve from 0.94428
Epoch 804/10000
Epoch 00804: val_acc did not improve from 0.94428
Epoch 805/10000
Epoch 00805: val_acc did not improve from 0.94428
Epoch 806/10000
Epoch 00806: val_acc did not improve from 0.94428
Epoch 807/10000
Epoch 0080

IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)



Epoch 00853: val_acc did not improve from 0.94428
Epoch 854/10000
Epoch 00854: val_acc did not improve from 0.94428
Epoch 855/10000
Epoch 00855: val_acc did not improve from 0.94428
Epoch 856/10000
Epoch 00856: val_acc did not improve from 0.94428
Epoch 857/10000
Epoch 00857: val_acc did not improve from 0.94428
Epoch 858/10000
Epoch 00858: val_acc did not improve from 0.94428
Epoch 859/10000
Epoch 00859: val_acc did not improve from 0.94428
Epoch 860/10000
Epoch 00860: val_acc did not improve from 0.94428
Epoch 861/10000
Epoch 00861: val_acc did not improve from 0.94428
Epoch 862/10000
Epoch 00862: val_acc did not improve from 0.94428
Epoch 863/10000
Epoch 00863: val_acc did not improve from 0.94428
Epoch 864/10000
Epoch 00864: val_acc did not improve from 0.94428
Epoch 865/10000
Epoch 00865: val_acc did not improve from 0.94428
Epoch 866/10000
Epoch 00866: val_acc did not improve from 0.94428
Epoch 867/10000
Epoch 00867: val_acc did not improve from 0.94428
Epoch 868/10000
Epoch 0086

IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)



Epoch 00915: val_acc did not improve from 0.94567
Epoch 916/10000
Epoch 00916: val_acc did not improve from 0.94567
Epoch 917/10000
Epoch 00917: val_acc did not improve from 0.94567
Epoch 918/10000
Epoch 00918: val_acc did not improve from 0.94567
Epoch 919/10000
Epoch 00919: val_acc did not improve from 0.94567
Epoch 920/10000
Epoch 00920: val_acc did not improve from 0.94567
Epoch 921/10000
Epoch 00921: val_acc did not improve from 0.94567
Epoch 922/10000
Epoch 00922: val_acc did not improve from 0.94567
Epoch 923/10000
Epoch 00923: val_acc did not improve from 0.94567
Epoch 924/10000
Epoch 00924: val_acc did not improve from 0.94567
Epoch 925/10000
Epoch 00925: val_acc did not improve from 0.94567
Epoch 926/10000
Epoch 00926: val_acc did not improve from 0.94567
Epoch 927/10000
Epoch 00927: val_acc did not improve from 0.94567
Epoch 928/10000
Epoch 00928: val_acc did not improve from 0.94567
Epoch 929/10000
Epoch 00929: val_acc did not improve from 0.94567
Epoch 930/10000
Epoch 0093

IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)



Epoch 00956: val_acc did not improve from 0.94567
Epoch 957/10000
Epoch 00957: val_acc did not improve from 0.94567
Epoch 958/10000
Epoch 00958: val_acc did not improve from 0.94567
Epoch 959/10000
Epoch 00959: val_acc did not improve from 0.94567
Epoch 960/10000
Epoch 00960: val_acc did not improve from 0.94567
Epoch 961/10000
Epoch 00961: val_acc did not improve from 0.94567
Epoch 962/10000
Epoch 00962: val_acc did not improve from 0.94567
Epoch 963/10000
Epoch 00963: val_acc did not improve from 0.94567
Epoch 964/10000
Epoch 00964: val_acc did not improve from 0.94567
Epoch 965/10000
Epoch 00965: val_acc did not improve from 0.94567
Epoch 966/10000
Epoch 00966: val_acc did not improve from 0.94567
Epoch 967/10000
Epoch 00967: val_acc did not improve from 0.94567
Epoch 968/10000
Epoch 00968: val_acc did not improve from 0.94567
Epoch 969/10000
Epoch 00969: val_acc did not improve from 0.94567
Epoch 970/10000
Epoch 00970: val_acc did not improve from 0.94567
Epoch 971/10000
Epoch 0097

ERROR:root:Internal Python error in the inspect module.
Below is the traceback from this internal error.



Traceback (most recent call last):
  File "/home/skyolia/anaconda3/envs/tensorflow_gpuenv/lib/python3.6/site-packages/tensorflow/python/keras/engine/training_generator.py", line 171, in fit_generator
    x, y, sample_weight=sample_weight, class_weight=class_weight)
  File "/home/skyolia/anaconda3/envs/tensorflow_gpuenv/lib/python3.6/site-packages/tensorflow/python/keras/engine/training.py", line 1828, in train_on_batch
    outputs = self.train_function(ins)
  File "/home/skyolia/anaconda3/envs/tensorflow_gpuenv/lib/python3.6/site-packages/tensorflow/python/keras/backend.py", line 2978, in __call__
    run_metadata=self.run_metadata)
  File "/home/skyolia/anaconda3/envs/tensorflow_gpuenv/lib/python3.6/site-packages/tensorflow/python/client/session.py", line 1399, in __call__
    run_metadata_ptr)
KeyboardInterrupt

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
  File "/home/skyolia/anaconda3/envs/tensorflow_gpuenv/lib/python3.6/s

TypeError: must be str, not list