# Test U-Net

## U-Net architecture

In [1]:

# Copyright (C) 2022 Langlois Quentin, UCLouvain, INGI. All rights reserved.
# Licenced under the Affero GPL v3 Licence (the "Licence").
# you may not use this file except in compliance with the License.
# See the "LICENCE" file at the root of the directory for the licence information.
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import tensorflow as tf

from custom_architectures.current_blocks import _get_var, Conv2DBN, Conv3DBN

_concat_layers = {
    'concat' : lambda: tf.keras.layers.Concatenate(axis = -1)
}

def UNet(input_shape  = 512,
         output_dim   = 1,
         
         n_stages     = 5,
         repeat_conv  = 2,
         
         filters      = [32, 64, 128, 256, 512],
         kernel_size  = 3,
         strides      = 1,
         use_bias     = True,
         padding      = 'same',
         activation   = 'relu',
         pool_type    = 'max',
         pool_strides = 2,
         bnorm        = 'never',
         drop_rate    = 0.3,
         
         concat_mode      = 'concat',
         final_activation = 'sigmoid',
         
         name     = None,
         ** kwargs
        ):
    concat_layer = _concat_layers[concat_mode]
    
    if not isinstance(input_shape, tuple): input_shape = (input_shape, input_shape, 3)
    inputs  = tf.keras.layers.Input(shape = input_shape, name = 'input_image')

    if len(input_shape) == 3:
        conv_fn = Conv2DBN
        pool_fn = tf.keras.layers.MaxPooling2D
        upsample_fn = tf.keras.layers.Conv2DTranspose
    else:
        conv_fn = Conv3DBN
        pool_fn = tf.keras.layers.MaxPooling3D
        upsample_fn = tf.keras.layers.Conv3DTranspose
    
    residuals = []
    x = inputs
    for i in range(n_stages):
        x = conv_fn(
            x,
            filters     = [_get_var(filters, i)] * _get_var(repeat_conv, i),
            kernel_size = _get_var(kernel_size, i),
            strides     = _get_var(strides, i),
            use_bias    = _get_var(use_bias, i),
            padding     = _get_var(padding, i),
            
            activation  = _get_var(activation, i),

            bnorm       = _get_var(bnorm, i),
            drop_rate   = 0.,
            
            bn_name = 'down_bn{}'.format(i + 1),
            name    = 'down_conv{}'.format(i + 1)
        )
        residuals.append(x)
        
        if i < n_stages - 1:
            x =pool_fn(_get_var(pool_strides, i))(x)
            x =tf.keras.layers.Dropout(_get_var(drop_rate, i))(x)

    for i in reversed(range(n_stages - 1)):
        x = upsample_fn(
            _get_var(filters, i), kernel_size = 3, strides = 2, padding = 'same', name = 'upsampling_{}'.format(i)
        )(x)
        x = concat_layer()([x, residuals[i]])
        x = conv_fn(
            x,
            filters     = [_get_var(filters, i)] * _get_var(repeat_conv, i),
            kernel_size = _get_var(kernel_size, i),
            use_bias    = _get_var(use_bias, i),
            padding     = _get_var(padding, i),
            
            bnorm       = _get_var(bnorm, i),

            activation  = _get_var(activation, i),
            
            bn_name = 'up_bn{}'.format(i + 1),
            name    = 'up_conv{}'.format(i + 1)
        )

    out = tf.keras.layers.Conv2D(output_dim, kernel_size = 1, strides = 1, activation = final_activation)(x)
    return tf.keras.Model(inputs = inputs, outputs = out, name = name)


custom_functions    = {
    'UNet'    : UNet
}

2023-03-21 09:45:37.171168: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 AVX512F AVX512_VNNI FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2023-03-21 09:45:37.268425: I tensorflow/core/util/util.cc:169] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2023-03-21 09:45:37.292138: E tensorflow/stream_executor/cuda/cuda_blas.cc:2981] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered


In [1]:
import importlib
import tensorflow as tf

from custom_architectures import unet_arch

def show_memory(msg = ''):
    print('{}{}'.format(msg if not msg else msg + '\t: ', {
        k : '{:.3f}'.format(v / 1024 ** 3) for k, v in tf.config.experimental.get_memory_info('GPU:0').items()
    }))
    
gpus = tf.config.list_physical_devices('GPU')
tf.config.set_visible_devices([gpus[0]], 'GPU')

2023-03-21 11:18:34.863736: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 AVX512F AVX512_VNNI FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2023-03-21 11:18:34.960541: I tensorflow/core/util/util.cc:169] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2023-03-21 11:18:34.984052: E tensorflow/stream_executor/cuda/cuda_blas.cc:2981] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered


## Test U-Net 2D

In [4]:
importlib.reload(unet_arch)

model = unet_arch.UNet(
    input_shape = (512, 512, 1), output_dim  = 1, n_stages    = 5, repeat_conv = lambda i: 1 if i == 0 else 2,
    drop_rate = lambda i: 0. if i == 4 else 0.25
)
print(model.count_params())
model.compile(loss = 'binary_crossentropy', optimizer = 'adam')

show_memory()

2023-03-21 10:00:34.425733: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 AVX512F AVX512_VNNI FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2023-03-21 10:00:34.799312: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1616] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 14783 MB memory:  -> device: 0, name: Quadro RTX 5000, pci bus id: 0000:17:00.0, compute capability: 7.5


8611425
{'current': '0.032', 'peak': '0.035'}


In [None]:
batch_size = 24

show_memory('Before')

inp = tf.random.uniform((batch_size * 2, ) + tuple(model.input_shape[1:]), 0., 1.)
out = tf.ones_like(inp)

"""show_memory('Batch init')

model(inp[:batch_size], training = True)

show_memory('Simple call')

with tf.GradientTape() as tape:
    pred = model(inp[:batch_size], training = True)
    l = model.compiled_loss(out[:batch_size], pred)

show_memory('With gradient')

del tape, l, pred

show_memory('After deleting tape')
"""
_ = model.fit(inp, out, epochs = 5, batch_size = batch_size)

show_memory('After fit')
tf.config.experimental.reset_memory_stats('GPU:0')

In [None]:
model.summary()

In [None]:
model.summary()

## Test U-Net 3D

In [2]:
importlib.reload(unet_arch)

model = unet_arch.UNet(
    input_shape = (128, 512, 512, 1), output_dim  = 1, n_stages    = 6, n_conv_per_stage = [1, 1, 1, 2, 2, 2],
    drop_rate = lambda i: 0. if i == 5 else 0.25,
    concat_mode = 'add',
    pool_strides = [2, 2, 2, 2, 2],
    filters = [8, 16, 32, 64, 128, 256]
)
print(model.count_params())
model.compile(loss = 'binary_crossentropy', optimizer = 'adam')

show_memory()

2023-03-21 10:47:32.945584: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 AVX512F AVX512_VNNI FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2023-03-21 10:47:33.323590: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1616] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 14783 MB memory:  -> device: 0, name: Quadro RTX 5000, pci bus id: 0000:17:00.0, compute capability: 7.5


5823497
{'current': '0.022', 'peak': '0.026'}


In [3]:
batch_size = 1

show_memory('Before')

inp = tf.random.uniform((batch_size * 2, ) + tuple(model.input_shape[1:]), 0., 1.)
out = tf.ones_like(inp)

"""show_memory('Batch init')

model(inp[:batch_size], training = True)

show_memory('Simple call')

with tf.GradientTape() as tape:
    pred = model(inp[:batch_size], training = True)
    l = model.compiled_loss(out[:batch_size], pred)

show_memory('With gradient')

del tape, l, pred

show_memory('After deleting tape')
"""
_ = model.fit(inp, out, epochs = 5, batch_size = batch_size)

show_memory('After fit')
tf.config.experimental.reset_memory_stats('GPU:0')

Before	: {'current': '0.022', 'peak': '0.026'}
Epoch 1/5


2023-03-21 10:47:35.758813: I tensorflow/stream_executor/cuda/cuda_dnn.cc:384] Loaded cuDNN version 8204


Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
After fit	: {'current': '0.905', 'peak': '13.136'}


In [4]:
model.summary()

Model: "model"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 input_image (InputLayer)       [(None, 128, 512, 5  0           []                               
                                12, 1)]                                                           
                                                                                                  
 down_conv1 (Conv3D)            (None, 128, 512, 51  224         ['input_image[0][0]']            
                                2, 8)                                                             
                                                                                                  
 activation (Activation)        (None, 128, 512, 51  0           ['down_conv1[0][0]']             
                                2, 8)                                                         

## Test UNet 3D with strides

In [2]:
importlib.reload(unet_arch)

model = unet_arch.UNet(
    input_shape = (128, 512, 512, 1), output_dim  = 1, n_stages = 6, n_conv_per_stage = 2,
    drop_rate = lambda i: 0. if i == 5 else 0.25,
    concat_mode = 'add',
    pool_type = None,
    strides = 2,
    filters = [8, 16, 32, 64, 128, 256]
)
print(model.count_params())
model.compile(loss = 'binary_crossentropy', optimizer = 'adam')

show_memory()

2023-03-21 11:18:44.599095: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 AVX512F AVX512_VNNI FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2023-03-21 11:18:44.979307: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1616] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 14783 MB memory:  -> device: 0, name: Quadro RTX 5000, pci bus id: 0000:17:00.0, compute capability: 7.5


0 (None, 128, 512, 512, 8)
1 (None, 64, 256, 256, 16)
2 (None, 32, 128, 128, 32)
3 (None, 16, 64, 64, 64)
4 (None, 8, 32, 32, 128)
5 (None, 4, 16, 16, 256)
4 (None, 8, 32, 32, 128) (None, 8, 32, 32, 128) 128
3 (None, 16, 64, 64, 64) (None, 16, 64, 64, 64) 64
2 (None, 32, 128, 128, 32) (None, 32, 128, 128, 32) 32
1 (None, 64, 256, 256, 16) (None, 64, 256, 256, 16) 16
0 (None, 128, 512, 512, 8) (None, 128, 512, 512, 8) 8
5896185
{'current': '0.022', 'peak': '0.026'}


In [3]:
batch_size = 1

show_memory('Before')

inp = tf.random.uniform((batch_size * 2, ) + tuple(model.input_shape[1:]), 0., 1.)
out = tf.ones_like(inp)

"""show_memory('Batch init')

model(inp[:batch_size], training = True)

show_memory('Simple call')

with tf.GradientTape() as tape:
    pred = model(inp[:batch_size], training = True)
    l = model.compiled_loss(out[:batch_size], pred)

show_memory('With gradient')

del tape, l, pred

show_memory('After deleting tape')
"""
_ = model.fit(inp, out, epochs = 5, batch_size = batch_size)

show_memory('After fit')
tf.config.experimental.reset_memory_stats('GPU:0')

Before	: {'current': '0.022', 'peak': '0.026'}
Epoch 1/5


2023-03-21 11:18:53.017038: I tensorflow/stream_executor/cuda/cuda_dnn.cc:384] Loaded cuDNN version 8204


Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
After fit	: {'current': '0.910', 'peak': '13.031'}


In [13]:
model.summary()

Model: "model_2"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 input_image (InputLayer)       [(None, 128, 512, 5  0           []                               
                                12, 1)]                                                           
                                                                                                  
 down_conv1 (Conv3D)            (None, 128, 512, 51  224         ['input_image[0][0]']            
                                2, 8)                                                             
                                                                                                  
 activation_123 (Activation)    (None, 128, 512, 51  0           ['down_conv1[0][0]']             
                                2, 8)                                                       