# Training the UNET

## Import

In [1]:
import os
import numpy as np
import cv2
from glob import glob
import tensorflow as tf
from tensorflow.keras.layers import Conv2D, BatchNormalization, Activation, MaxPool2D, Conv2DTranspose, Concatenate, Input
from tensorflow.keras.layers import GlobalAveragePooling2D, GlobalMaxPooling2D, Reshape, Dense, Multiply
from tensorflow.keras.models import Model
from tensorflow.keras.applications import ResNet50
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint, ReduceLROnPlateau, CSVLogger

## Seeding

In [2]:
os.environ["PYTHONHASHSEED"] = str(42)
np.random.seed(42)
tf.random.set_seed(42)

## Hyperparameters

In [3]:
height = 384
width = 512

batch_size = 8
lr = 1e-4 ## 0.0001
epochs = 100
num_classes = 8+1

## Path

In [4]:
dataset_path = "dataset"

files_dir = "files"
model_file = os.path.join(files_dir, "unet.h5")
log_file = os.path.join(files_dir, "log.csv")

## Creating Folder

In [5]:
def create_dir(path):
    if not os.path.exists(path):
        os.makedirs(path)

In [6]:
create_dir(files_dir)

## Building UNET

### CBAM

In [7]:
def channel_attention_module(x, ratio=8):
    channel = x.shape[-1]
    
    l1 = Dense(channel//ratio, activation="relu", use_bias=False)
    l2 = Dense(channel, use_bias=False)
    
    x1 = GlobalAveragePooling2D()(x)
    x1 = l1(x1)
    x1 = l2(x1)
    
    x2 = GlobalMaxPooling2D()(x)
    x2 = l1(x2)
    x2 = l2(x2)
    
    feats = x1 + x2
    feats = Activation("sigmoid")(feats)
    
    feats = Multiply()([x, feats])
    return feats

In [8]:
def spatial_attention_module(x):
    x1 = tf.reduce_mean(x, axis=-1)
    x1 = tf.expand_dims(x1, axis=-1)
    
    x2 = tf.reduce_max(x, axis=-1)
    x2 = tf.expand_dims(x2, axis=-1)
    
    feats = Concatenate()([x1, x2])
    feats = Conv2D(1, kernel_size=7, padding="same", activation="sigmoid")(feats)
    
    feats = Multiply()([x, feats])
    return feats

In [9]:
def cbam(x):
    x = channel_attention_module(x)
    x = spatial_attention_module(x)
    return x

### Conv Block

In [10]:
def conv_block(inputs, num_filters):
    x = Conv2D(num_filters, 3, padding="same")(inputs)
    x = BatchNormalization()(x)
    x = Activation("relu")(x)
    
    x = Conv2D(num_filters, 3, padding="same")(x)
    x = BatchNormalization()(x)
    x = Activation("relu")(x)
    
    x = cbam(x)
    
    return x

### Decoder Block

In [11]:
def decoder_block(inputs, skip, num_filters):
    x = Conv2DTranspose(num_filters, (2, 2), strides=2, padding="same")(inputs)
    x = Concatenate()([x, skip])
    x = conv_block(x, num_filters)
    return x

### UNET

In [12]:
def build_unet(input_shape, num_classes):
    """ Inputs """
    inputs = Input(input_shape)
    
    """ ResNet50 Encoder """
    resnet50 = ResNet50(include_top=False, weights="imagenet", input_tensor=inputs)
    
    s1 = resnet50.get_layer("input_1").output
    s2 = resnet50.get_layer("conv1_relu").output
    s3 = resnet50.get_layer("conv2_block3_out").output
    s4 = resnet50.get_layer("conv3_block4_out").output
    
    """ Bridge """
    b1 = resnet50.get_layer("conv4_block6_out").output
    
    """ Decoder """
    d1 = decoder_block(b1, s4, 512)
    d2 = decoder_block(d1, s3, 256)
    d3 = decoder_block(d2, s2, 128)
    d4 = decoder_block(d3, s1, 64)
    
    outputs = Conv2D(num_classes, 1, padding="same", activation="softmax")(d4) ##
    
    model = Model(inputs, outputs, name="UNET")
    return model

## Dataset Pipeline 

### Loading the training and validation dataset

In [13]:
def load_data(path):
    train_x = sorted(glob(os.path.join(path, "train", "images", "*")))
    train_y = sorted(glob(os.path.join(path, "train", "masks", "*")))
    
    valid_x = sorted(glob(os.path.join(path, "valid", "images", "*")))
    valid_y = sorted(glob(os.path.join(path, "valid", "masks", "*")))
    
    return (train_x, train_y), (valid_x, valid_y)

### Colormap

In [14]:
COLORMAP = [
    [0, 0, 0],
    [128, 0, 64],
    [192, 0, 192],
    [0, 64, 64],
    [128, 64, 128],
    [192, 0, 0],
    [192, 128, 64],
    [128, 64, 192],
    [192, 128, 192],
]

new_classes = [
    "Background",
    "Car",
    "MotorcycleScooter",
    "Pedestrian",
    "Road",
    "Sidewalk",
    "SUVPickupTruck",
    "Train",
    "Truck_Bus"
]

### Reading Images

In [15]:
def read_image(path):
    path = path.decode()
    x = cv2.imread(path, cv2.IMREAD_COLOR)
    x = cv2.resize(x, (width, height))
    x = x/255.0
    x = x.astype(np.float32)
    return x

### Reading Mask

In [16]:
def read_mask(path):
    path = path.decode()
    x = cv2.imread(path, cv2.IMREAD_COLOR)
    x = cv2.resize(x, (width, height))
    
    output = []
    for color in COLORMAP:
        cmap = np.all(np.equal(x, color), axis=-1)
        output.append(cmap)
    output = np.stack(output, axis=-1)
    output = output.astype(np.int64)

    return output

### tf.data pipeline

In [17]:
def tf_parse(x, y):
    def _parse(x, y):
        x = read_image(x)
        y = read_mask(y)
        return x, y
    
    x, y = tf.numpy_function(_parse, [x, y], [tf.float32, tf.int64])
    x.set_shape([height, width, 3])
    y.set_shape([height, width, num_classes]) ##
    
    return x, y

In [18]:
def tf_dataset(x, y, batch=8):
    dataset = tf.data.Dataset.from_tensor_slices((x, y))
    dataset = dataset.map(tf_parse, num_parallel_calls=tf.data.AUTOTUNE)
    dataset = dataset.batch(batch)
    dataset = dataset.prefetch(tf.data.AUTOTUNE)
    return dataset

## Training

In [19]:
(train_x, train_y), (valid_x, valid_y) = load_data(dataset_path)
print(f"Train: {len(train_x)} - {len(train_y)}")
print(f"Valid: {len(valid_x)} - {len(valid_y)}")

Train: 561 - 561
Valid: 70 - 70


In [20]:
train_dataset = tf_dataset(train_x, train_y, batch=batch_size)
valid_dataset = tf_dataset(valid_x, valid_y, batch=batch_size)

2022-08-27 16:28:19.772127: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:975] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-08-27 16:28:19.777085: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:975] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-08-27 16:28:19.777856: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:975] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-08-27 16:28:19.817677: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags

In [21]:
input_shape = (height, width, 3)
model = build_unet(input_shape, num_classes)

In [22]:
model.summary()

Model: "UNET"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 input_1 (InputLayer)           [(None, 384, 512, 3  0           []                               
                                )]                                                                
                                                                                                  
 conv1_pad (ZeroPadding2D)      (None, 390, 518, 3)  0           ['input_1[0][0]']                
                                                                                                  
 conv1_conv (Conv2D)            (None, 192, 256, 64  9472        ['conv1_pad[0][0]']              
                                )                                                                 
                                                                                               

                                                                                                  
 conv2_block3_1_conv (Conv2D)   (None, 96, 128, 64)  16448       ['conv2_block2_out[0][0]']       
                                                                                                  
 conv2_block3_1_bn (BatchNormal  (None, 96, 128, 64)  256        ['conv2_block3_1_conv[0][0]']    
 ization)                                                                                         
                                                                                                  
 conv2_block3_1_relu (Activatio  (None, 96, 128, 64)  0          ['conv2_block3_1_bn[0][0]']      
 n)                                                                                               
                                                                                                  
 conv2_block3_2_conv (Conv2D)   (None, 96, 128, 64)  36928       ['conv2_block3_1_relu[0][0]']    
          

                                                                                                  
 conv3_block2_out (Activation)  (None, 48, 64, 512)  0           ['conv3_block2_add[0][0]']       
                                                                                                  
 conv3_block3_1_conv (Conv2D)   (None, 48, 64, 128)  65664       ['conv3_block2_out[0][0]']       
                                                                                                  
 conv3_block3_1_bn (BatchNormal  (None, 48, 64, 128)  512        ['conv3_block3_1_conv[0][0]']    
 ization)                                                                                         
                                                                                                  
 conv3_block3_1_relu (Activatio  (None, 48, 64, 128)  0          ['conv3_block3_1_bn[0][0]']      
 n)                                                                                               
          

 conv4_block1_add (Add)         (None, 24, 32, 1024  0           ['conv4_block1_0_bn[0][0]',      
                                )                                 'conv4_block1_3_bn[0][0]']      
                                                                                                  
 conv4_block1_out (Activation)  (None, 24, 32, 1024  0           ['conv4_block1_add[0][0]']       
                                )                                                                 
                                                                                                  
 conv4_block2_1_conv (Conv2D)   (None, 24, 32, 256)  262400      ['conv4_block1_out[0][0]']       
                                                                                                  
 conv4_block2_1_bn (BatchNormal  (None, 24, 32, 256)  1024       ['conv4_block2_1_conv[0][0]']    
 ization)                                                                                         
          

                                                                                                  
 conv4_block4_add (Add)         (None, 24, 32, 1024  0           ['conv4_block3_out[0][0]',       
                                )                                 'conv4_block4_3_bn[0][0]']      
                                                                                                  
 conv4_block4_out (Activation)  (None, 24, 32, 1024  0           ['conv4_block4_add[0][0]']       
                                )                                                                 
                                                                                                  
 conv4_block5_1_conv (Conv2D)   (None, 24, 32, 256)  262400      ['conv4_block4_out[0][0]']       
                                                                                                  
 conv4_block5_1_bn (BatchNormal  (None, 24, 32, 256)  1024       ['conv4_block5_1_conv[0][0]']    
 ization) 

 global_average_pooling2d (Glob  (None, 512)         0           ['activation_1[0][0]']           
 alAveragePooling2D)                                                                              
                                                                                                  
 global_max_pooling2d (GlobalMa  (None, 512)         0           ['activation_1[0][0]']           
 xPooling2D)                                                                                      
                                                                                                  
 dense (Dense)                  (None, 64)           32768       ['global_average_pooling2d[0][0]'
                                                                 , 'global_max_pooling2d[0][0]']  
                                                                                                  
 dense_1 (Dense)                (None, 512)          32768       ['dense[0][0]',                  
          

 tf.math.reduce_mean_1 (TFOpLam  (None, 96, 128)     0           ['multiply_2[0][0]']             
 bda)                                                                                             
                                                                                                  
 tf.math.reduce_max_1 (TFOpLamb  (None, 96, 128)     0           ['multiply_2[0][0]']             
 da)                                                                                              
                                                                                                  
 tf.expand_dims_2 (TFOpLambda)  (None, 96, 128, 1)   0           ['tf.math.reduce_mean_1[0][0]']  
                                                                                                  
 tf.expand_dims_3 (TFOpLambda)  (None, 96, 128, 1)   0           ['tf.math.reduce_max_1[0][0]']   
                                                                                                  
 concatena

                                                                                                  
 concatenate_6 (Concatenate)    (None, 384, 512, 67  0           ['conv2d_transpose_3[0][0]',     
                                )                                 'input_1[0][0]']                
                                                                                                  
 conv2d_9 (Conv2D)              (None, 384, 512, 64  38656       ['concatenate_6[0][0]']          
                                )                                                                 
                                                                                                  
 batch_normalization_6 (BatchNo  (None, 384, 512, 64  256        ['conv2d_9[0][0]']               
 rmalization)                   )                                                                 
                                                                                                  
 activatio

In [23]:
opt = tf.keras.optimizers.Adam(lr)
model.compile(loss="categorical_crossentropy", optimizer=opt, metrics=["acc"])

In [24]:
callbacks = [
        ModelCheckpoint(model_file, verbose=1, save_best_only=True),
        ReduceLROnPlateau(monitor='val_loss', factor=0.1, patience=5),
        CSVLogger(log_file),
        EarlyStopping(monitor='val_loss', patience=20, restore_best_weights=False)
    ]

In [25]:
model.fit(
    train_dataset, 
    validation_data=valid_dataset,
    epochs=epochs,
    callbacks=callbacks
)

Epoch 1/100


2022-08-27 16:28:25.875165: I tensorflow/stream_executor/cuda/cuda_dnn.cc:384] Loaded cuDNN version 8100
2022-08-27 16:28:27.736044: I tensorflow/stream_executor/cuda/cuda_blas.cc:1786] TensorFloat-32 will be used for the matrix multiplication. This will only be logged once.


Epoch 1: val_loss improved from inf to 2.10252, saving model to files/unet.h5
Epoch 2/100
Epoch 2: val_loss improved from 2.10252 to 2.09015, saving model to files/unet.h5
Epoch 3/100
Epoch 3: val_loss improved from 2.09015 to 2.07214, saving model to files/unet.h5
Epoch 4/100
Epoch 4: val_loss improved from 2.07214 to 1.98986, saving model to files/unet.h5
Epoch 5/100
Epoch 5: val_loss improved from 1.98986 to 1.79884, saving model to files/unet.h5
Epoch 6/100
Epoch 6: val_loss improved from 1.79884 to 1.61537, saving model to files/unet.h5
Epoch 7/100
Epoch 7: val_loss improved from 1.61537 to 1.48289, saving model to files/unet.h5
Epoch 8/100
Epoch 8: val_loss improved from 1.48289 to 1.45982, saving model to files/unet.h5
Epoch 9/100
Epoch 9: val_loss improved from 1.45982 to 0.98852, saving model to files/unet.h5
Epoch 10/100
Epoch 10: val_loss improved from 0.98852 to 0.79103, saving model to files/unet.h5
Epoch 11/100
Epoch 11: val_loss did not improve from 0.79103
Epoch 12/100


Epoch 28/100
Epoch 28: val_loss did not improve from 0.12580
Epoch 29/100
Epoch 29: val_loss did not improve from 0.12580
Epoch 30/100
Epoch 30: val_loss did not improve from 0.12580
Epoch 31/100
Epoch 31: val_loss did not improve from 0.12580
Epoch 32/100
Epoch 32: val_loss improved from 0.12580 to 0.12222, saving model to files/unet.h5
Epoch 33/100
Epoch 33: val_loss improved from 0.12222 to 0.11860, saving model to files/unet.h5
Epoch 34/100
Epoch 34: val_loss improved from 0.11860 to 0.11640, saving model to files/unet.h5
Epoch 35/100
Epoch 35: val_loss improved from 0.11640 to 0.11527, saving model to files/unet.h5
Epoch 36/100
Epoch 36: val_loss improved from 0.11527 to 0.11464, saving model to files/unet.h5
Epoch 37/100
Epoch 37: val_loss improved from 0.11464 to 0.11425, saving model to files/unet.h5
Epoch 38/100
Epoch 38: val_loss improved from 0.11425 to 0.11411, saving model to files/unet.h5
Epoch 39/100
Epoch 39: val_loss improved from 0.11411 to 0.11397, saving model to fi

Epoch 57/100
Epoch 57: val_loss did not improve from 0.11397
Epoch 58/100
Epoch 58: val_loss did not improve from 0.11397
Epoch 59/100
Epoch 59: val_loss did not improve from 0.11397


<keras.callbacks.History at 0x7f41415d5ab0>