In [6]:
from tensorflow.keras.applications.vgg16 import VGG16
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Conv2D, Conv2DTranspose, concatenate
from IPython.display import SVG
from tensorflow.keras.utils import model_to_dot
from natsort import natsorted
import matplotlib.pyplot as plt
from glob import glob
import json
from functools import partial
import numpy as np
from cachier import cachier
from tqdm import tqdm
import imageio
from skimage import img_as_float32
import cv2 as cv2
from math import pi, e, sqrt, cos, sin
from scipy.stats import norm, multivariate_normal
from scipy.spatial.distance import euclidean
from sklearn.metrics import accuracy_score

In [7]:
ztqdm = partial(tqdm, position=0, leave=True)
cachier = partial(cachier, pickle_reload=False, cache_dir='data/cache')

In [9]:
SIZE = (768, 1024)

DATA_PATH_PAIRS = list(zip(
    natsorted(glob(f'../data/images-{SIZE[1]}x{SIZE[0]}/*.png')),
    natsorted(glob(f'../data/masks-{SIZE[1]}x{SIZE[0]}/*.png')),
))
DATA_IMGS = np.array(
    [img_as_float32(imageio.imread(img_path)) for img_path, _ in tqdm(DATA_PATH_PAIRS, 'Loading Images')])
DATA_MSKS = np.array(
    [img_as_float32(imageio.imread(msk_path)) for _, msk_path in tqdm(DATA_PATH_PAIRS, 'Loading Masks')])

assert DATA_IMGS.shape == (48, SIZE[0], SIZE[1], 3)
assert DATA_MSKS.shape == (48, SIZE[0], SIZE[1])

Loading Images: 100%|██████████████████████████████████████████████████████████████████| 48/48 [00:01<00:00, 28.97it/s]
Loading Masks: 100%|██████████████████████████████████████████████████████████████████| 48/48 [00:00<00:00, 206.90it/s]


## VGG-16

The VGG16 model proposed was tested using the ImageNet data set, which contains over 15 million hand labeled high-resolution images, that belong to around 22-thousand categories. The model was trained for weeks, and pre-trained models are commonly available using deep learning APIs.

In [35]:
vgg16_pretrained = VGG16(include_top=False, weights='imagenet', input_shape=(768,1024,3))
vgg16_pretrained.summary()

Model: "vgg16"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_8 (InputLayer)         [(None, 768, 1024, 3)]    0         
_________________________________________________________________
block1_conv1 (Conv2D)        (None, 768, 1024, 64)     1792      
_________________________________________________________________
block1_conv2 (Conv2D)        (None, 768, 1024, 64)     36928     
_________________________________________________________________
block1_pool (MaxPooling2D)   (None, 384, 512, 64)      0         
_________________________________________________________________
block2_conv1 (Conv2D)        (None, 384, 512, 128)     73856     
_________________________________________________________________
block2_conv2 (Conv2D)        (None, 384, 512, 128)     147584    
_________________________________________________________________
block2_pool (MaxPooling2D)   (None, 192, 256, 128)     0     

In [11]:
vgg16_pretrained.layers[0]

<tensorflow.python.keras.engine.input_layer.InputLayer at 0x1d9fdf6ac48>

## The U-Net architecture
U-Net was originally published as a convolutional network for biomedical image segmentation.
The UNet model was named after its U-shape that consists of two paths, which are commonly reffered to in deep learning as the encoder- and
decoder networks. UNet's encoder captures the context of input images, and it contains a symmetric decoder that allows to localization using a technique called upsampling (fractional convolution or transposed convolution.) 
<br><br>
![Unet](resources/UNet.PNG)


### VGG16-U-Net
The idea is to use the pre-trained VGG16 model as the encoder to a U-Net model, and also to construct a symmetric decoder network.

In [45]:
class VGGUnet(object):
    
    def __init__(self, segmentation_shape=(768, 1024, 3)):
        self.segmentation_shape = segmentation_shape
        self.model = self._build_model(self.segmentation_shape)
        self.model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
        
    def summary(self):
        print(self.model.summary())
    
    def _build_model(self, segmentation_shape):
        """Builds a UNet model from Keras' pretrained VGG16 model."""
        encoder = VGG16(include_top=False, weights='imagenet', input_shape=segmentation_shape)
        
        # Decoder block1, from Encoder output
        encoder_output = encoder.output
        conv_0 = Conv2D(512, (3, 3), activation='relu', padding='same')(encoder_output)
        conv_0_up = Conv2DTranspose(256, (3, 3), strides=(2,2), activation='relu', padding='same')(conv_0)
        
        # Decoder block 1, concat block with encoder block 5
        concat_1 = concatenate([encoder.get_layer('block5_conv3').output, conv_0_up])
        conv_1 = Conv2D(512, (3, 3), activation='relu', padding='same')(concat_1)
        conv_1_up =  Conv2DTranspose(256, (3, 3), strides=(2,2), activation='relu', padding='same')(conv_1)
        
        # Decoder block 2, concat block with encoder block 4
        concat_2 = concatenate([encoder.get_layer('block4_conv3').output, conv_1_up])
        conv_2 = Conv2D(512, (3, 3), activation='relu', padding='same')(concat_2)
        conv_2_up =  Conv2DTranspose(256, (3, 3), strides=(2,2), activation='relu', padding='same')(conv_2)
        
        # Decoder block 3, concat block with encoder block 3
        concat_3 = concatenate([encoder.get_layer('block3_conv3').output, conv_2_up])
        conv_3 = Conv2D(256, (3, 3), activation='relu', padding='same')(concat_3)
        conv_3_up =  Conv2DTranspose(128, (3, 3), strides=(2,2), activation='relu', padding='same')(conv_3)
        
        # Decoder block 4, concat block with encoder block 2
        concat_4 = concatenate([encoder.get_layer('block2_conv2').output, conv_3_up])
        conv_4 = Conv2D(128, (3, 3), activation='relu', padding='same')(concat_4)
        conv_4_up =  Conv2DTranspose(64, (3, 3), strides=(2,2), activation='relu', padding='same')(conv_4)
        
        # Decoder block 5, concat block with encoder block 1
        concat_5 = concatenate([encoder.get_layer('block1_conv2').output, conv_4_up])
        conv_5 = Conv2D(128, (3, 3), activation='relu', padding='same')(concat_5)
        return Model(inputs=[encoder.layers[0].input], outputs=[conv_5])
        #return encoder
    
    def fit(self, X, y):
        self.model.fit(X, y)

In [46]:
vgg_unet = VGGUnet()

In [47]:
vgg_unet.summary()

Model: "functional_11"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_11 (InputLayer)           [(None, 768, 1024, 3 0                                            
__________________________________________________________________________________________________
block1_conv1 (Conv2D)           (None, 768, 1024, 64 1792        input_11[0][0]                   
__________________________________________________________________________________________________
block1_conv2 (Conv2D)           (None, 768, 1024, 64 36928       block1_conv1[0][0]               
__________________________________________________________________________________________________
block1_pool (MaxPooling2D)      (None, 384, 512, 64) 0           block1_conv2[0][0]               
______________________________________________________________________________________

In [48]:
vgg_unet.fit(np.expand_dims(DATA_IMGS[0], axis=0), np.expand_dims(DATA_MSKS[0], axis=0))

ValueError: in user code:

    C:\ProgramData\Anaconda3\lib\site-packages\tensorflow\python\keras\engine\training.py:806 train_function  *
        return step_function(self, iterator)
    C:\ProgramData\Anaconda3\lib\site-packages\tensorflow\python\keras\engine\training.py:796 step_function  **
        outputs = model.distribute_strategy.run(run_step, args=(data,))
    C:\ProgramData\Anaconda3\lib\site-packages\tensorflow\python\distribute\distribute_lib.py:1211 run
        return self._extended.call_for_each_replica(fn, args=args, kwargs=kwargs)
    C:\ProgramData\Anaconda3\lib\site-packages\tensorflow\python\distribute\distribute_lib.py:2585 call_for_each_replica
        return self._call_for_each_replica(fn, args, kwargs)
    C:\ProgramData\Anaconda3\lib\site-packages\tensorflow\python\distribute\distribute_lib.py:2945 _call_for_each_replica
        return fn(*args, **kwargs)
    C:\ProgramData\Anaconda3\lib\site-packages\tensorflow\python\keras\engine\training.py:789 run_step  **
        outputs = model.train_step(data)
    C:\ProgramData\Anaconda3\lib\site-packages\tensorflow\python\keras\engine\training.py:749 train_step
        y, y_pred, sample_weight, regularization_losses=self.losses)
    C:\ProgramData\Anaconda3\lib\site-packages\tensorflow\python\keras\engine\compile_utils.py:204 __call__
        loss_value = loss_obj(y_t, y_p, sample_weight=sw)
    C:\ProgramData\Anaconda3\lib\site-packages\tensorflow\python\keras\losses.py:149 __call__
        losses = ag_call(y_true, y_pred)
    C:\ProgramData\Anaconda3\lib\site-packages\tensorflow\python\keras\losses.py:253 call  **
        return ag_fn(y_true, y_pred, **self._fn_kwargs)
    C:\ProgramData\Anaconda3\lib\site-packages\tensorflow\python\util\dispatch.py:201 wrapper
        return target(*args, **kwargs)
    C:\ProgramData\Anaconda3\lib\site-packages\tensorflow\python\keras\losses.py:1535 categorical_crossentropy
        return K.categorical_crossentropy(y_true, y_pred, from_logits=from_logits)
    C:\ProgramData\Anaconda3\lib\site-packages\tensorflow\python\util\dispatch.py:201 wrapper
        return target(*args, **kwargs)
    C:\ProgramData\Anaconda3\lib\site-packages\tensorflow\python\keras\backend.py:4687 categorical_crossentropy
        target.shape.assert_is_compatible_with(output.shape)
    C:\ProgramData\Anaconda3\lib\site-packages\tensorflow\python\framework\tensor_shape.py:1134 assert_is_compatible_with
        raise ValueError("Shapes %s and %s are incompatible" % (self, other))

    ValueError: Shapes (None, 768, 1024) and (None, 768, 1024, 128) are incompatible


In [32]:
.shape

(1, 768, 1024, 3)

In [27]:
np.array([DATA_IMGS[0]]).shape

(1, 768, 1024, 3)

In [28]:
DATA_MSKS[0].shape

(768, 1024)