<a href="https://colab.research.google.com/github/Po-Hsuan-Huang/CSCI699_hw2_data/blob/master/train_online.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

**Import tensroflow module**

In [0]:
! pip install -q pyyaml h5py
! pip install tensorflow==2

In [0]:
import os
import numpy as np
import matplotlib.pyplot as plt
import glob
import tensorflow as tf
from tensorflow import keras
from __future__ import absolute_import, division, print_function, unicode_literals


print(tf.version.VERSION)

**Define DeepLabv3 Model From Git repository**


In [0]:
#@title DeepLabV3+ Module {display-mode: "form"}

# This code will be hidden when the notebook is loaded.

# -*- coding: utf-8 -*-

""" Deeplabv3+ model for Keras.
This model is based on TF repo:
https://github.com/tensorflow/models/tree/master/research/deeplab
On Pascal VOC, original model gets to 84.56% mIOU

MobileNetv2 backbone is based on this repo:
https://github.com/JonathanCMitchell/mobilenet_v2_keras

# Reference
- [Encoder-Decoder with Atrous Separable Convolution
    for Semantic Image Segmentation](https://arxiv.org/pdf/1802.02611.pdf)
- [Xception: Deep Learning with Depthwise Separable Convolutions]
    (https://arxiv.org/abs/1610.02357)
- [Inverted Residuals and Linear Bottlenecks: Mobile Networks for
    Classification, Detection and Segmentation](https://arxiv.org/abs/1801.04381)
"""

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import tensorflow as tf

from tensorflow.python.keras.models import Model
from tensorflow.python.keras import layers
from tensorflow.python.keras.layers import Input
from tensorflow.python.keras.layers import Lambda
from tensorflow.python.keras.layers import Activation
from tensorflow.python.keras.layers import Concatenate
from tensorflow.python.keras.layers import Add
from tensorflow.python.keras.layers import Dropout
from tensorflow.python.keras.layers import BatchNormalization
from tensorflow.python.keras.layers import Conv2D
from tensorflow.python.keras.layers import DepthwiseConv2D
from tensorflow.python.keras.layers import ZeroPadding2D
from tensorflow.python.keras.layers import GlobalAveragePooling2D
from tensorflow.python.keras.utils.layer_utils import get_source_inputs
from tensorflow.python.keras.utils.data_utils import get_file
from tensorflow.python.keras import backend as K
from tensorflow.python.keras.activations import relu
from tensorflow.python.keras.applications.imagenet_utils import preprocess_input

WEIGHTS_PATH_X = "https://github.com/bonlime/keras-deeplab-v3-plus/releases/download/1.1/deeplabv3_xception_tf_dim_ordering_tf_kernels.h5"
WEIGHTS_PATH_MOBILE = "https://github.com/bonlime/keras-deeplab-v3-plus/releases/download/1.1/deeplabv3_mobilenetv2_tf_dim_ordering_tf_kernels.h5"
WEIGHTS_PATH_X_CS = "https://github.com/bonlime/keras-deeplab-v3-plus/releases/download/1.2/deeplabv3_xception_tf_dim_ordering_tf_kernels_cityscapes.h5"
WEIGHTS_PATH_MOBILE_CS = "https://github.com/bonlime/keras-deeplab-v3-plus/releases/download/1.2/deeplabv3_mobilenetv2_tf_dim_ordering_tf_kernels_cityscapes.h5"


def SepConv_BN(x, filters, prefix, stride=1, kernel_size=3, rate=1, depth_activation=False, epsilon=1e-3):
    """ SepConv with BN between depthwise & pointwise. Optionally add activation after BN
        Implements right "same" padding for even kernel sizes
        Args:
            x: input tensor
            filters: num of filters in pointwise convolution
            prefix: prefix before name
            stride: stride at depthwise conv
            kernel_size: kernel size for depthwise convolution
            rate: atrous rate for depthwise convolution
            depth_activation: flag to use activation between depthwise & poinwise convs
            epsilon: epsilon to use in BN layer
    """

    if stride == 1:
        depth_padding = 'same'
    else:
        kernel_size_effective = kernel_size + (kernel_size - 1) * (rate - 1)
        pad_total = kernel_size_effective - 1
        pad_beg = pad_total // 2
        pad_end = pad_total - pad_beg
        x = ZeroPadding2D((pad_beg, pad_end))(x)
        depth_padding = 'valid'

    if not depth_activation:
        x = Activation('relu')(x)
    x = DepthwiseConv2D((kernel_size, kernel_size), strides=(stride, stride), dilation_rate=(rate, rate),
                        padding=depth_padding, use_bias=False, name=prefix + '_depthwise')(x)
    x = BatchNormalization(name=prefix + '_depthwise_BN', epsilon=epsilon)(x)
    if depth_activation:
        x = Activation('relu')(x)
    x = Conv2D(filters, (1, 1), padding='same',
               use_bias=False, name=prefix + '_pointwise')(x)
    x = BatchNormalization(name=prefix + '_pointwise_BN', epsilon=epsilon)(x)
    if depth_activation:
        x = Activation('relu')(x)

    return x


def _conv2d_same(x, filters, prefix, stride=1, kernel_size=3, rate=1):
    """Implements right 'same' padding for even kernel sizes
        Without this there is a 1 pixel drift when stride = 2
        Args:
            x: input tensor
            filters: num of filters in pointwise convolution
            prefix: prefix before name
            stride: stride at depthwise conv
            kernel_size: kernel size for depthwise convolution
            rate: atrous rate for depthwise convolution
    """
    if stride == 1:
        return Conv2D(filters,
                      (kernel_size, kernel_size),
                      strides=(stride, stride),
                      padding='same', use_bias=False,
                      dilation_rate=(rate, rate),
                      name=prefix)(x)
    else:
        kernel_size_effective = kernel_size + (kernel_size - 1) * (rate - 1)
        pad_total = kernel_size_effective - 1
        pad_beg = pad_total // 2
        pad_end = pad_total - pad_beg
        x = ZeroPadding2D((pad_beg, pad_end))(x)
        return Conv2D(filters,
                      (kernel_size, kernel_size),
                      strides=(stride, stride),
                      padding='valid', use_bias=False,
                      dilation_rate=(rate, rate),
                      name=prefix)(x)


def _xception_block(inputs, depth_list, prefix, skip_connection_type, stride,
                    rate=1, depth_activation=False, return_skip=False):
    """ Basic building block of modified Xception network
        Args:
            inputs: input tensor
            depth_list: number of filters in each SepConv layer. len(depth_list) == 3
            prefix: prefix before name
            skip_connection_type: one of {'conv','sum','none'}
            stride: stride at last depthwise conv
            rate: atrous rate for depthwise convolution
            depth_activation: flag to use activation between depthwise & pointwise convs
            return_skip: flag to return additional tensor after 2 SepConvs for decoder
            """
    residual = inputs
    for i in range(3):
        residual = SepConv_BN(residual,
                              depth_list[i],
                              prefix + '_separable_conv{}'.format(i + 1),
                              stride=stride if i == 2 else 1,
                              rate=rate,
                              depth_activation=depth_activation)
        if i == 1:
            skip = residual
    if skip_connection_type == 'conv':
        shortcut = _conv2d_same(inputs, depth_list[-1], prefix + '_shortcut',
                                kernel_size=1,
                                stride=stride)
        shortcut = BatchNormalization(name=prefix + '_shortcut_BN')(shortcut)
        outputs = layers.add([residual, shortcut])
    elif skip_connection_type == 'sum':
        outputs = layers.add([residual, inputs])
    elif skip_connection_type == 'none':
        outputs = residual
    if return_skip:
        return outputs, skip
    else:
        return outputs


def relu6(x):
    return relu(x, max_value=6)


def _make_divisible(v, divisor, min_value=None):
    if min_value is None:
        min_value = divisor
    new_v = max(min_value, int(v + divisor / 2) // divisor * divisor)
    # Make sure that round down does not go down by more than 10%.
    if new_v < 0.9 * v:
        new_v += divisor
    return new_v


def _inverted_res_block(inputs, expansion, stride, alpha, filters, block_id, skip_connection, rate=1):
    in_channels = inputs.shape[-1]  # inputs._keras_shape[-1]
    pointwise_conv_filters = int(filters * alpha)
    pointwise_filters = _make_divisible(pointwise_conv_filters, 8)
    x = inputs
    prefix = 'expanded_conv_{}_'.format(block_id)
    if block_id:
        # Expand

        x = Conv2D(expansion * in_channels, kernel_size=1, padding='same',
                   use_bias=False, activation=None,
                   name=prefix + 'expand')(x)
        x = BatchNormalization(epsilon=1e-3, momentum=0.999,
                               name=prefix + 'expand_BN')(x)
        x = Activation(relu6, name=prefix + 'expand_relu')(x)
    else:
        prefix = 'expanded_conv_'
    # Depthwise
    x = DepthwiseConv2D(kernel_size=3, strides=stride, activation=None,
                        use_bias=False, padding='same', dilation_rate=(rate, rate),
                        name=prefix + 'depthwise')(x)
    x = BatchNormalization(epsilon=1e-3, momentum=0.999,
                           name=prefix + 'depthwise_BN')(x)

    x = Activation(relu6, name=prefix + 'depthwise_relu')(x)

    # Project
    x = Conv2D(pointwise_filters,
               kernel_size=1, padding='same', use_bias=False, activation=None,
               name=prefix + 'project')(x)
    x = BatchNormalization(epsilon=1e-3, momentum=0.999,
                           name=prefix + 'project_BN')(x)

    if skip_connection:
        return Add(name=prefix + 'add')([inputs, x])

    # if in_channels == pointwise_filters and stride == 1:
    #    return Add(name='res_connect_' + str(block_id))([inputs, x])

    return x


def Deeplabv3(weights='pascal_voc', input_tensor=None, input_shape=(500, 500, 3), classes=21, backbone='mobilenetv2',
              OS=16, alpha=1., activation=None):
    
    if not (weights in {'pascal_voc', 'cityscapes', None}):
        raise ValueError('The `weights` argument should be either '
                         '`None` (random initialization), `pascal_voc`, or `cityscapes` '
                         '(pre-trained on PASCAL VOC)')

    if not (backbone in {'xception', 'mobilenetv2'}):
        raise ValueError('The `backbone` argument should be either '
                         '`xception`  or `mobilenetv2` ')
    
    if input_tensor is None:
        img_input = Input(shape=input_shape)
        mask_shape = (input_shape[0],input_shape[1],classes) 
        mask_input = Input(shape=mask_shape)
    else:
        img_input = input_tensor
        img_dim = tf.TensorShape(input_tensor).as_list()[0]
        mask_shape = (img_dim, img_dim, classes)
        mask_input = Input(shape = mask_shape )
    
    if backbone == 'xception':
      
        if OS == 8:
            entry_block3_stride = 1
            middle_block_rate = 2  # ! Not mentioned in paper, but required
            exit_block_rates = (2, 4)
            atrous_rates = (12, 24, 36)
        else:
            entry_block3_stride = 2
            middle_block_rate = 1
            exit_block_rates = (1, 2)
            atrous_rates = (6, 12, 18)
            
        x = Conv2D(32, (3, 3), strides=(2, 2),
                   name='entry_flow_conv1_1', use_bias=False, padding='same') (img_input)
        
        x = BatchNormalization(name='entry_flow_conv1_1_BN')(x)
        x = Activation('relu')(x)

        x = _conv2d_same(x, 64, 'entry_flow_conv1_2', kernel_size=3, stride=1)
        x = BatchNormalization(name='entry_flow_conv1_2_BN')(x)
        x = Activation('relu')(x)

        x = _xception_block(x, [128, 128, 128], 'entry_flow_block1',
                            skip_connection_type='conv', stride=2,
                            depth_activation=False)
        x, skip1 = _xception_block(x, [256, 256, 256], 'entry_flow_block2',
                                   skip_connection_type='conv', stride=2,
                                   depth_activation=False, return_skip=True)

        x = _xception_block(x, [728, 728, 728], 'entry_flow_block3',
                            skip_connection_type='conv', stride=entry_block3_stride,
                            depth_activation=False)
        for i in range(16):
            x = _xception_block(x, [728, 728, 728], 'middle_flow_unit_{}'.format(i + 1),
                                skip_connection_type='sum', stride=1, rate=middle_block_rate,
                                depth_activation=False)

        x = _xception_block(x, [728, 1024, 1024], 'exit_flow_block1',
                            skip_connection_type='conv', stride=1, rate=exit_block_rates[0],
                            depth_activation=False)
        x = _xception_block(x, [1536, 1536, 2048], 'exit_flow_block2',
                            skip_connection_type='none', stride=1, rate=exit_block_rates[1],
                            depth_activation=True)

    else:
        OS = 8
        first_block_filters = _make_divisible(32 * alpha, 8)
        x = Conv2D(first_block_filters,
                   kernel_size=3,
                   strides=(2, 2), padding='same',
                   use_bias=False, name='Conv')(img_input)
        x = BatchNormalization(
            epsilon=1e-3, momentum=0.999, name='Conv_BN')(x)
        x = Activation(relu6, name='Conv_Relu6')(x)

        x = _inverted_res_block(x, filters=16, alpha=alpha, stride=1,
                                expansion=1, block_id=0, skip_connection=False)

        x = _inverted_res_block(x, filters=24, alpha=alpha, stride=2,
                                expansion=6, block_id=1, skip_connection=False)
        x = _inverted_res_block(x, filters=24, alpha=alpha, stride=1,
                                expansion=6, block_id=2, skip_connection=True)

        x = _inverted_res_block(x, filters=32, alpha=alpha, stride=2,
                                expansion=6, block_id=3, skip_connection=False)
        x = _inverted_res_block(x, filters=32, alpha=alpha, stride=1,
                                expansion=6, block_id=4, skip_connection=True)
        x = _inverted_res_block(x, filters=32, alpha=alpha, stride=1,
                                expansion=6, block_id=5, skip_connection=True)

        # stride in block 6 changed from 2 -> 1, so we need to use rate = 2
        x = _inverted_res_block(x, filters=64, alpha=alpha, stride=1,  # 1!
                                expansion=6, block_id=6, skip_connection=False)
        x = _inverted_res_block(x, filters=64, alpha=alpha, stride=1, rate=2,
                                expansion=6, block_id=7, skip_connection=True)
        x = _inverted_res_block(x, filters=64, alpha=alpha, stride=1, rate=2,
                                expansion=6, block_id=8, skip_connection=True)
        x = _inverted_res_block(x, filters=64, alpha=alpha, stride=1, rate=2,
                                expansion=6, block_id=9, skip_connection=True)

        x = _inverted_res_block(x, filters=96, alpha=alpha, stride=1, rate=2,
                                expansion=6, block_id=10, skip_connection=False)
        x = _inverted_res_block(x, filters=96, alpha=alpha, stride=1, rate=2,
                                expansion=6, block_id=11, skip_connection=True)
        x = _inverted_res_block(x, filters=96, alpha=alpha, stride=1, rate=2,
                                expansion=6, block_id=12, skip_connection=True)

        x = _inverted_res_block(x, filters=160, alpha=alpha, stride=1, rate=2,  # 1!
                                expansion=6, block_id=13, skip_connection=False)
        x = _inverted_res_block(x, filters=160, alpha=alpha, stride=1, rate=4,
                                expansion=6, block_id=14, skip_connection=True)
        x = _inverted_res_block(x, filters=160, alpha=alpha, stride=1, rate=4,
                                expansion=6, block_id=15, skip_connection=True)

        x = _inverted_res_block(x, filters=320, alpha=alpha, stride=1, rate=4,
                                expansion=6, block_id=16, skip_connection=False)

    # end of feature extractor

    # branching for Atrous Spatial Pyramid Pooling

    # Image Feature branch
    shape_before = tf.shape(x)
    b4 = GlobalAveragePooling2D()(x)
    # from (b_size, channels)->(b_size, 1, 1, channels)
    b4 = Lambda(lambda x: K.expand_dims(x, 1))(b4)
    b4 = Lambda(lambda x: K.expand_dims(x, 1))(b4)
    b4 = Conv2D(256, (1, 1), padding='same',
                use_bias=False, name='image_pooling')(b4)
    b4 = BatchNormalization(name='image_pooling_BN', epsilon=1e-5)(b4)
    b4 = Activation('relu')(b4)
    # upsample. have to use compat because of the option align_corners
    size_before = tf.keras.backend.int_shape(x)
    b4 = Lambda(lambda x: tf.compat.v1.image.resize(x, size_before[1:3],
                                                    method='bilinear', align_corners=True))(b4)
    # simple 1x1
    b0 = Conv2D(256, (1, 1), padding='same', use_bias=False, name='aspp0')(x)
    b0 = BatchNormalization(name='aspp0_BN', epsilon=1e-5)(b0)
    b0 = Activation('relu', name='aspp0_activation')(b0)

    # there are only 2 branches in mobilenetV2. not sure why
    if backbone == 'xception':
        # rate = 6 (12)
        b1 = SepConv_BN(x, 256, 'aspp1',
                        rate=atrous_rates[0], depth_activation=True, epsilon=1e-5)
        # rate = 12 (24)
        b2 = SepConv_BN(x, 256, 'aspp2',
                        rate=atrous_rates[1], depth_activation=True, epsilon=1e-5)
        # rate = 18 (36)
        b3 = SepConv_BN(x, 256, 'aspp3',
                        rate=atrous_rates[2], depth_activation=True, epsilon=1e-5)

        # concatenate ASPP branches & project
        x = Concatenate()([b4, b0, b1, b2, b3])
    else:
        x = Concatenate()([b4, b0])

    x = Conv2D(256, (1, 1), padding='same',
               use_bias=False, name='concat_projection')(x)
    x = BatchNormalization(name='concat_projection_BN', epsilon=1e-5)(x)
    x = Activation('relu')(x)
    x = Dropout(0.1)(x)
    # DeepLab v.3+ decoder

    if backbone == 'xception':
        # Feature projection
        # x4 (x2) block
        size_before2 = tf.keras.backend.int_shape(x)
        x = Lambda(lambda xx: tf.compat.v1.image.resize(xx,
                                                        skip1.shape[1:3],
                                                        method='bilinear', align_corners=True))(x)

        dec_skip1 = Conv2D(48, (1, 1), padding='same',
                           use_bias=False, name='feature_projection0')(skip1)
        dec_skip1 = BatchNormalization(
            name='feature_projection0_BN', epsilon=1e-5)(dec_skip1)
        dec_skip1 = Activation('relu')(dec_skip1)
        x = Concatenate()([x, dec_skip1])
        x = SepConv_BN(x, 256, 'decoder_conv0',
                       depth_activation=True, epsilon=1e-5)
        x = SepConv_BN(x, 256, 'decoder_conv1',
                       depth_activation=True, epsilon=1e-5)

    # you can use it with arbitary number of classes
    if (weights == 'pascal_voc' and classes == 21) or (weights == 'cityscapes' and classes == 19):
        last_layer_name = 'logits_semantic'
    else:
        last_layer_name = 'custom_logits_semantic'

    x = Conv2D(classes, (1, 1), padding='same', name=last_layer_name)(x)
    size_before3 = tf.keras.backend.int_shape(img_input)
    x = Lambda(lambda xx: tf.compat.v1.image.resize(xx,
                                                    size_before3[1:3],
                                                    method='bilinear', align_corners=True))(x)
    
    # Ensure that the model takes into account
    # any potential predecessors of `input_tensor`.
    if input_tensor is not None:
        inputs = get_source_inputs(input_tensor)
    else:
        inputs = img_input
    
    
    if activation in {'softmax', 'sigmoid'}:
        x = tf.keras.layers.Activation(activation)(x)

    model = Model(inputs, x, name='deeplabv3plus')

    # load weights

    if weights == 'pascal_voc':
        if backbone == 'xception':
            weights_path = get_file('deeplabv3_xception_tf_dim_ordering_tf_kernels.h5',
                                    WEIGHTS_PATH_X,
                                    cache_subdir='models')
        else:
            weights_path = get_file('deeplabv3_mobilenetv2_tf_dim_ordering_tf_kernels.h5',
                                    WEIGHTS_PATH_MOBILE,
                                    cache_subdir='models')
        model.load_weights(weights_path, by_name=True)
    elif weights == 'cityscapes':
        if backbone == 'xception':
            weights_path = get_file('deeplabv3_xception_tf_dim_ordering_tf_kernels_cityscapes.h5',
                                    WEIGHTS_PATH_X_CS,
                                    cache_subdir='models')
        else:
            weights_path = get_file('deeplabv3_mobilenetv2_tf_dim_ordering_tf_kernels_cityscapes.h5',
                                    WEIGHTS_PATH_MOBILE_CS,
                                    cache_subdir='models')
        model.load_weights(weights_path, by_name=True)
     
    return model
        
   

def preprocess_input(x):
    """Preprocesses a numpy array encoding a batch of images.
    # Arguments
        x: a 4D numpy array consists of RGB values within [0, 255].
    # Returns
        Input array scaled to [-1.,1.]
    """
    return preprocess_input(x, mode='tf')


**Data Loading functions**

In [0]:
#@title get_filename_data_readers(image_ids_file) { display-mode: "form" }

# This code will be hidden when the notebook is loaded.

def get_filename_data_readers(image_ids_file, get_labels=False,
                              x_jpg_dir=None, y_png_dir=None):
  """Given image IDs file, returns Datasets, which generate image paths.

  The goal of this function is to convert from image IDs to image paths.
  Specifically, the return type should be:
    if get_labels == False, return type should be tf.data.Dataset.
    Otherwise, return type should be pair (tf.data.Dataset, tf.data.Dataset).
  In both cases, the Dataset objects should be "not batched".

  For example, if the file contains 2 lines: "0000\n0001", then the returned
  dataset should give an iterator that when its tensor is ran, gives "0000" the
  first time and gives "0001" the second time.

  Args:
    image_ids_file: text with one image ID per line.
    get_labels: If set, returns 2 Datasets: the containing the image files (x)
      and the second containing the segmentation labels (y). If not, returns
      only the first argument.
    x_jpg_dir: Directory where each image lives. Specifically, image with
      ID "image1" will live on "x_jpg_dir/image1.jpg".
    y_png_dir: Directory where each segmentation mask lives. Specifically,
      image with ID "image1" will live on "x_png_dir/image1.png".
  
  Returns:
    instance of tf.data.Dataset, or pair of instances (if get_labels == True).
  """
  x_jpg_dir = x_jpg_dir or os.path.join(FLAGS.data_dir, 'images')
  y_png_dir = y_png_dir or os.path.join(FLAGS.data_dir, 'tf_segmentation')
  # TODO(student): Write code.
  
  with open(image_ids_file,'r') as f:
      img_ids = f.read().splitlines() 
      
  img_paths =[]  
  
  for f in img_ids : 
      
      img_paths.append( tf.cast(os.path.join( x_jpg_dir, f + '.jpg'), dtype = tf.string))
  
  if get_labels == False :
      
      data = tf.data.Dataset.from_tensor_slices(img_paths)
      
      return data
  
  elif get_labels == True :
      
      with open(image_ids_file,'r') as f:
          img_ids = f.read().splitlines() 
      
      label_paths = []
      
      for f in img_ids : 
          label_paths.append( tf.cast( os.path.join( y_png_dir, f + '.png'), dtype = tf.string))
          
      return tf.data.Dataset.from_tensor_slices(img_paths),tf.data.Dataset.from_tensor_slices(label_paths)

def decode_image_with_padding(im_file, decode_fn=tf.image.decode_jpeg,
                              channels=3, pad_upto=500):
  """Reads an image, decodes, and pads its spatial dimensions, all in TensorFlow

  Args:
    im_file: tf.string tensor, containing path to image file.
    decode_fn: Tensorflow function for converting
    channels: Image channels to decode. For data (x), set to 3 channels (i.e. RGB).
      For labels (segmentation masks), set to 1, because other 2 channels contain
      identical information.
    pad_upto: Number of pixels to pad to.

  Returns:
    Pair of Tensors:
      The first must be tf.int vector with 2 entries: containing the original height
        and width of the image.
      The second must be a tf.int matrix with size (pad_upto, pad_upto, 3)
        i.e. the contents of the image, with zero-padding.
  """
  # TODO(student): Write code.
  f = tf.io.read_file(im_file)
  img_tensor = decode_fn(f, channels = channels)
  shape = tf.shape(img_tensor)[:2]
  img_final = tf.image.pad_to_bounding_box(img_tensor, 0, 0, pad_upto, pad_upto)
  img_final = tf.cast(img_final, tf.int32)
  
  return (shape, img_final)
  


def make_loss_mask(shapes):
  """Given tf.int Tensor matrix with shape [N, 2], make N 2D binary masks.
  
  These binary masks will be used "to mask the loss". Specifically, if the
  image is shaped as (300 x 400) and therefore so its labels, we only want
  to penalize the model for misclassifying within the image boundary (300 x 400)
  and ignore values outside (e.g. at pixel [350, 380]).

  Args:
    shapes: tf.int Tensor with shape [N, 2]. Entry shapes[i] will be a vector:
      [image height, image width].

  Returns:
    tf.float32 mask of shape [N, 500, 500], with mask[i, h, w] set to 1.0
    iff shapes[i, 0] < h and shapes[i, 1] < w.
  """
  # TODO(student): Write code.
  batch_size = np.shape(shapes)[0]
  masks = np.zeros([batch_size, 500, 500])
  
  for i, shape in enumerate(shapes):
      h, w = shape[0], shape[1]
      masks[ i,: h,: w] = np.ones([ h, w])
      
  masks =tf.convert_to_tensor(masks, dtype=tf.float32)    
      
  return masks
      

def read_image_pair_with_padding(x_im_file, y_im_file, pad_upto=500):
  """Reads image pair (image & segmentation). You might find it useful.

  It only works properly, if you implemented `decode_image_with_padding`. If you
  do not find this function useful, ignore it.
  not have to use this function, if you do not find it useful.
  
  Args:
    x_im_file: Full path to jpg image to be segmented.
    y_im_file: Full path to png image, containing ground-truth segmentation.
    pad_upto: The padding of the images.

  Returns:
    tuple of tensors with 3 entries:
      int tensor of 2-dimensions.
  """
  shape, im_x = decode_image_with_padding(x_im_file)
  _    , im_y = decode_image_with_padding(y_im_file, tf.image.decode_png, channels=1)
  return shape, im_x, im_y



class SegmentationModel:
  """Class that can segment images into 21 classes (class 0 being background).

  You must implement the following in this class:
  + load()
  + predict()
  which will be called by the auto-grader

  + train()
  + save()
  which will NOT be called by the auto-grader, but you must implement them for
  repeatability. After the grading period, we will re-train all models to make
  sure their present training will get their results.
  """

  def __init__(self):
    # 0: background class, {1, .., 20} are for class labels.
    self.num_classes = 21
    self.batch_size = 100  # You can change or remove
    self.num_iter = 2000
    self.model_dir = '/home/pohsuanh/Documents/Lectures/CSCI699/hw2/hw2_models/'
    
  def save(self, model_dir):
    """Saves model parameters to disk."""
    # Save JSON config to disk
    json_config = self.model.to_json()
    with open('model_config.json', 'w') as json_file:
        json_file.write(json_config)
    # Save weights to disk
    self.model.save_weights('path_to_my_weights.h5')

    print("Model saved in path: %s" % model_dir)



  def load(self, model_dir):
    """Restores parameters of model from `model_in_file`."""
    # Reload the model from the 2 files we saved
    with open(os.path.join(model_dir,'model_config.json')) as json_file:
        json_config = json_file.read()
    new_model = tf.keras.models.model_from_json(json_config)
    new_model.load_weights(os.path.join(model_dir,'path_to_my_weights.h5'))

    print("Model laoded from path: %s" % model_dir)


  def predict(self, images):
    """Predicts segments of some images.

    This method WILL BE CALLED by auto-grader. Please do not change signature of
    function [though adding optional arguments is fine].

    Args:
      images: List of images. All will be padded to 500x500 by autograder. The
        list can be a primitive list or a numpy array. To go from the former to
        the latter, just wrap as np.array(images).

    Returns:
      List of predictions (or numpy array). If np array, must be of shape:
      N x 500 x 500 x 21, where N == len(images). Our evaluation script will
      take the argmax on the last column for accuracy (and might calculate AUC).
    """
    
    softmax = self.model(images)
    predictions = tf.multiply( softmax)

    return np.asarray(predictions)

  def train(self, train_ids_file):
    """Trains the model.
    
    This method WILL BE CALLED by our scripts, after submission period. Please
    do not add required arguments. Feel free to completely erase its body.

    Args:
      train_ids_file: file containing image IDs.
    """
    # TODO(student): Feel free to remove if you do not use. 
    " Load data "
    data_dir = '/home/pohsuanh/Documents/Lectures/CSCI699/hw2/hw2_data'

    img_path,label_path = get_filename_data_readers(train_ids_file, x_jpg_dir = os.path.join(data_dir,'images'),
                                                       y_png_dir = os.path.join(data_dir, 'tf_segmentation'),
                                                       get_labels= True)
    AUTOTUNE = tf.data.experimental.AUTOTUNE
    Data = tf.data.Dataset.zip((img_path,label_path)).map(read_image_pair_with_padding,num_parallel_calls=AUTOTUNE)
    DATASET_SIZE = 10
    train_size = int(0.8 * DATASET_SIZE)
    test_size = int(0.2 * DATASET_SIZE)

    train_ds = Data.take(train_size)
    test_ds = Data.skip(train_size)
    test_ds = Data.take(test_size)
    "shuffle repeat batch"
    train_ds = train_ds.shuffle(1000).repeat().batch(2).prefetch(buffer_size=AUTOTUNE)
    
    "build model"
    self.model = self.build_train_model()
#    self.model.summary()
    L2_rate = tf.constant(0.01, tf.float32)
        
    l2_loss = tf.constant(0.0, tf.float32) #tf.add_n(self.model.losses) 
    loss_object =tf.keras.losses.SparseCategoricalCrossentropy()
    train_loss = tf.keras.metrics.Mean(name='train_loss')
    train_accuracy = tf.keras.metrics.SparseCategoricalAccuracy(name='train_accuracy')
    test_loss = tf.keras.metrics.Mean(name='test_loss')
    test_accuracy = tf.keras.metrics.SparseCategoricalAccuracy(name='test_accuracy')
    # Configure the trainable Op (for TRAIN mode)
                
    optimizer = tf.keras.optimizers.Adam()
    
    @tf.function
    def train_step(images, labels, loss_masks):
      with tf.GradientTape() as tape:
        images = tf.cast(images, tf.float32)
#        softmax = self.model(images)
#        predictions = tf.multiply( softmax, loss_masks)
        predictions = self.model(images)
        print("prediction:",predictions)

        loss = loss_object(labels , predictions) #+ L2_rate * tf.reduce_mean(l2_loss)
    
        gradients = tape.gradient(loss, self.model.trainable_variables)
        optimizer.apply_gradients(zip(gradients, self.model.trainable_variables))
    
        train_loss(loss)
        train_accuracy(labels, predictions)
        
    @tf.function
    def test_step(images, labels):
#        softmax = self.model(images)
#        predictions = tf.multiply( softmax, loss_masks)
        images = tf.cast(images, tf.float32)
        predictions = self.model(images)
        loss = loss_object(labels , predictions) #+ L2_rate * tf.reduce_mean(l2_loss)
    
        test_loss(loss)
        test_accuracy(labels, predictions)
        
    EPOCHS = 5
    
    self.model.compile(optimizer = optimizer, loss = 'SparseCategoricalCrossentropy',
                       metrics =['accuracy'])
    
    for epoch in range(EPOCHS):
      for it, (shapes, images, segs) in enumerate(train_ds): 
        loss_masks = make_loss_mask(shapes)
        loss_masks = tf.expand_dims(loss_masks, 3)
        loss_masks_all_classes = tf.keras.backend.repeat_elements(loss_masks,rep = 21, axis = 3)
        labels = tf.multiplication(loss_masks_all_classes, labels)
        self.model.fit(images,segs)

#    for epoch in range(EPOCHS):
#      for it, (shapes, images, labels) in enumerate(train_ds):
#        loss_masks = make_loss_mask(shapes)
#        loss_masks = tf.expand_dims(loss_masks, 3)
#        loss_masks_all_classes = tf.keras.backend.repeat_elements(loss_masks,rep = 21, axis = 3)
#        train_step(images, labels, loss_masks_all_classes)
#        template = 'Iteration {}, Loss: {}, Accuracy: {}'
#        print(template.format(it+1,
#                            train_loss.result(),
#                            train_accuracy.result()*100))
#    
#      for shape, test_images, test_labels in test_ds:
#        loss_masks = make_loss_mask(shapes)
#        loss_masks = tf.expand_dims(loss_masks, 3)
#        loss_masks_all_classes = tf.keras.backend.repeat_elements(loss_masks,rep = 21, axis = 3)
#        test_step(test_images, test_labels, loss_masks_all_classes)
#    
#      template = 'Epoch {}, Loss: {}, Accuracy: {}, Test Loss: {}, Test Accuracy: {}'
#      print(template.format(epoch+1,
#                            train_loss.result(),
#                            train_accuracy.result()*100,
#                            test_loss.result(),
#                            test_accuracy.result()*100))
    
      # Reset the metrics for the next epoch
      train_loss.reset_states()
      train_accuracy.reset_states()
      test_loss.reset_states()
      test_accuracy.reset_states()


  # TODO(student): Feel free to remove if you do not use.
  def build_train_model(self):
      
     return Deeplabv3(input_shape=(500, 500, 3), classes=21, weights= 'pascal_voc',activation='softmax')  
         

In [0]:
#@title decode_image_with_padding(im_file) {display-mode: "form"}

# This code will be hidden when the notebook is loaded.
def decode_image_with_padding(im_file, decode_fn=tf.image.decode_jpeg,
                              channels=3, pad_upto=500):
  """Reads an image, decodes, and pads its spatial dimensions, all in TensorFlow

  Args:
    im_file: tf.string tensor, containing path to image file.
    decode_fn: Tensorflow function for converting
    channels: Image channels to decode. For data (x), set to 3 channels (i.e. RGB).
      For labels (segmentation masks), set to 1, because other 2 channels contain
      identical information.
    pad_upto: Number of pixels to pad to.

  Returns:
    Pair of Tensors:
      The first must be tf.int vector with 2 entries: containing the original height
        and width of the image.
      The second must be a tf.int matrix with size (pad_upto, pad_upto, 3)
        i.e. the contents of the image, with zero-padding.
  """
  # TODO(student): Write code.
  f = tf.io.read_file(im_file)
  img_tensor = decode_fn(f, channels = channels)
  shape = tf.shape(img_tensor)[:2]
  img_final = tf.image.pad_to_bounding_box(img_tensor, 0, 0, pad_upto, pad_upto)
  img_final = tf.cast(img_final, tf.int32)
  
  return (shape, img_final)

In [0]:
#@title make_loss_maks(shapes) {display-mode: "form"}

# This code will be hidden when the notebook is loaded.

def make_loss_mask(shapes,labels):
  """Given tf.int Tensor matrix with shape [N, 2], make N 2D binary masks.
  
  These binary masks will be used "to mask the loss". Specifically, if the
  image is shaped as (300 x 400) and therefore so its labels, we only want
  to penalize the model for misclassifying within the image boundary (300 x 400)
  and ignore values outside (e.g. at pixel [350, 380]).

  Args:
    shapes: tf.int Tensor with shape [N, 2]. Entry shapes[i] will be a vector:
      [image height, image width].

  Returns:
    tf.float32 mask of shape [N, 500, 500], with mask[i, h, w] set to 1.0
    iff shapes[i, 0] < h and shapes[i, 1] < w. Also, mask[i,h,w] is set to 
    0.0 for all pixels with labels equal to 255. 
   
  """
  # TODO(student): Write code.
  batch_size = np.shape(shapes)[0]
  masks = np.zeros([batch_size, 500, 500])
  
  
  for i, (shape,label) in enumerate(zip(shapes,labels)):
      h, w = shape[0], shape[1]
      label =np.squeeze(np.array(label))
      mask_boundary = np.where(label==255,0,1)
      masks[ i,: h,: w] = np.ones([ h, w])
      masks[i] = np.multiply( masks[i],mask_boundary)
      
      
      
  masks =tf.convert_to_tensor(masks, dtype=tf.float32)    
      
  return masks



In [0]:
#@title read_image_pair_with_padding(x_im_file) {display-mode: "form"}

# This code will be hidden when the notebook is loaded.

def read_image_pair_with_padding(x_im_file, y_im_file, pad_upto=500):
  """Reads image pair (image & segmentation). You might find it useful.

  It only works properly, if you implemented `decode_image_with_padding`. If you
  do not find this function useful, ignore it.
  not have to use this function, if you do not find it useful.
  
  Args:
    x_im_file: Full path to jpg image to be segmented.
    y_im_file: Full path to png image, containing ground-truth segmentation.
    pad_upto: The padding of the images.

  Returns:
    tuple of tensors with 3 entries:
      int tensor of 2-dimensions.
  """
  shape, im_x = decode_image_with_padding(x_im_file)
  _    , im_y = decode_image_with_padding(y_im_file, tf.image.decode_png, channels=1)
  return shape, im_x, im_y


**Mount Data Repository**

In [0]:
! git config --global credential.helper cache
! git config --global credential.https://github.com.Po-Hsuan-Huang foo
! git clone https://github.com/Po-Hsuan-Huang/CSCI699_hw2_data.git

In [0]:
! ls CSCI699_hw2_data

In [0]:
! ls
!ls

**Define Data_dir**


**Define Segmentation Model Class**

In [0]:
class SegmentationModel:
  """Class that can segment images into 21 classes (class 0 being background).

  You must implement the following in this class:
  + load()
  + predict()
  which will be called by the auto-grader

  + train()
  + save()
  which will NOT be called by the auto-grader, but you must implement them for
  repeatability. After the grading period, we will re-train all models to make
  sure their present training will get their results.
  """

  def __init__(self):
    # 0: background class, {1, .., 20} are for class labels.
    self.num_classes = 21
    self.batch_size = 100  # You can change or remove
    self.num_iter = 2000
    self.model_dir = 'CSCI699_hw2_data/model_weights'
    self.data_dir = 'CSCI699_hw2_data'   #    Local data_dir :
                                         #    Office computer : data_dir = '/home/pohsuanh/Documents/Lectures/CSCI699/hw2/hw2_data'
                                         #    Macbook :         data_dir = '/Users/pohsuanhuang/Documents/Lectures/CSCI699/hw2/hw2_data/'
  def save(self, model_dir, epoch):
    """Saves model parameters to disk."""
    filename = 'model_config.json'
    # serialize model archetecture to JSON
    model_json = self.model.to_json()
    with open(os.path.join(model_dir,filename), "w") as json_file:
      json_file.write(model_json)
    # serialize weights to HDF5
    filename = 'weights_{:04d}.h5'.format(epoch)
    self.model.save_weights(os.path.join(model_dir,filename))
    print("Saved model to disk")



  def load(self, model_dir, filename=None):
    """Restores parameters of model from `model_in_file`.
       If filename is not specified, used the latest.
    """
    # Reload the model from the 2 files we saved
    with open(os.path.join(model_dir,'model_config.json')) as json_file:
        json_config = json_file.read()
    self.model = keras.models.model_from_json(json_config)
    
    # Reload weights
    if filename :
      self.model.load_weights(os.path.join(model_dir,filename))
      print('Load ', os.path.join(model_dir,filename))

      
    else :
      list_of_files = glob.glob('model_dir/*.h5') # * means all if need specific format then *.h5
      latest_file = max(list_of_files, key=os.path.getctime)
      self.model.load_weights(latest_file)
      print('Load the latest model:', latest_file)



  def predict(self, images):
    """Predicts segments of some images.

    This method WILL BE CALLED by auto-grader. Please do not change signature of
    function [though adding optional arguments is fine].

    Args:
      images: List of images. All will be padded to 500x500 by autograder. The
        list can be a primitive list or a numpy array. To go from the former to
        the latter, just wrap as np.array(images).

    Returns:
      List of predictions (or numpy array). If np array, must be of shape:
      N x 500 x 500 x 21, where N == len(images). Our evaluation script will
      take the argmax on the last column for accuracy (and might calculate AUC).
    """
    
    softmax = self.model(images)
    predictions = tf.multiply(softmax)

    return np.asarray(predictions)

  def train(self, train_ids_file):
    """Trains the model.
    
    This method WILL BE CALLED by our scripts, after submission period. Please
    do not add required arguments. Feel free to completely erase its body.

    Args:
      train_ids_file: file containing image IDs.
    """
    # TODO(student): Feel free to remove if you do not use. 
    " Load data "

    img_path,label_path = get_filename_data_readers(train_ids_file, x_jpg_dir = os.path.join(self.data_dir,'images'),
                                                       y_png_dir = os.path.join(self.data_dir, 'tf_segmentation'),
                                                       get_labels= True)
    AUTOTUNE = tf.data.experimental.AUTOTUNE
    Data = tf.data.Dataset.zip((img_path,label_path)).map(read_image_pair_with_padding,num_parallel_calls=AUTOTUNE)
    DATASET_SIZE = 20

    train_size = int(0.8 * DATASET_SIZE)
    test_size = int(0.2 * DATASET_SIZE)

    train_ds = Data.take(train_size)
    test_ds = Data.skip(train_size)
    test_ds = Data.take(test_size)
    "shuffle repeat batch"
    train_ds = train_ds.shuffle(1000).repeat().batch(2).prefetch(buffer_size=AUTOTUNE)
    
    "build model"
    if os.path.exists(self.model_dir) and os.path.isdir(self.model_dir):
      if not glob.glob( os.path.join(self.model_dir,'*.h')):
        print("model_dir is empty, initialize weights.")
        self.model = self.build_train_model()
        
      else :
        self.model = self.load(self.model_dir)
        
    else :
      print('folder {:s} does not exist'.format(self.model_dir))
        
    
    
    
    L2_rate = tf.constant(0.01, tf.float32)
        
    l2_loss = tf.constant(0.0, tf.float32) #tf.add_n(self.model.losses) 
    loss_object =tf.keras.losses.SparseCategoricalCrossentropy()
    train_loss = tf.keras.metrics.Mean(name='train_loss')
    train_accuracy = tf.keras.metrics.SparseCategoricalAccuracy(name='train_accuracy')
    test_loss = tf.keras.metrics.Mean(name='test_loss')
    test_accuracy = tf.keras.metrics.SparseCategoricalAccuracy(name='test_accuracy')
    # Configure the trainable Op (for TRAIN mode)
                
    optimizer = tf.keras.optimizers.Adam()
    
    
    @tf.function
    def train_step(images, labels, loss_masks):
      with tf.GradientTape() as tape:
        images = tf.cast(images, tf.float32)
        softmax = self.model(images)
        predictions = tf.boolean_mask( softmax, loss_masks)
        labels = tf.boolean_mask(labels, loss_masks)
        print("prediction:",predictions)
        loss = loss_object(labels , predictions)#+ L2_rate * tf.reduce_mean(l2_loss)
    
        gradients = tape.gradient(loss, self.model.trainable_variables)
        optimizer.apply_gradients(zip(gradients, self.model.trainable_variables))
    
        train_loss(loss)
        train_accuracy(labels, predictions)
        
    @tf.function
    def test_step(images, labels, loss_masks):
        images = tf.cast(images, tf.float32)
        softmax = self.model(images)
        predictions = tf.boolean_mask( softmax, loss_masks)
        labels = tf.boolean_mask(labels, loss_masks)
        loss = loss_object(labels , predictions) #+ L2_rate * tf.reduce_mean(l2_loss)
    
        test_loss(loss)
        test_accuracy(labels, predictions)
        
    EPOCHS = 5
    
    """self.model.compile(optimizer = 'adam', loss = 'sparse_categorical_crossentropy',
                       metrics =['accuracy'])
    
    self.model.fit()
    
    for epoch in range(EPOCHS):
      for it, (shapes, images, segs) in enumerate(train_ds): 
        loss_masks = make_loss_mask(shapes)
        loss_masks = tf.expand_dims(loss_masks, 3)
        loss_masks_all_classes = tf.keras.backend.repeat_elements(loss_masks,rep = 21, axis = 3)
        labels = tf.multiply(loss_masks_all_classes, loss_masks_all_classes)
"""
     
    for epoch in range(EPOCHS):
      print('training...')
      for it, (shapes, images, labels) in enumerate(train_ds):
        loss_masks = make_loss_mask(shapes, labels)
        #loss_masks = tf.expand_dims(loss_masks, 3)
        #loss_masks_all_classes = tf.keras.backend.repeat_elements(loss_masks,rep = 21, axis = 3)
        train_step(images, labels, loss_masks)
        template = 'Iteration {}, Loss: {}, Accuracy: {}'
        print(template.format(it+1,
                            train_loss.result(),
                            train_accuracy.result()*100))
        if (it+1)%2 == 0:
          self.save('CSCI699_hw2_data/model_weights', it+1)
          
          
      print('evaluation...')
      for shape, test_images, test_labels in test_ds:
        loss_masks = make_loss_mask(shapes,labels)
        #loss_masks = tf.expand_dims(loss_masks, 3)
        #loss_masks_all_classes = tf.keras.backend.repeat_elements(loss_masks,rep = 21, axis = 3)
        test_step(test_images, test_labels, loss_masks)
    
      template = 'Epoch {}, Loss: {}, Accuracy: {}, Test Loss: {}, Test Accuracy: {}'
      print(template.format(epoch+1,
                            train_loss.result(),
                            train_accuracy.result()*100,
                            test_loss.result(),
                            test_accuracy.result()*100))

      # Reset the metrics for the next epoch
      train_loss.reset_states()
      train_accuracy.reset_states()
      test_loss.reset_states()
      test_accuracy.reset_states()


  # TODO(student): Feel free to remove if you do not use.
  def build_train_model(self):
      
     return Deeplabv3(weights='pascal_voc',input_shape=(500, 500, 3), classes=21,activation='softmax')  
         

In [0]:
  # TODO(student): Feel free to write whatever here. Our auto-grader will *not*
  # invoke this at all.
  
model = SegmentationModel()
#  model.train((os.path.join('/home/pohsuanh/Documents/Lectures/CSCI699/hw2/hw2_data/','test.txt')))
#  model.train((os.path.join('/Users/pohsuanhuang/Documents/Lectures/CSCI699/hw2/hw2_data/','test.txt')))
model.train((os.path.join('CSCI699_hw2_data/','test.txt')))




In [0]:
labels = tf.constant(np.random.choice([0,1,2,3,4,255],[2,500,500]))
shapes = tf.constant(np.asarray([[500,500],[500,500]]), dtype = tf.float32)
tf.print(labels)
masks = make_loss_mask(shapes, labels)
print(masks)

In [0]:
l = np.asarray([0,5,6,9,255])
where = np.where(l==255,0, 1)
print(where)

In [0]:
!ls



In [0]:
glob.glob('CSCI699_hw2_data/model_weights/*')