In [None]:
import os
os.environ["CUDA_VISIBLE_DEVICES"] = "-1"

In [2]:
%reload_ext tensorboard
import datetime

import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt

import tensorflow.keras.layers as layers
import tensorflow.keras.models as models
import tensorflow.keras.losses as losses
import tensorflow.keras.optimizers as optimizers

import tensorflow_datasets as tfds

from tensorflow.data import AUTOTUNE



C:\Users\andre\anaconda3\lib\site-packages\numpy\.libs\libopenblas.NOIJJG62EMASZI6NYURL6JBKM4EVBGM7.gfortran-win_amd64.dll
C:\Users\andre\anaconda3\lib\site-packages\numpy\.libs\libopenblas.WCDJNK7YVMPZQ2ME2ZZHJJRJ3JIKNDB7.gfortran-win_amd64.dll


In [3]:
device_name = tf.test.gpu_device_name()
if device_name != '/device:GPU:0':
  raise SystemError('GPU device not found')
print('Found GPU at: {}'.format(device_name))

gpus = tf.config.experimental.list_physical_devices('GPU')
if gpus:
  try:
    tf.config.experimental.set_virtual_device_configuration(gpus[0], [tf.config.experimental.VirtualDeviceConfiguration(memory_limit=3000)])
  except RuntimeError as e:
    print(e)
#if gpus:
#  try:
#    for gpu in gpus:
#      tf.config.experimental.set_memory_growth(gpu, True)
#  except RuntimeError as e:
#    print(e)

Found GPU at: /device:GPU:0


In [7]:
import tensorflow as tf

import tensorflow.keras.layers as layers
import tensorflow.keras.losses as losses
import tensorflow.keras.metrics as metrics
import tensorflow.keras.models as models
import tensorflow.keras.optimizers as optimizers
from tensorflow.keras import regularizers



# default input shape
INPUT_SHAPE = (512, 1024, 3)


def ge_layer(x_in, c, e=6, stride=1):
    x = layers.Conv2D(filters=c, kernel_size=(3,3),kernel_regularizer=regularizers.l2(5e-4), padding='same')(x_in)
    x = layers.BatchNormalization()(x)
    x = layers.Activation('relu')(x)
    
    if stride == 2:
        x = layers.DepthwiseConv2D(depth_multiplier=e, kernel_size=(3,3), strides=2, padding='same')(x)
        x = layers.BatchNormalization()(x)
        
        y = layers.DepthwiseConv2D(depth_multiplier=e, kernel_size=(3,3), strides=2, padding='same')(x_in)
        y = layers.BatchNormalization()(y)
        y = layers.Conv2D(filters=c, kernel_size=(1,1),kernel_regularizer=regularizers.l2(5e-4), padding='same')(y)
        y = layers.BatchNormalization()(y)
    else:
        y = x_in
        
    x = layers.DepthwiseConv2D(depth_multiplier=e, kernel_size=(3,3), padding='same')(x)
    x = layers.BatchNormalization()(x)
    x = layers.Conv2D(filters=c, kernel_size=(1,1),kernel_regularizer=regularizers.l2(5e-4), padding='same')(x)
    x = layers.BatchNormalization()(x)
    
    x = layers.Add()([x, y])
    x = layers.Activation('relu')(x)
    return x


def stem(x_in, c):
    x = layers.Conv2D(filters=c, kernel_size=(3,3),kernel_regularizer=regularizers.l2(5e-4), strides=2, padding='same')(x_in)
    x = layers.BatchNormalization()(x)
    x_split = layers.Activation('relu')(x)
    
    #x = layers.MaxPool2D(pool_size=(1, 1), padding='same')(x_split)
    x = layers.Conv2D(filters=c // 2, kernel_size=(1,1),kernel_regularizer=regularizers.l2(5e-4), padding='same')(x_split)
    x = layers.BatchNormalization()(x)
    x = layers.Activation('relu')(x)
    
    x = layers.Conv2D(filters=c, kernel_size=(3,3),kernel_regularizer=regularizers.l2(5e-4), strides=2, padding='same')(x)
    x = layers.BatchNormalization()(x)
    x = layers.Activation('relu')(x)
    
    y = layers.MaxPooling2D()(x_split)
    
    x = layers.Concatenate()([x, y])
    x = layers.Conv2D(filters=c, kernel_size=(3,3),kernel_regularizer=regularizers.l2(5e-4), padding='same')(x)
    x = layers.BatchNormalization()(x)
    x = layers.Activation('relu')(x)
    
    return x


def detail_conv2d(x_in, c, stride=1):
    x = layers.Conv2D(filters=c, kernel_size=(3,3),kernel_regularizer=regularizers.l2(5e-4), strides=stride, padding='same')(x_in)
    x = layers.BatchNormalization()(x)
    x = layers.Activation('relu')(x)
    
    return x


def context_embedding(x_in, c):
    x = layers.GlobalAveragePooling2D()(x_in)
    x = layers.BatchNormalization()(x)
    
    x = layers.Reshape((1,1,c))(x)
    
    x = layers.Conv2D(filters=c, kernel_size=(1,1),kernel_regularizer=regularizers.l2(5e-4), padding='same')(x)
    x = layers.BatchNormalization()(x)
    x = layers.Activation('relu')(x)
    # broadcasting no needed
    
    x = layers.Add()([x, x_in])
    x = layers.Conv2D(filters=c, kernel_size=(3,3),kernel_regularizer=regularizers.l2(5e-4), padding='same')(x)
    return x


def bilateral_guided_aggregation(detail, semantic, c):
    # detail branch
    detail_a = layers.DepthwiseConv2D(kernel_size=(3,3), padding='same')(detail)
    detail_a = layers.BatchNormalization()(detail_a)
    
    detail_a = layers.Conv2D(filters=c, kernel_size=(1,1),kernel_regularizer=regularizers.l2(5e-4), padding='same')(detail_a)
    
    #detail_a = layers.MaxPool2D(pool_size=(3,3), strides=2, padding='same')(detail_a)
    
    detail_b = layers.Conv2D(filters=c, kernel_size=(3,3),kernel_regularizer=regularizers.l2(5e-4), strides=2, padding='same')(detail)
    detail_b = layers.BatchNormalization()(detail_b)
    
    detail_b = layers.AveragePooling2D((3,3), strides=2, padding='same')(detail_b)
    
    # semantic branch
    semantic_a = layers.DepthwiseConv2D(kernel_size=(3,3), padding='same')(semantic)
    semantic_a = layers.BatchNormalization()(semantic_a)
    
    #semantic_a = layers.MaxPool2D(pool_size=(1, 1), padding='same')(semantic_a)
    semantic_a = layers.Conv2D(filters=c, kernel_size=(1,1),kernel_regularizer=regularizers.l2(5e-4), padding='same')(semantic_a)
    semantic_a = layers.Activation('sigmoid')(semantic_a)
    
    semantic_b = layers.Conv2D(filters=c, kernel_size=(3,3),kernel_regularizer=regularizers.l2(5e-4), padding='same')(semantic)
    semantic_b = layers.BatchNormalization()(semantic_b)
    
    #semantic_b = layers.MaxPool2D(pool_size=(3,3), strides=2, padding='same')(semantic_b)
    
    semantic_b = layers.UpSampling2D((4,4), interpolation='bilinear')(semantic_b)
    semantic_b = layers.Activation('sigmoid')(semantic_b)
    
    # combining
    detail = layers.Multiply()([detail_a, semantic_b])
    semantic = layers.Multiply()([semantic_a, detail_b])
    
    # this layer is not mentioned in the paper !?
    #semantic = layers.UpSampling2D((4,4))(semantic)
    semantic = layers.UpSampling2D((4,4), interpolation='bilinear')(semantic)
    
    x = layers.Add()([detail, semantic])
    x = layers.Conv2D(filters=c, kernel_size=(3,3),kernel_regularizer=regularizers.l2(5e-4), padding='same')(x)
    x = layers.BatchNormalization()(x)
    
    return x


def seg_head(x_in, c_t, s, n):
    x = layers.Conv2D(filters=c_t, kernel_size=(3,3),kernel_regularizer=regularizers.l2(5e-4), padding='same')(x_in)
    x = layers.BatchNormalization()(x)
    x = layers.Activation('relu')(x)
    
    x = layers.Conv2D(filters=n, kernel_size=(3,3),kernel_regularizer=regularizers.l2(5e-4), padding='same')(x)
    x = layers.UpSampling2D((s,s), interpolation='bilinear')(x)
    
    return x


class ArgmaxMeanIOU(metrics.MeanIoU):
    def update_state(self, y_true, y_pred, sample_weight=None):
        return super().update_state(tf.argmax(y_true, axis=-1), tf.argmax(y_pred, axis=-1), sample_weight)


def bisenetv2(num_classes=2, out_scale=8, input_shape=INPUT_SHAPE, l=4, seghead_expand_ratio=2):
    x_in = layers.Input(input_shape)

    # semantic branch
    # S1 + S2
    x = stem(x_in, 16 // 1)
    
    # S3
    x = ge_layer(x, 32 // 1, stride=2)
    x = ge_layer(x, 32 // 1, stride=1)

    # S4
    #x = ge_layer(x, 64, stride=2)
    #x = ge_layer(x, 64, stride=1)

    # S5
    #x = ge_layer(x, 128, stride=2)

    #x = ge_layer(x, 128, stride=1)
    #x = ge_layer(x, 128, stride=1)
    #x = ge_layer(x, 128, stride=1)

    #x = context_embedding(x, 128)
    x = context_embedding(x, 32)

    # detail branch
    # S1
    y = detail_conv2d(x_in, 64, stride=2)
    y = detail_conv2d(y, 64, stride=1)

    # S2
    #y = detail_conv2d(y, 64, stride=2)
    #y = detail_conv2d(y, 64, stride=1)
    #y = detail_conv2d(y, 64, stride=1)

    # S3
    #y = detail_conv2d(y, 128, stride=2)
    #y = detail_conv2d(y, 128, stride=1)
    #y = detail_conv2d(y, 128, stride=1)

    #x = bilateral_guided_aggregation(y, x, 128)
    x = bilateral_guided_aggregation(y, x, 64)

    x = seg_head(x, num_classes * seghead_expand_ratio, out_scale, num_classes)
    
    model = models.Model(inputs=[x_in], outputs=[x])
    
    # set weight initializers
    for layer in model.layers:
        if hasattr(layer, 'kernel_initializer'):
            layer.kernel_initializer = tf.keras.initializers.HeNormal()
        if hasattr(layer, 'depthwise_initializer'):
            layer.depthwise_initializer = tf.keras.initializers.HeNormal()

    return model


def bisenetv2_compiled(num_classes, decay_steps=5e4, momentum=0.9, weight_decay=0.0005, **kwargs):
    model = bisenetv2(num_classes, **kwargs)

    schedule = optimizers.schedules.PolynomialDecay(
        initial_learning_rate=5e-2,
        decay_steps=decay_steps,
        power=0.9
    )

    try:
        import tensorflow_addons as tfa

        sgd = tfa.optimizers.SGDW(
            weight_decay=weight_decay,
            learning_rate=schedule,
            momentum=momentum,
        )
    except ImportError:
        print('tensorflow_addons not available, not using weight-decay')

        sgd = optimizers.SGD(
            learning_rate=schedule,
            momentum=momentum,
        )

    cce = losses.CategoricalCrossentropy(from_logits=True)
    loss=tf.keras.losses.BinaryCrossentropy(from_logits=True)
    model.compile(sgd, loss=loss,
                  metrics=['accuracy', ArgmaxMeanIOU(num_classes)]) 
    
    return model


def bisenetv2_output_shape(num_classes, scale, input_shape=INPUT_SHAPE):
    return ((input_shape[0] // 8) * scale, 
            (input_shape[1] // 8) * scale, 
            num_classes)


In [8]:
import json
import tensorflow as tf
import numpy as np
import cv2


def class_map_road(seg):
    # map class 0=anything, 1=road
    return tf.where(seg == 7, [0, 1.0], [1.0, 0])


def cityscapes_prep(output_shape, input_shape=INPUT_SHAPE, class_map_func=None, float_range=True, standardize=True):
    def prep_map(sample):
        img = sample['image_left']
        seg = sample['segmentation_label']

        if float_range:
            img /= 255
        if standardize:
            img = tf.image.per_image_standardization(img)

        img = tf.image.resize(img, input_shape[0:2])
        seg = tf.image.resize(seg, output_shape[0:2])
        
        if callable(class_map_func):
            seg = class_map_func(seg)
        else:
            seg = tf.one_hot(tf.cast(seg, tf.int32), output_shape[-1], axis=2)
            seg = tf.cast(seg, tf.float32)
            seg = tf.squeeze(seg)
            #seg = tf.keras.utils.to_categorical(seg, num_classes=output_shape[-1])

        return img, seg

    return prep_map

In [9]:
import matplotlib.pyplot as plt


def segmentation_to_image(pred):
    img = tf.argmax(pred, axis=-1)
    img = img[..., tf.newaxis]
    return tf.keras.preprocessing.image.array_to_img(img)

        
def predict_tf(model):
    def predict_func(sample):
        print(sample[0])
        pred = model.predict(tf.expand_dims(sample[0], axis=0))
        return sample[0], pred[0]
    
    return predict_func


def display_dataset(ds, pred_func):
    for sample in ds:
        imgs = pred_func(sample)
        fig, axes = plt.subplots(1, len(imgs))
        
        for ax, img in zip(axes, imgs):
            if img.shape[-1] != 3:
                img = segmentation_to_image(img)
                
            ax.imshow(img)

In [None]:
# Test a image input
import cv2
import time
def img_pred(src,model,image_size):
  src = cv2.cvtColor(src, cv2.COLOR_BGR2RGB)
  #cv2_imshow(src)
  image = cv2.resize(src,image_size,interpolation=cv2.INTER_CUBIC)
  #cv2_imshow(src)
  image = image/255
  data = tf.convert_to_tensor(image, dtype=tf.float32)
  data = tf.expand_dims(data, axis=0)
  pred = model.predict(data)
  seg = tf.argmax(pred[0], axis=-1)
  seg = seg[..., tf.newaxis]
  seg = tf.keras.preprocessing.image.array_to_img(seg)
  #plt.imshow(seg)
  seg = cv2.cvtColor(np.array(seg), cv2.COLOR_BGR2RGB)
  seg = cv2.resize(seg,(src.shape[1],src.shape[0]),interpolation=cv2.INTER_CUBIC)
  result = cv2.addWeighted(np.array(src), 0.6, seg, 0.5, 0, dtype = cv2.CV_8U)
  return src, seg, result



In [None]:
def gen_seg_vid(cap,out,shape,model):
  #while(cap.isOpened()):
  for _ in range(500):
    ret, frame = cap.read()
    if ret == True:
      start = time.time()
      img_seg = img_pred(frame,model,(256,128))[2]
      end = time.time()
      fps = 1/(end - start)
      x = 'FPS:'
      text = "{}{:.3f}".format(x,fps)
      font = cv2.FONT_HERSHEY_SIMPLEX
      #cv2_imshow(img_seg)
      img_seg = cv2.cvtColor(img_seg, cv2.COLOR_BGR2RGB)
      cv2.putText(img_seg, text , (800,900), font, 1, (0, 255, 255), 2, cv2.LINE_AA)
      out.write(img_seg)
      if cv2.waitKey(100) & 0xFF == ord('q'):
        break
    else:
      break
  cap.release()
  out.release()
  cv2.destroyAllWindows()

In [None]:
INPUT_SHAPE = (128, 256, 3)
NUM_CLASSES = 2
SCALE = 2

In [None]:
OUTPUT_SHAPE = bisenetv2_output_shape(NUM_CLASSES, SCALE, input_shape=INPUT_SHAPE)
m = bisenetv2_compiled(num_classes=NUM_CLASSES, out_scale=SCALE, input_shape=INPUT_SHAPE)

In [None]:
m.summary()

In [None]:

import tensorflow_addons as tfa

#model = tf.keras.models.load_model('E:/EE/project/FPGA/128x256/bisenet_small_aug.tf',custom_objects={'ArgmaxMeanIOU': ArgmaxMeanIOU}, compile=False)


decay_steps=5e4
momentum=0.9
weight_decay=0.0005

schedule = optimizers.schedules.PolynomialDecay(
        initial_learning_rate=1e-2,
        decay_steps=decay_steps,
        power=0.9
    )

schedule_cosine= optimizers.schedules.CosineDecay(
    initial_learning_rate = 1e-2, decay_steps=decay_steps, alpha=0.0)


sgd = tfa.optimizers.SGDW(
            weight_decay=weight_decay,
            learning_rate=schedule,
            momentum=momentum,
        )
cce = losses.CategoricalCrossentropy(from_logits=True)
bce = tf.keras.losses.BinaryCrossentropy(from_logits=True)
adam = tf.keras.optimizers.Adam(learning_rate=5e-5)
yogi =  tfa.optimizers.Yogi(learning_rate = 0.0005)
adamw = tfa.optimizers.AdamW(learning_rate=1e-5 , weight_decay=0.0005)
swa = tfa.optimizers.SWA(adamw)

m.compile(adam, loss=cce,
                  metrics=['accuracy', ArgmaxMeanIOU(NUM_CLASSES)] )

In [None]:
print(OUTPUT_SHAPE)

In [None]:
tf.keras.utils.plot_model(m, show_shapes=True)

In [4]:
cityscapes = tfds.load('cityscapes/semantic_segmentation',data_dir="E:\EE\project\FPGA\cityscapes",download=False)

In [19]:
OUTPUT_SHAPE = (128, 256, 2)
train_ds = cityscapes['train'].map(cityscapes_prep(OUTPUT_SHAPE, INPUT_SHAPE, class_map_road))
valid_ds = cityscapes['validation'].map(cityscapes_prep(OUTPUT_SHAPE, INPUT_SHAPE, class_map_road))
test_ds = cityscapes['test'].map(cityscapes_prep(OUTPUT_SHAPE, INPUT_SHAPE, class_map_road))

In [16]:
print(train_ds)

<MapDataset shapes: ((128, 256, 3), (128, 256, 2)), types: (tf.float32, tf.float32)>


In [None]:
image,label = next(iter(zoom_ds))
#level = np.round(np.random.uniform(-0.2, 0.2), 3)
#print(level)
#print(label)
#image = tfa.image.shear_y(image, level, tf.constant(0))
#_ = plt.imshow(image)
seg = tf.argmax(label, axis=-1)
seg = seg[..., tf.newaxis]
seg = tf.keras.preprocessing.image.array_to_img(seg)
plt.imshow(seg)
plt.show()

In [11]:
seed = 123

Crop_img = tf.keras.Sequential([
  layers.experimental.preprocessing.RandomCrop(128, 256, seed=seed)
])
Crop_seg= tf.keras.Sequential([
  layers.experimental.preprocessing.RandomCrop(128, 256, seed=seed)
])
Rotate_img = tf.keras.Sequential([
  layers.experimental.preprocessing.RandomRotation(0.06, fill_mode='constant', seed = seed)
])
Rotate_seg = tf.keras.Sequential([
  layers.experimental.preprocessing.RandomRotation(0.06, fill_mode='constant', seed = seed)
])
Zoom_img = tf.keras.Sequential([
  layers.experimental.preprocessing.RandomZoom((-0.75,0.25), fill_mode='constant', seed = seed)
])
Zoom_seg = tf.keras.Sequential([
  layers.experimental.preprocessing.RandomZoom((-0.75,0.25), fill_mode='constant', seed = seed)
])

In [12]:
def zoom_prep(output_shape, input_shape=INPUT_SHAPE, class_map_func=None, float_range=True, standardize=True):
    def prep_map(sample):
        img = sample['image_left']
        seg = sample['segmentation_label']

        if float_range:
            img /= 255
        if standardize:
            img = tf.image.per_image_standardization(img)

        img = tf.image.resize(img, input_shape[0:2])
        seg = tf.image.resize(seg, output_shape[0:2])
        
        img = img[None, ...]
        seg = seg[None, ...]
        img = Zoom_img(img)
        seg = Zoom_seg(seg)
        img = tf.squeeze(img)
        seg = tf.squeeze(seg,0)
        
        if callable(class_map_func):
            seg = class_map_func(seg)
        else:
            seg = tf.one_hot(tf.cast(seg, tf.int32), output_shape[-1], axis=2)
            seg = tf.cast(seg, tf.float32)
            seg = tf.squeeze(seg)
            #seg = tf.keras.utils.to_categorical(seg, num_classes=output_shape[-1])
        print(seg)
        return img, seg

    return prep_map

OUTPUT_SHAPE = (128, 256, 2)
INPUT_SHAPE = (128, 256, 3)

zoom_ds = cityscapes['train'].map(zoom_prep(OUTPUT_SHAPE, INPUT_SHAPE, class_map_road))
zoom_ds_flipped = zoom_ds.shuffle(500).take(np.round(len(zoom_ds)/2)).map(lambda img, seg: (tf.image.flip_left_right(img), tf.image.flip_left_right(seg)))


Tensor("SelectV2:0", shape=(128, 256, 2), dtype=float32)


In [13]:
def Crop_prep(output_shape, input_shape=INPUT_SHAPE, class_map_func=None, float_range=True, standardize=True):
    def prep_map(sample):
        img = sample['image_left']
        seg = sample['segmentation_label']

        if float_range:
            img /= 255
        if standardize:
            img = tf.image.per_image_standardization(img)

        img = tf.image.resize(img, input_shape[0:2])
        seg = tf.image.resize(seg, output_shape[0:2])
        
        img = img[None, ...]
        seg = seg[None, ...]
        img = Crop_img(img)
        seg = Crop_seg(seg)
        img = tf.squeeze(img)
        seg = tf.squeeze(seg,0)
        
        if callable(class_map_func):
            seg = class_map_func(seg)
        else:
            seg = tf.one_hot(tf.cast(seg, tf.int32), output_shape[-1], axis=2)
            seg = tf.cast(seg, tf.float32)
            seg = tf.squeeze(seg)
            #seg = tf.keras.utils.to_categorical(seg, num_classes=output_shape[-1])
        
        return img, seg

    return prep_map

input_shape = (200,400,3)
output_shape = (200,400,2)
crop_ds = cityscapes['train']

crop_ds = crop_ds.map(Crop_prep(output_shape, input_shape, class_map_road))
crop_ds_flipped = crop_ds.shuffle(500).take(np.round(len(crop_ds)/2)).map(lambda img, seg: (tf.image.flip_left_right(img), tf.image.flip_left_right(seg)))

#display_dataset(tmp_ds.take(1), lambda s: (s[0], s[1]))
#display_dataset(crop_ds_flipped.take(5), lambda s: (s[0], s[1]))
#display_dataset(crop_ds.take(5), lambda s: (s[0], s[1]))
#plt.show()

aug_ds = crop_ds_flipped.concatenate(zoom_ds_flipped)

In [None]:
print(len(aug_ds))

In [20]:
BATCH_SIZE = 16

USE_TINYCAR_DATA = False
USE_AUGMENTATION = True

rng = tf.random.Generator.from_seed(123, alg='philox')

if USE_TINYCAR_DATA:
    from data_prep import labelme_prep, uwula_prep

    ds = tf.data.Dataset.list_files('../paperstreet/1/*.json')
    ds = ds.map(labelme_prep(OUTPUT_SHAPE, INPUT_SHAPE))


    ds2 = tf.data.Dataset.list_files('/media/mldata/tinycar/Mikrowunderland_1k/*.jpg')
    ds2 = ds2.map(uwula_prep(OUTPUT_SHAPE, INPUT_SHAPE))

    ds = ds.concatenate(ds2)
    
    tc_valid_ds = ds.take(24)
    tc_train_ds = ds.skip(24)
    
    train_ds = train_ds.concatenate(tc_train_ds)
    valid_ds = train_ds.concatenate(tc_valid_ds)

if USE_AUGMENTATION:

    def augment(img, seg, seed):
        #seed = tf.random.uniform((2,))
        print(seg)
        img = tf.image.stateless_random_brightness(img, 0.2, seed=seed)
        img = tf.image.stateless_random_contrast(img, 0.8, 1.2, seed=seed)
        img = tf.image.stateless_random_saturation(img, 0.0, 1.5, seed=seed)
        img = tf.image.stateless_random_hue(img, 0.2, seed=seed)
        img = tf.clip_by_value(img, 0.0, 1.0) # clip values outside 0..1

        return img, seg

    def randomize(img, seg):
        seed = rng.make_seeds(2)[0]
        img, seg = augment(img, seg, seed)
        return img, seg
    
    train_ds_flipped_lr = train_ds.map(lambda img, seg: (tf.image.flip_left_right(img), tf.image.flip_left_right(seg)))
    valid_ds_flipped_lr = valid_ds.map(lambda img, seg: (tf.image.flip_left_right(img), tf.image.flip_left_right(seg)))
    
    
    
    train_ds_randomized = train_ds.map(randomize)
    #train_ds_shear = train_ds.map(shear)
    crop_ds_randomized = crop_ds.map(randomize)
    zoom_ds_randomized = zoom_ds.map(randomize)
    valid_ds_randomized = valid_ds.map(randomize)
    
    train_ds_randomized_flipped = train_ds_flipped_lr.map(randomize)
    valid_ds_randomized_flipped = valid_ds_flipped_lr.map(randomize)
    aug_ds_rand = aug_ds.map(randomize)
    
    train_ds = train_ds.concatenate(train_ds_flipped_lr).concatenate(train_ds_randomized).concatenate(train_ds_randomized_flipped)
    valid_ds = valid_ds.concatenate(valid_ds_flipped_lr).concatenate(valid_ds_randomized).concatenate(valid_ds_randomized_flipped)

    #train_ds = train_ds.concatenate(train_ds_flipped_lr).concatenate(train_ds_randomized).concatenate(crop_ds_flipped).concatenate(train_ds_randomized).concatenate(zoom_ds_flipped)
    
    #train_ds = train_ds.concatenate(aug_ds).concatenate(train_ds_randomized).concatenate(aug_ds_rand)

print(train_ds)
train_ds = train_ds.batch(BATCH_SIZE).prefetch(AUTOTUNE)
valid_ds = valid_ds.batch(4).prefetch(AUTOTUNE)
test_ds = test_ds.batch(BATCH_SIZE).prefetch(AUTOTUNE)

print(valid_ds)
#display_dataset(valid_ds.unbatch().take(1), lambda s: (s[0], s[1]))

Tensor("args_1:0", shape=(128, 256, 2), dtype=float32)
Tensor("args_1:0", shape=(128, 256, 2), dtype=float32)
Tensor("args_1:0", shape=(128, 256, 2), dtype=float32)
Tensor("args_1:0", shape=(128, 256, 2), dtype=float32)
Tensor("args_1:0", shape=(128, 256, 2), dtype=float32)
Tensor("args_1:0", shape=(128, 256, 2), dtype=float32)
Tensor("args_1:0", shape=(128, 256, 2), dtype=float32)
<ConcatenateDataset shapes: ((128, 256, 3), (128, 256, 2)), types: (tf.float32, tf.float32)>
<PrefetchDataset shapes: ((None, 128, 256, 3), (None, 128, 256, 2)), types: (tf.float32, tf.float32)>


In [None]:
import tensorflow.keras.backend as K
from keras.layers import Flatten, Reshape
def DiceLoss(targets, inputs, smooth=1e-6):
    
    #flatten label and prediction tensors
    #inputs = Flatten()(inputs)
    #targets = Flatten()(targets)
    targets = tf.argmax(targets, axis=-1)
    targets = targets[..., tf.newaxis]
    inputs = tf.argmax(inputs, axis=-1)
    inputs = inputs[..., tf.newaxis]
    targets = tf.cast(targets, tf.float32)
    inputs = tf.cast(inputs, tf.float32)
    
    print(targets, inputs)
    intersection = K.sum(targets * inputs, axis=[1,2])
    union = K.sum(targets, axis=[1,2,3]) + K.sum(inputs, axis=[1,2])
    dice = (2. * intersection + smooth) / (union + smooth)
    dice = K.mean(dice,axis=0)
    dice = K.mean(dice)
    return 1 - dice


def DiceCCELoss(targets, inputs, smooth=1e-6):    
    
    #flatten label and prediction tensors
    I = inputs
    T = targets
    #inputs = Flatten()(inputs)
    inputs = Reshape((-1,))(inputs)
    #targets = Flatten()(targets)
    targets = Reshape((-1,))(targets)
    #print(inputs)
    #print(targets)
    #print(I,T)
    print(targets,inputs)
    CCE =  K.categorical_crossentropy(targets, inputs, from_logits = True)
    targets = T
    inpits = I
    #intersection = K.sum(targets*inputs, axis=(1,2,3))
    #union = K.sum(targets, axis=(1,2,3)) + K.sum(inputs, axis=(1,2,3))
    intersection = K.sum(targets * inputs, axis=[1,2,3])
    total = K.sum(targets, axis=[1,2,3]) + K.sum(inputs, axis=[1,2,3])
    union = total - intersection
    dice = k.mean((2. * intersection + smooth) / (union + smooth),axis = 0)
    dice_loss = 1 - dice  
    Dice_CCE = CCE + dice_loss
    #IoU = IoULoss(targets,inputs,1e-5)
    #Loss = CCE + IoU
    
    return Dice_CCE

def IoULoss(targets, inputs, smooth=1e-6):
    
    #flatten label and prediction tensors
    #inputs = Flatten()(inputs)
    #targets = Flatten()(targets)
    
    #intersection = K.sum(K.dot(targets, inputs))
    intersection = K.sum(targets * inputs, axis=[1,2])
    #total = K.sum(targets) + K.sum(inputs)
    total = K.sum(targets, axis=[1,2]) + K.sum(inputs, axis=[1,2])
    union = total - intersection
    IoU = K.mean((intersection + smooth) / (union + smooth), axis = 0)
    #IoU = (intersection + smooth)/(union + smooth)
    #return 1 - IoU
    return 1 - K.mean(IoU)

def CCE(targets, inputs):
    return K.categorical_crossentropy(targets, inputs, from_logits = True)

def GIoU_CCE(targets,inputs):
    print(targets,inputs)
    giou = tfa.losses.giou_loss(targets,inputs)
    cce = losses.categorical_crossentropy(targets, inputs, from_logits = True)
    return giou + cce

In [None]:
#train_ds = train_ds.shuffle(500)
print(len(valid_ds))
#display_dataset(train_ds_randomized_flipped.take(1), lambda s: (s[0], s[1]))

In [None]:
# warm-up training
import tensorflow_addons as tfa
from typing import Callable

class WarmUp(tf.keras.optimizers.schedules.LearningRateSchedule):

    def __init__(
    self,
    initial_learning_rate: float,
    decay_schedule_fn: Callable,
    warmup_steps: int,
    power: float = 1.0,
    name: str = None,
):
        super().__init__()
        self.initial_learning_rate = initial_learning_rate
        self.warmup_steps = warmup_steps
        self.power = power
        self.decay_schedule_fn = decay_schedule_fn
        self.name = name
    
    def __call__(self, step):
        with tf.name_scope(self.name or "WarmUp") as name:
            # Implements polynomial warmup. i.e., if global_step < warmup_steps, the
            # learning rate will be `global_step/num_warmup_steps * init_lr`.
            global_step_float = tf.cast(step, tf.float32)
            warmup_steps_float = tf.cast(self.warmup_steps, tf.float32)
            warmup_percent_done = global_step_float / warmup_steps_float
            warmup_learning_rate = (self.initial_learning_rate) * tf.math.pow(warmup_percent_done, self.power)
            return tf.cond(
                global_step_float < warmup_steps_float,
                lambda: warmup_learning_rate,
                lambda: self.decay_schedule_fn(step - self.warmup_steps),
                name=name,
            )

    def get_config(self):
        return {
            "initial_learning_rate": self.initial_learning_rate,
            "decay_schedule_fn": self.decay_schedule_fn,
            "warmup_steps": self.warmup_steps,
            "power": self.power,
            "name": self.name,
        }
    
decay_steps=5e4
momentum=0.9
weight_decay=0.0005
initial_lr = 5e-2
schedule = optimizers.schedules.PolynomialDecay(
        initial_learning_rate=5e-2,
        decay_steps=decay_steps,
        power=0.9
    )
schedule_cosine= optimizers.schedules.CosineDecay(
    initial_learning_rate = 5e-2, decay_steps=decay_steps, alpha=0.0)

lr_schedule = WarmUp(
            initial_learning_rate=initial_lr,
            decay_schedule_fn=schedule,
            warmup_steps=744
        )

cce = losses.CategoricalCrossentropy(from_logits=True)
bce = tf.keras.losses.BinaryCrossentropy(from_logits=True)
giou = tfa.losses.GIoULoss()

adamw = tfa.optimizers.AdamW(learning_rate=lr_schedule , weight_decay=weight_decay)
sgdw = tfa.optimizers.SGDW(
            weight_decay=weight_decay,
            learning_rate=lr_schedule,
            momentum=momentum
        )
sgd = optimizers.SGD(
            learning_rate=lr_schedule,
            momentum=momentum,
        )

#m.compile(sgd ,loss =IoULoss ,metrics=['accuracy', ArgmaxMeanIOU(NUM_CLASSES)] )

#EPOCHS = 2

#log_dir = "logs/fit/" + datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
#tensorboard_callback = tf.keras.callbacks.TensorBoard(log_dir=log_dir, histogram_freq=1)

#history = m.fit(train_ds,
#                validation_data=valid_ds,
#                epochs=EPOCHS,
#                callbacks=[tensorboard_callback])

In [None]:
model_copy = tf.keras.models.clone_model(m)
model_copy.set_weights(m.get_weights())


In [None]:
# Formal Training


decay_steps=5e4
momentum=0.9
weight_decay=0.0005

schedule_cosine= optimizers.schedules.CosineDecay(
    initial_learning_rate = 5e-2, decay_steps=decay_steps, alpha=0.0)

schedule = optimizers.schedules.PolynomialDecay(
        initial_learning_rate=5e-2,
        decay_steps=decay_steps,
        power=0.9,
        cycle = True
    )
lr_schedule = WarmUp(
            initial_learning_rate=initial_lr,
            decay_schedule_fn=schedule_cosine,
            warmup_steps=1500,
        )

sgdw = tfa.optimizers.SGDW(
            weight_decay=weight_decay,
            learning_rate=lr_schedule,
            momentum=momentum
        )

sgd = optimizers.SGD(
            learning_rate=lr_schedule,
            momentum=momentum,
        )

opt = tfa.optimizers.MovingAverage(sgd)

m.compile(opt ,loss = cce , metrics=['accuracy', ArgmaxMeanIOU(NUM_CLASSES)] )

EPOCHS = 10

log_dir = "logs/fit/" + datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
tensorboard_callback = tf.keras.callbacks.TensorBoard(log_dir=log_dir, histogram_freq=1)

history = m.fit(train_ds,
                validation_data=valid_ds,
                epochs=EPOCHS,
                callbacks=[tensorboard_callback])

In [None]:
plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.title('model loss')
plt.ylabel('loss')
plt.xlabel('epoch')
plt.legend(['train', 'test'], loc='upper left')
plt.show()

In [None]:
#m_copy.save('E:/EE/project/FPGA/128x256/bisenet_small_aug.tf')
m.save('E:/EE/project/FPGA/128x256/model_untrained.tf')

In [None]:
#m_copy.compile(sgd, loss = cce, metrics=['accuracy', ArgmaxMeanIOU(NUM_CLASSES)] )
model = tf.keras.models.load_model('E:/EE/project/FPGA/128x256/bisenet_small_2.tf',custom_objects={'ArgmaxMeanIOU': ArgmaxMeanIOU})
OUTPUT_SHAPE = (128, 256, 2)
eval_ds = cityscapes['test'].map(cityscapes_prep(OUTPUT_SHAPE, INPUT_SHAPE, class_map_road))
eval_ds = eval_ds.batch(16)
print("Evaluate on valid data")
results = model.evaluate(eval_ds)

In [None]:
model.summary()
#display_dataset(test_ds.unbatch().take(1), predict_tf(m))
#display_dataset(valid_ds.unbatch().take(20), predict_tf(m))

In [None]:
model = tf.keras.models.load_model('E:/EE/project/FPGA/128x256/stripped_pruned_small_model.tf',custom_objects={'ArgmaxMeanIOU': ArgmaxMeanIOU}, compile=False)

In [None]:
# Test Image
src = cv2.imread('E:/EE/project/FPGA/test_01.png')
start = time.time()
result = img_pred(src,pruned_model,(256,128))[2]
end = time.time()
fps = 1/(end-start)
print('fps =',fps)
cv2.imshow("result",result)
cv2.waitKey (0)

In [None]:
cap = cv2.VideoCapture('E:\EE\project\FPGA\Driving_data.mp4')
fourcc = cv2.VideoWriter_fourcc(*'MP4V')
out = cv2.VideoWriter('E:/EE/project/FPGA/128x256/Dirve_test_P_small_2_GPU.mp4',fourcc,10,(1920,1080))
gen_seg_vid(cap,out,(1920,1080),stripped_pruned_model)

In [None]:
import tensorflow_addons as tfa

decay_steps=10e3
momentum=0.9
weight_decay=0.0005

schedule = optimizers.schedules.PolynomialDecay(
        initial_learning_rate=5e-5,
        decay_steps=decay_steps,
        power=0.9
    )
sgd = tfa.optimizers.SGDW(
            weight_decay=weight_decay,
            learning_rate=schedule,
            momentum=momentum,
        )
cce = losses.CategoricalCrossentropy(from_logits=True)

In [None]:
import tensorflow_addons as tfa

#model = tf.keras.models.load_model('E:/EE/project/FPGA/128x256/original_model.tf',custom_objects={'ArgmaxMeanIOU': ArgmaxMeanIOU}, compile=False)


decay_steps=10e3
momentum=0.9
weight_decay=0.0005

schedule = optimizers.schedules.PolynomialDecay(
        initial_learning_rate=5e-2,
        decay_steps=decay_steps,
        power=0.9
    )


sgd = tfa.optimizers.SGDW(
            weight_decay=weight_decay,
            learning_rate=schedule,
            momentum=momentum,
        )
cce = losses.CategoricalCrossentropy(from_logits=True)

#model.compile(sgd, loss=cce,
#                  metrics=['accuracy', ArgmaxMeanIOU(NUM_CLASSES)] )

In [None]:
# Prune the original TF model
import tensorflow_model_optimization as tfmot
import tensorflow as tf

model = tf.keras.models.load_model('E:/EE/project/FPGA/128x256/bisenet_small_2.tf',custom_objects={'ArgmaxMeanIOU': ArgmaxMeanIOU})

#model = m 

prune_low_magnitude = tfmot.sparsity.keras.prune_low_magnitude

epochs = 10
end_step = np.ceil(len(train_ds)).astype(np.int32)*epochs
pruning_params = {
      'pruning_schedule': tfmot.sparsity.keras.PolynomialDecay(initial_sparsity=0.5,
                                    final_sparsity=0.7,
                                    begin_step=0,
                                    end_step=end_step)}
 #     'pruning_policy': tfmot.sparsity.keras.PruneForLatencyOnXNNPack()}
#pruning_params = {
#      'pruning_schedule': tfmot.sparsity.keras.ConstantSparsity(0.7, begin_step=0, frequency=100)
#  }
# Try to apply pruning wrapper with pruning policy parameter.
try:
    pruned_model = prune_low_magnitude(model, **pruning_params)
    print('Success')
except ValueError as e:
    print(e)

opt = tf.keras.optimizers.Adam(learning_rate=5e-6)
pruned_model.compile(opt, loss=cce,
                  metrics=['accuracy', ArgmaxMeanIOU(NUM_CLASSES)]) 
pruned_model.summary()

In [None]:
# Fine-tuning on trained tf model
import tempfile

logdir = tempfile.mkdtemp()
callbacks = [
  tfmot.sparsity.keras.UpdatePruningStep()
]   
history = pruned_model.fit(train_ds,
                validation_data=valid_ds,
                epochs=epochs,
                callbacks=callbacks) 

In [None]:
pruned_model.save('E:/EE/project/FPGA/128x256/pruned_small_2_model.tf')

In [None]:
#pruned_model = tf.keras.models.load_model("E:\EE\project\FPGA\pruned_model_SparsityDecay.tf",custom_objects={'ArgmaxMeanIOU': ArgmaxMeanIOU})

stripped_pruned_model = tfmot.sparsity.keras.strip_pruning(pruned_model)
stripped_pruned_model_copy = tf.keras.models.clone_model(stripped_pruned_model)
stripped_pruned_model_copy.set_weights(stripped_pruned_model.get_weights())

In [None]:
stripped_pruned_model.summary()

In [None]:
def print_model_weights_sparsity(model):

    for layer in model.layers:
        if isinstance(layer, tf.keras.layers.Wrapper):
            weights = layer.trainable_weights
        else:
            weights = layer.weights
        for weight in weights:
            if "kernel" not in weight.name or "centroid" in weight.name:
                continue
            weight_size = weight.numpy().size
            zero_num = np.count_nonzero(weight == 0)
            print(
                f"{weight.name}: {zero_num/weight_size:.2%} sparsity ",
                f"({zero_num}/{weight_size})",
            )

In [None]:
print_model_weights_sparsity(stripped_pruned_model)

In [None]:
stripped_pruned_model.save('E:/EE/project/FPGA/128x256/stripped_pruned_small_3_model.tf', include_optimizer=False)

In [None]:
stripped_pruned_model_copy = tf.keras.models.load_model('E:/EE/project/FPGA/128x256/stripped_pruned_small_2_model.tf',custom_objects={'ArgmaxMeanIOU': ArgmaxMeanIOU}) 

In [None]:
import tensorflow_model_optimization as tfmot

# Clustering
cluster_weights = tfmot.clustering.keras.cluster_weights
CentroidInitialization = tfmot.clustering.keras.CentroidInitialization

#clustering_params = {
#  'number_of_clusters': 8,
#  'cluster_centroids_init': CentroidInitialization.KMEANS_PLUS_PLUS
#}

#clustered_model = cluster_weights(stripped_pruned_model, **clustering_params)

#clustered_model.compile(sgd, loss=cce,
 #                 metrics=['accuracy', ArgmaxMeanIOU(NUM_CLASSES)]) 

#print('Train clustering model:')
#clustered_model.fit(train_ds, validation_data=valid_dsvalid_ds, epochs=5)


#stripped_pruned_model.save("/content/drive/MyDrive/Colab_Notebooks/stripped_pruned_model_clustered.tf")


# Sparsity preserving clustering
from tensorflow_model_optimization.python.core.clustering.keras.experimental import (
    cluster,
)

cluster_weights = cluster.cluster_weights

clustering_params = {
  'number_of_clusters': 8,
  'cluster_centroids_init': CentroidInitialization.KMEANS_PLUS_PLUS,
  'preserve_sparsity': True
}

sparsity_clustered_model = cluster_weights(stripped_pruned_model_copy, **clustering_params)
opt = tf.keras.optimizers.Adam(learning_rate=5e-6)
sparsity_clustered_model.compile(opt, loss=cce,
                  metrics=['accuracy', ArgmaxMeanIOU(NUM_CLASSES)])

print('Train sparsity preserving clustering model:')
sparsity_clustered_model.fit(train_ds, validation_data=valid_ds, epochs=5)
sparsity_clustered_model.save("E:/EE/project/FPGA/128x256/sparsity_clustered_small_2_model.tf")

In [None]:
stripped_sparsity_clustered_model = tfmot.clustering.keras.strip_clustering(sparsity_clustered_model)
stripped_sparsity_clustered_model.save("E:/EE/project/FPGA/128x256/stripped_sparsity_clustered_small_2_model.tf",include_optimizer=False)

In [None]:
stripped_sparsity_clustered_model.summary()

In [None]:
#m = tf.keras.models.load_model('E:/EE/project/FPGA/512x256/model8.tf',custom_objects={'ArgmaxMeanIOU': ArgmaxMeanIOU})
#stripped_pruned_model_copy = tf.keras.models.load_model('E:/EE/project/FPGA/512x256/stripped_pruned_model_SparsityDecay.tf',custom_objects={'ArgmaxMeanIOU': ArgmaxMeanIOU})
#stripped_sparsity_clustered_model = tf.keras.models.load_model('E:/EE/project/FPGA/128x256/stripped_sparsity_clustered_model.tf',custom_objects={'ArgmaxMeanIOU': ArgmaxMeanIOU})

opt = tf.keras.optimizers.Adam(learning_rate=5e-6)

stripped_pruned_model_copy.compile(opt, loss=cce,
                  metrics=['accuracy', ArgmaxMeanIOU(NUM_CLASSES)])
stripped_sparsity_clustered_model.compile(opt, loss=cce,
                  metrics=['accuracy', ArgmaxMeanIOU(NUM_CLASSES)])

In [None]:
def test_fps(model):
    src = cv2.imread('E:/EE/project/FPGA/test_01.png')
    start = time.time()
    result = img_pred(src,model,(256,128))[2]
    end = time.time()
    fps = 1/(end-start)
    return fps

In [None]:
print("Evaluate model on valid data")
results1 = m.evaluate(valid_ds)
print("Evaluate pruned model on valid data")
results2 = stripped_pruned_model_copy.evaluate(valid_ds)
print("Evaluate pruned clustered model on valid data")
results2 = stripped_sparsity_clustered_model.evaluate(valid_ds)

In [None]:
print('Original fps: ', test_fps(m))
print('Pruned fps: ', test_fps(stripped_pruned_model_copy))
print('Pruned Clustered fps: ', test_fps(stripped_sparsity_clustered_model))


In [None]:
# Test Image
src = cv2.imread('E:/EE/project/FPGA/test_01.png')
start = time.time()
result = img_pred(src,stripped_sparsity_clustered_model)[2]
end = time.time()
fps = 1/(end-start)
print('fps =',fps)
cv2.imshow("result",result)
cv2.waitKey (0)

In [None]:
# Quantize model to a tflite int8 model speed up for ARM

model = tf.keras.models.load_model('E:EE/project/FPGA/128x256/stripped_sparsity_clustered_small_2_model.tf',custom_objects={'ArgmaxMeanIOU': ArgmaxMeanIOU})
#model = stripped_sparsity_clustered_model
cityscapes = tfds.load('cityscapes/semantic_segmentation',data_dir="E:\EE\project\FPGA\cityscapes",download=False)
test_ds = cityscapes['test'].map(cityscapes_prep(OUTPUT_SHAPE, INPUT_SHAPE, class_map_road))

def representative_dataset():
  for data in test_ds.take(100).batch(1):
      yield [data[0]]

converter = tf.lite.TFLiteConverter.from_keras_model(model)

converter.optimizations = [tf.lite.Optimize.DEFAULT]
#converter.optimizations = [tf.lite.Optimize.EXPERIMENTAL_SPARSITY]

converter.representative_dataset = representative_dataset
converter.target_spec.supported_ops = [tf.lite.OpsSet.TFLITE_BUILTINS_INT8]

#converter.inference_input_type = tf.uint8
#converter.inference_output_type = tf.uint8
converter.inference_input_type = tf.float32
converter.inference_output_type = tf.float32


#tflite_quant_model_fullyINT8 = converter.convert()
tflite_quant_model_fullyINT8 = converter.convert()

In [None]:
# Check input/output dtype
interpreter = tf.lite.Interpreter(model_content=tflite_quant_model_fullyINT8)
input_type = interpreter.get_input_details()[0]['dtype']
print('input: ', input_type)
output_type = interpreter.get_output_details()[0]['dtype']
print('output: ', output_type)

In [None]:
# Save float and int tflite model
import pathlib

tflite_models_dir = pathlib.Path("E:/EE/project/FPGA/128x256/cityscapes_tflite_models")
tflite_models_dir.mkdir(exist_ok=True, parents=True)


tflite_model_quant_fullINT_file = tflite_models_dir/"cityscapes_f32_small_2_pc.tflite"
tflite_model_quant_fullINT_file.write_bytes(tflite_quant_model_fullyINT8)

In [None]:
# Helper function to run inference on a TFLite model with dataset
import cv2
import time
import os
os.environ['CUDA_VISIBLE_DEVICES'] = '-1'

def run_tflite_model_valid_MIoU(tflite_file, test, size):
    cnt = 0
    for sample in test:
        cnt += 1
        gt = sample[1]
        test_image = sample[0]
  
        gt = tf.argmax(gt, axis=-1)
        gt = gt[..., tf.newaxis]
  
  # Initialize the interpreter
        interpreter = tf.lite.Interpreter(model_path=str(tflite_file),num_threads=4)
        interpreter.allocate_tensors()

        input_details = interpreter.get_input_details()[0]
        output_details = interpreter.get_output_details()[0]

        test_image = cv2.cvtColor(np.array(test_image), cv2.COLOR_BGR2RGB)
        test_image = cv2.resize(test_image,size,interpolation=cv2.INTER_CUBIC)
        image = test_image/255
        data = tf.convert_to_tensor(image, dtype=tf.float32)
        data = tf.expand_dims(data, axis=0)
        interpreter.set_tensor(input_details["index"], data)
  
        s = time.time()
        interpreter.invoke()
        e = time.time()
        fps = 1/(e-s)
        
        output = interpreter.get_tensor(output_details["index"])[0]

        seg = tf.argmax(output, axis=-1)
        seg = seg[..., tf.newaxis]
        m = tf.keras.metrics.MeanIoU(num_classes=2)
        m.update_state(seg, gt)
        
        x = 'fps: '
        text = "{}{:.3f}".format(x,fps)
        y = "MIoU: "
        text2 = "{}{:.3f}".format(y,m.result().numpy())
        z = "#"
        text3 = "{}{}".format(z,cnt)
        
        seg = tf.keras.preprocessing.image.array_to_img(seg)

        seg = cv2.cvtColor(np.array(seg), cv2.COLOR_BGR2RGB)
        result = cv2.addWeighted(test_image, 0.6, seg, 0.5, 0, dtype = cv2.CV_8U)
        if size == (512,256):
            result = cv2.resize(result,(512,256),interpolation=cv2.INTER_CUBIC)
        font = cv2.FONT_HERSHEY_SIMPLEX
        cv2.putText(result, text , (100,250), font, 0.7, (0, 255, 255), 2, cv2.LINE_AA)
        cv2.putText(result, text2 , (250,250), font, 0.7, (100, 100, 255), 2, cv2.LINE_AA)
        cv2.putText(result, text3 , (10,50), font, 1, (255, 255, 255), 2, cv2.LINE_AA)
        cv2.imshow('result',result)
        if cv2.waitKey(300) & 0xFF == ord('q') or cnt == 100:
            break
    cv2.destroyAllWindows()

In [None]:
INPUT_SHAPE = (128, 256, 3)
NUM_CLASSES = 2
SCALE = 8
OUTPUT_SHAPE = (128,256,2)

cityscapes = tfds.load('cityscapes/semantic_segmentation',data_dir="E:\EE\project\FPGA\cityscapes",download=False)
test_ds = cityscapes['validation'].map(cityscapes_prep(OUTPUT_SHAPE, INPUT_SHAPE, class_map_road, float_range=False))
path = "E:/EE/project/FPGA/128x256/cityscapes_tflite_models/cityscapes_f32_small_2.tflite"

run_tflite_model_valid_MIoU(path,test_ds.shuffle(100),(512,256))


In [None]:
# Helper function to run inference on a TFLite model with image
import cv2

def run_tflite_model(tflite_file, test_image):
    
  
  # Initialize the interpreter
  interpreter = tf.lite.Interpreter(model_path=str(tflite_file),num_threads=4)
  interpreter.allocate_tensors()

  input_details = interpreter.get_input_details()[0]
  output_details = interpreter.get_output_details()[0]

  test_image = cv2.cvtColor(np.array(test_image), cv2.COLOR_BGR2RGB)
  test_image = cv2.resize(test_image,(512,256),interpolation=cv2.INTER_CUBIC)
  image = test_image/255
  data = tf.convert_to_tensor(image, dtype=tf.float32)
  data = tf.expand_dims(data, axis=0)
  interpreter.set_tensor(input_details["index"], data)
  
  print('start')
  s = time.time()
  interpreter.invoke()
  e = time.time()
  print('end')  

  print(f'invoke: {e-s:.3f}s ({1/(e-s):.2f} fps)')

  output = interpreter.get_tensor(output_details["index"])[0]

  seg = tf.argmax(output, axis=-1)
  seg = seg[..., tf.newaxis]
  seg = tf.keras.preprocessing.image.array_to_img(seg)

  seg = cv2.cvtColor(np.array(seg), cv2.COLOR_BGR2RGB)
  result = cv2.addWeighted(test_image, 0.6, seg, 0.5, 0, dtype = cv2.CV_8U)
  
  return src, seg, result

In [None]:
src = cv2.imread('E:/EE/project/FPGA/test_01.png')
path = "E:/EE/project/FPGA/512x256/cityscapes_tflite_models/cityscapes_EXPiof32.tflite"
#path = "E:/EE/project/FPGA/512x256/cityscapes_tflite_models/cityscapes_fullyINT8.tflite"

result = run_tflite_model(path,src)[2]

cv2.imshow('result',result)
cv2.waitKey(0)

In [None]:
# helper function to evaluate on the tf dataset
import time 

import os
os.environ['CUDA_VISIBLE_DEVICES'] = '-1'


interpreter = tf.lite.Interpreter(model_path=str(path),num_threads=4)
interpreter.allocate_tensors()

input_details = interpreter.get_input_details()
output_details = interpreter.get_output_details()

INPUT_SHAPE = output_details[0]["shape"][1:4]

input_details

OUTPUT_SHAPE = output_details[0]["shape"][1:4]

output_details

cityscapes = tfds.load('cityscapes/semantic_segmentation',data_dir="E:\EE\project\FPGA\cityscapes",download=False)
test_ds = cityscapes['validation'].map(cityscapes_prep(OUTPUT_SHAPE, INPUT_SHAPE, class_map_road, float_range=False))

def pred_func_tflite(sample):
    input_data = tf.expand_dims(sample[0], axis=0)
    input_data = tf.cast(input_data, tf.dtypes.float32)
    
    #truth_data = sample[1]
    
    interpreter.set_tensor(input_details[0]["index"], input_data)
    
    print('start')
    s = time.time()
    interpreter.invoke()
    e = time.time()
    print('end')
    
    print(f'invoke: {e-s:.3f}s ({1/(e-s):.2f} fps)')
    output_data = interpreter.get_tensor(output_details[0]['index'])
    seg = tf.argmax(output_data[0], axis=-1)
    seg = seg[..., tf.newaxis]
    seg = tf.keras.preprocessing.image.array_to_img(seg)
    #plt.imshow(seg)
    seg = cv2.cvtColor(np.array(seg), cv2.COLOR_BGR2RGB)
    result = cv2.addWeighted(src, 0.6, seg, 0.5, 0, dtype = cv2.CV_8U)
    return input_data[0], output_data[0]

display_dataset(test_ds.take(10), pred_func_tflite)