In [1]:
%reload_ext tensorboard
import datetime

import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt

import tensorflow.keras.layers as layers
import tensorflow.keras.models as models
import tensorflow.keras.losses as losses
import tensorflow.keras.optimizers as optimizers

import tensorflow_datasets as tfds

from tensorflow.data import AUTOTUNE



In [2]:
device_name = tf.test.gpu_device_name()
if device_name != '/device:GPU:0':
  raise SystemError('GPU device not found')
print('Found GPU at: {}'.format(device_name))

gpus = tf.config.experimental.list_physical_devices('GPU')
if gpus:
  try:
    tf.config.experimental.set_virtual_device_configuration(gpus[0], [tf.config.experimental.VirtualDeviceConfiguration(memory_limit=1024)])
  except RuntimeError as e:
    print(e)
#if gpus:
#  try:
#    for gpu in gpus:
#      tf.config.experimental.set_memory_growth(gpu, True)
#  except RuntimeError as e:
#    print(e)

Found GPU at: /device:GPU:0


In [3]:
import tensorflow as tf

import tensorflow.keras.layers as layers
import tensorflow.keras.losses as losses
import tensorflow.keras.metrics as metrics
import tensorflow.keras.models as models
import tensorflow.keras.optimizers as optimizers


# default input shape
INPUT_SHAPE = (512, 1024, 3)


def ge_layer(x_in, c, e=6, stride=1):
    x = layers.Conv2D(filters=c, kernel_size=(3,3), padding='same')(x_in)
    x = layers.BatchNormalization()(x)
    x = layers.Activation('relu')(x)
    
    if stride == 2:
        x = layers.DepthwiseConv2D(depth_multiplier=e, kernel_size=(3,3), strides=2, padding='same')(x)
        x = layers.BatchNormalization()(x)
        
        y = layers.DepthwiseConv2D(depth_multiplier=e, kernel_size=(3,3), strides=2, padding='same')(x_in)
        y = layers.BatchNormalization()(y)
        y = layers.Conv2D(filters=c, kernel_size=(1,1), padding='same')(y)
        y = layers.BatchNormalization()(y)
    else:
        y = x_in
        
    x = layers.DepthwiseConv2D(depth_multiplier=e, kernel_size=(3,3), padding='same')(x)
    x = layers.BatchNormalization()(x)
    x = layers.Conv2D(filters=c, kernel_size=(1,1), padding='same')(x)
    x = layers.BatchNormalization()(x)
    
    x = layers.Add()([x, y])
    x = layers.Activation('relu')(x)
    return x


def stem(x_in, c):
    x = layers.Conv2D(filters=c, kernel_size=(3,3), strides=2, padding='same')(x_in)
    x = layers.BatchNormalization()(x)
    x_split = layers.Activation('relu')(x)
    
    x = layers.Conv2D(filters=c // 2, kernel_size=(1,1), padding='same')(x_split)
    x = layers.BatchNormalization()(x)
    x = layers.Activation('relu')(x)
    
    x = layers.Conv2D(filters=c, kernel_size=(3,3), strides=2, padding='same')(x)
    x = layers.BatchNormalization()(x)
    x = layers.Activation('relu')(x)
    
    y = layers.MaxPooling2D()(x_split)
    
    x = layers.Concatenate()([x, y])
    x = layers.Conv2D(filters=c, kernel_size=(3,3), padding='same')(x)
    x = layers.BatchNormalization()(x)
    x = layers.Activation('relu')(x)
    
    return x


def detail_conv2d(x_in, c, stride=1):
    x = layers.Conv2D(filters=c, kernel_size=(3,3), strides=stride, padding='same')(x_in)
    x = layers.BatchNormalization()(x)
    x = layers.Activation('relu')(x)
    
    return x


def context_embedding(x_in, c):
    x = layers.GlobalAveragePooling2D()(x_in)
    x = layers.BatchNormalization()(x)
    
    x = layers.Reshape((1,1,c))(x)
    
    x = layers.Conv2D(filters=c, kernel_size=(1,1), padding='same')(x)
    x = layers.BatchNormalization()(x)
    x = layers.Activation('relu')(x)
    # broadcasting no needed
    
    x = layers.Add()([x, x_in])
    x = layers.Conv2D(filters=c, kernel_size=(3,3), padding='same')(x)
    return x


def bilateral_guided_aggregation(detail, semantic, c):
    # detail branch
    detail_a = layers.DepthwiseConv2D(kernel_size=(3,3), padding='same')(detail)
    detail_a = layers.BatchNormalization()(detail_a)
    
    detail_a = layers.Conv2D(filters=c, kernel_size=(1,1), padding='same')(detail_a)
    
    detail_b = layers.Conv2D(filters=c, kernel_size=(3,3), strides=2, padding='same')(detail)
    detail_b = layers.BatchNormalization()(detail_b)
    
    detail_b = layers.AveragePooling2D((3,3), strides=2, padding='same')(detail_b)
    
    # semantic branch
    semantic_a = layers.DepthwiseConv2D(kernel_size=(3,3), padding='same')(semantic)
    semantic_a = layers.BatchNormalization()(semantic_a)
    
    semantic_a = layers.Conv2D(filters=c, kernel_size=(1,1), padding='same')(semantic_a)
    semantic_a = layers.Activation('sigmoid')(semantic_a)
    
    semantic_b = layers.Conv2D(filters=c, kernel_size=(3,3), padding='same')(semantic)
    semantic_b = layers.BatchNormalization()(semantic_b)
    
    semantic_b = layers.UpSampling2D((4,4), interpolation='bilinear')(semantic_b)
    semantic_b = layers.Activation('sigmoid')(semantic_b)
    
    # combining
    detail = layers.Multiply()([detail_a, semantic_b])
    semantic = layers.Multiply()([semantic_a, detail_b])
    
    # this layer is not mentioned in the paper !?
    #semantic = layers.UpSampling2D((4,4))(semantic)
    semantic = layers.UpSampling2D((4,4), interpolation='bilinear')(semantic)
    
    x = layers.Add()([detail, semantic])
    x = layers.Conv2D(filters=c, kernel_size=(3,3), padding='same')(x)
    x = layers.BatchNormalization()(x)
    
    return x


def seg_head(x_in, c_t, s, n):
    x = layers.Conv2D(filters=c_t, kernel_size=(3,3), padding='same')(x_in)
    x = layers.BatchNormalization()(x)
    x = layers.Activation('relu')(x)
    
    x = layers.Conv2D(filters=n, kernel_size=(3,3), padding='same')(x)
    x = layers.UpSampling2D((s,s), interpolation='bilinear')(x)
    
    return x


class ArgmaxMeanIOU(metrics.MeanIoU):
    def update_state(self, y_true, y_pred, sample_weight=None):
        return super().update_state(tf.argmax(y_true, axis=-1), tf.argmax(y_pred, axis=-1), sample_weight)


def bisenetv2(num_classes=2, out_scale=8, input_shape=INPUT_SHAPE, l=4, seghead_expand_ratio=2):
    x_in = layers.Input(input_shape)

    # semantic branch
    # S1 + S2
    x = stem(x_in, 64 // l)
    
    # S3
    x = ge_layer(x, 128 // l, stride=2)
    x = ge_layer(x, 128 // l, stride=1)

    # S4
    x = ge_layer(x, 64, stride=2)
    x = ge_layer(x, 64, stride=1)

    # S5
    x = ge_layer(x, 128, stride=2)

    x = ge_layer(x, 128, stride=1)
    x = ge_layer(x, 128, stride=1)
    x = ge_layer(x, 128, stride=1)

    x = context_embedding(x, 128)

    # detail branch
    # S1
    y = detail_conv2d(x_in, 64, stride=2)
    y = detail_conv2d(y, 64, stride=1)

    # S2
    y = detail_conv2d(y, 64, stride=2)
    y = detail_conv2d(y, 64, stride=1)
    y = detail_conv2d(y, 64, stride=1)

    # S3
    y = detail_conv2d(y, 128, stride=2)
    y = detail_conv2d(y, 128, stride=1)
    y = detail_conv2d(y, 128, stride=1)

    x = bilateral_guided_aggregation(y, x, 128)

    x = seg_head(x, num_classes * seghead_expand_ratio, out_scale, num_classes)
    
    model = models.Model(inputs=[x_in], outputs=[x])
    
    # set weight initializers
    for layer in model.layers:
        if hasattr(layer, 'kernel_initializer'):
            layer.kernel_initializer = tf.keras.initializers.HeNormal()
        if hasattr(layer, 'depthwise_initializer'):
            layer.depthwise_initializer = tf.keras.initializers.HeNormal()

    return model


def bisenetv2_compiled(num_classes, decay_steps=10e3, momentum=0.9, weight_decay=0.0005, **kwargs):
    model = bisenetv2(num_classes, **kwargs)

    schedule = optimizers.schedules.PolynomialDecay(
        initial_learning_rate=5e-2,
        decay_steps=decay_steps,
        power=0.9
    )

    try:
        import tensorflow_addons as tfa

        sgd = tfa.optimizers.SGDW(
            weight_decay=weight_decay,
            learning_rate=schedule,
            momentum=momentum,
        )
    except ImportError:
        print('tensorflow_addons not available, not using weight-decay')

        sgd = optimizers.SGD(
            learning_rate=schedule,
            momentum=momentum,
        )

    cce = losses.CategoricalCrossentropy(from_logits=True)

    model.compile(sgd, loss=cce,
                  metrics=['accuracy', ArgmaxMeanIOU(num_classes)]) 
    
    return model


def bisenetv2_output_shape(num_classes, scale, input_shape=INPUT_SHAPE):
    return ((input_shape[0] // 8) * scale, 
            (input_shape[1] // 8) * scale, 
            num_classes)


In [4]:
import json
import tensorflow as tf
import numpy as np
import cv2


def class_map_road(seg):
    # map class 0=anything, 1=road
    return tf.where(seg == 7, [0, 1.0], [1.0, 0])


def cityscapes_prep(output_shape, input_shape=INPUT_SHAPE, class_map_func=None, float_range=True):
    def prep_map(sample):
        img = sample['image_left']
        seg = sample['segmentation_label']

        if float_range:
            img /= 255

        img = tf.image.resize(img, input_shape[0:2])
        seg = tf.image.resize(seg, output_shape[0:2])
        
        if callable(class_map_func):
            seg = class_map_func(seg)
        else:
            seg = tf.one_hot(tf.cast(seg, tf.int32), output_shape[-1], axis=2)
            seg = tf.cast(seg, tf.float32)
            seg = tf.squeeze(seg)
            #seg = tf.keras.utils.to_categorical(seg, num_classes=output_shape[-1])

        return img, seg

    return prep_map

In [5]:
import matplotlib.pyplot as plt


def segmentation_to_image(pred):
    img = tf.argmax(pred, axis=-1)
    img = img[..., tf.newaxis]
    return tf.keras.preprocessing.image.array_to_img(img)

        
def predict_tf(model):
    def predict_func(sample):
        print(sample[0])
        pred = model.predict(tf.expand_dims(sample[0], axis=0))
        return sample[0], pred[0]
    
    return predict_func


def display_dataset(ds, pred_func):
    for sample in ds:
        imgs = pred_func(sample)
        fig, axes = plt.subplots(1, len(imgs))
        
        for ax, img in zip(axes, imgs):
            if img.shape[-1] != 3:
                img = segmentation_to_image(img)
                
            ax.imshow(img)

In [6]:
# Test a image input
import cv2
import time
def img_pred(src,model):
  src = cv2.cvtColor(src, cv2.COLOR_BGR2RGB)
  #cv2_imshow(src)
  src = cv2.resize(src,(512,256),interpolation=cv2.INTER_CUBIC)
  #cv2_imshow(src)
  image = src/255
  data = tf.convert_to_tensor(image, dtype=tf.float32)
  data = tf.expand_dims(data, axis=0)
  pred = model.predict(data)
  seg = tf.argmax(pred[0], axis=-1)
  seg = seg[..., tf.newaxis]
  seg = tf.keras.preprocessing.image.array_to_img(seg)
  #plt.imshow(seg)
  seg = cv2.cvtColor(np.array(seg), cv2.COLOR_BGR2RGB)
  result = cv2.addWeighted(src, 0.6, seg, 0.5, 0, dtype = cv2.CV_8U)
  return src, seg, result



In [7]:
def gen_seg_vid(cap,out,shape,model):
  while(cap.isOpened()):
    ret, frame = cap.read()
    if ret == True:
      start = time.time()
      img_seg = img_pred(frame,model)[2]
      end = time.time()
      fps = 1/(end - start)
      x = 'FPS:'
      img_seg = cv2.resize(img_seg,(2048,1024),interpolation=cv2.INTER_CUBIC)
      text = "{}{:.3f}".format(x,fps)
      font = cv2.FONT_HERSHEY_SIMPLEX
      #cv2_imshow(img_seg)
      img_seg = cv2.cvtColor(img_seg, cv2.COLOR_BGR2RGB)
      cv2.putText(img_seg, text , (800,900), font, 1, (0, 255, 255), 2, cv2.LINE_AA)
      out.write(img_seg)
      if cv2.waitKey(100) & 0xFF == ord('q'):
        break
    else:
      break
  cap.release()
  out.release()
  cv2.destroyAllWindows()

In [8]:
INPUT_SHAPE = (256, 512, 3)
NUM_CLASSES = 2
SCALE = 8

In [9]:
OUTPUT_SHAPE = bisenetv2_output_shape(NUM_CLASSES, SCALE, input_shape=INPUT_SHAPE)
#m = bisenetv2_compiled(num_classes=NUM_CLASSES, out_scale=SCALE, input_shape=INPUT_SHAPE)

In [None]:
m = tf.keras.models.load_model('E:\EE\project\FPGA\model8.tf',custom_objects={'ArgmaxMeanIOU': ArgmaxMeanIOU})


In [None]:
m.summary()

In [None]:
#tf.keras.utils.plot_model(m, show_shapes=True)

In [10]:
cityscapes = tfds.load('cityscapes/semantic_segmentation',data_dir="E:\EE\project\FPGA\cityscapes",download=False)

In [11]:
train_ds = cityscapes['train'].map(cityscapes_prep(OUTPUT_SHAPE, INPUT_SHAPE, class_map_road))
valid_ds = cityscapes['validation'].map(cityscapes_prep(OUTPUT_SHAPE, INPUT_SHAPE, class_map_road))
test_ds = cityscapes['test'].map(cityscapes_prep(OUTPUT_SHAPE, INPUT_SHAPE, class_map_road))

In [None]:
display_dataset(train_ds.take(1), lambda s: (s[0], s[1]))

In [12]:
BATCH_SIZE = 16

USE_TINYCAR_DATA = False
USE_AUGMENTATION = False

rng = tf.random.Generator.from_seed(123, alg='philox')

if USE_TINYCAR_DATA:
    from data_prep import labelme_prep, uwula_prep

    ds = tf.data.Dataset.list_files('../paperstreet/1/*.json')
    ds = ds.map(labelme_prep(OUTPUT_SHAPE, INPUT_SHAPE))


    ds2 = tf.data.Dataset.list_files('/media/mldata/tinycar/Mikrowunderland_1k/*.jpg')
    ds2 = ds2.map(uwula_prep(OUTPUT_SHAPE, INPUT_SHAPE))

    ds = ds.concatenate(ds2)
    
    tc_valid_ds = ds.take(24)
    tc_train_ds = ds.skip(24)
    
    train_ds = train_ds.concatenate(tc_train_ds)
    valid_ds = train_ds.concatenate(tc_valid_ds)

if USE_AUGMENTATION:
    def augment(img, seg, seed):
        #seed = tf.random.uniform((2,))
        img = tf.image.stateless_random_brightness(img, 0.2, seed=seed)
        img = tf.image.stateless_random_contrast(img, 0.8, 1.2, seed=seed)
        img = tf.image.stateless_random_saturation(img, 0.0, 1.5, seed=seed)
        img = tf.image.stateless_random_hue(img, 0.2, seed=seed)
        img = tf.clip_by_value(img, 0.0, 1.0) # clip values outside 0..1

        return img, seg

    def randomize(img, seg):
        seed = rng.make_seeds(2)[0]
        img, seg = augment(img, seg, seed)
        return img, seg

    train_ds_flipped_lr = train_ds.map(lambda img, seg: (tf.image.flip_left_right(img), tf.image.flip_left_right(seg)))
    valid_ds_flipped_lr = valid_ds.map(lambda img, seg: (tf.image.flip_left_right(img), tf.image.flip_left_right(seg)))
    
    train_ds_randomized = train_ds.map(randomize)
    valid_ds_randomized = valid_ds.map(randomize)
    
    train_ds_randomized_flipped = train_ds_flipped_lr.map(randomize)
    valid_ds_randomized_flipped = valid_ds_flipped_lr.map(randomize)
    
    train_ds = train_ds.concatenate(train_ds_flipped_lr).concatenate(train_ds_randomized).concatenate(train_ds_randomized_flipped)
    valid_ds = valid_ds.concatenate(valid_ds_flipped_lr).concatenate(valid_ds_randomized).concatenate(valid_ds_randomized_flipped)


train_ds = train_ds.batch(BATCH_SIZE).prefetch(AUTOTUNE)
valid_ds = valid_ds.batch(BATCH_SIZE).prefetch(AUTOTUNE)
test_ds = test_ds.batch(BATCH_SIZE).prefetch(AUTOTUNE)

#display_dataset(valid_ds.unbatch().take(5), lambda s: (s[0], s[1]))

In [None]:
display_dataset(train_ds_randomized_flipped.take(10), lambda s: (s[0], s[1]))

In [None]:
EPOCHS = 10

log_dir = "logs/fit/" + datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
tensorboard_callback = tf.keras.callbacks.TensorBoard(log_dir=log_dir, histogram_freq=1)

history = m.fit(train_ds,
                validation_data=test_ds,
                epochs=EPOCHS,
                callbacks=[tensorboard_callback])

In [None]:
m.save('/content/drive/MyDrive/model.tf')
#m.save('model_untrained.tf')

In [None]:
print("Evaluate on valid data")
results = m.evaluate(valid_ds)

In [None]:
display_dataset(test_ds.unbatch().take(1), predict_tf(m))
#display_dataset(valid_ds.unbatch().take(20), predict_tf(m))

In [None]:
# Test Image
src = cv2.imread('E:/EE/project/FPGA/test_01.png')
start = time.time()
result = img_pred(src,m)[2]
end = time.time()
fps = 1/(end-start)
print('fps =',fps)
cv2.imshow("result",result)
cv2.waitKey (0)

In [None]:
cap = cv2.VideoCapture('/content/drive/MyDrive/Colab_Notebooks/FPGA_RoadSeg/Driving_data.mp4')
fourcc = cv2.VideoWriter_fourcc(*'MP4V')
out = cv2.VideoWriter('/content/drive/MyDrive/Colab_Notebooks/FPGA_RoadSeg/Dirve_test.mp4',fourcc,10,(2048,1024))
gen_seg_vid(cap,out,(2048,1024),m)

In [13]:
import tensorflow_addons as tfa

decay_steps=10e3
momentum=0.9
weight_decay=0.0005

schedule = optimizers.schedules.PolynomialDecay(
        initial_learning_rate=5e-5,
        decay_steps=decay_steps,
        power=0.9
    )
sgd = tfa.optimizers.SGDW(
            weight_decay=weight_decay,
            learning_rate=schedule,
            momentum=momentum,
        )
cce = losses.CategoricalCrossentropy(from_logits=True)

In [None]:
print(len(train_ds))

In [None]:
# Prune the original TF model
import tensorflow_model_optimization as tfmot
import tensorflow as tf

model = tf.keras.models.load_model('E:\EE\project\FPGA\model8.tf',custom_objects={'ArgmaxMeanIOU': ArgmaxMeanIOU})

prune_low_magnitude = tfmot.sparsity.keras.prune_low_magnitude

epochs = 5
end_step = np.ceil(len(train_ds)).astype(np.int32)*epochs
pruning_params = {
      'pruning_schedule': tfmot.sparsity.keras.PolynomialDecay(initial_sparsity=0.5,
                                    final_sparsity=0.80,
                                    begin_step=0,
                                    end_step=end_step)}
#pruning_params = {
#      'pruning_schedule': tfmot.sparsity.keras.ConstantSparsity(0.5, begin_step=0, frequency=100)
#  }
opt = tf.keras.optimizers.Adam(learning_rate=5e-6)
pruned_model = prune_low_magnitude(model, **pruning_params)

pruned_model.compile(opt, loss=cce,
                  metrics=['accuracy', ArgmaxMeanIOU(NUM_CLASSES)]) 
pruned_model.summary()

In [None]:
# Fine-tuning on trained tf model

import tempfile

logdir = tempfile.mkdtemp()
callbacks = [
  tfmot.sparsity.keras.UpdatePruningStep()
]   
history = pruned_model.fit(train_ds,
                validation_data=valid_ds,
                epochs=epochs,
                callbacks=callbacks) 

In [None]:
print("Evaluate model on valid data")
results1 = m.evaluate(valid_ds)
print("Evaluate pruned model on valid data")
results2 = pruned_model.evaluate(valid_ds)

In [None]:
pruned_model.save('E:\EE\project\FPGA\pruned_model_SparsityDecay.tf')

In [None]:
def print_model_weights_sparsity(model):

    for layer in model.layers:
        if isinstance(layer, tf.keras.layers.Wrapper):
            weights = layer.trainable_weights
        else:
            weights = layer.weights
        for weight in weights:
            if "kernel" not in weight.name or "centroid" in weight.name:
                continue
            weight_size = weight.numpy().size
            zero_num = np.count_nonzero(weight == 0)
            print(
                f"{weight.name}: {zero_num/weight_size:.2%} sparsity ",
                f"({zero_num}/{weight_size})",
            )

In [None]:
stripped_pruned_model = tfmot.sparsity.keras.strip_pruning(pruned_model)

print_model_weights_sparsity(stripped_pruned_model)

stripped_pruned_model_copy = tf.keras.models.clone_model(stripped_pruned_model)
stripped_pruned_model_copy.set_weights(stripped_pruned_model.get_weights())

In [None]:
# Test Image
src = cv2.imread('E:/EE/project/FPGA/test_01.png')
start = time.time()
result = img_pred(src,stripped_pruned_model)[2]
end = time.time()
fps = 1/(end-start)
print('fps =',fps)
cv2.imshow("result",result)
cv2.waitKey (0)

In [None]:
stripped_pruned_model.save('E:\EE\project\FPGA\stripped_pruned_model_SparsityDecay.tf', include_optimizer=False)

In [None]:
stripped_pruned_model_copy = tf.keras.models.load_model('E:\EE\project\FPGA\stripped_pruned_model_SparsityDecay.tf',custom_objects={'ArgmaxMeanIOU': ArgmaxMeanIOU})
opt = tf.keras.optimizers.Adam(learning_rate=5e-6)
stripped_pruned_model_copy.compile(opt, loss=cce,
                  metrics=['accuracy', ArgmaxMeanIOU(NUM_CLASSES)]) 

In [None]:
import tensorflow_model_optimization as tfmot

# Clustering
cluster_weights = tfmot.clustering.keras.cluster_weights
CentroidInitialization = tfmot.clustering.keras.CentroidInitialization

#clustering_params = {
#  'number_of_clusters': 8,
#  'cluster_centroids_init': CentroidInitialization.KMEANS_PLUS_PLUS
#}

#clustered_model = cluster_weights(stripped_pruned_model, **clustering_params)

#clustered_model.compile(sgd, loss=cce,
 #                 metrics=['accuracy', ArgmaxMeanIOU(NUM_CLASSES)]) 

#print('Train clustering model:')
#clustered_model.fit(train_ds, validation_data=valid_dsvalid_ds, epochs=5)


#stripped_pruned_model.save("/content/drive/MyDrive/Colab_Notebooks/stripped_pruned_model_clustered.tf")


# Sparsity preserving clustering
from tensorflow_model_optimization.python.core.clustering.keras.experimental import (
    cluster,
)

cluster_weights = cluster.cluster_weights

clustering_params = {
  'number_of_clusters': 8,
  'cluster_centroids_init': CentroidInitialization.KMEANS_PLUS_PLUS,
  'preserve_sparsity': True
}

sparsity_clustered_model = cluster_weights(stripped_pruned_model_copy, **clustering_params)
opt = tf.keras.optimizers.Adam(learning_rate=5e-6)
sparsity_clustered_model.compile(opt, loss=cce,
                  metrics=['accuracy', ArgmaxMeanIOU(NUM_CLASSES)])

print('Train sparsity preserving clustering model:')
sparsity_clustered_model.fit(train_ds, validation_data=valid_ds, epochs=5)
sparsity_clustered_model.save("E:\EE\project\FPGA\sparsity_clustered_model.tf")

In [None]:
stripped_sparsity_clustered_model = tfmot.clustering.keras.strip_clustering(sparsity_clustered_model)
stripped_sparsity_clustered_model.save("E:\EE\project\FPGA\stripped_sparsity_clustered_model.tf",include_optimizer=False)

In [15]:
import tensorflow_model_optimization as tfmot


stripped_sparsity_clustered_model = tf.keras.models.load_model("E:\EE\project\FPGA\stripped_sparsity_clustered_model.tf")
prune_low_magnitude = tfmot.sparsity.keras.prune_low_magnitude

epochs = 1
end_step = np.ceil(len(train_ds)).astype(np.int32)*epochs
pruning_params = {
      'pruning_schedule': tfmot.sparsity.keras.PolynomialDecay(initial_sparsity=0.5,
                                    final_sparsity=0.80,
                                    begin_step=0,
                                    end_step=end_step)}
#pruning_params = {
#      'pruning_schedule': tfmot.sparsity.keras.ConstantSparsity(0.5, begin_step=0, frequency=100)
#  }
opt = tf.keras.optimizers.Adam(learning_rate=5e-6)
pruned_model = prune_low_magnitude(stripped_sparsity_clustered_model, **pruning_params)

pruned_model.compile(opt, loss=cce,
                  metrics=['accuracy', ArgmaxMeanIOU(NUM_CLASSES)]) 
pruned_model.summary()





Model: "model"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_1 (InputLayer)            [(None, 256, 512, 3) 0                                            
__________________________________________________________________________________________________
prune_low_magnitude_conv2d (Pru (None, 128, 256, 16) 882         input_1[0][0]                    
__________________________________________________________________________________________________
prune_low_magnitude_batch_norma (None, 128, 256, 16) 65          prune_low_magnitude_conv2d[0][0] 
__________________________________________________________________________________________________
prune_low_magnitude_activation  (None, 128, 256, 16) 1           prune_low_magnitude_batch_normali
______________________________________________________________________________________________

In [17]:
import tempfile
logdir = tempfile.mkdtemp()
callbacks = [
  tfmot.sparsity.keras.UpdatePruningStep()
]   
history = pruned_model.fit(train_ds,
                validation_data=valid_ds,
                epochs=epochs,
                callbacks=callbacks) 

Instructions for updating:
The `validate_indices` argument has no effect. Indices are always validated on CPU and never validated on GPU.


Instructions for updating:
The `validate_indices` argument has no effect. Indices are always validated on CPU and never validated on GPU.




In [20]:
stripped_pruned_model = tfmot.sparsity.keras.strip_pruning(pruned_model)


In [21]:
stripped_pruned_model.summary()

Model: "model"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_1 (InputLayer)            [(None, 256, 512, 3) 0                                            
__________________________________________________________________________________________________
conv2d (Conv2D)                 (None, 128, 256, 16) 448         input_1[0][0]                    
__________________________________________________________________________________________________
batch_normalization (BatchNorma (None, 128, 256, 16) 64          conv2d[2][0]                     
__________________________________________________________________________________________________
activation (Activation)         (None, 128, 256, 16) 0           batch_normalization[2][0]        
______________________________________________________________________________________________

In [30]:
# Test Image
src = cv2.imread('E:/EE/project/FPGA/test_01.png')
start = time.time()
result = img_pred(src,stripped_pruned_model)[2]
end = time.time()
fps = 1/(end-start)
print('fps =',fps)
cv2.imshow("result",result)
cv2.waitKey (0)

fps = 17.00515307177406


-1

In [None]:
# Quantize model to a tflite int8 model (exclude I/O) speed up for ARM

model = tf.keras.models.load_model('model.tf',custom_objects={'ArgmaxMeanIOU': ArgmaxMeanIOU})
cityscapes = tfds.load('cityscapes/semantic_segmentation',data_dir="drive/MyDrive/Colab_Notebooks/dataset",download=False)
test_ds = cityscapes['test'].map(cityscapes_prep(OUTPUT_SHAPE, INPUT_SHAPE, class_map_road))

def representative_dataset():
  for data in test_ds.take(50).batch(1):
      yield [data[0]]

converter = tf.lite.TFLiteConverter.from_keras_model(model)
converter.optimizations = [tf.lite.Optimize.DEFAULT]
converter.representative_dataset = representative_dataset
#converter.target_spec.supported_ops = [tf.lite.OpsSet.TFLITE_BUILTINS_INT8]
#converter.inference_input_type = tf.uint8
#converter.inference_output_type = tf.uint8
tflite_quant_model_IOfloat32 = converter.convert()

In [None]:
# Check input/output dtype
interpreter = tf.lite.Interpreter(model_content=tflite_quant_model_IOfloat32)
input_type = interpreter.get_input_details()[0]['dtype']
print('input: ', input_type)
output_type = interpreter.get_output_details()[0]['dtype']
print('output: ', output_type)

In [None]:
# Quantize model to a tflite int8 model speed up for ARM

model = tf.keras.models.load_model('model.tf',custom_objects={'ArgmaxMeanIOU': ArgmaxMeanIOU})
cityscapes = tfds.load('cityscapes/semantic_segmentation',data_dir="drive/MyDrive/Colab_Notebooks/dataset",download=False)
test_ds = cityscapes['test'].map(cityscapes_prep(OUTPUT_SHAPE, INPUT_SHAPE, class_map_road))

def representative_dataset():
  for data in test_ds.take(50).batch(1):
      yield [data[0]]

converter = tf.lite.TFLiteConverter.from_keras_model(model)
converter.optimizations = [tf.lite.Optimize.DEFAULT]
converter.representative_dataset = representative_dataset
converter.target_spec.supported_ops = [tf.lite.OpsSet.TFLITE_BUILTINS_INT8]
converter.inference_input_type = tf.uint8
converter.inference_output_type = tf.uint8
tflite_quant_model_fullyINT8 = converter.convert()

In [None]:
# Check input/output dtype
interpreter = tf.lite.Interpreter(model_content=tflite_quant_model_fullyINT8)
input_type = interpreter.get_input_details()[0]['dtype']
print('input: ', input_type)
output_type = interpreter.get_output_details()[0]['dtype']
print('output: ', output_type)

In [None]:
# Save float and int tflite model
import pathlib

tflite_models_dir = pathlib.Path("/content/drive/MyDrive/Colab_Notebooks/FPGA_RoadSeg/cityscapes_tflite_models/")
tflite_models_dir.mkdir(exist_ok=True, parents=True)

tflite_model_quant_iofloat_file = tflite_models_dir/"cityscapes_iofloat32.tflite"
tflite_model_quant_iofloat_file.write_bytes(tflite_quant_model_IOfloat32)

tflite_model_quant_fullINT_file = tflite_models_dir/"cityscapes_fullyINT8.tflite"
tflite_model_quant_fullINT_file.write_bytes(tflite_quant_model_fullyINT8)

In [None]:
# Helper function to run inference on a TFLite model with image
def run_tflite_model(tflite_file, test_image):

  # Initialize the interpreter
  interpreter = tf.lite.Interpreter(model_path=str(tflite_file))
  interpreter.allocate_tensors()

  input_details = interpreter.get_input_details()[0]
  output_details = interpreter.get_output_details()[0]

  test_image = cv2.cvtColor(test_image, cv2.COLOR_BGR2RGB)
  test_image = cv2.resize(test_image,(512,256),interpolation=cv2.INTER_CUBIC)
  image = test_image/255
  data = tf.convert_to_tensor(image, dtype=tf.uint8)
  data = tf.expand_dims(data, axis=0)
  interpreter.set_tensor(input_details["index"], data)

  start = time.time()
  interpreter.invoke()
  end = time.time()

  print(f'invoke: {e-s:.3f}s ({1/(e-s):.2f} fps)')

  output = interpreter.get_tensor(output_details["index"])[0]

  seg = tf.argmax(pred[0], axis=-1)
  seg = seg[..., tf.newaxis]
  seg = tf.keras.preprocessing.image.array_to_img(seg)
  #plt.imshow(seg)
  seg = cv2.cvtColor(np.array(seg), cv2.COLOR_BGR2RGB)
  result = cv2.addWeighted(src, 0.6, seg, 0.5, 0, dtype = cv2.CV_8U)

  return src, seg, result

In [None]:
src = cv2.imread('/content/drive/MyDrive/Colab_Notebooks/FPGA_RoadSeg/test_02.png')

result = run_tflite_model(tflite_model_quant_fullINT_file,src)[2]

cv2_imshow(result)

In [None]:
# helper function to evaluate on the tf dataset
import time 
interpreter = tf.lite.Interpreter(model_path=str(tflite_model_quant_fullINT_file))
interpreter.allocate_tensors()

input_details = interpreter.get_input_details()[0]
output_details = interpreter.get_output_details()[0]
def pred_func(sample):
    input_data = tf.expand_dims(sample[0], axis=0)
    input_data = tf.cast(input_data, tf.dtypes.uint8)
    
    #truth_data = sample[1]
    
    interpreter.set_tensor(input_details[0]['index'], input_data)
    
    s = time.time()
    interpreter.invoke()
    e = time.time()
    print(f'invoke: {e-s:.3f}s ({1/(e-s):.2f} fps)')
    output_data = interpreter.get_tensor(output_details[0]['index'])
    
    return input_data[0], output_data[0]

display_dataset(test_ds.take(10), pred_func)