### ENET FOR IMAGES

In [108]:
# !pip install --user imutils

In [114]:
# import the necessary packages
import numpy as np
import matplotlib.pyplot as plt
import argparse
import imutils
import time
import cv2
import os

In [115]:
start = time.time()
SET_WIDTH = int(600)

normalize_image = 1 / 255.0
resize_image_shape = (1024, 512)

sample_img = cv2.imread('./images/example_02.jpg')
sample_img = imutils.resize(sample_img, width=SET_WIDTH)

blob_img = cv2.dnn.blobFromImage(sample_img, normalize_image, resize_image_shape, 0,swapRB=True, crop=False)

In [116]:
print(sample_img.shape)
print(blob_img.shape)

(300, 600, 3)
(1, 3, 512, 1024)


In [117]:
print("[INFO] loading model...")
cv_enet_model = cv2.dnn.readNet('./enet-cityscapes/enet-model.net')

[INFO] loading model...


In [118]:
cv_enet_model.setInput(blob_img)

cv_enet_model_output = cv_enet_model.forward()
cv_enet_model_output.shape
# cv_enet_model_output[0].shape

(1, 20, 512, 1024)

In [119]:
label_values = open('./enet-cityscapes/enet-classes.txt').read().strip().split("\n")

In [120]:
IMG_OUTPUT_SHAPE_START =1
IMG_OUTPUT_SHAPE_END =4
(classes_num, h, w) = cv_enet_model_output.shape[IMG_OUTPUT_SHAPE_START:IMG_OUTPUT_SHAPE_END]

In [121]:
class_map = np.argmax(cv_enet_model_output[0], axis=0)
len(class_map)


512

In [122]:
import os
if os.path.isfile('./enet-cityscapes/enet-colors.txt'):
    CV_ENET_SHAPE_IMG_COLORS = open('./enet-cityscapes/enet-colors.txt').read().strip().split("\n")
    CV_ENET_SHAPE_IMG_COLORS = [np.array(c.split(",")).astype("int") for c in CV_ENET_SHAPE_IMG_COLORS]
    CV_ENET_SHAPE_IMG_COLORS = np.array(CV_ENET_SHAPE_IMG_COLORS, dtype="uint8")

else:
   
    np.random.seed(42)
    CV_ENET_SHAPE_IMG_COLORS = np.random.randint(0, 255, size=(len(label_values) - 1, 3),
                               dtype="uint8")
    CV_ENET_SHAPE_IMG_COLORS = np.vstack([[0, 0, 0], CV_ENET_SHAPE_IMG_COLORS]).astype("uint8")

# print(CV_ENET_SHAPE_IMG_COLORS)
# CV_ENET_SHAPE_IMG_COLORS[[0,3]]

In [123]:
mask_class_map = CV_ENET_SHAPE_IMG_COLORS[class_map]
# mask_class_map

In [124]:
mask_class_map = cv2.resize(mask_class_map, (sample_img.shape[1], sample_img.shape[0]),
                  interpolation=cv2.INTER_NEAREST)

class_map = cv2.resize(mask_class_map, (sample_img.shape[1], sample_img.shape[0]),
                      interpolation=cv2.INTER_NEAREST)

In [125]:
cv_enet_model_output = ((0.4 * sample_img) + (0.6 * mask_class_map)).astype("uint8")

In [126]:
my_legend = np.zeros(((len(label_values) * 25) + 25, 300, 3), dtype="uint8")

In [127]:
for (i, (class_name, img_color)) in enumerate(zip(label_values, CV_ENET_SHAPE_IMG_COLORS)):
    # draw the class name + color on the legend
    color_info = [int(color) for color in img_color]
    cv2.putText(my_legend, class_name, (5, (i * 25) + 17),
                cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 255), 2)
    cv2.rectangle(my_legend, (100, (i * 25)), (300, (i * 25) + 25),
                  tuple(color_info), -1)

In [128]:
cv2.imshow("My_Legend", my_legend)
cv2.imshow("Img_Input", sample_img)
cv2.imshow("CV_Model_Output_weighted", cv_enet_model_output)
cv2.imshow("CV_Model_Output", mask_class_map)
cv2.waitKey(0)
cv2.destroyAllWindows()

end = time.time()
print("[INFO] inference took {:.4f} seconds".format(end - start))


[INFO] inference took 35.4877 seconds


### ENET FOR VIDEOS

In [103]:
import os
import time
import cv2
import imutils
import numpy as np


DEFAULT_FRAME = 1
SET_WIDTH = 600

In [104]:
class_labels = open('./enet-cityscapes/enet-classes.txt').read().strip().split("\n")

In [105]:
if os.path.isfile('./enet-cityscapes/enet-colors.txt'):
    CV_ENET_SHAPE_IMG_COLORS = open('./enet-cityscapes/enet-colors.txt').read().strip().split("\n")
    CV_ENET_SHAPE_IMG_COLORS = [np.array(c.split(",")).astype("int") for c in CV_ENET_SHAPE_IMG_COLORS]
    CV_ENET_SHAPE_IMG_COLORS = np.array(CV_ENET_SHAPE_IMG_COLORS, dtype="uint8")

else:
    
    np.random.seed(42)
    CV_ENET_SHAPE_IMG_COLORS = np.random.randint(0, 255, size=(len(class_labels) - 1, 3),
                               dtype="uint8")
    CV_ENET_SHAPE_IMG_COLORS = np.vstack([[0, 0, 0], CV_ENET_SHAPE_IMG_COLORS]).astype("uint8")

In [106]:
print("[INFO] loading model...")
cv_enet_model = cv2.dnn.readNet('./enet-cityscapes/enet-model.net')

[INFO] loading model...


In [130]:
sv = cv2.VideoCapture('.//video/taxi.mp4')
sample_video_writer = None

In [131]:
print(sv)

<VideoCapture 0000023809CA6470>


In [132]:
prop = cv2.cv.CV_CAP_PROP_FRAME_COUNT if imutils.is_cv2() \
        else cv2.CAP_PROP_FRAME_COUNT

In [133]:
total = int(sv.get(prop))

In [134]:
total

241

In [136]:

# try to determine the total number of frames in the video file
try:
    prop = cv2.cv.CV_CAP_PROP_FRAME_COUNT if imutils.is_cv2() \
        else cv2.CAP_PROP_FRAME_COUNT
    total = int(sv.get(prop))
    print("[INFO] {} total frames in video".format(total))

# an error occurred while trying to determine the total
# number of frames in the video file
except:
    print("[INFO] could not determine # of frames in video")
    total = -1
#sample_video
# loop over frames from the video file stream
while True:
    # read the next frame from the file
    (grabbed, frame) = sv.read()

    # if the frame was not grabbed, then we have reached the end
    # of the stream
    if not grabbed:
        break

    # construct a blob from the frame and perform a forward pass
    # using the segmentation model
    normalize_image = 1 / 255.0
    resize_image_shape = (1024, 512)
    video_frame = imutils.resize(frame, width=SET_WIDTH)
    blob_img = cv2.dnn.blobFromImage(frame,  normalize_image,resize_image_shape, 0,
                                 swapRB=True, crop=False)
    cv_enet_model.setInput(blob_img)
    start = time.time()
    cv_enet_model_output = cv_enet_model.forward()
    end = time.time()

    # infer the total number of classes along with the spatial
    # dimensions of the mask image via the shape of the output array
    (Classes_num, height, width) = cv_enet_model_output.shape[1:4]

    # our output class ID map will be num_classes x height x width in
    # size, so we take the argmax to find the class label with the
    # largest probability for each and every (x, y)-coordinate in the
    # image
    classMap = np.argmax(cv_enet_model_output[0], axis=0)

    # given the class ID map, we can map each of the class IDs to its
    # corresponding color
    
    mask_class_map = CV_ENET_SHAPE_IMG_COLORS[classMap]

    # resize the mask such that its dimensions match the original size
    # of the input frame
    
    
    mask_class_map = cv2.resize(mask_class_map, (video_frame.shape[1], video_frame.shape[0]),
                      interpolation=cv2.INTER_NEAREST)

    # perform a weighted combination of the input frame with the mask
    # to form an output visualization
    
    
    cv_enet_model_output = ((0.3 * video_frame) + (0.7 * mask_class_map)).astype("uint8")

    # check if the video writer is None
    if sample_video_writer is None:
        print("sample_video_writer is None")
        # initialize our video writer
        fourcc_obj = cv2.VideoWriter_fourcc(*"MJPG")

        sample_video_writer = cv2.VideoWriter('./output/output_toronoto.avi', fourcc_obj, 15,
                                 (cv_enet_model_output.shape[1], cv_enet_model_output.shape[0]), True)

        # some information on processing single frame
        if total > 0:
            
            execution_time = (end - start)
            print("[INFO] single video_frame took {:.4f} seconds".format(execution_time))

            print("[INFO] estimated total_time time: {:.4f}".format(
                execution_time * total))

    # write the output frame to disk
    
    sample_video_writer.write(cv_enet_model_output)

    # check to see if we should display the output frame to our screen
    if DEFAULT_FRAME > 0:
        cv2.imshow("Video Frame", cv_enet_model_output)
        cv2.imshow("My_Legend", my_legend)
        key = cv2.waitKey(1) & 0xFF
        
        if key == ord("q"):
            break

print("[INFO] cleaning up...")
sample_video_writer.release()
sv.release()
cv2.destroyAllWindows()

[INFO] 0 total frames in video
[INFO] cleaning up...
