# DeepLab - Video Segmentation - Xception 65

## Import libraries

In [1]:
from DeepLabModel import DeepLabModel
from glob import glob
from google.colab.patches import cv2_imshow
from imageio import mimread
from io import BytesIO
from matplotlib import  gridspec
from matplotlib.style import use
from PIL import Image
from six.moves import urllib
from sklearn.metrics import  confusion_matrix
from sklearn.model_selection import train_test_split
from tabulate import tabulate
from tqdm import tqdm

import cv2
import IPython
import keras.backend as K
import matplotlib.pyplot as plt
import numpy as np
import os
import random
#import tarfile
import tempfile
import tensorflow as tf

use('seaborn')
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'

print(f'Tensorflow version: {tf.__version__}')

Tensorflow version: 2.9.2


In [2]:
def create_colormap():
    """
    Retunr:
        NumPy array with a colormap to visualize the segmentation results.
    """
    colormap = np.array([
        [128,  64, 128],
        [244,  35, 232],
        [ 70,  70,  70],
        [102, 102, 156],
        [190, 153, 153],
        [153, 153, 153],
        [250, 170,  30],
        [220, 220,   0],
        [120, 155,  42],
        [152, 251, 152],
        [ 93, 165, 227],
        [220,  20,  60],
        [255,   0,   0],
        [ 34,  34, 142],
        [  0,   0,  70],
        [  0,  60, 100],
        [  0,  80, 100],
        [  0,   0, 230],
        [119,  11,  32],
        [  0,   0,   0]], dtype=np.uint8)
    return colormap

In [3]:
def label_to_color_image(label):
    """
    Parameters:
        label: 2D array that stores the segmentation label.

    Return:
        Segmentation map: a 2D array with float values. The array element is an indexed color by correspondent element in the input label. 
        In other words, the return is an image like the original image, but the pixels will be segmented with the network prediction.
    """
    
    # Error if the input label doesn't have two dimensions.
    if label.ndim != 2:
        raise ValueError('Expect 2-D input label')

    colormap = create_colormap()

    # Error if label index is larger than the maximum index of colormap list.
    if np.max(label) >= len(colormap):
        raise ValueError('label value too large.')

    return colormap[label]

## Label names, class identifiers and colors associated with it.

In [4]:
LABEL_NAMES = np.asarray([
    'road', 'sidewalk', 'building', 'wall', 'fence', 'pole', 'traffic light',
    'traffic sign', 'vegetation', 'terrain', 'sky', 'person', 'rider', 'car', 'truck',
    'bus', 'train', 'motorcycle', 'bicycle', 'void'])

print(f'Number of labels: {len(LABEL_NAMES)}')

Number of labels: 20


In [5]:
# Class indentifiers. 
FULL_LABEL_MAP = np.arange(len(LABEL_NAMES)).reshape(len(LABEL_NAMES), 1) 
# Colors associated with class identifiers.
FULL_COLOR_MAP = label_to_color_image(FULL_LABEL_MAP)

for label, color in zip(FULL_LABEL_MAP, FULL_COLOR_MAP):
  print(label, color)

[0] [[128  64 128]]
[1] [[244  35 232]]
[2] [[70 70 70]]
[3] [[102 102 156]]
[4] [[190 153 153]]
[5] [[153 153 153]]
[6] [[250 170  30]]
[7] [[220 220   0]]
[8] [[120 155  42]]
[9] [[152 251 152]]
[10] [[ 93 165 227]]
[11] [[220  20  60]]
[12] [[255   0   0]]
[13] [[ 34  34 142]]
[14] [[ 0  0 70]]
[15] [[  0  60 100]]
[16] [[  0  80 100]]
[17] [[  0   0 230]]
[18] [[119  11  32]]
[19] [[0 0 0]]


## Pre-trained files

In [6]:
MODEL_URL = 'http://download.tensorflow.org/models/deeplabv3_cityscapes_train_2018_02_06.tar.gz'

In [7]:
TARBALL_NAME = 'deeplab_model.tar.gz'
model_dir = tempfile.mkdtemp()
tf.io.gfile.makedirs(model_dir)

In [8]:
download_path = os.path.join(model_dir, TARBALL_NAME)
download_path

'/tmp/tmpkkdwpewi/deeplab_model.tar.gz'

In [9]:
urllib.request.urlretrieve(MODEL_URL, download_path)

('/tmp/tmpkkdwpewi/deeplab_model.tar.gz',
 <http.client.HTTPMessage at 0x7f79058ed8e0>)

## Create the model

In [10]:
model = DeepLabModel(download_path)

## Model test

### Load the video test

In [11]:
!wget https://github.com/lexfridman/mit-deep-learning/raw/master/tutorial_driving_scene_segmentation/mit_driveseg_sample.mp4

--2022-12-28 23:31:49--  https://github.com/lexfridman/mit-deep-learning/raw/master/tutorial_driving_scene_segmentation/mit_driveseg_sample.mp4
Resolving github.com (github.com)... 140.82.121.3
Connecting to github.com (github.com)|140.82.121.3|:443... connected.
HTTP request sent, awaiting response... 302 Found
Location: https://raw.githubusercontent.com/lexfridman/mit-deep-learning/master/tutorial_driving_scene_segmentation/mit_driveseg_sample.mp4 [following]
--2022-12-28 23:31:49--  https://raw.githubusercontent.com/lexfridman/mit-deep-learning/master/tutorial_driving_scene_segmentation/mit_driveseg_sample.mp4
Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.108.133, 185.199.109.133, 185.199.110.133, ...
Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.108.133|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 28576737 (27M) [application/octet-stream]
Saving to: ‘mit_driveseg_sample.mp4’


2022-12-28 23:31:4



### Result & Segmentation visualization

In [12]:
def video_segmentation_vis(image, segmentation_map, index):
    plt.figure(figsize=(12, 7))

    seg_image = label_to_color_image(segmentation_map).astype(np.uint8)
    plt.imshow(image)
    plt.imshow(seg_image, alpha=0.7)
    plt.axis('off')
    plt.title('Segmentation | frame #%d'%index)
    plt.grid('off')
    plt.tight_layout()
 
    f = BytesIO()
    plt.savefig(f, format='jpeg')
    IPython.display.display(IPython.display.Image(data=f.getvalue()))
    f.close()
    plt.close()

    segmentated_image = cv2.addWeighted(np.uint8(image), 0.3, np.uint8(seg_image), 0.7, 0)
    return segmentated_image

In [13]:
def run_video_vis(frame, index):
    original_image = Image.fromarray(frame[..., ::-1])
    segmentated_map = model.run(original_image)
    segmentated_frame = video_segmentation_vis(original_image, segmentated_map, index)
    segmentated_frame = cv2.cvtColor(segmentated_frame, cv2.COLOR_RGB2BGR)
    return segmentated_frame

In [14]:
def video_config(width, height, filename='result.avi'): 
  fourcc = cv2.VideoWriter_fourcc(*'XVID') 
  fps = 24
  output = cv2.VideoWriter(filename, fourcc, fps, (width, height))
  return output

In [15]:
video_test = 'mit_driveseg_sample.mp4'

cap = cv2.VideoCapture(video_test)
num_frames = 598
initial_frame = 0
current_frame = 0 

connected, video = cap.read()
width, height = video.shape[1], video.shape[0]
video_output = video_config(width, height)

try:
    for i in range(num_frames):
      _, frame = cap.read()
      if not _: break

      if i < initial_frame:
        continue
      processed_frame = run_video_vis(frame, i)
      IPython.display.clear_output(wait=True)
      current_frame = current_frame + 1
      video_output.write(processed_frame) 

except KeyboardInterrupt:
    plt.close()
    print("Stream stopped.")


print("The end!")
video_output.release() 

The end!
