In [None]:
import os
import random
import numpy as np

import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers


# deeplabv3 - tensorflow

In [None]:
IMAGE_SIZE = 512
BATCH_SIZE = 4
NUM_CLASSES = 20
DATA_DIR = 'data'
NUM_TRAIN_IMAGES = 1000
NUM_VAL_IMAGES = 50

def read_image(image_path, mask=False):
    image = tf.io.read_file(image_path)
    if mask:
        image = tf.image.decode_png(image, channels=1)
        image.set_shape([None, None, 1])
        image = tf.image.resize(images=image, size=[IMAGE_SIZE, IMAGE_SIZE])
    else:
        image = tf.image.decode_png(image, channels=3)
        image.set_shape([None, None, 3])
        image = tf.image.resize(images=image, size=[IMAGE_SIZE, IMAGE_SIZE])
        image = image / 127.5 - 1
    return image

def load_data(image_list, mask_list):
    image = read_image(image_list)
    mask = read_image(mask_list, mask=True)
    return image, mask

def data_generator(image_list, mask_list):
    dataset = tf.data.Dataset.from_tensor_slices((image_list, mask_list))
    dataset = dataset.map(load_data, num_parallel_calls=tf.data.AUTOTUNE)
    dataset = dataset.batch(BATCH_SIZE, drop_remainder=True)
    return dataset

# train_dataset = data_generator(train_images, train_masks)
# val_dataset = data_generator(val_images, val_masks)
# print('Train Dataset:', train_dataset)
# print('Validation Dataset:', val_dataset)

def convolution_block(block_input, num_filters=256, kernel_size=3,
                      dilation_rate=1, padding='same', use_bias=False):
    x = layers.Conv2D(num_filters, kernel_size, dilation_rate=dilation_rate,
                      padding='same', use_bias=use_bias, kernel_initializer=keras.initializers.HeNormal)(block_input)
    x = layers.BatchNormalization()(x)
    return tf.nn.relu(x)

def DilatedSpatialPyramidPooling(dspp_input):
    dims = dspp_input.shape
    x = layers.AveragePooling2D(pool_size=(dims[-3], dims[-2]))(dspp_input)
    x = convolution_block(x, kernel_size=1, use_bias=True)
    out_pool = layers.UpSampling2D(size=(dims[-3] // x.shape[1], dims[-2]),
                                   interpolation='bilinear')(x)

    out_1 = convolution_block(dspp_input, kernel_size=1, dilation_rate=1)
    out_6 = convolution_block(dspp_input, kernel_size=3, dilation_rate=6)
    out_12 = convolution_block(dspp_input, kernel_size=3, dilation_rate=12)
    out_18 = convolution_block(dspp_input, kernel_size=3, dilation_rate=18)

    x = layers.Concatenate()([out_pool, out_1, out_6, out_12, out_18])
    output = convolution_block(x, kernel_size=1)
    return output

def Deeplabv3(image_size, num_classes):
    model_input = keras.Input(shape=(image_size, image_size, 3))
    resnet50 = keras.applications.ResNet50(weights='imagenet', include_top=False, input_tensor=model_input)

    x = resnet50.get_layer('conv4_block6_2_relu').output
    x = DilatedSpatialPyramidPooling(x)
    input_a = layers.UpSampling2D(size=(image_size // 4 // x.shape[1],
                                        image_size // 4 // x.shape[2]),
                                  interpolation='bilinear')(x)
    input_b = resnet50.get_layer('conv2_block3_relu').output
    input_b = convolution_block(input_b, num_filters=48, kernel_size=1)

    x = layers.Concatenate()([input_a, input_b])
    x = convolution_block(x)
    x = convolution_block(x)
    x = layers.UpSampling2D(size=(image_size // x.shape[1],
                                  image_size // x.shape[2]),
                            interpolation='bilinear')(x)
    model_output = layers.Conv2D(num_classes, kernel_size=1, padding='same')(x)
    return keras.Model(inputs=model_input, outputs=model_output)

# model = Deeplabv3(image_size=IMAGE_SIZE, num_classes=NUM_CLASSES)
# model.summary()

'''
loss = keras.losses.SparseCategoricalCrossentropy(from_logits=True)
model.compile(optimizer=keras.optimizers.Adam(learning_rate=0.001), loss=loss, metrics=['accuracy'])
history = model.fit(train_dataset, validation_data=val_dataset, epochs=10)

loss = history.history['loss']
val_loss = history.history['val_loss']

fig = plt.figure(figsize=(10, 5))
ax1 = fig.add_subplot(1, 2, 1)
ax1.plot(loss, color='blue', label='train_loss')
ax1.plot(val_loss, color='red', label='val_loss')
ax1.set_title('Train and Validation Loss')
ax1.set_xlabel('Epochs')
ax1.set_ylabel('Loss')
ax1.grid()
ax1.legend()

accuracy = history.history['accuracy']
val_accuracy = history.history['val_accuracy']

ax2 = fig.add_subplot(1, 2, 1)
ax2.plot(loss, color='blue', label='train_accuracy')
ax2.plot(val_loss, color='red', label='val_accuracy')
ax2.set_title('Train and Validation Accuracy')
ax2.set_xlabel('Epochs')
ax2.set_ylabel('Accuracy')
ax2.grid()
ax2.legend()

colormap = loadmat()['colormap']
colormap = colormap * 100
colormap = colormap.astype(np.uint8)
'''

def infer(model, image_tensor):
    predictions = model.predict(np.expand_dims((image_tensor), axis=0))
    predictions = np.squeeze(predictions)
    predictions = np.argmax(predictions, axis=2)
    return predictions

def decode_segmentation_mask(mask, colormap, n_classes):
    r = np.zeros_like(mask).astype(np.uint8)
    g = np.zeros_like(mask).astype(np.uint8)
    b = np.zeros_like(mask).astype(np.uint8)
    for i in range(0, n_classes):
        idx = mask == i
        r[idx] = colormap[i, 0]
        g[idx] = colormap[i, 1]
        b[idx] = colormap[i, 2]
    rgb = np.stack([r, g, b], axis=2)
    return rgb

def get_overlay(image, colored_mask):
    image = tf.keras.preprocessing.image.array_to_img(image)
    image = np.array(image).astype(np.uint8)
    overlay = cv2.addWeighted(image, 0.35, colored_mask, 0.65, 0)
    return overlay

def plot_samples_matplotlib(display_list, figsize=(5, 3)):
    _, axes = plt.subplots(nrows=1, ncols=len(display_list), figsize=figsize)
    for i in range(len(display_list)):
        if display_list[i].shape[-1] == 3:
            axes[i].imshow(tf.keras.preprocessing.image.array_to_img(display_list[i]))
        else:
            axes[i].imshow(display_list[i])
    plt.show()

def plot_predictions(image_list, colormap, model):
    for image_file in image_list:
        image_tensor = read_image(image_file)
        prediction_mask = infer(image_tensor=image_tensor, model=model)
        prediction_colormap = decode_segmentation_mask(prediction_mask, colormap, NUM_CLASSES)
        overlay = get_overlay(image_tensor, prediction_colormap)
        plot_samples_matplotlib([image_tensor, overlay, prediction_colormap], figsize=(15, 5))

# plot_predictions(image_list=val_images[:4], colormap=colormap, model=model)

# deeplabv3 - pytorch

In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms
from torchvision.io import read_image
import numpy as np
import cv2
import matplotlib.pyplot as plt

IMAGE_SIZE = 512
BATCH_SIZE = 4
NUM_CLASSES = 20
DATA_DIR = 'data'
NUM_TRAIN_IMAGES = 1000
NUM_VAL_IMAGES = 50

class CustomDataset(Dataset):
    def __init__(self, image_list, mask_list, transform=None):
        self.image_list = image_list
        self.mask_list = mask_list
        self.transform = transform

    def __len__(self):
        return len(self.image_list)

    def __getitem__(self, idx):
        image = read_image(self.image_list[idx])
        mask = read_image(self.mask_list[idx], mask=True)
        if self.transform:
            image = self.transform(image)
            mask = self.transform(mask)
        return image, mask


def convolution_block(block_input, num_filters=256, kernel_size=3,
                      dilation_rate=1, padding=1, use_bias=False):
    conv_layer = nn.Conv2d(block_input.shape[1], num_filters, kernel_size,
                           dilation=dilation_rate, padding=padding, bias=use_bias)
    x = conv_layer(block_input)
    x = nn.BatchNorm2d(num_filters)(x)
    return F.relu(x)


def DilatedSpatialPyramidPooling(dspp_input):
    dims = dspp_input.shape
    x = F.avg_pool2d(dspp_input, kernel_size=(dims[-2], dims[-1]))
    x = convolution_block(x, kernel_size=1, use_bias=True)
    out_pool = F.interpolate(x, size=(dims[-2], dims[-1]), mode='bilinear', align_corners=False)

    out_1 = convolution_block(dspp_input, kernel_size=1, dilation_rate=1)
    out_6 = convolution_block(dspp_input, kernel_size=3, dilation_rate=6)
    out_12 = convolution_block(dspp_input, kernel_size=3, dilation_rate=12)
    out_18 = convolution_block(dspp_input, kernel_size=3, dilation_rate=18)

    x = torch.cat([out_pool, out_1, out_6, out_12, out_18], dim=1)
    output = convolution_block(x, kernel_size=1)
    return output


class Deeplabv3(nn.Module):
    def __init__(self, image_size, num_classes):
        super(Deeplabv3, self).__init__()
        resnet50 = torch.hub.load('pytorch/vision:v0.10.0', 'resnet50', pretrained=True)
        self.resnet50 = nn.Sequential(*list(resnet50.children())[:-2])

        self.dspp = DilatedSpatialPyramidPooling
        self.conv1x1_a = nn.Conv2d(2048, num_classes, kernel_size=1)
        self.conv1x1_b = nn.Conv2d(256, 48, kernel_size=1)
        self.conv_final = nn.Conv2d(num_classes + 48, num_classes, kernel_size=1)

        self.up1 = nn.Upsample(scale_factor=4, mode='bilinear', align_corners=False)
        self.up2 = nn.Upsample(scale_factor=2, mode='bilinear', align_corners=False)

    def forward(self, x):
        x = self.resnet50(x)
        x = self.dspp(x)
        input_a = self.up1(x)
        input_b = self.conv1x1_b(self.resnet50[4].output)
        input_b = self.up2(input_b)

        x = torch.cat([input_a, input_b], dim=1)
        x = convolution_block(x)
        x = convolution_block(x)
        x = self.up2(x)
        x = self.conv_final(x)
        return x

transform = transforms.Compose([
    transforms.Resize((IMAGE_SIZE, IMAGE_SIZE)),
    transforms.ToTensor(),
])

train_dataset = CustomDataset(train_images, train_masks, transform=transform)
train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True)

val_dataset = CustomDataset(val_images, val_masks, transform=transform)
val_loader = DataLoader(val_dataset, batch_size=BATCH_SIZE)

model = Deeplabv3(image_size=IMAGE_SIZE, num_classes=NUM_CLASSES)

# Define training loop, optimizer, loss function, etc.

def infer(model, image_tensor):
    with torch.no_grad():
        model.eval()
        predictions = model(image_tensor)
        predictions = torch.argmax(predictions, dim=1)
    return predictions.numpy()

# Remaining code for plotting, visualization, and training loop goes here.
