### Importing libraries

In [None]:
import os
import numpy as np
from PIL import Image

In [None]:
image_dir = 'Pascal-part/JPEGImages'
mask_dir = 'Pascal-part/gt_masks'

### Data Preprocessing

In [None]:
def dataset_freq(image_dir, mode='train'):
    dataset = 'Pascal-part/' + mode + '_id.txt'
    size_count = {}
    with open(dataset, 'r') as file:
        for filename in file.readlines():
            image_path = os.path.join(image_dir, filename.strip() + '.jpg')
            with Image.open(image_path) as img:
                size = img.size
                if size in size_count:
                    size_count[size] += 1
                else:
                    size_count[size] = 1
    # Sort the dictionary by count in decreasing order
    sorted_size_count = dict(sorted(size_count.items(), key=lambda item: item[1], reverse=True))
    return sorted_size_count

dataset_freq(image_dir)[:10]

We need images of the same size to begin training model

In [None]:
def dataset_max_image_size(image_dir, mode='train'):
    max_height, max_width = 0, 0
    dataset = 'Pascal-part/' + mode + '_id.txt'
    with open(dataset, 'r') as file:
        for filename in file.readlines():
            image_path = os.path.join(image_dir, filename.strip() + '.jpg')
            with Image.open(image_path) as image:
                max_height, max_width = max(max_height, image.height), max(max_width, image.width)
    return max_height, max_width

dataset_max_image_size(image_dir)

(500, 500)

I will do padding to (500, 500, 3) for all images and padding to (500, 500) for all masks. It's a function for making np.array square-shaped:

In [None]:
def padding(arr, target_size=(500, 500, 3)):
    pad_width_rows = (target_size[0] - arr.shape[0])
    pad_width_cols = (target_size[1] - arr.shape[1])
    pad_width = [
        (pad_width_rows // 2, pad_width_rows - (pad_width_rows // 2)),
        (pad_width_cols // 2, pad_width_cols - (pad_width_cols // 2)),
    ]
    if len(target_size) == 3:
        pad_width += [(0, 0)]
    return np.pad(arr, pad_width, 'constant')

Now we can do padding for each jpg file and each corresponding mask:

In [None]:
def dataset_padding(image_dir, mask_dir, mode='train'):
    h, w = dataset_max_image_size(image_dir, mode)
    dataset = 'Pascal-part/' + mode + '_id.txt'
    X, y = [], []
    with open(dataset, 'r') as file:
        for filename in file.readlines():
            image_path = os.path.join(image_dir, filename.strip() + '.jpg')
            mask_path = os.path.join(mask_dir, filename.strip() + '.npy')
            with Image.open(image_path) as image:
                img = np.array(image)
                padded_img = padding(img, (h, w, 3))
                X.append(padded_img)
                with open(mask_path, 'rb') as filemask:
                    mask = np.load(filemask)
                    padded_mask = padding(mask, (h, w))
                    y.append(padded_mask)

    X = np.array(X)
    y = np.array(y)
    # y = to_categorical(y, num_classes=7)
    return X, y


In [None]:
X_train, y_train = dataset_padding(image_dir, mask_dir, mode='train')
X_val, y_val = dataset_padding(image_dir, mask_dir, mode='val')

Now we have images and masks of the same size and can train model

### Building a Model

I will choose Keras, but also we can choose Pytorch and Tensorflow for building and training model.

Importing libraries

In [None]:
from keras import Input, Model
from keras.layers import *
from keras import backend as be
from keras.optimizers import *
from keras.losses import *
from keras.metrics import *

os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'

Implement Unet architecture:

In [None]:
def MyUnet(img_size=(500, 500, 3), num_classes=7):
    inputs = Input(shape=img_size)

    ### [First half of the network: downsampling inputs] ###

    # Entry block
    x = Conv2D(32, 3, strides=2, padding="same", name="entry_conv2d")(inputs)
    x = BatchNormalization(name="entry_bn")(x)
    x = Activation("relu", name="entry_act")(x)

    previous_block_activation = x  # Set aside residual

    # Blocks 1, 2, 3 are identical apart from the feature depth.
    for filters in [64, 128, 256]:
        x = Activation("relu", name=str(filters) + "_act_1")(x)
        x = SeparableConv2D(filters, 3, padding="same", name=str(filters) + "_sep_1")(x)
        x = BatchNormalization(name=str(filters) + "_bn_1")(x)

        x = Activation("relu", name=str(filters) + "_act_2")(x)
        x = SeparableConv2D(filters, 3, padding="same", name=str(filters) + "_sep_2")(x)
        x = BatchNormalization(name=str(filters) + "_bn_2")(x)

        x = MaxPooling2D(3, strides=2, padding="same", name=str(filters) + "_pool")(x)

        # Project residual
        residual = Conv2D(filters, 1, strides=2, padding="same", name=str(filters) + "_conv2d")(
            previous_block_activation
        )
        x = add([x, residual], name=str(filters) + "_add")  # Add back residual
        previous_block_activation = x  # Set aside next residual


    ### [Second half of the network: upsampling inputs] ###

    for filters in [256, 128, 64, 32]:
        x = Activation("relu", name=str(filters) + "_act_3")(x)
        x = Conv2DTranspose(filters, 3, padding="same", name=str(filters) + "_trans_1")(x)
        x = BatchNormalization(name=str(filters) + "_bn_3")(x)

        x = Activation("relu", name=str(filters) + "_act_4")(x)
        x = Conv2DTranspose(filters, 3, padding="same", name=str(filters) + "_trans_2")(x)
        x = BatchNormalization(name=str(filters) + "_bn_4")(x)

        x = UpSampling2D(2, name=str(filters) + "_up")(x)
        # Project residual
        residual = UpSampling2D(2, name=str(filters) + "_up_res")(previous_block_activation)
        residual = Conv2D(filters, 1, padding="same", name=str(filters) + "_conv2d_res")(residual)
        x = add([x, residual], name=str(filters) + "_add_res")  # Add back residual
        previous_block_activation = x  # Set aside next residual

    # Add a per-pixel classification layer
    outputs = Conv2D(num_classes, 3, activation="softmax", padding="same", name="second_conv2d")(x)
    # Define the model
    model = Model(inputs=inputs, outputs=outputs)
    return model


### Training a Model

Now we can train our model:

In [None]:
model = MyUnet()
# model.summary()
be.clear_session()

model.compile(optimizer=Adam(learning_rate=1e-4),
              loss=CategoricalCrossentropy(from_logits=True),
              metrics=[MeanIoU(numclasses=7)])
print('compiled')

history = model.fit(X_train, y_train, epochs=7, batch_size=1, validation_data=(X_val, y_val))