Preparing dataset

In [19]:
import os
import numpy as np
import cv2
from sklearn.model_selection import train_test_split

pathname = 'Dataset-small/data/WildScenes/WildScenes2d/V-01/'
sample_size = 5

X_all = []
y_all = []

def one_hot_encode(masks, num_classes):
    masks_encoded = np.zeros((*masks.shape, num_classes), dtype=np.float32)
    for i in range(num_classes):
        masks_encoded[..., i] = (masks == i).astype(np.float32)
    return masks_encoded

for img_name in os.listdir(f'{pathname}image')[:sample_size]:
    if ':' in img_name:
        continue
    img = np.asarray(cv2.imread(f'{pathname}/image/{img_name}')) / 255.0
    mask = np.asarray(cv2.imread(f'{pathname}/indexLabel/{img_name}'))
    mask = mask[:,:,0]
    X_all.append(img)
    y_all.append(mask)

X_all = np.asarray(X_all)
y_all = one_hot_encode(np.asarray(y_all), 15)
X_train, X_test, y_train, y_test = train_test_split(X_all, y_all, test_size=0.2, random_state=42)
X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.2, random_state=42)

Creating and compiling the model

In [31]:
import tensorflow as tf
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Conv2DTranspose, concatenate, Input, Cropping2D, ZeroPadding2D
from tensorflow.keras.models import Model

def unet_model(input_size=(1512, 2016, 3), num_classes=15):
    inputs = Input(input_size)

    # Encoding
    c1 = Conv2D(64, (3, 3), activation='relu', padding='same')(inputs)
    c1 = Conv2D(64, (3, 3), activation='relu', padding='same')(c1)
    p1 = MaxPooling2D((2, 2))(c1)

    c2 = Conv2D(128, (3, 3), activation='relu', padding='same')(p1)
    c2 = Conv2D(128, (3, 3), activation='relu', padding='same')(c2)
    p2 = MaxPooling2D((2, 2))(c2)

    c3 = Conv2D(256, (3, 3), activation='relu', padding='same')(p2)
    c3 = Conv2D(256, (3, 3), activation='relu', padding='same')(c3)
    p3 = MaxPooling2D((2, 2))(c3)

    c4 = Conv2D(512, (3, 3), activation='relu', padding='same')(p3)
    c4 = Conv2D(512, (3, 3), activation='relu', padding='same')(c4)

    # Decoding
    u5 = Conv2DTranspose(256, (2, 2), strides=(2, 2), padding='same')(c4)
    u5 = concatenate([u5, c3])
    c5 = Conv2D(256, (3, 3), activation='relu', padding='same')(u5)
    c5 = Conv2D(256, (3, 3), activation='relu', padding='same')(c5)

    u6 = Conv2DTranspose(128, (2, 2), strides=(2, 2), padding='same')(c5)
    u6 = concatenate([u6, c2])
    c6 = Conv2D(128, (3, 3), activation='relu', padding='same')(u6)
    c6 = Conv2D(128, (3, 3), activation='relu', padding='same')(c6)

    u7 = Conv2DTranspose(64, (2, 2), strides=(2, 2), padding='same')(c6)
    u7 = concatenate([u7, c1])
    c7 = Conv2D(64, (3, 3), activation='relu', padding='same')(u7)
    c7 = Conv2D(64, (3, 3), activation='relu', padding='same')(c7)

    outputs = Conv2D(num_classes, (1, 1), activation='softmax')(c7)

    model = Model(inputs=[inputs], outputs=[outputs])
    return model

# Create and compile the model
model = unet_model(input_size=(1512, 2016, 3), num_classes=15)

from tensorflow.keras.metrics import MeanIoU

model.compile(optimizer='adam', 
              loss='categorical_crossentropy', 
              metrics=[MeanIoU(num_classes=15)])

Training the model

In [33]:
print(y_train.shape)

model.fit(X_train, y_train, 
          validation_data=(X_val, y_val),
          batch_size=8, 
          epochs=20)

(1, 1512, 2016, 15)
Epoch 1/20
