In [11]:
# import libraries
import os
import glob
import cv2
import numpy as np
from osgeo import gdal

n_classes= 3 #Number of classes for segmentation

# path to images and labels.
image_dir = "C:\\vision_impulse\\unet\\dlt_32N_07\\images\\"
label_dir = "C:\\vision_impulse\\unet\\dlt_32N_07\\labels\\"

# training image info as a list
train_images = []
train_images_list = []

mode = "RGB" # multispectral or RGB as mode
# defining start and end channel numbers to stack
def image_mode(RasterCount, mode = mode):
    if mode == "multispectral":
        start_chnl = 1
        end_chnl = RasterCount + 1
        step = 1
        channels = RasterCount
    if mode == "RGB":
        start_chnl = 4
        end_chnl = 1
        step = -1
        channels = 3
    return start_chnl, end_chnl, step, channels

for root, dirs, files in os.walk(image_dir):
    for img_path in glob.glob(os.path.join(root, "*.tif")):
        dataset = gdal.Open(img_path)
        start_chnl, end_chnl, step, channels = image_mode(RasterCount = dataset.RasterCount)
        stacked = np.zeros((dataset.RasterXSize, dataset.RasterYSize, channels), int)
        for x in range(start_chnl, end_chnl, step):
            band = dataset.GetRasterBand(x)
            array = band.ReadAsArray()
            if mode == "RGB":
                stacked[..., -(x-1)] = array #stack bands (4,3,2) for (R,G,B) respectively.
            else:
                stacked[..., x-1] = array
            
        img = np.reshape(stacked,(dataset.RasterXSize, dataset.RasterYSize, channels))
        train_images.append(img)
        train_images_list.append(img_path)    

#Convert list to array for training
train_images = np.array(train_images)
print(train_images.shape)

(5683, 64, 64, 3)


In [12]:
# training labels info as a list
train_masks = [] 
train_masks_list = []
for root, dirs, files in os.walk(label_dir):
    for mask_path in glob.glob(os.path.join(root, "*.tif")):
        mask = cv2.imread(mask_path,0) 
        mask = np.array(mask)
        train_masks_list.append(mask_path)
        train_masks.append(mask)
                 
train_masks = np.array(train_masks)
print(train_masks.shape)

(5683, 64, 64)


In [13]:
# Encoding the levels of categorical features into numeric values such as 0,1,2.
from sklearn.preprocessing import LabelEncoder
from keras.utils import normalize
labelencoder = LabelEncoder()
n, h, w = train_masks.shape
train_masks_reshaped = train_masks.reshape(-1,1)
train_masks_reshaped_encoded = labelencoder.fit_transform(train_masks_reshaped)
train_masks_encoded_original_shape = train_masks_reshaped_encoded.reshape(n, h, w)

np.unique(train_masks_encoded_original_shape)

train_images = np.expand_dims(train_images, axis=-1)
train_images = normalize(train_images, axis=1)

train_masks_input = np.expand_dims(train_masks_encoded_original_shape, axis=3)

In [14]:
# splitting the dataset into train, test, and inference.
from sklearn.model_selection import train_test_split
X1, X_test, y1, y_test = train_test_split(train_images, train_masks_input, test_size = 0.10, random_state = 0)
X_train, X_inference, y_train, y_inference = train_test_split(X1, y1, test_size = 0.05, random_state = 0)

print("total no. of train images:",len(X_train))
print("total no. of test images:",len(y_test))
print("total no. of inference images:",len(X_inference))

total no. of train images: 4858
total no. of test images: 569
total no. of inference images: 256


In [15]:
# converting the class vectors to binary matrix to use with categorical_classentropy.
from keras.utils import to_categorical
train_masks_cat = to_categorical(y_train, num_classes=n_classes)
y_train_cat = train_masks_cat.reshape((y_train.shape[0], y_train.shape[1], y_train.shape[2], n_classes))

test_masks_cat = to_categorical(y_test, num_classes=n_classes)
y_test_cat = test_masks_cat.reshape((y_test.shape[0], y_test.shape[1], y_test.shape[2], n_classes))

In [None]:
import tensorflow as tf
import segmentation_models as sm
import keras
from keras.metrics import MeanIoU
from segmentation_models import Unet
from keras.layers import Input, Conv2D
from keras.models import Model

# using ResNet as model backbone with imagenet pretrained weights
Backbone = 'resnet34'
preprocess_input1 = sm.get_preprocessing(Backbone)

X_train1 = preprocess_input1(X_train)
X_test1 = preprocess_input1(X_test)

N = X_train.shape[3] # no. of channels

if mode == "multispectral":
    base_model = Unet(backbone_name=Backbone, encoder_weights='imagenet', classes = n_classes, activation = 'softmax')

    # map N channels data to 3 channels
    inp = Input(shape=(None, None, N))
    l1 = Conv2D(3, (1, 1))(inp) 
    out = base_model(l1)

    model = Model(inp, out, name=base_model.name)
    
if mode == "RGB":
    model = Unet(backbone_name=Backbone, encoder_weights='imagenet', classes = n_classes, activation = 'softmax')

#start training with previously trained model weights.
model.load_weights('C:\\vision_impulse\\unet_sandstone\\test_pretrained_with_wt.hdf5') 

model.compile('Adam', loss='categorical_crossentropy', metrics= sm.metrics.IOUScore())

model.summary()

# filepath to save training model checkpoins
checkpoint_filepath = "C:\\vision_impulse\\unet_sandstone\\model_on_RGBdata_with_pretrained_weights.hdf5"

# checkpoint callback to save model based on best validation IoU.
model_checkpoint_callback = tf.keras.callbacks.ModelCheckpoint(
                            filepath=checkpoint_filepath,
                            save_weights_only=True,
                            monitor='val_iou_score',
                            mode='max',
                            save_best_only=True)

model.fit(X_train1,
           y_train_cat,
           batch_size = 8,
           epochs = 60,
           verbose = 1,
           validation_data = (X_test1, y_test_cat),
           callbacks=[model_checkpoint_callback])

Model: "model_11"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
data (InputLayer)               [(None, None, None,  0                                            
__________________________________________________________________________________________________
bn_data (BatchNormalization)    (None, None, None, 3 9           data[0][0]                       
__________________________________________________________________________________________________
zero_padding2d_170 (ZeroPadding (None, None, None, 3 0           bn_data[0][0]                    
__________________________________________________________________________________________________
conv0 (Conv2D)                  (None, None, None, 6 9408        zero_padding2d_170[0][0]         
___________________________________________________________________________________________

Epoch 1/60
Epoch 2/60
Epoch 3/60
Epoch 4/60
Epoch 5/60
Epoch 6/60
Epoch 7/60
Epoch 8/60
Epoch 9/60
Epoch 10/60
Epoch 11/60
Epoch 12/60
Epoch 13/60
Epoch 14/60
Epoch 15/60
Epoch 16/60
 85/608 [===>..........................] - ETA: 10:13 - loss: 0.4229 - iou_score: 0.5443

In [None]:
from matplotlib import pyplot as plt
from keras.models import Model

#mode = "RGB"
# map N channels data to 3 channels
if mode == "multispectral":
    base_model = Unet(backbone_name=Backbone, encoder_weights='imagenet', classes = n_classes, activation = 'softmax')

    # map N channels data to 3 channels
    inp = Input(shape=(None, None, N))
    l1 = Conv2D(3, (1, 1))(inp) 
    out = base_model(l1)

    model = Model(inp, out, name=base_model.name)
    
if mode == "RGB":
    model = Unet(backbone_name=Backbone, encoder_weights='imagenet', classes = n_classes, activation = 'softmax')

checkpoint_filepath = "C:\\vision_impulse\\unet_sandstone\\model_on_12bandsdata_with_pretrained_weights.hdf5"
model.load_weights(checkpoint_filepath)

# prediction on random inference images
index = np.random.randint(0, len(X_inference),1) 
pred_img = X_inference[index].reshape((1,64,64,X_inference.shape[3]))
pred_img = normalize(pred_img, axis=1)
y_pred = model.predict(pred_img)
y_pred_argmax = np.argmax(y_pred, axis=3)

prediction = y_pred_argmax.reshape((64,64))
ground_truth = y_inference[index].reshape((64,64))

# splitting the data to get a same inference label image.
# from sklearn.model_selection import train_test_split
X1_list, X_test_list, y1_list, y_test_list = train_test_split(train_images_list, train_masks_list, test_size = 0.10, random_state = 0)
X_train_list, X_inference_list, y_train_list, y_inference_list = train_test_split(X1_list, y1_list, test_size = 0.05, random_state = 0)
inference_image_name = np.array(y_inference_list)[index][0]
inference_image = cv2.imread(inference_image_name)

# visualization on inference images.
print('\033[1m' +"\t\t\t\t\tPredictions on "+mode+" mode")
fig, axes = plt.subplots(nrows=1,ncols=3,figsize=(12,12))
plt.sca(axes[0]); 
plt.imshow(prediction,cmap="viridis"); plt.title('prediction')
plt.axis("off")
plt.sca(axes[1]); 
plt.imshow(ground_truth,cmap="viridis"); plt.title('Ground truth')
plt.axis("off")
plt.sca(axes[2]); 
plt.imshow(inference_image); plt.title('Label')
plt.axis("off")
plt.tight_layout()
plt.show()