In [1]:
import pickle
import numpy as np
import pandas as pd
from PIL import Image
import albumentations as A
from IPython.display import SVG
import matplotlib.pyplot as plt
%matplotlib inline
import os, re, sys, random, shutil, cv2

import tensorflow as tf
from tensorflow import keras
import tensorflow.keras.backend as K
from keras.models import Model
from keras.optimizers import Adam, Nadam
from keras import applications, optimizers
from keras.applications import InceptionResNetV2
from keras.applications.resnet50 import preprocess_input

from keras.metrics import Recall, Precision, MeanSquaredError, MeanIoU

from keras.preprocessing.image import load_img, img_to_array
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from keras.utils import model_to_dot, plot_model, Sequence
from keras.callbacks import ModelCheckpoint, ReduceLROnPlateau, EarlyStopping, CSVLogger, LearningRateScheduler
from keras.layers import Input, Conv2D, BatchNormalization, Activation, MaxPool2D, Conv2DTranspose, Concatenate, ZeroPadding2D, Dropout

2024-07-19 13:07:12.374823: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2024-07-19 13:07:12.374930: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-07-19 13:07:12.499310: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered


In [2]:
# !rm -rf /kaggle/working/*

In [3]:
# Пути к папкам с изображениями и масками
input_images_folder = '/kaggle/input/images/images'
input_masks_folder = '/kaggle/input/masks/masks'

output_images_folder = '/kaggle/working/aug_images'
output_masks_folder = '/kaggle/working/aug_masks'

In [4]:
transform = A.Compose([
    A.RandomCrop(width=512, height=512, p=1.0),
    A.HorizontalFlip(p=1.0),
    A.VerticalFlip(p=1.0),
    A.Rotate(limit=[60, 300], p=1.0, interpolation=cv2.INTER_NEAREST),
    A.RandomBrightnessContrast(brightness_limit=[-0.2, 0.3], contrast_limit=0.2, p=1.0),
    A.OneOf([
        A.CLAHE (clip_limit=1.5, tile_grid_size=(8, 8), p=0.5),
        A.GridDistortion(p=0.5),
        A.OpticalDistortion(distort_limit=1, shift_limit=0.5, interpolation=cv2.INTER_NEAREST, p=0.5),
    ], p=1.0),
], p=1.0)

In [5]:
os.makedirs(output_images_folder, exist_ok=True)
os.makedirs(output_masks_folder, exist_ok=True)

image_files = os.listdir(input_images_folder)
# Фильтруем только изображения
image_files = [f for f in image_files if f.endswith('.jpg') or f.endswith('.png')]

In [6]:
image_files = os.listdir(input_images_folder)
for image_file in image_files:
    src_image_path = os.path.join(input_images_folder, image_file)
    dst_image_path = os.path.join(output_images_folder, image_file)
    shutil.copyfile(src_image_path, dst_image_path)

    mask_file = image_file.replace('.jpg', '.png')  # предполагаем, что маски в формате PNG
    src_mask_path = os.path.join(input_masks_folder, mask_file)
    dst_mask_path = os.path.join(output_masks_folder, mask_file)
    shutil.copyfile(src_mask_path, dst_mask_path)

In [7]:
from tqdm import tqdm
# Проходимся по каждому изображению
for image_file in tqdm(image_files, desc='Augmenting images'):
    # Загружаем изображение
    image_path = os.path.join(output_images_folder, image_file)
    image = cv2.imread(image_path)
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)

    # Загружаем маску (предполагается, что маски имеют те же имена и тот же формат, что и изображения)
    mask_file = image_file.replace('.jpg', '.png')
    mask_path = os.path.join(output_masks_folder, mask_file)
    mask = cv2.imread(mask_path, cv2.COLOR_BGR2RGB)

    # Применяем аугментацию к изображению и маске
    augmented = transform(image=image, mask=mask)
    augmented_image = augmented['image']
    augmented_mask = augmented['mask']

    # Сохраняем аугментированное изображение и маску
    output_image_path = os.path.join(output_images_folder, f"aug_{image_file}")
    output_mask_path = os.path.join(output_masks_folder, f"aug_{mask_file}")
    cv2.imwrite(output_image_path, cv2.cvtColor(augmented_image, cv2.COLOR_RGB2BGR))  # сохраняем обратно в BGR
    cv2.imwrite(output_mask_path, augmented_mask)

print(f"Augmentation finished. Augmented images and masks are saved in {output_images_folder} and {output_masks_folder}")

Augmenting images: 100%|██████████| 585/585 [00:25<00:00, 23.34it/s]

Augmentation finished. Augmented images and masks are saved in /kaggle/working/aug_images and /kaggle/working/aug_masks





In [8]:
images_dir = output_images_folder
masks_dir = output_masks_folder

from sklearn.model_selection import train_test_split

# Получение всех путей к изображениям и маскам
images = [os.path.join(images_dir, f) for f in os.listdir(images_dir) if os.path.isfile(os.path.join(images_dir, f))]
masks = [os.path.join(masks_dir, f) for f in os.listdir(masks_dir) if os.path.isfile(os.path.join(masks_dir, f))]

images.sort()
masks.sort()

# Смешивание изображений и их масок
data = list(zip(images, masks))

# Разделение на train и val
train_data, val_data = train_test_split(data, test_size=0.2, random_state=42)

train_images, train_masks = zip(*train_data)
val_images, val_masks = zip(*val_data)


In [9]:
class_dict_df = pd.read_csv('/kaggle/input/class_dict.csv', index_col=False, skipinitialspace=True, sep=';')
class_dict_df

Unnamed: 0,name,r,g,b
0,building,255,255,0
1,road,255,0,0
2,vegetation,0,255,0
3,water,0,0,255
4,unlabeled,0,0,0


In [10]:
label_codes = tuple(map(tuple, class_dict_df[['r', 'g', 'b']].values))
code2id = {v:k for k,v in enumerate(label_codes)}
id2code = {k:v for k,v in enumerate(label_codes)}

name2id = {v:k for k,v in enumerate(class_dict_df['name'].values)}
id2name = {k:v for k,v in enumerate(class_dict_df['name'].values)}

In [11]:
def rgb_to_onehot(rgb_image, colormap = id2code):
    '''Function to one hot encode RGB mask labels
        Inputs:
            rgb_image - image matrix (eg. 256 x 256 x 3 dimension numpy ndarray)
            colormap - dictionary of color to label id
        Output: One hot encoded image of dimensions (height x width x num_classes) where num_classes = len(colormap)
    '''
    num_classes = len(colormap)
    shape = rgb_image.shape[:2]+(num_classes,)
    encoded_image = np.zeros( shape, dtype=np.int8 )
    for i, cls in enumerate(colormap):
        encoded_image[:,:,i] = np.all(rgb_image.reshape( (-1,3) ) == colormap[i], axis=1).reshape(shape[:2])
    return encoded_image


def onehot_to_rgb(onehot, colormap = id2code):
    '''Function to decode encoded mask labels
        Inputs:
            onehot - one hot encoded image matrix (height x width x num_classes)
            colormap - dictionary of color to label id
        Output: Decoded RGB image (height x width x 3)
    '''
    single_layer = np.argmax(onehot, axis=-1)
    output = np.zeros( onehot.shape[:2]+(3,) )
    for k in colormap.keys():
        output[single_layer==k] = colormap[k]
    return np.uint8(output)

In [12]:
def histogram_normalization(image):

    image_clahe = np.zeros_like(image, dtype=np.uint8)
    for channel in range(3):
        channel_img = image[:,:,channel].astype(np.uint8)
        clahe = cv2.createCLAHE(clipLimit=5)
        channel_clahe = clahe.apply(channel_img)
        channel_clahe = np.clip(channel_clahe + 30, 0, 255).astype(np.uint8)
        image_clahe[:,:,channel] = channel_clahe


    return image_clahe.astype(np.float32)

In [13]:
data_gen_args = dict(
    preprocessing_function=histogram_normalization,
    rescale=1./255
)
mask_gen_args = dict()

train_frames_datagen = ImageDataGenerator(**data_gen_args)
train_masks_datagen = ImageDataGenerator(**mask_gen_args)
val_frames_datagen = ImageDataGenerator(**data_gen_args)
val_masks_datagen = ImageDataGenerator(**mask_gen_args)

seed = 1

In [14]:
def TrainAugmentGenerator(train_images, train_masks, seed=1, batch_size=8, target_size=(512, 512)):
    train_image_generator = train_frames_datagen.flow(
        np.array([img_to_array(load_img(img, target_size=target_size)).astype(np.float32) for img in train_images]),
        batch_size=batch_size,
        seed=seed
    )

    train_mask_generator = train_masks_datagen.flow(
        np.array([img_to_array(load_img(mask, target_size=target_size)).astype(np.float32) for mask in train_masks]),
        batch_size=batch_size,
        seed=seed
    )

    while True:
        X1i = next(train_image_generator)
        X2i = next(train_mask_generator)

        mask_encoded = [rgb_to_onehot(X2i[x, :, :, :], id2code).astype(np.float32) for x in range(X2i.shape[0])]

        yield X1i, np.asarray(mask_encoded)

def ValAugmentGenerator(val_images, val_masks, seed=1, batch_size=8, target_size=(512, 512)):
    val_image_generator = val_frames_datagen.flow(
        np.array([img_to_array(load_img(img, target_size=target_size)).astype(np.float32) for img in val_images]),
        batch_size=batch_size,
        seed=seed
    )

    val_mask_generator = val_masks_datagen.flow(
        np.array([img_to_array(load_img(mask, target_size=target_size)).astype(np.float32) for mask in val_masks]),
        batch_size=batch_size,
        seed=seed
    )

    while True:
        X1i = next(val_image_generator)
        X2i = next(val_mask_generator)

        mask_encoded = [rgb_to_onehot(X2i[x, :, :, :], id2code).astype(np.float32) for x in range(X2i.shape[0])]

        yield X1i, np.asarray(mask_encoded)


In [15]:
def conv_block(input, num_filters):
    x = Conv2D(num_filters, 3, padding="same")(input)
    x = BatchNormalization()(x)
    x = Activation("relu")(x)

    x = Conv2D(num_filters, 3, padding="same")(x)
    x = BatchNormalization()(x)
    x = Activation("relu")(x)

    return x

def decoder_block(input, skip_features, num_filters):
    x = Conv2DTranspose(num_filters, (2, 2), strides=2, padding="same")(input)
    x = Concatenate()([x, skip_features])
    x = conv_block(x, num_filters)
    return x

def build_inception_resnetv2_unet(input_shape):
    """ Input """
    inputs = Input(input_shape)

    """ Pre-trained InceptionResNetV2 Model """
    encoder = InceptionResNetV2(include_top=False, weights="imagenet", input_tensor=inputs)

    """ Encoder """
    s1 = encoder.get_layer("input_layer").output           ## (512 x 512)

    s2 = encoder.get_layer("activation").output        ## (255 x 255)
    s2 = ZeroPadding2D(( (1, 0), (1, 0) ))(s2)         ## (256 x 256)

    s3 = encoder.get_layer("activation_3").output      ## (126 x 126)
    s3 = ZeroPadding2D((1, 1))(s3)                     ## (128 x 128)

    s4 = encoder.get_layer("activation_74").output      ## (61 x 61)
    s4 = ZeroPadding2D(( (2, 1),(2, 1) ))(s4)           ## (64 x 64)

    """ Bridge """
    b1 = encoder.get_layer("activation_161").output     ## (30 x 30)
    b1 = ZeroPadding2D((1, 1))(b1)                      ## (32 x 32)

    """ Decoder """
    d1 = decoder_block(b1, s4, 512)                     ## (64 x 64)
    d2 = decoder_block(d1, s3, 256)                     ## (128 x 128)
    d3 = decoder_block(d2, s2, 128)                     ## (256 x 256)
    d4 = decoder_block(d3, s1, 64)                      ## (512 x 512)

    """ Output """
    dropout = Dropout(0.3)(d4)
    outputs = Conv2D(5, 1, padding="same", activation="softmax")(dropout)

    model = Model(inputs, outputs, name="InceptionResNetV2-UNet")
    return model

In [16]:
batch_size = 16
num_train_samples = len(train_images)
num_val_samples = len(val_images)

steps_per_epoch = int(np.ceil(float(num_train_samples) / float(batch_size)))
print('steps_per_epoch: ', steps_per_epoch)
validation_steps = int(np.ceil(float(4 * num_val_samples) / float(batch_size)))
print('validation_steps: ', validation_steps)

steps_per_epoch:  59
validation_steps:  59


In [17]:
K.clear_session()

In [18]:
strategy = tf.distribute.MirroredStrategy()
print("Number of devices: {}".format(strategy.num_replicas_in_sync))


Number of devices: 2


In [19]:
with strategy.scope():
    model = build_inception_resnetv2_unet(input_shape=(512, 512, 3))

    def dice_coefficient(y_true, y_pred, smooth=1):    
        intersection = K.sum(y_true * y_pred)
        return (2. * intersection + smooth) / (K.sum(y_true) + K.sum(y_pred) + smooth) 

    model.compile(
        optimizer=Adam(learning_rate=0.0001), 
        loss='categorical_crossentropy', 
        metrics=["accuracy", dice_coefficient, Precision(), Recall(), MeanSquaredError(), MeanIoU(num_classes=len(id2code))]
    )

Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/inception_resnet_v2/inception_resnet_v2_weights_tf_dim_ordering_tf_kernels_notop.h5
[1m219055592/219055592[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 0us/step


In [20]:
def exponential_decay(lr0, s):
    def exponential_decay_fn(epoch):
        return lr0 * 0.1 **(epoch / s)
    return exponential_decay_fn

exponential_decay_fn = exponential_decay(0.0001, 60)

lr_scheduler = LearningRateScheduler(
    exponential_decay_fn,
    verbose=1
)

checkpoint = ModelCheckpoint(
    filepath = '/kaggle/working/InceptionResNetV2-UNet.keras',
    save_best_only = True, 
#     save_weights_only = False,
    monitor = 'val_loss', 
    mode = 'auto', 
    verbose = 1
)

earlystop = EarlyStopping(
    monitor = 'val_loss',
    min_delta = 0.0005,
    patience = 18,
    mode = 'auto',
    verbose = 1,
    restore_best_weights = True
)

csvlogger = CSVLogger(
    filename= "model_training.csv",
    separator = ",",
    append = False
)

callbacks = [checkpoint, earlystop, csvlogger, lr_scheduler]

In [21]:
history = model.fit(
    TrainAugmentGenerator(train_images = train_images, train_masks = train_masks, target_size = (512, 512)),
    steps_per_epoch=steps_per_epoch,
    validation_data = ValAugmentGenerator(val_images = val_images, val_masks = val_masks, target_size = (512, 512)),
    validation_steps = validation_steps,
    epochs = 60,
    callbacks=callbacks,
    verbose=1
)


Epoch 1: LearningRateScheduler setting learning rate to 0.0001.
Epoch 1/60


2024-07-19 13:11:23.048965: E tensorflow/core/grappler/optimizers/meta_optimizer.cc:961] layout failed: INVALID_ARGUMENT: Size of values 0 does not match size of permutation 4 @ fanin shape inStatefulPartitionedCall/cond/else/_3258/cond/StatefulPartitionedCall/InceptionResNetV2-UNet_1/dropout_1/stateless_dropout/SelectV2-2-TransposeNHWCToNCHW-LayoutOptimizer


[1m59/59[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1s/step - accuracy: 0.2965 - dice_coefficient: 0.2274 - loss: 1.5942 - mean_io_u: 0.4015 - mean_squared_error: 0.1559 - precision: 0.3140 - recall: 0.0184
Epoch 1: val_loss improved from inf to 1.55317, saving model to /kaggle/working/InceptionResNetV2-UNet.keras
[1m59/59[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m463s[0m 2s/step - accuracy: 0.2974 - dice_coefficient: 0.2277 - loss: 1.5926 - mean_io_u: 0.4015 - mean_squared_error: 0.1558 - precision: 0.3157 - recall: 0.0186 - val_accuracy: 0.3081 - val_dice_coefficient: 0.2073 - val_loss: 1.5532 - val_mean_io_u: 0.4013 - val_mean_squared_error: 0.1555 - val_precision: 0.9957 - val_recall: 2.0779e-04 - learning_rate: 1.0000e-04

Epoch 2: LearningRateScheduler setting learning rate to 9.623506263980886e-05.
Epoch 2/60
[1m59/59[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1s/step - accuracy: 0.4864 - dice_coefficient: 0.3290 - loss: 1.2584 - mean_io_u: 0.4014 

In [22]:
model.load_weights("/kaggle/working/InceptionResNetV2-UNet.keras")

In [23]:
images_dir = '/kaggle/input/images/images'
masks_dir = '/kaggle/input/masks/masks'


# Получение всех путей к изображениям и маскам
images = [os.path.join(images_dir, f) for f in os.listdir(images_dir) if os.path.isfile(os.path.join(images_dir, f))]
masks = [os.path.join(masks_dir, f) for f in os.listdir(masks_dir) if os.path.isfile(os.path.join(masks_dir, f))]

images.sort()
masks.sort()

# Смешивание изображений и их масок
data = list(zip(images, masks))

# Разделение на train и val
train_data, val_data = train_test_split(data, test_size=0.2, random_state=42)

train_images, train_masks = zip(*train_data)
val_images, val_masks = zip(*val_data)


In [24]:
testing_gen = ValAugmentGenerator(val_images = val_images, 
                                  val_masks = val_masks, 
                                  target_size = (512, 512))