In [1]:
import os
os.environ["TF_CPP_MIN_LOG_LEVEL"] = "2"
import albumentations as A
import cv2
import numpy as np
import tensorflow as tf
from tensorflow.keras.layers import Input, Conv2D, BatchNormalization, Activation, MaxPool2D, UpSampling2D, Concatenate, Add
from tensorflow.keras.callbacks import ModelCheckpoint, CSVLogger, ReduceLROnPlateau, EarlyStopping, TensorBoard
from tensorflow.keras.optimizers import Adam
from glob import glob

def conv_block(inputs, out_ch, rate=1):
    x = Conv2D(out_ch, 3, padding="same", dilation_rate=1)(inputs)
    x = BatchNormalization()(x)
    x = Activation("relu")(x)
    return x

def RSU_L(inputs, out_ch, int_ch, num_layers, rate=2):
    """ Initial Conv """
    x = conv_block(inputs, out_ch)
    init_feats = x

    """ Encoder """
    skip = []
    x = conv_block(x, int_ch)
    skip.append(x)

    for i in range(num_layers-2):
        x = MaxPool2D((2, 2))(x)
        x = conv_block(x, int_ch)
        skip.append(x)

    """ Bridge """
    x = conv_block(x, int_ch, rate=rate)

    """ Decoder """
    skip.reverse()

    x = Concatenate()([x, skip[0]])
    x = conv_block(x, int_ch)

    for i in range(num_layers-3):
        x = UpSampling2D(size=(2, 2), interpolation="bilinear")(x)
        x = Concatenate()([x, skip[i+1]])
        x = conv_block(x, int_ch)

    x = UpSampling2D(size=(2, 2), interpolation="bilinear")(x)
    x = Concatenate()([x, skip[-1]])
    x = conv_block(x, out_ch)

    """ Add """
    x = Add()([x, init_feats])
    return x

def RSU_4F(inputs, out_ch, int_ch):
    """ Initial Conv """
    x0 = conv_block(inputs, out_ch, rate=1)

    """ Encoder """
    x1 = conv_block(x0, int_ch, rate=1)
    x2 = conv_block(x1, int_ch, rate=2)
    x3 = conv_block(x2, int_ch, rate=4)

    """ Bridge """
    x4 = conv_block(x3, int_ch, rate=8)

    """ Decoder """
    x = Concatenate()([x4, x3])
    x = conv_block(x, int_ch, rate=4)

    x = Concatenate()([x, x2])
    x = conv_block(x, int_ch, rate=2)

    x = Concatenate()([x, x1])
    x = conv_block(x, out_ch, rate=1)

    """ Addition """
    x = Add()([x, x0])
    return x

def u2net(input_shape, out_ch, int_ch, num_classes=1):
    """ Input Layer """
    inputs = Input(input_shape)
    s0 = inputs

    """ Encoder """
    s1 = RSU_L(s0, out_ch[0], int_ch[0], 7)
    p1 = MaxPool2D((2, 2))(s1)

    s2 = RSU_L(p1, out_ch[1], int_ch[1], 6)
    p2 = MaxPool2D((2, 2))(s2)

    s3 = RSU_L(p2, out_ch[2], int_ch[2], 5)
    p3 = MaxPool2D((2, 2))(s3)

    s4 = RSU_L(p3, out_ch[3], int_ch[3], 4)
    p4 = MaxPool2D((2, 2))(s4)

    s5 = RSU_4F(p4, out_ch[4], int_ch[4])
    p5 = MaxPool2D((2, 2))(s5)

    """ Bridge """
    b1 = RSU_4F(p5, out_ch[5], int_ch[5])
    b2 = UpSampling2D(size=(2, 2), interpolation="bilinear")(b1)

    """ Decoder """
    d1 = Concatenate()([b2, s5])
    d1 = RSU_4F(d1, out_ch[6], int_ch[6])
    u1 = UpSampling2D(size=(2, 2), interpolation="bilinear")(d1)

    d2 = Concatenate()([u1, s4])
    d2 = RSU_L(d2, out_ch[7], int_ch[7], 4)
    u2 = UpSampling2D(size=(2, 2), interpolation="bilinear")(d2)

    d3 = Concatenate()([u2, s3])
    d3 = RSU_L(d3, out_ch[8], int_ch[8], 5)
    u3 = UpSampling2D(size=(2, 2), interpolation="bilinear")(d3)

    d4 = Concatenate()([u3, s2])
    d4 = RSU_L(d4, out_ch[9], int_ch[9], 6)
    u4 = UpSampling2D(size=(2, 2), interpolation="bilinear")(d4)

    d5 = Concatenate()([u4, s1])
    d5 = RSU_L(d5, out_ch[10], int_ch[10], 7)

    """ Side Outputs """
    y1 = Conv2D(num_classes, 3, padding="same")(d5)

    y2 = Conv2D(num_classes, 3, padding="same")(d4)
    y2 = UpSampling2D(size=(2, 2), interpolation="bilinear")(y2)

    y3 = Conv2D(num_classes, 3, padding="same")(d3)
    y3 = UpSampling2D(size=(4, 4), interpolation="bilinear")(y3)

    y4 = Conv2D(num_classes, 3, padding="same")(d2)
    y4 = UpSampling2D(size=(8, 8), interpolation="bilinear")(y4)

    y5 = Conv2D(num_classes, 3, padding="same")(d1)
    y5 = UpSampling2D(size=(16, 16), interpolation="bilinear")(y5)

    y6 = Conv2D(num_classes, 3, padding="same")(b1)
    y6 = UpSampling2D(size=(32, 32), interpolation="bilinear")(y6)

    y0 = Concatenate()([y1, y2, y3, y4, y5, y6])
    y0 = Conv2D(num_classes, 3, padding="same")(y0)

    y0 = Activation("sigmoid", name="y0")(y0)
    y1 = Activation("sigmoid", name="y1")(y1)
    y2 = Activation("sigmoid", name="y2")(y2)
    y3 = Activation("sigmoid", name="y3")(y3)
    y4 = Activation("sigmoid", name="y4")(y4)
    y5 = Activation("sigmoid", name="y5")(y5)
    y6 = Activation("sigmoid", name="y6")(y6)

    model = tf.keras.models.Model(inputs, outputs=[y0, y1, y2, y3, y4, y5, y6])
    return model

def build_u2net(input_shape, num_classes=1):
    out_ch = [64, 128, 256, 512, 512, 512, 512, 256, 128, 64, 64]
    int_ch = [32, 32, 64, 128, 256, 256, 256, 128, 64, 32, 16]
    model = u2net(input_shape, out_ch, int_ch, num_classes=num_classes)
    return model

def build_u2net_lite(input_shape, num_classes=1):
    out_ch = [64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64]
    int_ch = [16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16]
    model = u2net(input_shape, out_ch, int_ch, num_classes=num_classes)
    return model




In [2]:
#pip install "numpy>=1.16.5,<1.23.0"

# Data loader

In [3]:
""" Global parameters """
H = 512
W = 512

def load_dataset(path, split=0.1):
    train_x_path = os.path.join(path, "train", "original")
    train_y_path = os.path.join(path, "train", "mask")
    train_x = sorted([os.path.join(train_x_path, x) for x in os.listdir(train_x_path)])
    train_y = sorted([os.path.join(train_y_path, x) for x in os.listdir(train_y_path)])

    valid_x_path = os.path.join(path, "validation", "image")
    valid_y_path = os.path.join(path, "validation", "mask")
    valid_x = sorted([os.path.join(valid_x_path, x) for x in os.listdir(valid_x_path)])
    valid_y = sorted([os.path.join(valid_y_path, x) for x in os.listdir(valid_y_path)])

    return (train_x, train_y), (valid_x, valid_y)


def read_image(path):
    path = path.decode()
    x = cv2.imread(path, cv2.IMREAD_COLOR)
    x = cv2.resize(x, (W, H))
#     x = x / 255.0
#     x = x.astype(np.float32)
    return x


def read_mask(path):
    path = path.decode()
    x = cv2.imread(path, cv2.IMREAD_GRAYSCALE)
    x = cv2.resize(x, (W, H))
#     x = x.astype(np.float32)
#     x = np.expand_dims(x, axis=-1)
    return x


In [4]:
transform = A.Compose([
    A.HorizontalFlip(p=0.5),
    A.RandomBrightnessContrast(p=0.2),
    A.ElasticTransform(always_apply=False, p=0.2, alpha=1.0, sigma=19.74, alpha_affine=20.39, interpolation=0, border_mode=0, value=(0, 0, 0), mask_value=None, approximate=False, same_dxdy=False),
    A.CLAHE(always_apply=False, p=0.4, clip_limit=(1, 4), tile_grid_size=(8, 8))
])


def tf_parse(x, y):
    def _parse(x, y, isTrain= False):
        x = read_image(x)
        y = read_mask(y)
        if isTrain:
            transformed = transform(image=x, mask=y)
            x = transformed['image']
            y = transformed['mask']
            
        x = x / 255.0
        x = x.astype(np.float32)
        
        y = y / 255.0
        y = y.astype(np.float32)
        y = np.expand_dims(y, axis=-1)
        return x, y

    x, y = tf.numpy_function(_parse, [x, y], [tf.float32, tf.float32])
    x.set_shape([H, W, 3])
    y.set_shape([H, W, 1])
    return x, y


def tf_dataset(X, Y, batch=2):
    ds = tf.data.Dataset.from_tensor_slices((X, Y))
    ds = ds.map(tf_parse).batch(batch).prefetch(10)
    return ds


In [5]:
import keras.backend as K
from keras import layers
from keras.losses import binary_crossentropy
import tensorflow as tf

def alpha_loss(y_true, y_pred):
    return tf.reduce_mean(tf.square(y_true - y_pred))


def ssim_loss(y_true, y_pred):
    return 1 - tf.reduce_mean(tf.image.ssim(y_true, y_pred, max_val=1.0))

def gradient_loss(y_true, y_pred):
    grad_true_x = tf.abs(y_true[:, :-1, :] - y_true[:, 1:, :])
    grad_true_y = tf.abs(y_true[:, :, :-1] - y_true[:, :, 1:])
    grad_pred_x = tf.abs(y_pred[:, :-1, :] - y_pred[:, 1:, :])
    grad_pred_y = tf.abs(y_pred[:, :, :-1] - y_pred[:, :, 1:])
    return tf.reduce_mean(tf.square(grad_true_x - grad_pred_x)) + tf.reduce_mean(tf.square(grad_true_y - grad_pred_y))


def custom_loss(y_true, y_pred):
    accuracy = tf.reduce_mean(tf.cast(tf.equal(y_true, y_pred), tf.float32))
    return alpha_loss(y_true, y_pred) + ssim_loss(y_true, y_pred) + binary_crossentropy(y_true, y_pred)


In [6]:
def SAD(y_true, y_pred):

    # Convert images to float32
    img1 = tf.image.convert_image_dtype(y_true, tf.float32)
    img2 = tf.image.convert_image_dtype(y_pred, tf.float32)

    # Calculate absolute differences
    abs_diff = tf.abs(tf.subtract(img1, img2))

    # Sum of absolute differences
    sad = tf.reduce_sum(abs_diff)

    return sad


def MSE(y_true, y_pred):

    # Convert images to float32
    img1 = tf.image.convert_image_dtype(y_true, tf.float32)
    img2 = tf.image.convert_image_dtype(y_pred, tf.float32)

    # Calculate squared differences
    squared_diff = tf.square(tf.subtract(img1, img2))

    # Mean Squared Error
    mse = tf.reduce_mean(squared_diff)

    return mse


def MAD(y_true, y_pred):
    # Convert images to float32
    img1 = tf.image.convert_image_dtype(y_true, tf.float32)
    img2 = tf.image.convert_image_dtype(y_pred, tf.float32)

    # Calculate absolute differences
    abs_diff = tf.abs(tf.subtract(img1, img2))

    # Mean of Absolute Differences
    mad = tf.reduce_mean(abs_diff)

    return mad

In [7]:
np.random.seed(42)
tf.random.set_seed(42)

""" Hyperparameters """
batch_size = 4
lr = 1e-7
num_epochs = 1
model_path = "U2Net_custom_loss.h5"
csv_path = "log.csv"

In [8]:
dataset_path = "/kaggle/input/p3m10k-portraitsegmentation"
(train_x, train_y), (valid_x, valid_y) = load_dataset(dataset_path)

print(f"Train: {len(train_x)} - {len(train_y)}")
print(f"Valid: {len(valid_x)} - {len(valid_y)}")

train_dataset = tf_dataset(train_x, train_y, batch=batch_size)
valid_dataset = tf_dataset(valid_x, valid_y, batch=batch_size)


Train: 11321 - 11321
Valid: 1200 - 1200


In [9]:
from tensorflow.keras.metrics import MeanSquaredError
model = build_u2net((H, W, 3))
mse_metric = MeanSquaredError(name='mse')
model.load_weights('/kaggle/input/weight/U2Net_AutoMattingData-0.6424-weights-09.h5')
model.compile(loss=custom_loss, optimizer=Adam(lr), metrics=[MAD, SAD, MSE])

# callbacks = [
#     ModelCheckpoint(filepath='U2Net_AutoMattingData-{val_loss:.4f}-weights-{epoch:02d}.h5', verbose=1, save_best_only=True),
#     ReduceLROnPlateau(monitor='val_loss', factor=0.2, patience=5, min_lr=1e-12, verbose=1),
#     CSVLogger(csv_path),
#     EarlyStopping(monitor='val_loss', patience=20, restore_best_weights=False),
# ]

# history = model.fit(
#     train_dataset,
#     epochs=num_epochs,
#     validation_data=valid_dataset,
#     callbacks=callbacks 
# )


In [10]:
model.evaluate(valid_dataset)



[0.6424411535263062,
 0.07014510780572891,
 0.07048436999320984,
 0.07400575280189514,
 0.08164706081151962,
 0.0921558290719986,
 0.10877583920955658,
 0.1452268660068512,
 0.009744225069880486,
 10217.560546875,
 0.006238456349819899,
 0.009692889638245106,
 10163.7314453125,
 0.006247739773243666,
 0.010094616562128067,
 10584.97265625,
 0.006421877536922693,
 0.01094686146825552,
 11478.6162109375,
 0.0069298529997467995,
 0.01249728910624981,
 13104.357421875,
 0.007880646735429764,
 0.014935364946722984,
 15660.865234375,
 0.009436163119971752,
 0.021145321428775787,
 22172.4765625,
 0.0132065424695611]

In [11]:
print(history.history.keys())

NameError: name 'history' is not defined

In [None]:
import matplotlib.pyplot as plt

plt.plot(history.history['mean_squared_error'])
plt.plot(history.history['val_mean_squared_error'])
plt.title('Model MSE')
plt.ylabel('MSE')
plt.xlabel('Epoch')
plt.legend(['Train', 'Validation'], loc='upper left')
plt.show()
