In [None]:
import tensorflow as tf
import pandas as pd
import numpy as np
from matplotlib import pyplot as plt
from tensorflow.keras.preprocessing.image import load_img, img_to_array
from tensorflow.keras.models import Model 
from tensorflow.keras.layers import Conv2D, Input, Conv2DTranspose, MaxPooling2D, concatenate, BatchNormalization, Activation, Add, Dropout, DepthwiseConv2D, Flatten, Dense
from skimage.transform import resize
from sklearn.model_selection import train_test_split
from tensorflow.keras.applications.mobilenet_v2 import MobileNetV2

In [None]:
from tensorflow import keras
import tensorflow.keras.backend as K
import gc
from tensorflow.keras import optimizers
from tqdm.notebook import tqdm_notebook

from tensorflow.keras.utils import to_categorical

In [None]:
print("tensorflow version: ", tf.__version__)

# LOAD DATA

In [None]:
# ------------------------------------------------------------------------------- #
#                              Important Variables                                #
# ------------------------------------------------------------------------------- #
data_dir = "../../DL_data/competition_data"
validation_data_dir = "../../DL_data/competition_data"
nrows = "all"                 # Set to 'all' to load the whole set
load_validation = False      # Only load the validation images and masks??
split_train_test = False    # Split data to train and test sets??
data_augmentation = False   # Augment the data??
# ------------------------------------------------------------------------------- #


#load the ids  and depths of 'nrows' from the training data set
if isinstance(nrows, int) and nrows>0:
    train_df = pd.read_csv(data_dir+"/train.csv", index_col="id", usecols=[0], nrows=nrows)
    depths_df = pd.read_csv(data_dir+"/depths.csv", index_col="id")
    train_df = train_df.join(depths_df)
    test_df = depths_df[~depths_df.index.isin(train_df.index)]
elif isinstance(nrows, str) and nrows.upper() == "ALL":
    train_df = pd.read_csv(data_dir+"/train.csv", index_col="id", usecols=[0])
    depths_df = pd.read_csv(data_dir+"/depths.csv", index_col="id")
    train_df = train_df.join(depths_df)
    test_df = depths_df[~depths_df.index.isin(train_df.index)]
else:
    raise ValueError("Invalid nrows value")
        

        
# Function that loads the ids of 'nrows' from the validation data set
def load_validation_data(data_dir, nrows):
    if isinstance(nrows, int) and nrows>0:
        valid_ids = pd.read_csv(data_dir+"/Validation_ids.csv", usecols=[1], nrows=nrows)
    elif isinstance(nrows, str) and nrows.upper() == "ALL":
        valid_ids = pd.read_csv(data_dir+"/Validation_ids.csv")
    else:
        raise ValueError("Invalid nrows value")
    return valid_ids.ids

In [None]:
if load_validation:
    ids_to_load = load_validation_data(data_dir, nrows)
    index_list = list(train_df.index)
    ids_list = [index_list.index(i) for i in ids_to_load]
    train_df = train_df.iloc[ids_list]
else:
    ids_to_load = train_df.index
    
print("Loading images...")
train_df["images"] = [np.array(load_img(data_dir+"/train/images/{}.png".format(idx),
                                        color_mode = "grayscale"))/255 for idx in tqdm_notebook(list(ids_to_load))]
print("Loading masks...")
train_df["masks"] = [np.array(load_img(data_dir+"/train/masks/{}.png".format(idx),
                                       color_mode = "grayscale"))/65535 for idx in tqdm_notebook(list(ids_to_load))] 
 #train_df.index

print("done loading images")

# **Prepossessing Data**

####  Resize to a pow of 2

In [None]:
# either pad with zeros or resize with interpolation
resize_to = 128
original_size = 101


def upsample(original_img):
    if resize_to == original_size:
        return original_img
    return resize(original_img, (resize_to, resize_to), mode='constant', preserve_range=True)


def pad_zeros(array):
    padded_image = np.zeros(shape=(resize_to, resize_to))
    padded_image[13:114, 13:114] = array
    return padded_image

In [None]:
resizing_function_to_use = pad_zeros

images_resized = np.array(train_df.images.map(resizing_function_to_use).tolist()).reshape((-1, resize_to, resize_to, 1))
masks_resized = np.array(train_df.masks.map(resizing_function_to_use).tolist()).reshape((-1, resize_to, resize_to, 1))

In [None]:
# Plotting
def plot_reshape_example():
    fig_reshape, (axs_reshape_mask, axs_reshape_img) = plt.subplots(1, 2)
    fig_reshape.suptitle("Reshaped data example")
    axs_reshape_img.set(title="Reshaped image")
    axs_reshape_mask.set(title="Reshaped mask")
    axs_reshape_img.imshow(images_resized[id_index], cmap='gray')
    axs_reshape_mask.imshow(masks_resized[id_index], cmap='gray')


#plot_reshape_example()

#### Calculating the salt coverage

In [None]:
train_df["coverage"] = train_df.masks.map(np.sum) / (train_df["masks"][0].shape[0]*train_df["masks"][0].shape[1])

# Generate salt coverage classes
def cov_to_class(val):    
    for i in range(0, 11):
        if val * 10 <= i :
            return i
        
train_df["coverage_class"] = train_df.coverage.map(cov_to_class)

Plotting the salt coverage classes

In [None]:
#TO_DO: Change that to use matplotlib
fig, axs = plt.subplots(1, 2, sharey=True, tight_layout=True)
n_bins = 20
axs[0].hist(train_df.coverage, bins=n_bins)
axs[1].hist(train_df.coverage, bins=10)

plt.suptitle("Salt coverage")
axs[0].set_xlabel("Coverage")
axs[1].set_xlabel("Coverage class")

#### Split train/dev

In [None]:
if split_train_test:
    (ids_train, ids_valid, x_train, x_valid, y_train, y_valid,
     cov_train, cov_test, depth_train, depth_test) = train_test_split(train_df.index.values, 
                                                                      images_resized, masks_resized, 
                                                                      train_df.coverage.values, 
                                                                      train_df.z.values, 
                                                                      test_size=0.5, 
                                                                      stratify=train_df.coverage_class,
                                                                      random_state=1337)
else:
    x_train = images_resized
    y_train = masks_resized
    x_valid = np.array([])  # Just to print the x_valid.shape([0]) in the end
    y_valid = np.array([]) 

print("Train/ Valid shape = %d/ %d"%(x_train.shape[0], x_valid.shape[0]))

####  Data augmentation

In [None]:
if data_augmentation:
    x_train = np.append(x_train, [np.fliplr(x) for x in x_train], axis=0)
    y_train = np.append(y_train, [np.fliplr(x) for x in y_train], axis=0)

In [None]:
x_train = np.repeat(x_train,3,axis=3)

In [None]:
print("x_rain shape: ", x_train.shape)
print("y_train shape: ", y_train.shape)

#####  Model definition

In [None]:
input_shape = (128, 128, 3)

backbone = MobileNetV2(input_shape=input_shape,weights='imagenet',include_top=False)

inputs = backbone.input

#backbone.get_layer('block_5_depthwise').get_weights()

# conv4 = backbone.output
backbone.summary()

In [None]:

coverage_label = np.array(train_df["coverage_class"])
coverage_label = to_categorical(coverage_label)

print(coverage_label.shape)

cov_bn = backbone.get_layer('block_13_expand_relu')
cov_bn = cov_bn.output
cov_flatten = Flatten()(cov_bn)
#cov_output = Dense(100, activation="softmax")(cov_flatten)
cov_output = Dense(11, activation="softmax")(cov_flatten)

cov_model = Model(inputs=[inputs], outputs=[cov_output])
#print("BUILD MODEL")
cov_adam_optimizer = optimizers.Adam(learning_rate=0.01)
cov_model.compile(optimizer=cov_adam_optimizer, loss='categorical_crossentropy', metrics=["accuracy"])
#print("COMPILE")
cov_model.summary()

#### Training

In [None]:
cov_epochs = 25
cov_batch_len = 16
cov_history = cov_model.fit(x_train, coverage_label, epochs=cov_epochs, shuffle=True, batch_size=cov_batch_len)

In [None]:
plot_history(hs={'Cov_Net': cov_history}, epochs=40, metric='loss')
plot_history(hs={'Cov_Net': cov_history}, epochs=40, metric='accuracy')

In [None]:
cov_model.save('cov_model_v2')

In [None]:
K.clear_session()
del cov_model
gc.collect()