In [None]:
import gc
import glob
import os
import matplotlib.pyplot as plt
import time

import numpy as np
import pandas as pd

from salt_parser import SaltParser

from tqdm import tqdm

from collections import Counter
# del backbone.losses
from backbone.losses import my_iou_metric, my_iou_metric_2, lovasz_loss, focal_loss

from net import UResNet34
from keras.callbacks import ModelCheckpoint, EarlyStopping, ReduceLROnPlateau
from keras.optimizers import Adam, SGD
from keras.models import load_model
from keras import Model

% matplotlib inline

# plt.rcParams['figure.figsize'] = (12, 9)
# plt.style.use('ggplot')
!nvidia-smi

In [None]:
image_size = (128, 128)
pretrain_used = True  # ResNet 34
normalize = True

# Input dictionary for SaltParser
salt_parameters = {
    'salt_parameters_padding': {
        'data_src': './input/',
        'image_size': image_size,
        'pad_images': True,
        'grayscale': False,
    },

    'salt_parameters_rescale': {
        'data_src': './input/',
        'image_size': image_size,
        'pad_images': False,
        'grayscale': False,
    }
}

salt_parsers_padding = SaltParser(**salt_parameters['salt_parameters_padding'])
salt_parsers_rescale = SaltParser(**salt_parameters['salt_parameters_rescale'])

parsers = {
    'padding': salt_parsers_padding,
#     'rescale': salt_parsers_rescale
}

for key in parsers:
    parser = parsers[key]
    print(key, parser)
    parser.initialize_data()
    X_train, y_train, X_test = parser.load_data()
    train_df = parser.compute_coverage()
    padding_pixels = parser.return_padding_borders()

    if normalize:
        X_train = X_train / 255.
        y_train = y_train / 255.
        X_test = X_test / 255.
        print('X_train - min: {}, max: {}'.format(np.min(X_train), np.max(X_train)))
        print('y_train - min: {}, max: {}'.format(np.min(y_train), np.max(y_train)))
        print('Train set: {}, {}'.format(X_train.shape, y_train.shape))
        print('X_test - min: {}, max: {}'.format(np.min(X_test), np.max(X_test)))
        print('Test set: {}'.format(X_test.shape))

    X_train = X_train.astype(np.float32)
    y_train = y_train.astype(np.float32)
    X_test = X_test.astype(np.float32)

    parser.train_df['X_train'] = list(X_train)
    parser.train_df['y_train'] = list(y_train)
    parser.X_test = X_test


In [None]:
convert_1d = lambda x: np.array(x.tolist()).reshape(-1, *image_size, 1)
convert_3d = lambda x: np.array(x.tolist()).reshape(-1, *image_size, 3)


def get_train_val(n_fold_data, fold_idx):
    train = pd.concat([_ for idx, _ in enumerate(n_fold_data) if idx != fold_idx])
    valid = n_fold_data[fold_idx]
    print(Counter(train.coverage_class))
    print(Counter(valid.coverage_class))
    x_train = convert_3d(train['X_train'])
    x_valid = convert_3d(valid['X_train'])
    y_train = convert_1d(train['y_train'])
    y_valid = convert_1d(valid['y_train'])
    return x_train, x_valid, y_train, y_valid


In [None]:
for key in parsers:
    parser = parsers[key]
    train_df = parser.train_df

    n_fold = 4
    train_df.sort_values('coverage_class', inplace=True)
    train_df['fold'] = (list(range(n_fold)) * train_df.shape[0])[:train_df.shape[0]]
    subsets = [train_df[train_df['fold'] == i] for i in range(n_fold)]
    for idx in range(n_fold):
        print('\n\nNow training fold {} of {}'.format(idx + 1, key))
        x_train, x_valid, y_train, y_valid = get_train_val(subsets, idx)
        x_train = np.append(x_train, [np.fliplr(x) for x in x_train], axis=0)
        y_train = np.append(y_train, [np.fliplr(x) for x in y_train], axis=0)

        adam = Adam(lr=0.001)
        model = UResNet34(input_shape=(128, 128, 3))
        model.compile(loss='binary_crossentropy', optimizer=adam, metrics=[my_iou_metric])

        # from snapshot import SnapshotCallbackBuilder

        # n_snapshots = 5 # number of snapshots
        epochs = 30  # number of epochs
        # alpha_zero = 0.05 # initial learning rate

        # snapshot = SnapshotCallbackBuilder(epochs, n_snapshots, alpha_zero)
        # snapshot_ens = snapshot.get_callbacks(model_prefix='ResNet34UNet_')
        # early_stopping = EarlyStopping(monitor='my_iou_metric',patience=25,mode='max', verbose=1)
        model_checkpoint = ModelCheckpoint("./ResUNet34-Fold{}-{}.model".format(idx + 1, key.capitalize()),
                                           monitor='my_iou_metric', mode='max', save_best_only=True, verbose=1)
#         reduce_lr = ReduceLROnPlateau(factor=0.25, monitor='my_iou_metric', mode='max', patience=8, verbose=1)

        batch_size = 32

        history = model.fit(x_train, y_train,
                            validation_data=[x_valid, y_valid],
                            epochs=epochs,
                            batch_size=batch_size,
                            callbacks=[model_checkpoint], shuffle=True,
                            verbose=2)  # reduce_lr, model_checkpoint


In [None]:
# Stage 2: Lovasz Loss

for key in parsers:
    parser = parsers[key]
    train_df = parser.train_df

    n_fold = 4
    if 'fold' not in train_df.columns:
        train_df.sort_values('coverage_class', inplace=True)
        train_df['fold'] = (list(range(n_fold)) * train_df.shape[0])[:train_df.shape[0]]
    subsets = [train_df[train_df['fold'] == i] for i in range(n_fold)]
    for idx in range(n_fold):
        print('\n\nNow training fold {} of {}'.format(idx + 1, key))
        x_train, x_valid, y_train, y_valid = get_train_val(subsets, idx)
        x_train = np.append(x_train, [np.fliplr(x) for x in x_train], axis=0)
        y_train = np.append(y_train, [np.fliplr(x) for x in y_train], axis=0)

        # Load stage 1 model
        model_stage_1 = load_model('./ResUNet34-Fold{}-{}.model'.format(idx + 1, key.capitalize()),
                                   custom_objects={'my_iou_metric': my_iou_metric})
        print('Loaded stage 1 model for', 'ResUNet34-Fold{}-{}.model'.format(idx + 1, key.capitalize()))
        # remove layter activation layer and use losvasz loss
        input_x = model_stage_1.layers[0].input

        output_layer = model_stage_1.layers[-1].input
        model = Model(input_x, output_layer)
        c = SGD(lr=0.005, decay=0.0001, momentum=0.9)

        # lovasz_loss need input range (-∞，+∞), so cancel the last "sigmoid" activation  
        # Then the default threshod for pixel prediction is 0 instead of 0.5, as in my_iou_metric_2.
        model.compile(loss=lovasz_loss, optimizer=c, metrics=[my_iou_metric_2])

        # from snapshot import SnapshotCallbackBuilder

        # n_snapshots = 5 # number of snapshots
        epochs = 96  # number of epochs
        # alpha_zero = 0.05 # initial learning rate
        early_stopping = EarlyStopping(monitor='val_my_iou_metric_2', mode='max', patience=12, verbose=1)
        # snapshot = SnapshotCallbackBuilder(epochs, n_snapshots, alpha_zero)
        # snapshot_ens = snapshot.get_callbacks(model_prefix='ResNet34UNet_')
        # early_stopping = EarlyStopping(monitor='my_iou_metric',patience=25,mode='max', verbose=1)
        model_checkpoint = ModelCheckpoint("./ResUNet34-Lovasz-Fold{}-{}.model".format(idx + 1, key.capitalize()),
                                           monitor='val_my_iou_metric_2', mode='max', save_best_only=True, verbose=1)
        reduce_lr = ReduceLROnPlateau(factor=0.5, monitor='val_my_iou_metric_2', mode='max', patience=6, verbose=1,
                                      min_lr=0.0001)

        batch_size = 32

        history = model.fit(x_train, y_train,
                            validation_data=[x_valid, y_valid],
                            epochs=epochs,
                            batch_size=batch_size,
                            callbacks=[reduce_lr, model_checkpoint], shuffle=True,
                            verbose=1)  # reduce_lr, model_checkpoint

    break


In [None]:
#Score the model and do a threshold optimization by the best IoU.
# src: https://www.kaggle.com/aglotero/another-iou-metric
def iou_metric(y_true_in, y_pred_in, print_table=False):
    labels = y_true_in
    y_pred = y_pred_in


    true_objects = 2
    pred_objects = 2

    #  if all zeros, original code  generate wrong  bins [-0.5 0 0.5],
    temp1 = np.histogram2d(labels.flatten(), y_pred.flatten(), bins=([0,0.5,1], [0,0.5, 1]))
#     temp1 = np.histogram2d(labels.flatten(), y_pred.flatten(), bins=(true_objects, pred_objects))
    #print(temp1)
    intersection = temp1[0]
    #print("temp2 = ",temp1[1])
    #print(intersection.shape)
   # print(intersection)
    # Compute areas (needed for finding the union between all objects)
    #print(np.histogram(labels, bins = true_objects))
    area_true = np.histogram(labels,bins=[0,0.5,1])[0]
    #print("area_true = ",area_true)
    area_pred = np.histogram(y_pred, bins=[0,0.5,1])[0]
    area_true = np.expand_dims(area_true, -1)
    area_pred = np.expand_dims(area_pred, 0)

    # Compute union
    union = area_true + area_pred - intersection
  
    # Exclude background from the analysis
    intersection = intersection[1:,1:]
    intersection[intersection == 0] = 1e-9
    
    union = union[1:,1:]
    union[union == 0] = 1e-9

    # Compute the intersection over union
    iou = intersection / union

    # Precision helper function
    def precision_at(threshold, iou):
        matches = iou > threshold
        true_positives = np.sum(matches, axis=1) == 1   # Correct objects
        false_positives = np.sum(matches, axis=0) == 0  # Missed objects
        false_negatives = np.sum(matches, axis=1) == 0  # Extra objects
        tp, fp, fn = np.sum(true_positives), np.sum(false_positives), np.sum(false_negatives)
        return tp, fp, fn

    # Loop over IoU thresholds
    prec = []
    if print_table:
        print("Thresh\tTP\tFP\tFN\tPrec.")
    for t in np.arange(0.5, 1.0, 0.05):
        tp, fp, fn = precision_at(t, iou)
        if (tp + fp + fn) > 0:
            p = tp / (tp + fp + fn)
        else:
            p = 0
        if print_table:
            print("{:1.3f}\t{}\t{}\t{}\t{:1.3f}".format(t, tp, fp, fn, p))
        prec.append(p)
    
    if print_table:
        print("AP\t-\t-\t-\t{:1.3f}".format(np.mean(prec)))
    return np.mean(prec)

def iou_metric_batch(y_true_in, y_pred_in):
    batch_size = y_true_in.shape[0]
    metric = []
    for batch in range(batch_size):
        value = iou_metric(y_true_in[batch], y_pred_in[batch])
        metric.append(value)
    return np.mean(metric)

def predict_result(model,x_test,img_size_target): # predict both orginal and reflect x
    x_test_reflect =  np.array([np.fliplr(x) for x in x_test])
    preds_test = model.predict(x_test).reshape(-1, img_size_target, img_size_target)
    preds_test2_refect = model.predict(x_test_reflect).reshape(-1, img_size_target, img_size_target)
    preds_test += np.array([ np.fliplr(x) for x in preds_test2_refect] )
    return preds_test / 2.

In [None]:
model = load_model("./ResUNet34-Lovasz-Fold1-Padding.model",custom_objects={'my_iou_metric_2': my_iou_metric_2,
                                                   'lovasz_loss': lovasz_loss})

preds_valid = predict_result(model,x_valid,128)

## Scoring for last model, choose threshold by validation data 
thresholds_ori = np.linspace(0.3, 0.7, 31)
# Reverse sigmoid function: Use code below because the  sigmoid activation was removed
thresholds = np.log(thresholds_ori/(1-thresholds_ori)) 

In [None]:
# ious = np.array([get_iou_vector(y_valid, preds_valid > threshold) for threshold in tqdm_notebook(thresholds)])
# print(ious)
ious = np.array([iou_metric_batch(y_valid, preds_valid > threshold) for threshold in tqdm(thresholds)])
print(ious)

# instead of using default 0 as threshold, use validation data to find the best threshold.
threshold_best_index = np.argmax(ious) 
iou_best = ious[threshold_best_index]
threshold_best = thresholds[threshold_best_index]
threshold_best

In [None]:
y_pred_test = model.predict(parsers['padding'].X_test)

y_pred_test_rle = parsers['padding'].predictions_rle_encode(
    y_pred_test, confidence_threshold_best=threshold_best)

In [None]:
submission = parsers['padding'].generate_submission(y_pred_test_rle)

# Save submission with specified run_name.
# if save:
submission.to_csv('N123ew-submission_{}.csv'.format(1))
    
submission