# <center> **Airbus Kaggle Challenge**</center>

## Part I : Classification

#### Imports:

In [1]:
import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import numpy as np
from skimage.io import imread
from preprocess.pre_process import multi_rle_encode, rle_encode, rle_decode, masks_as_image, masks_as_color, balancing_train
from preprocess.pre_process import make_image_gen, create_aug_gen
from sklearn.model_selection import train_test_split
from keras import backend as K
from keras.utils import multi_gpu_model

import keras 
import keras.backend as K
from keras.optimizers import Adam
from keras.losses import binary_crossentropy
from models.resnet50_classif import get_resnet50_classif
from keras.callbacks import ModelCheckpoint, LearningRateScheduler, EarlyStopping, ReduceLROnPlateau, TensorBoard, Callback

Using TensorFlow backend.


#### Models Parameters:

In [20]:
BATCH_SIZE = 16
EDGE_CROP = 16
GAUSSIAN_NOISE = 0.1
UPSAMPLE_MODE = 'SIMPLE'
# downsampling inside the network
NET_SCALING = None
# number of validation images to use
VALID_IMG_COUNT = 900
# maximum number of steps_per_epoch in training
MAX_TRAIN_STEPS = 1000000
MAX_TRAIN_EPOCHS = 5

In [3]:
# Use of tensorflow:
import tensorflow as tf
with tf.Session() as sess:
    devices = sess.list_devices()
for device in devices:
    print(device)
    
if tf.test.gpu_device_name():
    print('Default GPU Device: {}'.format(tf.test.gpu_device_name()))
else:
    print("Please install GPU version of TF")

_DeviceAttributes(/job:localhost/replica:0/task:0/device:CPU:0, CPU, 268435456, 637212447713652715)
_DeviceAttributes(/job:localhost/replica:0/task:0/device:XLA_GPU:0, XLA_GPU, 17179869184, 5119052921696102644)
_DeviceAttributes(/job:localhost/replica:0/task:0/device:XLA_CPU:0, XLA_CPU, 17179869184, 1835051423032854898)
_DeviceAttributes(/job:localhost/replica:0/task:0/device:GPU:0, GPU, 15560753152, 11783431029140668752)
Default GPU Device: /device:GPU:0


#### Paths to folders:

In [4]:
ship_dir = '../../data/airbus_ship_detection/'
train_image_dir = os.path.join(ship_dir, 'train')# Images for training
test_image_dir = os.path.join(ship_dir, 'test')# Images for testing
model_weights_dir = "weights_models/"

label_dir = os.path.join(ship_dir, 'train_ship_segmentations_v2_clean.csv')# Images for testing
dataframe = pd.read_csv(label_dir, engine="python") # Markers for ships

In [5]:
data_link_balanced = balancing_train(dataframe, rate_of_has_ship=0.5, ship_dir_train=train_image_dir)
data_link_unbalanced = balancing_train(dataframe, rate_of_has_ship=0.0, ship_dir_train=train_image_dir)

print("data_link_balanced rate:{0}, lenght: {1}".format(data_link_balanced.has_ship.sum()/len(data_link_balanced)
                                                    ,len(data_link_balanced)))
print("data_link_unbalanced rate:{0}, lenght: {1}".format(round(data_link_unbalanced.has_ship.sum()/len(data_link_unbalanced),2)
                                                    ,len(data_link_unbalanced)))

data_link_balanced rate:0.5, lenght: 80432
data_link_unbalanced rate:0.21, lenght: 187098


In [6]:
training_set, validation_set = train_test_split(data_link_balanced, test_size=0.05)
training_set_unbalanced, validation_set_unbalanced = train_test_split(data_link_unbalanced, test_size=0.05)

print("length of training set", len(training_set))
print("length of validation set", len(validation_set))
print("length of unbalanced training set ", len(training_set_unbalanced))
print("length of unbalanced validation set", len(validation_set_unbalanced))

length of training set 76410
length of validation set 4022
length of unbalanced training set  177743
length of unbalanced validation set 9355


In [7]:
train_gen = make_image_gen(training_set, train_image_dir, BATCH_SIZE, (1,1))
train_x, train_y = next(train_gen)
print('x', train_x.shape, train_x.min(), train_x.max())
print('y', train_y.shape, train_x.min(), train_x.max())

x (16, 768, 768, 3) 0.0 1.0
y (16,) 0.0 1.0


### **Classification at low resolution:**

#### First pass of the classification with low resolution images dataset and Imagenet weights
- For the low resolution we use the parameter scaling = 3.
- We don't start from scratch and we use pre-trained weights from ImageNet

In [8]:
def get_list_files(dataframe, ship_dir_train):
    training_set, validation_set = train_test_split(dataframe, test_size=0.05)
    
    training_set_balanced = balancing_train(training_set, rate_of_has_ship=0.5, ship_dir_train=ship_dir_train)
    validation_set_balanced = balancing_train(validation_set, rate_of_has_ship=0.5, ship_dir_train=ship_dir_train)
    training_set_unbalanced = balancing_train(training_set, rate_of_has_ship=0.0, ship_dir_train=ship_dir_train)
    validation_set_unbalanced = balancing_train(validation_set, rate_of_has_ship=0.0, ship_dir_train=ship_dir_train)

    dict_dataset = {'training_set_balanced': training_set_balanced,
                   'validation_set_balanced': validation_set_balanced,
                   'training_set_unbalanced': training_set_unbalanced,
                   'validation_set_unbalanced': validation_set_unbalanced
                   }
    
    return dict_dataset

In [9]:
def callbacks_list(weight_path, scaling):
    checkpoint = ModelCheckpoint(weight_path, monitor='val_loss', verbose=0, save_best_only=True, mode='min', save_weights_only=False)

    reduceLROnPlat = ReduceLROnPlateau(monitor='val_loss', factor=0.1,
                                       patience=2, verbose=1, mode='min',
                                       min_delta=0.001, cooldown=1, min_lr=1e-7)

    early = EarlyStopping(monitor="val_loss", mode="min", verbose=2, patience=2)
    tensorboard =  TensorBoard(log_dir="../logs/log{0}".format(str(scaling)), update_freq='batch')
    return [checkpoint, reduceLROnPlat, early]

In [10]:
def get_image_generator(training_set, validation_set, scaling, train_image_dir, batch_size):
    step_count_train = min(MAX_TRAIN_STEPS, training_set.shape[0]//batch_size)
    training_gen = make_image_gen(training_set, train_image_dir, batch_size, (scaling, scaling))
    
    step_count_valid = validation_set.shape[0]//batch_size
    validation_gen = make_image_gen(validation_set, train_image_dir, batch_size, (scaling, scaling))
    
    return step_count_train, training_gen, step_count_valid, validation_gen

In [22]:
def save_model(model, nbr_gpu=1, scaling=1, print_details=False):
    # Sauvegarde des poids
    if nbr_gpu > 1:
        # save weights multi-gpu
        weights_path_clf_scal = model_weights_dir + "model_clf_scal{0}_{1}_weights.best.h5".format(str(scaling), "p_gpu")
        if print_details:
            print("Save weights to :", weights_path_clf_scal)
        model.save_weights(weights_path_clf_scal)  # save multi-gpu model weights
            
        # save weights single-gpu
        s_gpu_model = model.layers[-2]   #get single GPU model weights
        if print_details:
            print("Save weights to :", weights_path_clf_scal)
        weights_path_clf_scal = model_weights_dir + "model_clf_scal{0}_{1}_weights.best.h5".format(str(scaling), "s_gpu")
        s_gpu_model.save_weights(weights_path_clf_scal)  # save single-gpu model weights
    else:
        weights_path_clf_scal = model_weights_dir + "model_clf_scal{0}_{1}_weights.best.h5".format(str(scaling), "s_gpu")
        if print_details:
            print("Save weights to :", weights_path_clf_scal)
        model.save_weights(weights_path_clf_scal)  # save single-gpu model weights

In [28]:
def classification_training(dict_dataset, train_image_dir, weights=None, scaling=1, batch_size=BATCH_SIZE,
                            nbr_gpu=0, print_details=False, epochs=MAX_TRAIN_EPOCHS, nbr_cpu=1):
    train_gen = make_image_gen(dict_dataset['training_set_balanced'], train_image_dir, batch_size, (scaling, scaling))
    train_x, train_y = next(train_gen)
    
    if print_details:
        print('x', train_x.shape, train_x.min(), train_x.max())
        print('y', train_y.shape, train_x.min(), train_x.max())

    # Definition of the model with the input shape
    clf_model = get_resnet50_classif(input_shape=train_x.shape[1:])
    
    # Load weights
    if weights is not None:
        if print_details:
            print("Load weights...")
        clf_model.load_weights(weights)

    weight_path = model_weights_dir + "model_clf_checkpoint_scal{0}_weights.hdf5".format(str(scaling))
    callbacks = callbacks_list(weight_path, scaling)
   
    # Model appliqué sur un jeux équilibré
    if print_details:
        print("Get image generator for balance dataset...")
    step_count_train, training_gen, step_count_valid, validation_gen = get_image_generator(dict_dataset['training_set_balanced'],
                                                                                           dict_dataset['validation_set_balanced'],
                                                                                           scaling, 
                                                                                           train_image_dir,
                                                                                           batch_size)
    if print_details:
        print("step_count_train =", step_count_train)
        print("step_count_valid =", step_count_valid)
    
    if nbr_gpu > 1:
        model = multi_gpu_model(clf_model, gpus=nbr_gpu)
    else:
        model = clf_model
    
    if print_details:
        clf_model.summary()
        
    model.compile(optimizer=Adam(), loss=binary_crossentropy, 
                  metrics=["accuracy"])
    
    if print_details:
        print("Start model...")
    loss_history1 = [model.fit_generator(training_gen,
                                     steps_per_epoch=step_count_train,
                                     epochs=epochs,
                                     callbacks=callbacks,
                                     validation_data=validation_gen,
                                     validation_steps=step_count_valid,
                                     use_multiprocessing=nbr_cpu>1,
                                     workers=nbr_cpu)]
    
    # save weights
    save_model(model, nbr_gpu=nbr_gpu, scaling=scaling, print_details=print_details)
    
    print("Model Evaluation Balanced Data: ", model.evaluate_generator(validation_gen,
                                              step_count_valid,
                                              workers=1,
                                              verbose=1))
    
    
    
    # Model appliqué sur un jeux non équilibré
    if print_details:
        print("Get image generator for balance dataset...")
    step_count_train, training_gen, step_count_valid, validation_gen = get_image_generator(dict_dataset['training_set_unbalanced'],
                                                                                           dict_dataset['validation_set_unbalanced'],
                                                                                           scaling, 
                                                                                           train_image_dir,
                                                                                           batch_size)
    
    if print_details:
        print("step_count_train =", step_count_train)
        print("step_count_valid =", step_count_valid)
    
    if print_details:
        print("Start model...")   
    loss_history2 = [model.fit_generator(training_gen,
                                 steps_per_epoch=step_count_train,
                                 epochs=epochs//2,
                                 callbacks=callbacks,
                                 validation_data=validation_gen,
                                 validation_steps=step_count_valid,
                                 use_multiprocessing=nbr_cpu!=1,
                                 workers=nbr_cpu)]
    
    # save weights
    save_model(model, nbr_gpu=nbr_gpu, scaling=scaling, print_details=print_details)
    
    print("Model Evaluation Unbalanced Data: ", model.evaluate_generator(validation_gen,
                                          step_count_valid,
                                          workers=1,
                                          verbose=1))

    
            


In [13]:
dict_dataset = get_list_files(dataframe, train_image_dir)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  df['ships'] = df['EncodedPixels'].map(lambda c_row: 1 if isinstance(c_row, str) else 0)


In [13]:
classification_training(dict_dataset, train_image_dir, weights=None, 
                        scaling=3, batch_size=256, nbr_gpu=4, print_details=True, 
                        epochs=2, nbr_cpu=8)

x (256, 256, 256, 3) 0.0 1.0
y (256,) 0.0 1.0
Instructions for updating:
Colocations handled automatically by placer.




Get image generator for balance dataset...
step_count_train = 304
step_count_valid = 28
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
Input_Image (InputLayer)        (None, 256, 256, 3)  0                                            
__________________________________________________________________________________________________
lambda_1 (Lambda)               (None, 256, 256, 3)  0           Input_Image[0][0]                
__________________________________________________________________________________________________
lambda_2 (Lambda)               (None, 256, 256, 3)  0           Input_Image[0][0]                
__________________________________________________________________________________________________
lambda_3 (Lambda)               (None, 256, 256, 3)  0           Input_Image[0][0]                
_____________________



Epoch 2/2
Model Evaluation Balanced Data:  [0.5111086070537567, 0.8715122767857143]
Save weights to : weights_models/model_clf_scal3_p_gpu_weights.best.h5
Save weights to : weights_models/model_clf_scal3_s_gpu_weights.best.h5
Get image generator for balance dataset...
step_count_train = 697
step_count_valid = 42
Start model...
Epoch 1/1
Model Evaluation Unbalanced Data:  [0.3654882641775267, 0.8543526785714286]
Save weights to : weights_models/model_clf_scal3_p_gpu_weights.best.h5
Save weights to : weights_models/model_clf_scal3_s_gpu_weights.best.h5


TypeError: Mismatch between array dtype ('object') and format specifier ('%.18e')

In [14]:
weights_path3 = model_weights_dir + "model_clf_scal{0}_{1}_weights.best.h5".format(str(3), "s_gpu")
classification_training(dict_dataset, train_image_dir, weights=weights_path3, 
                        scaling=2, batch_size=64, nbr_gpu=2, print_details=True, 
                        epochs=MAX_TRAIN_EPOCHS, nbr_cpu=8)

x (64, 384, 384, 3) 0.0 1.0
y (64,) 0.0 1.0
Instructions for updating:
Colocations handled automatically by placer.




Get image generator for balance dataset...
step_count_train = 1216
step_count_valid = 111
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
Input_Image (InputLayer)        (None, 384, 384, 3)  0                                            
__________________________________________________________________________________________________
lambda_1 (Lambda)               (None, 384, 384, 3)  0           Input_Image[0][0]                
__________________________________________________________________________________________________
lambda_2 (Lambda)               (None, 384, 384, 3)  0           Input_Image[0][0]                
__________________________________________________________________________________________________
model_1 (Model)                 (None, 1)            23589761    lambda_1[0][0]                   
                   



Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Model Evaluation Balanced Data:  [0.3701113920893755, 0.8735923423423423]
Save weights to : weights_models/model_clf_scal2_p_gpu_weights.best.h5
Save weights to : weights_models/model_clf_scal2_s_gpu_weights.best.h5
Get image generator for balance dataset...
step_count_train = 2787
step_count_valid = 171
Start model...
Epoch 1/2
Epoch 2/2
Model Evaluation Unbalanced Data:  [0.5749671318900516, 0.8343384502923976]
Save weights to : weights_models/model_clf_scal2_p_gpu_weights.best.h5
Save weights to : weights_models/model_clf_scal2_s_gpu_weights.best.h5


In [21]:
weights_path2 = model_weights_dir + "model_clf_scal{0}_{1}_weights.best.h5".format(str(2), "s_gpu")
classification_training(dict_dataset, train_image_dir, weights=weights_path2, 
                        scaling=1, batch_size=8, nbr_gpu=1, print_details=True, 
                        epochs=2, nbr_cpu=1)

x (8, 768, 768, 3) 0.0 1.0
y (8,) 0.0 1.0
Load weights...
Get image generator for balance dataset...
step_count_train = 9730
step_count_valid = 909
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
Input_Image (InputLayer)     (None, 768, 768, 3)       0         
_________________________________________________________________
resnet50 (Model)             multiple                  23587712  
_________________________________________________________________
avg_pool (GlobalAveragePooli (None, 2048)              0         
_________________________________________________________________
fc1 (Dense)                  (None, 1)                 2049      
Total params: 23,589,761
Trainable params: 23,536,641
Non-trainable params: 53,120
_________________________________________________________________
Start model...
Epoch 1/2
Epoch 2/2


NameError: name 'scaling' is not defined

In [29]:
def temp_classification_training(dict_dataset, train_image_dir, weights=None, scaling=1, batch_size=BATCH_SIZE,
                            nbr_gpu=0, print_details=False, epochs=MAX_TRAIN_EPOCHS, nbr_cpu=1):
    train_gen = make_image_gen(dict_dataset['training_set_balanced'], train_image_dir, batch_size, (scaling, scaling))
    train_x, train_y = next(train_gen)
    
    if print_details:
        print('x', train_x.shape, train_x.min(), train_x.max())
        print('y', train_y.shape, train_x.min(), train_x.max())

    # Definition of the model with the input shape
    clf_model = get_resnet50_classif(input_shape=train_x.shape[1:])
    
    # Load weights
    if weights is not None:
        if print_details:
            print("Load weights...")
        clf_model.load_weights(weights)

    weight_path = model_weights_dir + "model_clf_checkpoint_scal{0}_weights.hdf5".format(str(scaling))
    callbacks = callbacks_list(weight_path, scaling)
   
    # Model appliqué sur un jeux équilibré
    if print_details:
        print("Get image generator for balance dataset...")
    step_count_train, training_gen, step_count_valid, validation_gen = get_image_generator(dict_dataset['training_set_balanced'],
                                                                                           dict_dataset['validation_set_balanced'],
                                                                                           scaling, 
                                                                                           train_image_dir,
                                                                                           batch_size)
    if print_details:
        print("step_count_train =", step_count_train)
        print("step_count_valid =", step_count_valid)
    
    if nbr_gpu > 1:
        model = multi_gpu_model(clf_model, gpus=nbr_gpu)
    else:
        model = clf_model
    
    if print_details:
        clf_model.summary()
        
    model.compile(optimizer=Adam(), loss=binary_crossentropy, 
                  metrics=["accuracy"])
    
    if print_details:
        print("Start model...")
#     loss_history1 = [model.fit_generator(training_gen,
#                                      steps_per_epoch=step_count_train,
#                                      epochs=epochs,
#                                      callbacks=callbacks,
#                                      validation_data=validation_gen,
#                                      validation_steps=step_count_valid,
#                                      use_multiprocessing=nbr_cpu>1,
#                                      workers=nbr_cpu)]
    
    
    # save weights
    save_model(model, nbr_gpu=nbr_gpu, scaling=scaling, print_details=print_details)
    
    print("Model Evaluation Balanced Data: ", model.evaluate_generator(validation_gen,
                                              step_count_valid,
                                              workers=1,
                                              verbose=1))
    
    
    
    # Model appliqué sur un jeux non équilibré
    if print_details:
        print("Get image generator for balance dataset...")
    step_count_train, training_gen, step_count_valid, validation_gen = get_image_generator(dict_dataset['training_set_unbalanced'],
                                                                                           dict_dataset['validation_set_unbalanced'],
                                                                                           scaling, 
                                                                                           train_image_dir,
                                                                                           batch_size)
    
    if print_details:
        print("step_count_train =", step_count_train)
        print("step_count_valid =", step_count_valid)
    
    if print_details:
        print("Start model...")   
    loss_history2 = [model.fit_generator(training_gen,
                                 steps_per_epoch=step_count_train,
                                 epochs=epochs//2,
                                 callbacks=callbacks,
                                 validation_data=validation_gen,
                                 validation_steps=step_count_valid,
                                 use_multiprocessing=nbr_cpu!=1,
                                 workers=nbr_cpu)]
    
    # save weights
    save_model(model, nbr_gpu=nbr_gpu, scaling=scaling, print_details=print_details)
    
    print("Model Evaluation Unbalanced Data: ", model.evaluate_generator(validation_gen,
                                          step_count_valid,
                                          workers=1,
                                          verbose=1))


In [None]:
weights_path2 = model_weights_dir + "model_clf_checkpoint_scal{0}_weights.hdf5".format(str(1))
temp_classification_training(dict_dataset, train_image_dir, weights=weights_path2, 
                        scaling=1, batch_size=8, nbr_gpu=1, print_details=True, 
                        epochs=2, nbr_cpu=1)

x (8, 768, 768, 3) 0.0 1.0
y (8,) 0.0 1.0
Load weights...
Get image generator for balance dataset...
step_count_train = 9730
step_count_valid = 909
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
Input_Image (InputLayer)     (None, 768, 768, 3)       0         
_________________________________________________________________
resnet50 (Model)             multiple                  23587712  
_________________________________________________________________
avg_pool (GlobalAveragePooli (None, 2048)              0         
_________________________________________________________________
fc1 (Dense)                  (None, 1)                 2049      
Total params: 23,589,761
Trainable params: 23,536,641
Non-trainable params: 53,120
_________________________________________________________________
Start model...
Save weights to : weights_models/model_clf_scal1_s_gpu_weights.best.h5