# Train notebook - Airbus kaggle challenge

# Ship Detection- Project AIRBUS 2019
## Model Parameters
We might want to adjust these later (or do some hyperparameter optimizations)

In [1]:
BATCH_SIZE = 64
EDGE_CROP = 16
GAUSSIAN_NOISE = 0.1
UPSAMPLE_MODE = 'SIMPLE'
# downsampling inside the network
NET_SCALING = None
# downsampling in preprocessing
IMG_SCALING_2 = (2,2)
IMG_SCALING_3 = (3,3)
# number of validation images to use
VALID_IMG_COUNT = 900
# maximum number of steps_per_epoch in training
MAX_TRAIN_STEPS = 10000
MAX_TRAIN_EPOCHS = 5

In [9]:
import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import numpy as np
from skimage.io import imread
from preprocess.pre_process import multi_rle_encode, rle_encode, rle_decode, masks_as_image, masks_as_color, balancing_train
from preprocess.pre_process import make_image_gen, create_aug_gen
from sklearn.model_selection import train_test_split
from keras import backend as K
from keras.utils import multi_gpu_model


import keras.backend as K
from keras.optimizers import Adam
from keras.losses import binary_crossentropy
from models.resnet50_classif import get_resnet50_classif
from keras.callbacks import ModelCheckpoint, LearningRateScheduler, EarlyStopping, ReduceLROnPlateau, TensorBoard, Callback


In [3]:
import tensorflow as tf
with tf.Session() as sess:
    devices = sess.list_devices()
for device in devices:
    print(device)
    
if tf.test.gpu_device_name():
    print('Default GPU Device: {}'.format(tf.test.gpu_device_name()))
else:
    print("Please install GPU version of TF")

_DeviceAttributes(/job:localhost/replica:0/task:0/device:CPU:0, CPU, 268435456, 5455472318980934254)
_DeviceAttributes(/job:localhost/replica:0/task:0/device:XLA_GPU:0, XLA_GPU, 17179869184, 670781362221969246)
_DeviceAttributes(/job:localhost/replica:0/task:0/device:XLA_GPU:1, XLA_GPU, 17179869184, 4123523586325071699)
_DeviceAttributes(/job:localhost/replica:0/task:0/device:XLA_CPU:0, XLA_CPU, 17179869184, 9452674138670884858)
_DeviceAttributes(/job:localhost/replica:0/task:0/device:GPU:0, GPU, 15560753152, 13889844610170148661)
_DeviceAttributes(/job:localhost/replica:0/task:0/device:GPU:1, GPU, 15560753152, 11514604324811355203)
Default GPU Device: /device:GPU:0


In [4]:
ship_dir = '../../data/airbus_ship_detection/'
train_image_dir = os.path.join(ship_dir, 'train')# Images for training
test_image_dir = os.path.join(ship_dir, 'test')# Images for testing
label_dir = os.path.join(ship_dir, 'train_ship_segmentations_v2.csv')# Images for testing
masks = pd.read_csv(label_dir, engine="python") # Markers for ships

In [5]:
data_link_balanced = balancing_train(masks, rate_of_has_ship=0.5, ship_dir_train=train_image_dir)
data_link_unbalanced = balancing_train(masks, rate_of_has_ship=0.0, ship_dir_train=train_image_dir)

In [6]:
print("data_link_balanced rate:{0}, lenght: {1}".format(data_link_balanced.has_ship.sum()/len(data_link_balanced)
                                                    ,len(data_link_balanced)))
print("data_link_unbalanced rate:{0}, lenght: {1}".format(round(data_link_unbalanced.has_ship.sum()/len(data_link_unbalanced),2)
                                                    ,len(data_link_unbalanced)))

data_link_balanced rate:0.5, lenght: 80432
data_link_unbalanced rate:0.21, lenght: 187099


In [7]:
training_set, validation_set = train_test_split(data_link_balanced, test_size=0.05)
print("length of training set", len(training_set))
print("length of validation set", len(validation_set))

training_set_unbalanced, validation_set_unbalanced = train_test_split(data_link_unbalanced, test_size=0.05)
print("length of unbalanced training set ", len(training_set_unbalanced))
print("length of unbalanced validation set", len(validation_set_unbalanced))

length of training set 76410
length of validation set 4022
length of unbalanced training set  177744
length of unbalanced validation set 9355


In [8]:
train_gen = make_image_gen(training_set, train_image_dir, BATCH_SIZE, IMG_SCALING_3)
train_x, train_y = next(train_gen)
print('x', train_x.shape, train_x.min(), train_x.max())
print('y', train_y.shape, train_x.min(), train_x.max())

x (64, 256, 256, 3) 0.0 1.0
y (64,) 0.0 1.0


### First step to train

In [None]:
model = get_resnet50_classif(input_shape=train_x.shape[1:])
model.summary()

In [12]:
from keras.callbacks import ModelCheckpoint, LearningRateScheduler, EarlyStopping, ReduceLROnPlateau, TensorBoard, Callback
weight_path = "weights_models/{0}_weights.hdf5".format('model_1_scal' +str(IMG_SCALING_3[0]))

checkpoint = ModelCheckpoint(weight_path, monitor='val_loss', verbose=0, save_best_only=True, mode='min', save_weights_only=True)

reduceLROnPlat = ReduceLROnPlateau(monitor='val_loss', factor=0.1,
                                   patience=2, verbose=1, mode='min',
                                   min_delta=0.001, cooldown=1, min_lr=1e-7)

early = EarlyStopping(monitor="val_loss", mode="min", verbose=2,
                      patience=2) # probably needs to be more patient, but kaggle time is limited

tensorboard =  TensorBoard(log_dir="../logs/log1", update_freq='batch')

callbacks_list = [checkpoint, reduceLROnPlat, early]

In [30]:
step_count_train = min(MAX_TRAIN_STEPS, training_set.shape[0]//BATCH_SIZE)
training_gen = make_image_gen(training_set, train_image_dir, BATCH_SIZE, IMG_SCALING_3)
#training_aug_gen =  create_aug_gen(training_gen, image_gen, label_gen)

step_count_valid = validation_set.shape[0]//BATCH_SIZE
validation_gen = make_image_gen(validation_set, train_image_dir, BATCH_SIZE, IMG_SCALING_3)

print("step_count_train =", step_count_train)
print("step_count_valid =", step_count_valid)

step_count_train = 1193
step_count_valid = 62


In [None]:
parallel_model = multi_gpu_model(model, gpus=2)
parallel_model.compile(optimizer=Adam(), loss=binary_crossentropy, 
              metrics=["accuracy"])
#run_opts = tf.RunOptions(report_tensor_allocations_upon_oom = True)
loss_history = [parallel_model.fit_generator(training_gen,
                                 steps_per_epoch=step_count_train,
                                 epochs=MAX_TRAIN_EPOCHS,
                                 callbacks=callbacks_list,
                                 validation_data=validation_gen,
                                 validation_steps=step_count_valid)]

train on the whole data set

In [31]:
step_count_train_unbalanced = min(MAX_TRAIN_STEPS, training_set_unbalanced.shape[0]//BATCH_SIZE)
training_gen_unbalanced = make_image_gen(training_set_unbalanced, train_image_dir, BATCH_SIZE, IMG_SCALING_3)
#training_aug_gen =  create_aug_gen(training_gen, image_gen, label_gen)

step_count_valid_unbalanced = validation_set_unbalanced.shape[0]//BATCH_SIZE
validation_gen_unbalanced = make_image_gen(validation_set_unbalanced, train_image_dir, BATCH_SIZE, IMG_SCALING_3)

print("step_count_train_unbalanced =", step_count_train_unbalanced)
print("step_count_valid_unbalanced =", step_count_valid_unbalanced)

step_count_train_unbalanced = 2777
step_count_valid_unbalanced = 146


In [None]:
loss_history = [parallel_model.fit_generator(training_gen_unbalanced,
                                 steps_per_epoch=step_count_train_unbalanced,
                                 epochs=1,
                                 callbacks=callbacks_list,
                                 validation_data=validation_gen_unbalanced,
                                 validation_steps=step_count_valid_unbalanced)]

In [None]:
model.save("weights_models/{}_weights.best.hdf5".format('clf_Scal3'))

In [32]:
model.load_weights("weights_models/{}_weights.best.hdf5".format('clf_Scal3'))
print("model evaluation unblanced data: ", parallel_model.evaluate_generator(validation_gen_unbalanced,
                                              step_count_valid_unbalanced,
                                              workers=-1,
                                              verbose=1))
print("model evaluation balanced data: ", parallel_model.evaluate_generator(validation_gen,
                                              step_count_valid,
                                              workers=-1,
                                              verbose=1))

model evaluation unblanced data:  [0.09203402280858526, 0.9665025684931506]
model evaluation balanced data:  [0.12902688703710033, 0.9533770161290323]


In [13]:
# Make weights available for transfert learning
from keras.models import load_model
model_clf_scal3 = load_model("weights_models/{}_weights.best.hdf5".format('clf_Scal3'))
model_clf_scal3_tf_learn = model_clf_scal3.get_layer('resnet50')
model_clf_scal3_tf_learn.summary()

Instructions for updating:
Colocations handled automatically by placer.


KeyboardInterrupt: 

In [17]:
weights_path_clf_scal3 = "weights_models/{}_weights.best.hdf5".format('clf_Scal3_tf_learn')

In [74]:
model_clf_scal3_tf_learn.save(weights_path_clf_scal3)

### Second step to train

In [14]:
# Create callbacks
weight_path = "weights_models/{0}_weights.hdf5".format('model_1_scal2')

checkpoint = ModelCheckpoint(weight_path, monitor='val_loss', verbose=0, save_best_only=True, mode='min', save_weights_only=True)

reduceLROnPlat = ReduceLROnPlateau(monitor='val_loss', factor=0.1,
                                   patience=2, verbose=1, mode='min',
                                   min_delta=0.001, cooldown=1, min_lr=1e-7)

early = EarlyStopping(monitor="val_loss", mode="min", verbose=2, patience=2) 

tensorboard =  TensorBoard(log_dir="../logs/log1", update_freq='batch')

callbacks_list = [checkpoint, reduceLROnPlat, early]

In [15]:
#Get shape of train
train_gen = make_image_gen(training_set, train_image_dir, BATCH_SIZE, IMG_SCALING_2)
shape_train_data = next(train_gen)[0].shape[1:]

In [18]:
model = get_resnet50_classif(input_shape=shape_train_data, weights=weights_path_clf_scal3)
model.summary()

Instructions for updating:
Colocations handled automatically by placer.




_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_2 (InputLayer)         (None, 384, 384, 3)       0         
_________________________________________________________________
resnet50 (Model)             multiple                  23587712  
_________________________________________________________________
flatten_1 (Flatten)          (None, 294912)            0         
_________________________________________________________________
dense_1 (Dense)              (None, 128)               37748864  
_________________________________________________________________
dense_2 (Dense)              (None, 1)                 129       
Total params: 61,336,705
Trainable params: 61,283,585
Non-trainable params: 53,120
_________________________________________________________________


In [22]:
# Make sure that top layer doesn't have the same name has the scale 3 classifier
parallel_model = multi_gpu_model(model, gpus=2)
parallel_model.compile(optimizer=Adam(), loss=binary_crossentropy, 
              metrics=["accuracy"])

In [23]:
step_count_train = min(MAX_TRAIN_STEPS, training_set.shape[0]//BATCH_SIZE)
training_gen = make_image_gen(training_set, train_image_dir, BATCH_SIZE, IMG_SCALING_2)
#training_aug_gen =  create_aug_gen(training_gen, image_gen, label_gen)

step_count_valid = validation_set.shape[0]//BATCH_SIZE
validation_gen = make_image_gen(validation_set, train_image_dir, BATCH_SIZE, IMG_SCALING_2)

print("step_count_train =", step_count_train)
print("step_count_valid =", step_count_valid)

step_count_train = 1193
step_count_valid = 62


In [None]:
loss_history1 = [parallel_model.fit_generator(training_gen,
                                 steps_per_epoch=step_count_train,
                                 epochs=MAX_TRAIN_EPOCHS,
                                 callbacks=callbacks_list,
                                 validation_data=validation_gen,
                                 validation_steps=step_count_valid,
                                 workers=-1)]

Instructions for updating:
Use tf.cast instead.
Epoch 1/5
Epoch 2/5
Epoch 3/5
 222/1193 [====>.........................] - ETA: 33:29 - loss: 0.1868 - acc: 0.9367

In [None]:
np_loss_history = np.array(loss_history1)
np.savetxt("loss_history1_clf_scal2.txt", np_loss_history, delimiter=",")

In [29]:
step_count_train_unbalanced = min(MAX_TRAIN_STEPS, training_set_unbalanced.shape[0]//BATCH_SIZE)
training_gen_unbalanced = make_image_gen(training_set_unbalanced, train_image_dir, BATCH_SIZE, IMG_SCALING_2)
#training_aug_gen =  create_aug_gen(training_gen, image_gen, label_gen)

step_count_valid_unbalanced = validation_set_unbalanced.shape[0]//BATCH_SIZE
validation_gen_unbalanced = make_image_gen(validation_set_unbalanced, train_image_dir, BATCH_SIZE, IMG_SCALING_2)

print("step_count_train_unbalanced =", step_count_train_unbalanced)
print("step_count_valid_unbalanced =", step_count_valid_unbalanced)

step_count_train_unbalanced = 2777
step_count_valid_unbalanced = 146


In [30]:
loss_history2 = [parallel_model.fit_generator(training_gen_unbalanced,
                                 steps_per_epoch=step_count_train_unbalanced,
                                 epochs=1,
                                 callbacks=callbacks_list,
                                 validation_data=validation_gen_unbalanced,
                                 validation_steps=step_count_valid_unbalanced,
                                 workers=-1)]

Epoch 1/1

ValueError: Could not load "" 
Reason: "image file is truncated (55 bytes not processed)"
Please see documentation at: http://pillow.readthedocs.io/en/latest/installation.html#external-libraries

In [31]:
np_loss_history = np.array(loss_history2)
np.savetxt("loss_history2_clf_scal2.txt", np_loss_history, delimiter=",")

NameError: name 'loss_history2' is not defined

In [32]:
model.save("weights_models/{}_weights.best.hdf5".format('clf_Scal2'))

In [33]:
model.load_weights("weights_models/{}_weights.best.hdf5".format('clf_Scal2'))
print("model evaluation unblanced data: ", parallel_model.evaluate_generator(validation_gen_unbalanced,
                                              step_count_valid_unbalanced,
                                              workers=-1,
                                              verbose=1))
print("model evaluation balanced data: ", parallel_model.evaluate_generator(validation_gen,
                                              step_count_valid,
                                              workers=-1,
                                              verbose=1))

model evaluation unblanced data:  [0.08682599704559535, 0.967679794520548]
model evaluation balanced data:  [0.1644323167781676, 0.9359879032258065]
