# CNN_3channel_Inception

Abstract:
- Conv的kernel_size有(1, 1)、(2, 2)和(3, 3)
- single channel: band_avg
- CNN, small net

Result:
- Kaggle score: 

References:
- https://www.kaggle.com/ivalmian/simple-svd-xgboost-baseline-lb-35
- https://www.kaggle.com/arieltci/a-keras-prototype-0-21174-on-pl

## 1. Preprocess

### Import pkgs

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import log_loss, accuracy_score
from IPython.display import display

%matplotlib inline

In [2]:
import os
import time
import zipfile
import lzma
import pickle
from PIL import Image
from shutil import copy2
import gc

import cv2

### Run name

In [3]:
project_name = 'SC_Iceberg_Classifier'
step_name = 'CNN_3channel_Inception'
date_str = time.strftime("%Y%m%d", time.localtime())
time_str = time.strftime("%Y%m%d_%H%M%S", time.localtime())
run_name = project_name + '_' + step_name + '_' + time_str
print('run_name: ' + run_name)
t0 = time.time()

run_name: SC_Iceberg_Classifier_CNN_3channel_Inception_20180107_125815


### Basic folders

In [4]:
cwd = os.getcwd()
input_path = os.path.join(cwd, 'input')
log_path = os.path.join(cwd, 'log')
model_path = os.path.join(cwd, 'model')
output_path = os.path.join(cwd, 'output')
print('input_path: ' + input_path)
print('log_path: ' + log_path)
print('model_path: ' + model_path)
print('output_path: ' + output_path)

input_path: /data1/Kaggle/statoil-iceberg-classifier-challenge/input
log_path: /data1/Kaggle/statoil-iceberg-classifier-challenge/log
model_path: /data1/Kaggle/statoil-iceberg-classifier-challenge/model
output_path: /data1/Kaggle/statoil-iceberg-classifier-challenge/output


### Basic functions

In [5]:
import sys
def describe(arr):
    print(arr.shape, arr.min(), arr.max(), sys.getsizeof(arr))

### Load data

In [6]:
sample_submission_path = os.path.join(input_path, 'sample_submission.csv')
sample_submission = pd.read_csv(sample_submission_path)
print(sample_submission.shape)
sample_submission.head(2)

(8424, 2)


Unnamed: 0,id,is_iceberg
0,5941774d,0.5
1,4023181e,0.5


In [7]:
is_iceberg_path = os.path.join(input_path, 'is_iceberg.p')
y_data = pickle.load(open(is_iceberg_path, mode='rb'))
describe(y_data)

(1604,) 0 1 25688


In [8]:
%%time
#Load orignal data
inc_angle_data_path = os.path.join(input_path, 'inc_angle_data.p')
inc_angle_test_path = os.path.join(input_path, 'inc_angle_test.p')

inc_angle_data = pickle.load(open(inc_angle_data_path, mode='rb'))
inc_angle_test = pickle.load(open(inc_angle_test_path, mode='rb'))

describe(inc_angle_data)
describe(inc_angle_test)

(1604,) 0.0 45.9375 25688
(8424,) 23.0805 50.66178518 134808
CPU times: user 8 ms, sys: 0 ns, total: 8 ms
Wall time: 4.66 ms


In [9]:
%%time
#Load orignal data
band1_data_path = os.path.join(input_path, 'band1_data_gray.p')
band2_data_path = os.path.join(input_path, 'band2_data_gray.p')
band1_test_path = os.path.join(input_path, 'band1_test_gray.p')
band2_test_path = os.path.join(input_path, 'band2_test_gray.p')

band1_data = pickle.load(open(band1_data_path, mode='rb'))
band2_data = pickle.load(open(band2_data_path, mode='rb'))
band1_test = pickle.load(open(band1_test_path, mode='rb'))
band2_test = pickle.load(open(band2_test_path, mode='rb'))

band_avg_data = (band1_data + band2_data)/2
band_avg_test = (band1_test + band2_test)/2

describe(band1_data)
describe(band2_data)
describe(band1_test)
describe(band2_test)

describe(band_avg_data)
describe(band_avg_test)

(1604, 75, 75) 0.0 1.0 128
(1604, 75, 75) 0.0 1.0 128
(8424, 75, 75) 0.0 1.0 128
(8424, 75, 75) 0.0 1.0 128
(1604, 75, 75) 0.0114754206838 1.0 72180128
(8424, 75, 75) 0.0 1.0 379080128
CPU times: user 664 ms, sys: 1.08 s, total: 1.74 s
Wall time: 1.73 s


In [10]:
%%time
x_data = np.concatenate([band1_data[:, :, :, np.newaxis],
                         band2_data[:, :, :, np.newaxis],
                         band_avg_data[:, :, :, np.newaxis]], axis=-1)
describe(x_data)
del band1_data
del band2_data
del band_avg_data
gc.collect()
x_test = np.concatenate([band1_test[:, :, :, np.newaxis],
                         band2_test[:, :, :, np.newaxis],
                         band_avg_test[:, :, :, np.newaxis]], axis=-1)
describe(x_test)
del band1_test
del band2_test
del band_avg_test
gc.collect()

(1604, 75, 75, 3) 0.0 1.0 216540144
(8424, 75, 75, 3) 0.0 1.0 1137240144
CPU times: user 832 ms, sys: 564 ms, total: 1.4 s
Wall time: 1.39 s


In [11]:
%%time
x_train, x_val, inc_angle_train, inc_angle_val, y_train, y_val = train_test_split(x_data, inc_angle_data, y_data, test_size=0.1, random_state=31)
describe(x_train)
describe(x_val)
describe(inc_angle_train)
describe(inc_angle_val)
describe(y_train)
describe(y_val)

(1443, 75, 75, 3) 0.0 1.0 194805144
(161, 75, 75, 3) 0.0 1.0 21735144
(1443,) 0.0 45.9375 23112
(161,) 0.0 45.2814 2600
(1443,) 0 1 23112
(161,) 0 1 2600
CPU times: user 92 ms, sys: 72 ms, total: 164 ms
Wall time: 159 ms


In [12]:
# %%time
# def aug(arr):
#     ah = np.zeros((arr.shape))
#     av = np.zeros((arr.shape))
#     ahv = np.zeros((arr.shape))
#     for i, a in enumerate(arr):
#         ah[i] = cv2.flip(a, 0)
#         av[i] = cv2.flip(a, 1)
#         ahv[i] = cv2.flip(a, -1)
#     return np.concatenate([arr, ah, av], axis = 0)

# x_data = aug(x_data)
# describe(x_data)

# describe(y_data)
# y_data = np.concatenate([y_data, y_data.copy(), y_data.copy()], axis = 0)
# describe(y_data)

## 2. Build model

In [13]:
from keras.utils.np_utils import to_categorical # convert to one-hot-encoding
from keras.models import Sequential, Model
from keras.layers import Dense, Dropout, Flatten, Conv2D, MaxPooling2D, GlobalMaxPooling2D, BatchNormalization, Input
from keras.layers.merge import Concatenate
from keras.optimizers import Adam
from keras.preprocessing.image import ImageDataGenerator
from keras.callbacks import LearningRateScheduler, TensorBoard

Using TensorFlow backend.


In [14]:
def build_model():
    bn_model = 0
    p_activation = "relu"
    input_1 = Input(shape=(x_train.shape[1], x_train.shape[2], x_train.shape[3]), name="bands")
    
    img_1 = Conv2D(16, kernel_size = (1,1), activation=p_activation, padding='same')(input_1)
    img_1 = BatchNormalization()(img_1)
    img_1 = Conv2D(16, kernel_size = (1,1), activation=p_activation, padding='same') (img_1)
    img_1 = BatchNormalization()(img_1)
    img_1 = MaxPooling2D((2,2), padding='same') (img_1)
    img_1 = Dropout(0.4)(img_1)
    
    img_2 = Conv2D(16, kernel_size = (2,2), activation=p_activation, padding='same')(input_1)
    img_2 = BatchNormalization()(img_2)
    img_2 = Conv2D(16, kernel_size = (2,2), activation=p_activation, padding='same') (img_2)
    img_2 = BatchNormalization()(img_2)
    img_2 = MaxPooling2D((2,2), padding='same') (img_2)
    img_2 = Dropout(0.4)(img_2)
    
    img_3 = Conv2D(16, kernel_size = (3,3), activation=p_activation, padding='same')(input_1)
    img_3 = BatchNormalization()(img_3)
    img_3 = Conv2D(16, kernel_size = (3,3), activation=p_activation, padding='same') (img_3)
    img_3 = BatchNormalization()(img_3)
    img_3 = MaxPooling2D((2,2), padding='same') (img_3)
    img_3 = Dropout(0.4)(img_3)
    
    img_5 = Conv2D(16, kernel_size = (5,5), activation=p_activation, padding='same')(input_1)
    img_5 = BatchNormalization()(img_5)
    img_5 = Conv2D(16, kernel_size = (5,5), activation=p_activation, padding='same') (img_5)
    img_5 = BatchNormalization()(img_5)
    img_5 = MaxPooling2D((2,2), padding='same') (img_5)
    img_5 = Dropout(0.4)(img_5)
    
    concat_layer = (Concatenate()([img_1, img_2, img_3, img_5]))
#############################################################################################
    img_1 = Conv2D(16, kernel_size = (1,1), activation=p_activation, padding='same')(concat_layer)
    img_1 = BatchNormalization()(img_1)
    img_1 = Conv2D(16, kernel_size = (1,1), activation=p_activation, padding='same') (img_1)
    img_1 = BatchNormalization()(img_1)
    img_1 = MaxPooling2D((2,2), padding='same') (img_1)
    img_1 = Dropout(0.4)(img_1)
    
    img_2 = Conv2D(16, kernel_size = (2,2), activation=p_activation, padding='same')(concat_layer)
    img_2 = BatchNormalization()(img_2)
    img_2 = Conv2D(16, kernel_size = (2,2), activation=p_activation, padding='same') (img_2)
    img_2 = BatchNormalization()(img_2)
    img_2 = MaxPooling2D((2,2), padding='same') (img_2)
    img_2 = Dropout(0.4)(img_2)
    
    img_3 = Conv2D(16, kernel_size = (3,3), activation=p_activation, padding='same')(concat_layer)
    img_3 = BatchNormalization()(img_3)
    img_3 = Conv2D(16, kernel_size = (3,3), activation=p_activation, padding='same') (img_3)
    img_3 = BatchNormalization()(img_3)
    img_3 = MaxPooling2D((2,2), padding='same') (img_3)
    img_3 = Dropout(0.4)(img_3)
    
    img_5 = Conv2D(16, kernel_size = (5,5), activation=p_activation, padding='same')(concat_layer)
    img_5 = BatchNormalization()(img_5)
    img_5 = Conv2D(16, kernel_size = (5,5), activation=p_activation, padding='same') (img_5)
    img_5 = BatchNormalization()(img_5)
    img_5 = MaxPooling2D((2,2), padding='same') (img_5)
    img_5 = Dropout(0.4)(img_5)
    
    concat_layer = (Concatenate()([img_1, img_2, img_3, img_5]))
#############################################################################################
    img_1 = Conv2D(16, kernel_size = (1,1), activation=p_activation, padding='same')(concat_layer)
    img_1 = BatchNormalization()(img_1)
    img_1 = Conv2D(16, kernel_size = (1,1), activation=p_activation, padding='same') (img_1)
    img_1 = BatchNormalization()(img_1)
    img_1 = MaxPooling2D((2,2), padding='same') (img_1)
    img_1 = Dropout(0.4)(img_1)
    
    img_2 = Conv2D(16, kernel_size = (2,2), activation=p_activation, padding='same')(concat_layer)
    img_2 = BatchNormalization()(img_2)
    img_2 = Conv2D(16, kernel_size = (2,2), activation=p_activation, padding='same') (img_2)
    img_2 = BatchNormalization()(img_2)
    img_2 = MaxPooling2D((2,2), padding='same') (img_2)
    img_2 = Dropout(0.4)(img_2)
    
    img_3 = Conv2D(16, kernel_size = (3,3), activation=p_activation, padding='same')(concat_layer)
    img_3 = BatchNormalization()(img_3)
    img_3 = Conv2D(16, kernel_size = (3,3), activation=p_activation, padding='same') (img_3)
    img_3 = BatchNormalization()(img_3)
    img_3 = MaxPooling2D((2,2), padding='same') (img_3)
    img_3 = Dropout(0.4)(img_3)
    
    img_5 = Conv2D(16, kernel_size = (5,5), activation=p_activation, padding='same')(concat_layer)
    img_5 = BatchNormalization()(img_5)
    img_5 = Conv2D(16, kernel_size = (5,5), activation=p_activation, padding='same') (img_5)
    img_5 = BatchNormalization()(img_5)
    img_5 = MaxPooling2D((2,2), padding='same') (img_5)
    img_5 = Dropout(0.4)(img_5)
    
    concat_layer = (Concatenate()([img_1, img_2, img_3, img_5]))
#############################################################################################
    img_1 = Conv2D(16, kernel_size = (1,1), activation=p_activation, padding='same')(concat_layer)
    img_1 = BatchNormalization()(img_1)
    img_1 = Conv2D(16, kernel_size = (1,1), activation=p_activation, padding='same') (img_1)
    img_1 = BatchNormalization()(img_1)
    img_1 = MaxPooling2D((2,2), padding='same') (img_1)
    img_1 = Dropout(0.4)(img_1)
    
    img_2 = Conv2D(16, kernel_size = (2,2), activation=p_activation, padding='same')(concat_layer)
    img_2 = BatchNormalization()(img_2)
    img_2 = Conv2D(16, kernel_size = (2,2), activation=p_activation, padding='same') (img_2)
    img_2 = BatchNormalization()(img_2)
    img_2 = MaxPooling2D((2,2), padding='same') (img_2)
    img_2 = Dropout(0.4)(img_2)
    
    img_3 = Conv2D(16, kernel_size = (3,3), activation=p_activation, padding='same')(concat_layer)
    img_3 = BatchNormalization()(img_3)
    img_3 = Conv2D(16, kernel_size = (3,3), activation=p_activation, padding='same') (img_3)
    img_3 = BatchNormalization()(img_3)
    img_3 = MaxPooling2D((2,2), padding='same') (img_3)
    img_3 = Dropout(0.4)(img_3)
    
    img_5 = Conv2D(16, kernel_size = (5,5), activation=p_activation, padding='same')(concat_layer)
    img_5 = BatchNormalization()(img_5)
    img_5 = Conv2D(16, kernel_size = (5,5), activation=p_activation, padding='same') (img_5)
    img_5 = BatchNormalization()(img_5)
    img_5 = MaxPooling2D((2,2), padding='same') (img_5)
    img_5 = Dropout(0.4)(img_5)
    
    concat_layer = (Concatenate()([img_1, img_2, img_3, img_5]))
#############################################################################################
    img_1 = Conv2D(64, kernel_size = (3,3), activation=p_activation)(concat_layer)
    img_1 = BatchNormalization()(img_1)
    img_1 = Conv2D(64, kernel_size = (3,3), activation=p_activation) (img_1)
    img_1 = BatchNormalization()(img_1)
    img_1 = MaxPooling2D((2,2), padding='same') (img_1)
    img_1 = Dropout(0.4)(img_1)
#############################################################################################
    img_1 = GlobalMaxPooling2D() (img_1)

    dense_ayer = Dense(1024, activation=p_activation)(img_1)
#     dense_ayer = BatchNormalization()(dense_ayer)
    dense_ayer = Dropout(0.4)(dense_ayer)
    dense_ayer = Dense(1024, activation=p_activation)(dense_ayer)
    dense_ayer = Dropout(0.4)(dense_ayer)
    dense_ayer = Dense(128, activation=p_activation)(dense_ayer)
    dense_ayer = Dropout(0.4)(dense_ayer)
    output = Dense(1, activation="sigmoid")(dense_ayer)
    
    model = Model(input_1, output)
#     optimizer = Adam(lr=0.001, beta_1=0.9, beta_2=0.999, epsilon=1e-08, decay=0.0)
    model.compile(optimizer = Adam(lr=1e-4), loss = 'binary_crossentropy', metrics = ['accuracy'])
    return model

In [15]:
model = build_model()
model.summary()

__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
bands (InputLayer)              (None, 75, 75, 3)    0                                            
__________________________________________________________________________________________________
conv2d_1 (Conv2D)               (None, 75, 75, 16)   64          bands[0][0]                      
__________________________________________________________________________________________________
conv2d_3 (Conv2D)               (None, 75, 75, 16)   208         bands[0][0]                      
__________________________________________________________________________________________________
conv2d_5 (Conv2D)               (None, 75, 75, 16)   448         bands[0][0]                      
__________________________________________________________________________________________________
conv2d_7 (

In [16]:
def saveModel(model, run_name):
    cwd = os.getcwd()
    modelPath = os.path.join(cwd, 'model')
    if not os.path.isdir(modelPath):
        os.mkdir(modelPath)
    weigthsFile = os.path.join(modelPath, run_name + '.h5')
    model.save(weigthsFile)
# saveModel(model, 'saveModel_test')

In [17]:
def get_lr(x):
    lr = round(1e-4 * 0.995 ** x, 6)
    if lr < 5e-5:
        lr = 5e-5
    print(lr, end='  ')
    return lr

# annealer = LearningRateScheduler(lambda x: 1e-3 * 0.9 ** x)
annealer = LearningRateScheduler(get_lr)

log_dir = os.path.join(log_path, run_name)
print('log_dir:' + log_dir)
tensorBoard = TensorBoard(log_dir=log_dir)

log_dir:/data1/Kaggle/statoil-iceberg-classifier-challenge/log/SC_Iceberg_Classifier_CNN_3channel_Inception_20180107_125815


In [18]:
datagen = ImageDataGenerator(
    zoom_range = 0.2,
    rotation_range=20,
    width_shift_range=0.3,
    height_shift_range=0.3,
    horizontal_flip=True,
    vertical_flip=True
)

In [19]:
%%time
batch_size = 64
# steps_per_epoch = 1 * len(x_train) / batch_size
hist = model.fit_generator(
    datagen.flow(x_train, y_train, batch_size=batch_size, shuffle=True),
    steps_per_epoch=100,
    epochs=100, #1 for ETA, 0 for silent
    verbose=1,
    max_queue_size=128,
    callbacks=[annealer],
    workers=32,
    validation_data=(x_val, y_val)
)

0.0001  Epoch 1/100
0.0001  Epoch 2/100
9.9e-05  Epoch 3/100
9.9e-05  Epoch 4/100
9.8e-05  Epoch 5/100
9.8e-05  Epoch 6/100
9.7e-05  Epoch 7/100
9.7e-05  Epoch 8/100
9.6e-05  Epoch 9/100
9.6e-05  Epoch 10/100
9.5e-05  Epoch 11/100
9.5e-05  Epoch 12/100
9.4e-05  Epoch 13/100
9.4e-05  Epoch 14/100
9.3e-05  Epoch 15/100
9.3e-05  Epoch 16/100
9.2e-05  Epoch 17/100
9.2e-05  Epoch 18/100
9.1e-05  Epoch 19/100
9.1e-05  Epoch 20/100
9e-05  Epoch 21/100
9e-05  Epoch 22/100

KeyboardInterrupt: 

In [20]:
# %%time
# hist = model.fit(x_train, y_train, 
#                  batch_size = 32, 
#                  verbose = 1,
#                  callbacks = [annealer],
#                  epochs = 100, #1 for ETA, 0 for silent
#                  validation_data = (x_val, y_val))

In [None]:
final_loss, final_acc = model.evaluate(x_val, y_val, verbose=1)
print("Final loss: {0:.4f}, final accuracy: {1:.4f}".format(final_loss, final_acc))

Final loss: 0.5398, final accuracy: 0.7888


In [None]:
val_prob1 = model.predict(x_val)

# print('Val log_loss: {}'.format(log_loss(y_val, val_prob1)))
val_prob1_limit = np.clip(val_prob1, 0.00005, 0.99995)
loss = log_loss(y_val, val_prob1_limit)
print('Val log_loss: {}'.format(loss))

val_prob1_limit = np.clip(val_prob1_limit, 0.05, 0.95)
loss = log_loss(y_val, val_prob1_limit)
print('Val log_loss: {}'.format(loss))

In [None]:
final_acc_str = '{0:0>4}'.format(int(loss*10000))
run_name_acc = project_name + '_' + step_name + '_' + time_str + '_' + final_acc_str
print(run_name_acc)

In [None]:
histories = pd.DataFrame(hist.history)
histories['epoch'] = hist.epoch
print(histories.columns)
histories_file = os.path.join(model_path, run_name_acc + '.csv')
histories.to_csv(histories_file, index=False)

In [None]:
plt.plot(histories['loss'], color='b')
plt.plot(histories['val_loss'], color='r')
plt.show()
plt.plot(histories['acc'], color='b')
plt.plot(histories['val_acc'], color='r')
plt.show()

In [None]:
saveModel(model, run_name_acc)

## 3. Predict

In [None]:
if not os.path.exists(output_path):
    os.mkdir(output_path)
pred_file = os.path.join(output_path, run_name_acc + '.csv')
print(pred_file)

In [None]:
test_prob = model.predict(x_test)
print(test_prob.shape)
print(test_prob[0:2])
test_prob = np.clip(test_prob, 0.05, 0.95)
print(test_prob.shape)
print(test_prob[0:2])

In [None]:
sample_submission['is_iceberg'] = test_prob
print(sample_submission[0:2])
print(sample_submission.shape)
sample_submission.to_csv(pred_file, index=False)

In [None]:
t1 = time.time()
print('time cost: %.2f s' % (t1-t0))
print('Done!')

In [None]:
print(run_name_acc)