In [21]:
# import basic package
import numpy as np
import pandas as pd 

# import plot package
import matplotlib.pyplot as plt
import plotly
%matplotlib inline

# import ML package
from sklearn.model_selection import train_test_split, StratifiedKFold
from sklearn.metrics import log_loss, confusion_matrix, accuracy_score

In [2]:
# Preprocessing the data

# Load the data
train_data = pd.read_json("./data/train.json")
test_data = pd.read_json("./data/test.json")

In [3]:
train_data.head()

Unnamed: 0,band_1,band_2,id,inc_angle,is_iceberg
0,"[-27.878360999999998, -27.15416, -28.668615, -...","[-27.154118, -29.537888, -31.0306, -32.190483,...",dfd5f913,43.9239,0
1,"[-12.242375, -14.920304999999999, -14.920363, ...","[-31.506321, -27.984554, -26.645678, -23.76760...",e25388fd,38.1562,0
2,"[-24.603676, -24.603714, -24.871029, -23.15277...","[-24.870956, -24.092632, -20.653963, -19.41104...",58b2aaa0,45.2859,1
3,"[-22.454607, -23.082819, -23.998013, -23.99805...","[-27.889421, -27.519794, -27.165262, -29.10350...",4cfc3a18,43.8306,0
4,"[-26.006956, -23.164886, -23.164886, -26.89116...","[-27.206915, -30.259186, -30.259186, -23.16495...",271f93f4,35.6256,0


In [4]:
# Generate the training data
train_band_1 = np.array([np.array(band).astype(np.float32).reshape(75, 75) for band in train_data["band_1"]])
train_band_2 = np.array([np.array(band).astype(np.float32).reshape(75, 75) for band in train_data["band_2"]])
train_band_3 = (train_band_1 + train_band_2)/2
train_band = np.concatenate([train_band_1[:,:,:, np.newaxis],
                            train_band_2[:,:,:, np.newaxis],
                            train_band_3[:,:,:, np.newaxis]], axis = -1)
train_angle = pd.to_numeric(train_data['inc_angle'], errors='coerce')
train_angle = train_angle.fillna(method='pad')
train_target = np.array(train_data['is_iceberg'])

# Generate the test data 
test_band_1 = np.array([np.array(band).astype(np.float32).reshape(75, 75) for band in test_data["band_1"]])
test_band_2 = np.array([np.array(band).astype(np.float32).reshape(75, 75) for band in test_data["band_2"]])
test_band_3 = (test_band_1 + test_band_2)/2
test_band = np.concatenate([test_band_1[:,:,:, np.newaxis],
                           test_band_2[:,:,:, np.newaxis],
                           test_band_3[:,:,:, np.newaxis]], axis = -1)
test_angle = pd.to_numeric(test_data['inc_angle'], errors='coerce')
test_angle = test_angle.fillna(method='pad')

In [5]:
# Generate training and validation 
#X_train_1, X_valid_1, \
#X_train_2, X_valid_2, \
#X_train_angle, X_valid_angle, \
#y_train_1, y_valid_1 = train_test_split(train_band_1, train_band_2, train_angle, train_target, 
    #random_state = 1, train_size = 0.8)

In [11]:
from plotly import tools
import plotly.offline as py
import plotly.graph_objs as go
py.init_notebook_mode(connected=True)

In [12]:
scene = dict(
    xaxis=dict(
        gridcolor='rgb(255, 255, 255)',
        zerolinecolor='rgb(255, 255, 255)',
        showbackground=False,
        backgroundcolor='rgb(230, 230,230)'
    ),
    yaxis=dict(
        gridcolor='rgb(255, 255, 255)',
        zerolinecolor='rgb(255, 255, 255)',
        showbackground=False,
        backgroundcolor='rgb(230, 230,230)'
    ),
    zaxis=dict(
        gridcolor='rgb(255, 255, 255)',
        zerolinecolor='rgb(255, 255, 255)',
        showbackground=False,
        backgroundcolor='rgb(230, 230,230)'
    )
)

fig = tools.make_subplots(rows=2, cols=2, 
                            specs=[[{'is_3d': True}, {'is_3d': True}],
                                   [{'is_3d': True}, {'is_3d': True}]])

# adding surfaces to subplots.
fig.append_trace(dict(type='surface', z=train_band_1[3,:,:], colorscale='Viridis',
                      scene='scene1', showscale=False), 1, 1)
fig.append_trace(dict(type='surface', z=train_band_2[3,:,:], colorscale='Viridis',
                      scene='scene2', showscale=False), 1, 2)
fig.append_trace(dict(type='surface', z=train_band_1[1,:,:], colorscale='RdBu',
                      scene='scene3', showscale=False), 2, 1)
fig.append_trace(dict(type='surface', z=train_band_2[1,:,:], colorscale='RdBu',
                      scene='scene4', showscale=False), 2, 2)

fig['layout'].update(height=800, width=800, title="Iceber and Ship")
fig['layout']['scene1'].update(scene)
fig['layout']['scene2'].update(scene)
fig['layout']['scene3'].update(scene)
fig['layout']['scene4'].update(scene)
fig['layout']['annotations'] = [
    dict(x=0.1859205, y=0.95, 
         text='Iceberg HH',
         showarrow=False),
    dict(x=0.82, y=0.95,
         text='Iceberg HV',
         showarrow=False),
    dict(x=0.1859205, y=0.4, 
         text='Ship HH',
         showarrow=False),
    dict(x=0.82, y=0.4,
         text='Ship HV',
         showarrow=False)

]

py.iplot(fig)



This is the format of your plot grid:
[ (1,1) scene1 ]  [ (1,2) scene2 ]
[ (2,1) scene3 ]  [ (2,2) scene4 ]



In [35]:
from keras.preprocessing.image import ImageDataGenerator
from keras.optimizers import adam, rmsprop
from keras.layers import Dense, Flatten, Dropout, Concatenate, Input
from keras.layers import Conv2D, MaxPooling2D, GlobalAveragePooling2D, GlobalMaxPooling2D
from keras.models import Sequential
from keras.applications import VGG16, VGG19, ResNet50, Xception
from keras.callbacks import ModelCheckpoint, EarlyStopping
from keras.models import Model

In [36]:
gen = ImageDataGenerator(horizontal_flip=True, vertical_flip=True, width_shift_range=0., height_shift_range=0., 
                         channel_shift_range=0., zoom_range=0.2, rotation_range=10)

In [76]:
def generate_flow_input(X1, X_angle, y):
    X1 = gen.flow(X1, y, seed=1)
    X_target = gen.flow(X1, X_angle)
    
    yield [X1[0], X_target[1], X1[1]]

In [77]:
def get_callbacks(filepath, patience=10):
    checkpointer = ModelCheckpoint(filepath=filepath, verbose=1, save_best_only=True)
    earlystop = EarlyStopping('val_loss', patience=patience)

In [78]:
print(train_band.shape[1:])

(75, 75, 3)


In [79]:
def getXceptionConcatenateAngleModel():
    input_angle = Input(shape=[1], name="angle")
    angle_layer = Dense(1, )(input_angle)
    base_model = Xception(include_top=False, weights='imagenet', input_shape=train_band.shape[1:], classes=1)
    x = base_model.get_layer('block14_sepconv2_act').output
    x = GlobalMaxPooling2D()(x)
    print(x.shape)
    print(angle_layer.shape)
    merge = Concatenate()([x, angle_layer])
    print(merge.shape)
    merge = Dense(256, activation='relu', name='fc1')(merge)
    merge = Dropout(0.3)(merge)
    merge = Dense(256, activation='relu', name='fc2')(merge)
    merge = Dropout(0.3)(merge)
    pred = Dense(1, activation='sigmoid')(merge)
    
    opt = adam(lr=0.001, beta_1=0.9, beta_2=0.999, epsilon=1e-08, decay=0.0)
    model = Model(inputs=[base_model.input, input_angle], outputs=pred)
    model.compile(loss='binary_crossentropy', optimizer=opt, metrics=['accuracy'])
    model.summary()
    
    return model  

In [80]:
def myAngleCV(X_train, X_angle, target_train, X_test):
    K=3
    folds = list(StratifiedKFold(n_splits=K, shuffle=True, random_state=16).split(X_train, target_train))
    y_test_pred_log = 0
    y_train_pred_log=0
    y_valid_pred_log = 0.0*target_train
    for j, (train_idx, test_idx) in enumerate(folds):
        print('\n===================FOLD=',j)
        X_train_cv = X_train[train_idx]
        y_train_cv = target_train[train_idx]
        X_holdout = X_train[test_idx]
        Y_holdout= target_train[test_idx]
        
        #Angle
        X_angle_cv=X_angle[train_idx]
        X_angle_hold=X_angle[test_idx]

        #define file path and get callbacks
        file_path = "%s_aug_model_weights.hdf5"%j
        callbacks = get_callbacks(filepath=file_path, patience=5)
        gen_flow = generate_flow_input(X_train_cv, X_angle_cv, y_train_cv)
        galaxyModel= getXceptionConcatenateAngleModel()
        galaxyModel.fit_generator(
                gen_flow,
                steps_per_epoch=24,
                epochs=100,
                shuffle=True,
                verbose=1,
                validation_data=([X_holdout,X_angle_hold], Y_holdout),
                callbacks=callbacks)

        #Getting the Best Model
        galaxyModel.load_weights(filepath=file_path)
        #Getting Training Score
        score = galaxyModel.evaluate([X_train_cv,X_angle_cv], y_train_cv, verbose=0)
        print('Train loss:', score[0])
        print('Train accuracy:', score[1])
        #Getting Test Score
        score = galaxyModel.evaluate([X_holdout,X_angle_hold], Y_holdout, verbose=0)
        print('Test loss:', score[0])
        print('Test accuracy:', score[1])

        #Getting validation Score.
        pred_valid=galaxyModel.predict([X_holdout,X_angle_hold])
        y_valid_pred_log[test_idx] = pred_valid.reshape(pred_valid.shape[0])

        #Getting Test Scores
        temp_test=galaxyModel.predict([X_test, X_test_angle])
        y_test_pred_log+=temp_test.reshape(temp_test.shape[0])

        #Getting Train Scores
        temp_train=galaxyModel.predict([X_train, X_angle])
        y_train_pred_log+=temp_train.reshape(temp_train.shape[0])

    y_test_pred_log=y_test_pred_log/K
    y_train_pred_log=y_train_pred_log/K

    print('\n Train Log Loss Validation= ',log_loss(target_train, y_train_pred_log))
    print(' Test Log Loss Validation= ',log_loss(target_train, y_valid_pred_log))
    return y_test_pred_log

In [None]:
preds=myAngleCV(train_band, train_angle, train_target, test_band)


(?, 2048)
(?, 1)
(?, 2049)
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_15 (InputLayer)           (None, 75, 75, 3)    0                                            
__________________________________________________________________________________________________
block1_conv1 (Conv2D)           (None, 37, 37, 32)   864         input_15[0][0]                   
__________________________________________________________________________________________________
block1_conv1_bn (BatchNormaliza (None, 37, 37, 32)   128         block1_conv1[0][0]               
__________________________________________________________________________________________________
block1_conv1_act (Activation)   (None, 37, 37, 32)   0           block1_conv1_bn[0][0]            
_________________________________________________________________________________

Epoch 1/100
