## Reading data

In [2]:
import numpy as np
import os
import pandas as pd
import cv2

In [3]:
#PRJ = "/home/weiyi/workspace/iceburger"
PRJ = "/workspace/iceburger"
DATA = os.path.join(PRJ, "data/processed")

In [4]:
def image_normalization(x, percentile=1):
    """Normalize the image signal value by rescale data
    
    :param x: :class:`numpy.ndarray` of signal of dimension (height, width, 2)
    :param percentile: signal greater or less than the percentile will be capped
        as 1 and 0 respectively
    :returns: :class:`numpy.ndarray` of normalized 3 channel image with last
        channel totally black
    """
    vmax = np.percentile(x, 100 - percentile)
    vmin = np.percentile(x, percentile)
    x = (x - vmin) / (vmax - vmin)
    x[x > 1] = 1
    x[x < 0] = 0
    return np.concatenate([x, np.zeros(x.shape[:2] + (1,))], axis=-1)[np.newaxis, :, :, :]

In [7]:
def parse_json_data(json_filename):
    """Parse json data to generate trainable matrices
    
    :param json_filename: path to input json file
    :returns: a `tuple` of
        X: :class:`numpy.ndarray` of dimension (nb_samples, height, width, 3)
        X_angle: :class:`numpy.array` of dimension (nb_samples) of incidence
            angles
        y: :class:`numpy.array` of labels
    """
    df = pd.read_json(json_filename)
    dim = int(np.sqrt(len(df.band_1.iloc[0])))
    _X = np.concatenate([
        np.concatenate([np.array(r.band_1).reshape((dim, dim, 1)),
                        np.array(r.band_2).reshape((dim, dim, 1))],
                       axis=-1)[np.newaxis, :, :, :]
        for _, r in df.iterrows()], axis = 0)
    X = np.concatenate([image_normalization(x) for x in _X], axis=0)
    X_angle = df.inc_angle.values
    y = df.is_iceberg.values
    return (X, X_angle, y)

In [64]:
X_train, X_train_angle, y,subset = parse_json_data(os.path.join(DATA, "train.json"))

In [None]:
print(X_train.shape)
print(X_train_angle.shape)
print(y.shape)

In [65]:
json=os.path.join(DATA, "train.json")
df_json = pd.read_json(json)
#df_json.head()
len(df_json.band_1.iloc[0])
df_json.head()
dim = int(np.sqrt(len(df_json.band_1.iloc[0])))


(1604, 75, 75, 3)
(1604,)
(1604,)
75


In [10]:
from keras.preprocessing.image import ImageDataGenerator

Using TensorFlow backend.


In [66]:
gen = ImageDataGenerator(horizontal_flip = True,
                         vertical_flip = True,
                         width_shift_range = 0.1,
                         height_shift_range = 0.1,
                         zoom_range = 0.1,
                         rotation_range = 45)

In [67]:
genX1 = gen.flow(X_train, y,  batch_size=32, seed=666)
#genX2 = gen.flow(X_train_angle, y, batch_size=32, seed = 666)

In [69]:
X,Y =genX1.next()
X.shape
Y.shape

(32,)

In [15]:
w = 197
h = 197
XX = cv2.resize(X[0],(w,h))

In [16]:
XX.shape

(197, 197, 3)

In [17]:
# Here is the function that merges our two generators
# We use the exact same generator with the same random seed for both the y and angle arrays
def gen_flow_for_one_input(X1, y):
    genX1 = gen.flow(X1, y, batch_size= 32, seed=666)
    while True:
        X1i = genX1.next()
        yield X1i[0], X1i[1]

#Finally create out generator
gen_flow = gen_flow_for_one_input(X_train, y)

In [26]:
def exponential(args):
    X,angles,Y = args
    return (X[0]+j*X[1])*np.exp(-j*pi*angles/180)

## Resnet Model


In [38]:
import numpy as np
import pandas as pd
import os
import keras
from keras.optimizers import RMSprop
import shutil

from keras.applications.resnet50 import ResNet50
from keras.layers import GlobalMaxPooling2D, Dense, BatchNormalization, GlobalAveragePooling2D, Dropout
from keras.models import Model

In [None]:
def parse_json_data(json_filename):
    """Parse json data to generate trainable matrices

    :param json_filename: path to input json file
    :returns: a `tuple` of
        X: :class:`numpy.ndarray` of dimension (nb_samples, height, width, 3)
        X_angle: :class:`numpy.array` of dimension (nb_samples) of incidence
            angles
        y: :class:`numpy.array` of labels
    """
    df = pd.read_json(json_filename)
    dim = int(np.sqrt(len(df.band_1.iloc[0])))
    _X = np.concatenate([
        np.concatenate([np.array(r.band_1).reshape((dim, dim, 1)),
                        np.array(r.band_2).reshape((dim, dim, 1))],
                       axis=-1)[np.newaxis, :, :, :]
        for _, r in df.iterrows()], axis=0)
    X = np.concatenate([image_normalization(x) for x in _X], axis=0)
    X_angle = df.inc_angle.values
    y = df.is_iceberg.values
    if "set" in df.columns:
        subset = df["set"].values
    else:
        subset = np.array(["train"] * len(y))

    return (X, X_angle, y, subset)

In [73]:
X, X_angle, y, subset = parse_json_data(os.path.join(DATA, "train_valid.json"))
df = pd.read_json(os.path.join(DATA, "train_valid.json"))

In [None]:
print(X.shape)
print(X_angle.shape)
print(y.shape)
print(subset.shape)
print(subset)
df.head()

In [81]:
X_train = np.array([cv2.resize(x,(w,h)) for x in X[subset=='train']])
X_angle_train = X_angle[subset=='train']
y_train = y[subset=='train']
X_valid = np.array([cv2.resize(x,(w,h)) for x in X[subset=='valid']])
X_angle_valid = X_angle[subset=='valid']
y_valid = y[subset=='valid']

In [83]:
X_train.shape

(1321, 197, 197, 3)

In [84]:
gen_train = ImageDataGenerator(horizontal_flip = True,
                         vertical_flip = True,
                         width_shift_range = 0.1,
                         height_shift_range = 0.1,
                         zoom_range = 0.1,
                         rotation_range = 45)

gen_valid = ImageDataGenerator(horizontal_flip = True,
                         vertical_flip = True,
                         width_shift_range = 0.1,
                         height_shift_range = 0.1,
                         zoom_range = 0.1,
                         rotation_range = 45)
# Here is the function that merges our two generators
# We use the exact same generator with the same random seed for both the y and angle arrays
def gen_flow_train_for_one_input(X1, y):
    genX1 = gen_train.flow(X1, y, batch_size= 32, seed=666)
    while True:
        X1i = genX1.next()
        yield X1i[0], X1i[1]

def gen_flow_valid_for_one_input(X1, y):
    genX1 = gen_valid.flow(X1, y, batch_size= 32, seed=444)
    while True:
        X1i = genX1.next()
        yield X1i[0], X1i[1]
#Finally create out generator
gen_flow_train = gen_flow_train_for_one_input(X_train, y_train)
gen_flow_valid = gen_flow_valid_for_one_input(X_valid, y_valid) 

In [60]:
x,y=gen_flow_train.next()

AttributeError: 'generator' object has no attribute 'next'

In [None]:
base_model = ResNet50(weights='imagenet', include_top=False, input_shape=(197,197,3))
x = base_model.output
x = GlobalAveragePooling2D()(x)
x = Dense(1024, activation='relu')(x)
x = Dropout(0.5)(x)
x = Dense(512, activation='relu')(x)
x = Dropout(0.5)(x)
predictions = Dense(1, activation='sigmoid')(x)
model = Model(inputs=base_model.input, outputs=predictions)
#for layer in base_model.layers:
#    layer.trainable = False
for layer in model.layers[:15]:
    layer.trainable = False
for layer in model.layers[15:]:
    layer.trainable = True

from keras.optimizers import SGD
model.compile(optimizer=SGD(lr=0.0001, momentum=0.9), loss='binary_crossentropy', metrics=['accuracy'])
from keras.callbacks import EarlyStopping, ModelCheckpoint
epochs_to_wait_for_improve = 50
early_stopping_callback = EarlyStopping(monitor='val_loss', patience=epochs_to_wait_for_improve)
checkpoint_callback = ModelCheckpoint('BestKerasModelResNet50.h5', monitor='val_loss', verbose=1, save_best_only=True, mode='min')

In [None]:
model.summary()

In [85]:
#fit the model
model.fit_generator(gen_flow_train, validation_data= gen_flow_valid, validation_steps = int(np.ceil(len(X_valid)/32)), steps_per_epoch=int(np.ceil(len(X_train)/32)), epochs=500, verbose=1, callbacks=[early_stopping_callback, checkpoint_callback])
                    

____________________________________________________________________________________________________
Layer (type)                     Output Shape          Param #     Connected to                     
input_12 (InputLayer)            (None, 197, 197, 3)   0                                            
____________________________________________________________________________________________________
zero_padding2d_12 (ZeroPadding2D (None, 203, 203, 3)   0                                            
____________________________________________________________________________________________________
conv1 (Conv2D)                   (None, 99, 99, 64)    9472                                         
____________________________________________________________________________________________________
bn_conv1 (BatchNormalization)    (None, 99, 99, 64)    256                                          
___________________________________________________________________________________________

KeyboardInterrupt: 