## Reading data

In [1]:
import numpy as np
import os
import pandas as pd

In [2]:
PRJ = "/home/weiyi/workspace/iceburger"
DATA = os.path.join(PRJ, "data/processed")

In [3]:
def image_normalization(x, percentile=1):
    """Normalize the image signal value by rescale data
    
    :param x: :class:`numpy.ndarray` of signal of dimension (height, width, 2)
    :param percentile: signal greater or less than the percentile will be capped
        as 1 and 0 respectively
    :returns: :class:`numpy.ndarray` of normalized 3 channel image with last
        channel totally black
    """
    vmax = np.percentile(x, 100 - percentile)
    vmin = np.percentile(x, percentile)
    x = (x - vmin) / (vmax - vmin)
    x[x > 1] = 1
    x[x < 0] = 0
    return np.concatenate([x, np.zeros(x.shape[:2] + (1,))], axis=-1)[np.newaxis, :, :, :]

In [6]:
def parse_json_data(json_filename):
    """Parse json data to generate trainable matrices
    
    :param json_filename: path to input json file
    :returns: a `tuple` of
        X: :class:`numpy.ndarray` of dimension (nb_samples, height, width, 3)
        X_angle: :class:`numpy.array` of dimension (nb_samples) of incidence
            angles
        y: :class:`numpy.array` of labels
    """
    df = pd.read_json(json_filename)
    dim = int(np.sqrt(len(df.band_1.iloc[0])))
    _X = np.concatenate([
        np.concatenate([np.array(r.band_1).reshape((dim, dim, 1)),
                        np.array(r.band_2).reshape((dim, dim, 1))],
                       axis=-1)[np.newaxis, :, :, :]
        for _, r in df.iterrows()], axis = 0)
    X = np.concatenate([image_normalization(x) for x in _X], axis=0)
    X_angle = df.inc_angle.values
    y = df.is_iceberg.values
    return (X, X_angle, y)

In [7]:
X_train, X_train_angle, y = parse_json_data(os.path.join(DATA, "train.json"))

In [12]:
from keras.preprocessing.image import ImageDataGenerator

Using TensorFlow backend.
  return f(*args, **kwds)


In [13]:
gen = ImageDataGenerator(horizontal_flip = True,
                         vertical_flip = True,
                         width_shift_range = 0.1,
                         height_shift_range = 0.1,
                         zoom_range = 0.1,
                         rotation_range = 45)

In [14]:
genX1 = gen.flow(X_train, y,  batch_size=32, seed=666)

In [15]:
genX1.next()

(array([[[[ 0.95562428,  0.18664785,  0.        ],
          [ 0.78717524,  0.15958722,  0.        ],
          [ 0.55777597,  0.36569679,  0.        ],
          ..., 
          [ 0.53021997,  0.36588168,  0.        ],
          [ 0.28355649,  0.34641996,  0.        ],
          [ 0.43756217,  0.34642345,  0.        ]],
 
         [[ 0.95562428,  0.18664785,  0.        ],
          [ 0.78717524,  0.15958722,  0.        ],
          [ 0.55777597,  0.36569679,  0.        ],
          ..., 
          [ 0.53021997,  0.36588168,  0.        ],
          [ 0.28355649,  0.34641996,  0.        ],
          [ 0.43756217,  0.34642345,  0.        ]],
 
         [[ 0.95562428,  0.18664785,  0.        ],
          [ 0.78717524,  0.15958722,  0.        ],
          [ 0.55777597,  0.36569679,  0.        ],
          ..., 
          [ 0.53021997,  0.36588168,  0.        ],
          [ 0.28355649,  0.34641996,  0.        ],
          [ 0.43756217,  0.34642345,  0.        ]],
 
         ..., 
         [