In [1]:
import six
import types
import keras
from keras.models import Model
from keras.layers import (
    Input,
    Activation,
    Dense,
    Flatten
)  # so sao
from keras.layers.convolutional import (
    Conv2D,
    MaxPooling2D,
    AveragePooling2D
)
from keras.layers.merge import add
from keras.layers.normalization import BatchNormalization
from keras.regularizers import l2
from keras import backend as K

Using TensorFlow backend.


In [2]:
import pandas as pd
import os

In [3]:
import numpy as np

In [4]:
import tqdm

In [5]:
from keras.preprocessing import image

In [6]:
from sklearn.preprocessing import LabelBinarizer

In [7]:
import gc

In [8]:
# All ResNet Things are adapted from https://github.com/raghakot/keras-resnet/blob/master/resnet.py
def _bn_relu(inp):
    """Helper to build a BN -> relu block"""
    norm = BatchNormalization(axis=CHANNEL_AXIS)(inp)
    
    return Activation("relu")(norm)

In [9]:
def _conv_bn_relu(**conv_params):
    """Helper to build a conv -> BN -> relu block"""
    filters = conv_params["filters"]
    kernel_size = conv_params["kernel_size"]
    strides = conv_params.setdefault("strides", (1, 1))
    kernel_initializer = conv_params.setdefault("kernel_initializer", "he_normal")
    padding = conv_params.setdefault("padding", "same")
    kernel_regularizer = conv_params.setdefault("kernel_regularizer", l2(1.e-4))
    
    def f(inp):
        conv = Conv2D(filters=filters, kernel_size=kernel_size,
                      strides=strides, padding=padding,
                      kernel_initializer=kernel_initializer,
                      kernel_regularizer=kernel_regularizer)(inp)
        return _bn_relu(conv)
    
    return f

In [10]:
def _bn_relu_conv(**conv_params):
    """Helper to build a BN -> relu -> conv block.
    
    This is an improved scheme proposed in http://arxiv.org/pdf/1603.05027v2.pdf
    """
    filters = conv_params["filters"]
    kernel_size = conv_params["kernel_size"]
    strides = conv_params.setdefault("strides", (1, 1))
    kernel_initializer = conv_params.setdefault("kernel_initializer", "he_normal")
    padding = conv_params.setdefault("padding", "same")
    kernel_regularizer = conv_params.setdefault("kernel_regularizer", l2(1.e-4))
    
    def f(inp):
        activation = _bn_relu(inp)
        return Conv2D(filters=filters, kernel_size=kernel_size,
                      strides=strides, padding=padding,
                      kernel_initializer=kernel_initializer,
                      kernel_regularizer=kernel_regularizer)(activation)
    
    return f

In [11]:
def _shortcut(inp, residual):
    """Adds a shortcut between input and residual block and merges them with "sum"
    """
    # Expand channels of shortcut to match residual.
    # Stride appropriately to match residual (width, height)
    # Should be int if network architecture is correctly configured.
    input_shape = K.int_shape(inp)
    residual_shape = K.int_shape(residual)
    stride_width = int(round(input_shape[ROW_AXIS] / residual_shape[ROW_AXIS]))
    stride_height = int(round(input_shape[COL_AXIS] / residual_shape[COL_AXIS]))
    equal_channels = input_shape[CHANNEL_AXIS] == residual_shape[CHANNEL_AXIS]

    shortcut = inp
    # 1 X 1 conv if shape is different. Else identity.
    if stride_width > 1 or stride_height > 1 or not equal_channels:
        shortcut = Conv2D(filters=residual_shape[CHANNEL_AXIS],
                          kernel_size=(1, 1),
                          strides=(stride_width, stride_height),
                          padding="valid",
                          kernel_initializer="he_normal",
                          kernel_regularizer=l2(0.0001))(inp)

    return add([shortcut, residual])

In [12]:
def _residual_block(block_function, filters, repetitions, is_first_layer=False):
    """Builds a residual block with repeating bottleneck blocks."""
    def f(inp):
        for i in range(repetitions):
            init_strides = (1, 1)
            if i == 0 and not is_first_layer:
                init_strides = (2, 2)
            inp = block_function(filters=filters, init_strides=init_strides,
                                 is_first_block_of_first_layer=(is_first_layer and i == 0))(inp)
        return inp

    return f

In [13]:
def basic_block(filters, init_strides=(1, 1), is_first_block_of_first_layer=False):
    """Basic 3 X 3 convolution blocks for use on resnets with layers <= 34.

    Follows improved proposed scheme in http://arxiv.org/pdf/1603.05027v2.pdf
    """
    def f(inp):
        if is_first_block_of_first_layer:
            # don't repeat bn->relu since we just did bn->relu->maxpool
            conv1 = Conv2D(filters=filters, kernel_size=(3, 3),
                           strides=init_strides,
                           padding="same",
                           kernel_initializer="he_normal",
                           kernel_regularizer=l2(1e-4))(inp)
        else:
            conv1 = _bn_relu_conv(filters=filters, kernel_size=(3, 3),
                                  strides=init_strides)(inp)

        residual = _bn_relu_conv(filters=filters, kernel_size=(3, 3))(conv1)
        return _shortcut(inp, residual)

    return f

In [14]:
def bottleneck(filters, init_strides=(1, 1), is_first_block_of_first_layer=False):
    """Bottleneck architecture for > 34 layer resnet.

    Follows improved proposed scheme in http://arxiv.org/pdf/1603.05027v2.pdf

    Returns
    -------
        A final conv layer of filters * 4
    """
    def f(inp):
        if is_first_block_of_first_layer:
            # don't repeat bn->relu since we just did bn->relu->maxpool
            conv_1_1 = Conv2D(filters=filters, kernel_size=(1, 1),
                              strides=init_strides,
                              padding="same",
                              kernel_initializer="he_normal",
                              kernel_regularizer=l2(1e-4))(inp)
        else:
            conv_1_1 = _bn_relu_conv(filters=filters, kernel_size=(1, 1),
                                     strides=init_strides)(inp)

        conv_3_3 = _bn_relu_conv(filters=filters, kernel_size=(3, 3))(conv_1_1)
        residual = _bn_relu_conv(filters=filters * 4, kernel_size=(1, 1))(conv_3_3)
        return _shortcut(inp, residual)

    return f

In [15]:
def _handle_dim_ordering():
    global ROW_AXIS
    global COL_AXIS
    global CHANNEL_AXIS
    if K.image_dim_ordering() == 'tf':
        ROW_AXIS = 1
        COL_AXIS = 2
        CHANNEL_AXIS = 3
    else:
        CHANNEL_AXIS = 1
        ROW_AXIS = 2
        COL_AXIS = 3

In [16]:
def _get_block(identifier):
    if isinstance(identifier, six.string_types):
        res = globals().get(identifier)
        if not res:
            raise ValueError('Invalid identifier: {}'.format(identifier))
        return res
    return identifier

In [17]:
class ResnetBuilder(object):
    @staticmethod
    def build(input_shape, num_outputs, block_fn, repetitions):
        """Builds a custom ResNet like architecture.

        Arguments
        ---------
        input_shape: tuple 
            The input shape in the form (nb_channels, nb_rows, nb_cols)
            
        num_outputs: int 
            The number of outputs at final softmax layer

        block_fn: types.FunctionType | {'basic_block', 'bottleneck'}
            The block function to use. This is either `basic_block` or `bottleneck`.
            The original paper used basic_block for layers < 50

        repetitions: int
            Number of repetitions of various block units.
            At each block unit, the number of filters are doubled and the input size is halved

        Returns
        -------
        model: keras.models.Model
            The keras `Model`.
        """
        _handle_dim_ordering()
        if len(input_shape) != 3:
            raise Exception("Input shape should be a tuple (nb_channels, nb_rows, nb_cols)")

        # Permute dimension order if necessary
        if K.image_dim_ordering() == 'tf':
            input_shape = (input_shape[1], input_shape[2], input_shape[0])

        # Load function from str if needed.
        block_fn = _get_block(block_fn)

        inp = Input(shape=input_shape)
        conv1 = _conv_bn_relu(filters=64, kernel_size=(7, 7), strides=(2, 2))(inp)
        pool1 = MaxPooling2D(pool_size=(3, 3), strides=(2, 2), padding="same")(conv1)

        block = pool1
        filters = 64
        for i, r in enumerate(repetitions):
            block = _residual_block(block_fn, filters=filters, repetitions=r, is_first_layer=(i == 0))(block)
            filters *= 2

        # Last activation
        block = _bn_relu(block)

        # Classifier block
        block_shape = K.int_shape(block)
        pool2 = AveragePooling2D(pool_size=(block_shape[ROW_AXIS], block_shape[COL_AXIS]),
                                 strides=(1, 1))(block)
        flatten1 = Flatten()(pool2)
        dense = Dense(units=num_outputs, kernel_initializer="he_normal",
                      activation="softmax")(flatten1)

        model = Model(inputs=inp, outputs=dense)
        return model


    @staticmethod
    def build_resnet_18(input_shape, num_outputs):
        return ResnetBuilder.build(input_shape, num_outputs, basic_block, [2, 2, 2, 2])

    @staticmethod
    def build_resnet_34(input_shape, num_outputs):
        return ResnetBuilder.build(input_shape, num_outputs, basic_block, [3, 4, 6, 3])

    @staticmethod
    def build_resnet_50(input_shape, num_outputs):
        return ResnetBuilder.build(input_shape, num_outputs, bottleneck, [3, 4, 6, 3])

    @staticmethod
    def build_resnet_101(input_shape, num_outputs):
        return ResnetBuilder.build(input_shape, num_outputs, bottleneck, [3, 4, 23, 3])

    @staticmethod
    def build_resnet_152(input_shape, num_outputs):
        return ResnetBuilder.build(input_shape, num_outputs, bottleneck, [3, 8, 36, 3])

In [18]:
def read_img(img_id, folder, size):
    """Read and resize image.
    
    Arguments
    ---------
        img_id: string
        
        folder: string
        
        size: tuple
            Target size to resize the original image into.
    
    Returns
    -------
        img: np.ndarray
            Image as numpy array.
    """
    img = image.load_img(os.path.join(folder, '%s.jpg' % img_id), target_size=size)
    img = image.img_to_array(img)
    return img

In [19]:
# All preparation things are adapted from https://www.kaggle.com/gaborfodor/dog-breed-pretrained-keras-models-lb-0-3
INPUT_SIZE = 224
SEED = 20180407
data_dir = '../data/dog_breed'
labels = pd.read_csv(os.path.join(data_dir, 'labels.csv'))
sample_submission = pd.read_csv(os.path.join(data_dir, 'sample_submission.csv'))
NUM_CLASSES = labels["breed"].nunique()

In [20]:
print("Train Image Counts in Directory: {}".format(len(os.listdir(os.path.join(data_dir, 'Train')))))
print("Train Image Counts in CSV: {}".format(labels.shape[0]))
print("Test Image Counts in Directory: {}".format(len(os.listdir(os.path.join(data_dir, 'Test')))))
print("Test Image Counts in Sample Submission: {}".format(sample_submission.shape[0]))
print("Class Counts: {}".format(NUM_CLASSES))

Train Image Counts in Directory: 10222
Train Image Counts in CSV: 10222
Test Image Counts in Directory: 10357
Test Image Counts in Sample Submission: 10357
Class Counts: 120


In [21]:
# load images
INPUT_SIZE = 224
POOLING = 'avg'
train_folder = "../data/dog_breed/Train"
y_train = labels["breed"].values
x_train = np.zeros((labels.shape[0], INPUT_SIZE, INPUT_SIZE, 3), dtype='float32')
for i, img_id in tqdm.tqdm(enumerate(labels['id'])):
    img = read_img(img_id, train_folder, (INPUT_SIZE, INPUT_SIZE))
    # x = preprocess_input(np.expand_dims(img.copy(), axis=0))
    x_train[i] = img  # it's x originally
print('Train Images shape: {} size: {:,}'.format(x_train.shape, x_train.size))

10222it [00:30, 338.94it/s]

Train Images shape: (10222, 224, 224, 3) size: 1,538,697,216





In [22]:
# split training and validation set
np.random.seed(seed=SEED)
rnd = np.random.random(len(labels))
train_idx = rnd < 0.8
valid_idx = rnd >= 0.8

In [23]:
# prepare labels
lb = LabelBinarizer()
ytr = y_train[train_idx]
yv = y_train[valid_idx]
ytr_onehot = lb.fit_transform(ytr)
yv_onehot = lb.transform(yv)

In [24]:
# prepare inputs
Xtr = x_train[train_idx]
Xv = x_train[valid_idx]
del x_train
gc.collect()  # clear the memory or your computer will explode lol

0

In [25]:
print("Unique Class Counts in training set: {}".format(np.unique(ytr).shape[0]))
print("Unique Class Counts in validation set: {}".format(np.unique(yv).shape[0]))

Unique Class Counts in training set: 120
Unique Class Counts in validation set: 120


In [26]:
res18 = ResnetBuilder.build_resnet_18((3, INPUT_SIZE, INPUT_SIZE), NUM_CLASSES)
res18.summary()

____________________________________________________________________________________________________
Layer (type)                     Output Shape          Param #     Connected to                     
input_1 (InputLayer)             (None, 224, 224, 3)   0                                            
____________________________________________________________________________________________________
conv2d_1 (Conv2D)                (None, 112, 112, 64)  9472        input_1[0][0]                    
____________________________________________________________________________________________________
batch_normalization_1 (BatchNorm (None, 112, 112, 64)  256         conv2d_1[0][0]                   
____________________________________________________________________________________________________
activation_1 (Activation)        (None, 112, 112, 64)  0           batch_normalization_1[0][0]      
___________________________________________________________________________________________

In [27]:
res18.compile(optimizer='adadelta', loss='binary_crossentropy', metrics=['acc'])
res18.fit(Xtr, ytr_onehot,  # This is fucking slow!!!
          epochs=1,
          batch_size=32,
          shuffle=True,
          validation_data=(Xv, yv_onehot))
#         callbacks=[ks.callbacks.EarlyStopping(monitor='val_loss', min_delta=0.0001, patience=10)])

Train on 8162 samples, validate on 2060 samples
Epoch 1/1


<keras.callbacks.History at 0x7f56e974c080>