In [1]:
%matplotlib inline

# Dependecy imports
from keras.models import Sequential
from keras.layers import Dense, Flatten, Conv3D, Conv2D, Dropout, BatchNormalization
from keras.optimizers import adam
from keras.utils.data_utils import Sequence
import keras.backend.tensorflow_backend as K
from keras import callbacks
from keras.utils import np_utils

import matplotlib.pyplot as plt

# Set Keras TF backend allow_growth not to consume all GPU memory
K_CONFIG = K.tf.ConfigProto()
K_CONFIG.allow_soft_placement = True
K_CONFIG.gpu_options.allow_growth = True # pylint: disable=E1101
K.set_session(K.tf.Session(config=K_CONFIG))

from IPython.core.display import display, HTML
display(HTML("<style>.container { width:80% !important; }</style>"))

Using TensorFlow backend.


In [2]:
from tensorflow.examples.tutorials.mnist import input_data

mnist = input_data.read_data_sets("data/MNIST_data/", one_hot=False, reshape=False)

Instructions for updating:
Please use alternatives such as official/mnist/dataset.py from tensorflow/models.
Instructions for updating:
Please write your own downloading logic.
Instructions for updating:
Please use tf.data to implement this functionality.
Extracting data/MNIST_data/train-images-idx3-ubyte.gz
Instructions for updating:
Please use tf.data to implement this functionality.
Extracting data/MNIST_data/train-labels-idx1-ubyte.gz
Extracting data/MNIST_data/t10k-images-idx3-ubyte.gz
Extracting data/MNIST_data/t10k-labels-idx1-ubyte.gz
Instructions for updating:
Please use alternatives such as official/mnist/dataset.py from tensorflow/models.


In [3]:
import numpy as np

def generate_data(dataset_size=10, seq_len=16, conv2d=False):

    train_data = []
    train_labels = []
    
    def to_one_hot(arr):
        bins = [0, 25, 30, 35, 40, 100]

        labels = np.digitize(arr, bins)
        one_hot_labels = np_utils.to_categorical(labels)

        y = np.bincount(labels)
        ii = np.nonzero(y)[0]
        print('Label distribution', list(zip(ii, y[ii])))
        
        return one_hot_labels

    for _ in range(dataset_size):
        _data, _labels = mnist.train.next_batch(batch_size=seq_len, shuffle=True)

        train_data.append(_data)
        train_labels.append(_labels.sum())
    
    all_data = np.array(train_data)
    
    if conv2d:
        all_data = np.transpose(all_data, (0, 4, 2, 3, 1))[:, 0, :, :, :]
    
    all_labels = to_one_hot(np.array(train_labels))

    return all_data[0:-2000], all_labels[0:-2000], all_data[-2000:], all_labels[-2000:] 

train_data, train_labels, valid_data, valid_labels = generate_data(dataset_size=12000, seq_len=7, conv2d=False)

print()
print(f'train_data: {train_data.shape}, train_labels: {train_labels.shape}')
print(f'valid_data: {valid_data.shape}, valid_labels: {valid_labels.shape}')

Label distribution [(1, 2394), (2, 2588), (3, 3020), (4, 2346), (5, 1652)]

train_data: (10000, 7, 28, 28, 1), train_labels: (10000, 6)
valid_data: (2000, 7, 28, 28, 1), valid_labels: (2000, 6)


In [4]:
def conv3d_model(dshape, dense_nn=100, kernel_size_0=1, kernel_size_1=7, activation='relu', dropout=False, l_r=0.001):
    """Create keras model."""
    seq_model = Sequential()

    seq_model.add(Conv3D(24, (kernel_size_0, kernel_size_1, kernel_size_1), padding='valid', activation=activation,
                         strides=(1, 1, 1), input_shape=(dshape[0], dshape[1], dshape[2], dshape[3])))
    seq_model.add(BatchNormalization())
    seq_model.add(Conv3D(36, (kernel_size_0, kernel_size_1, kernel_size_1), padding='valid', activation=activation, strides=(1, 1, 1)))
    seq_model.add(BatchNormalization())
    seq_model.add(Conv3D(48, (kernel_size_0, kernel_size_1, kernel_size_1), padding='valid', activation=activation))
    seq_model.add(BatchNormalization())
    seq_model.add(Conv3D(64, (kernel_size_0, kernel_size_1, kernel_size_1), padding='valid', activation=activation))
    seq_model.add(BatchNormalization())
    seq_model.add(Flatten())
    seq_model.add(Dense(dense_nn, activation=activation))
    seq_model.add(BatchNormalization())
    if dropout:
        seq_model.add(Dropout(0.3))
    seq_model.add(Dense(dense_nn // 2, activation=activation))
    seq_model.add(BatchNormalization())
    if dropout:
        seq_model.add(Dropout(0.3))
    seq_model.add(Dense(dense_nn // 5, activation=activation))
    seq_model.add(BatchNormalization())
    if dropout:
        seq_model.add(Dropout(0.1))
    seq_model.add(Dense(6, activation='softmax'))

    seq_model.compile(loss='categorical_crossentropy', optimizer=adam(lr=l_r), metrics=['accuracy'])

    # seq_model.summary()

    return seq_model

In [5]:
def conv2d_model(dshape, activation, dropout=False, l_r=0.001):
    """Create keras model."""
    seq_model = Sequential()

    seq_model.add(Conv2D(48, (8, 8), padding='valid', activation=activation,
                         strides=(1, 1), input_shape=(dshape[0], dshape[1], dshape[2])))
    seq_model.add(BatchNormalization())
    seq_model.add(Conv2D(48, (7, 7), padding='valid', activation=activation, strides=(1, 1)))
    seq_model.add(BatchNormalization())
    seq_model.add(Conv2D(64, (6, 6), padding='valid', activation=activation))
    seq_model.add(BatchNormalization())
    seq_model.add(Conv2D(64, (4, 4), padding='valid', activation=activation))
    seq_model.add(BatchNormalization())
    seq_model.add(Flatten())
    seq_model.add(Dense(100, activation=activation))
    seq_model.add(BatchNormalization())
    if dropout:
        seq_model.add(Dropout(0.3))
    seq_model.add(Dense(50, activation=activation))
    seq_model.add(BatchNormalization())
    if dropout:
        seq_model.add(Dropout(0.3))
    seq_model.add(Dense(10, activation=activation))
    seq_model.add(BatchNormalization())
    if dropout:
        seq_model.add(Dropout(0.1))
    seq_model.add(Dense(6, activation='softmax'))

    seq_model.compile(loss='categorical_crossentropy', optimizer=adam(lr=l_r), metrics=['accuracy'])

    # seq_model.summary()

    return seq_model

In [8]:
keras_model = conv3d_model(train_data[0].shape)
train_history = keras_model.fit(
    x=train_data,
    y=train_labels,
    batch_size=32,
    epochs=5,
    verbose=1,
    shuffle=True,
    validation_data=(valid_data, valid_labels)
)

Train on 10000 samples, validate on 2000 samples
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


# Parameter search

In [6]:
from keras.wrappers.scikit_learn import KerasClassifier
from sklearn.model_selection import GridSearchCV

In [9]:
# create model
model = KerasClassifier(build_fn=conv3d_model, verbose=0)

param_grid = dict(
    batch_size=[4, 8, 16, 32, 64],
    epochs=[10, 15, 20, 25, 30],
    dshape=[train_data[0].shape],
    activation=['selu', 'relu'],
    dropout=[True, False],
    kernel_size_0 = [1, 2],
    kernel_size_1 = [4, 6, 7],
    dense_nn = [50, 100, 200, 500]
)

grid = GridSearchCV(
    estimator=model,
    param_grid=param_grid,
    return_train_score=True,
    refit='precision_macro',
    # scoring=dict(validation_data=[(valid_data, valid_labels)]),
    # n_jobs=10
)

In [None]:
print('Running hyperparameter search.')
grid_result = grid.fit(train_data, train_labels)

# summarize results
print("Best: %f using %s" % (grid_result.best_score_, grid_result.best_params_))

means = grid_result.cv_results_['mean_test_score']
stds = grid_result.cv_results_['std_test_score']
params = grid_result.cv_results_['params']

for mean, stdev, param in zip(means, stds, params):
    print("%f (%f) with: %r" % (mean, stdev, param))

Running hyperparameter search.
